utf.h 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. // utf by pietro gagliardi (andlabs) — https://github.com/andlabs/utf/
  2. // 10 november 2016
  3. #ifdef __cplusplus
  4. extern "C" {
  5. #endif
  6. #include <stddef.h>
  7. #include <stdint.h>
  8. // if nElem == 0, assume the buffer has no upper limit and is '\0' terminated
  9. // otherwise, assume buffer is NOT '\0' terminated but is bounded by nElem *elements*
  10. extern size_t utf8EncodeRune(uint32_t rune, char *encoded);
  11. extern const char *utf8DecodeRune(const char *s, size_t nElem, uint32_t *rune);
  12. extern size_t utf16EncodeRune(uint32_t rune, uint16_t *encoded);
  13. extern const uint16_t *utf16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune);
  14. extern size_t utf8RuneCount(const char *s, size_t nElem);
  15. extern size_t utf8UTF16Count(const char *s, size_t nElem);
  16. extern size_t utf16RuneCount(const uint16_t *s, size_t nElem);
  17. extern size_t utf16UTF8Count(const uint16_t *s, size_t nElem);
  18. #ifdef __cplusplus
  19. }
  20. // Provide overloads on Windows for using these functions with wchar_t and WCHAR when wchar_t is a keyword in C++ mode (the default).
  21. // Otherwise, you'd need to cast to pass a wchar_t pointer, WCHAR pointer, or equivalent to these functions.
  22. // We use __wchar_t to be independent of the setting; see https://blogs.msdn.microsoft.com/oldnewthing/20161201-00/?p=94836 (ironically posted one day after I initially wrote this code!).
  23. // TODO check this on MinGW-w64
  24. // TODO check this under /Wall
  25. // TODO C-style casts enough? or will that fail in /Wall?
  26. // TODO same for UniChar/unichar on Mac? if both are unsigned then we have nothing to worry about
  27. #if defined(_MSC_VER)
  28. inline size_t utf16EncodeRune(uint32_t rune, __wchar_t *encoded)
  29. {
  30. return utf16EncodeRune(rune, reinterpret_cast<uint16_t *>(encoded));
  31. }
  32. inline const __wchar_t *utf16DecodeRune(const __wchar_t *s, size_t nElem, uint32_t *rune)
  33. {
  34. const uint16_t *ret;
  35. ret = utf16DecodeRune(reinterpret_cast<const uint16_t *>(s), nElem, rune);
  36. return reinterpret_cast<const __wchar_t *>(ret);
  37. }
  38. inline size_t utf16RuneCount(const __wchar_t *s, size_t nElem)
  39. {
  40. return utf16RuneCount(reinterpret_cast<const uint16_t *>(s), nElem);
  41. }
  42. inline size_t utf16UTF8Count(const __wchar_t *s, size_t nElem)
  43. {
  44. return utf16UTF8Count(reinterpret_cast<const uint16_t *>(s), nElem);
  45. }
  46. #endif
  47. #endif