From 65860e6c873e6e056fe3d1dadd1d309b1bd66e7b Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Thu, 4 Sep 2025 22:24:13 +0200 Subject: Add UTF-8, UTF-16 and Modified UTF-8 support --- src/u16.hh | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/u16.hh (limited to 'src/u16.hh') diff --git a/src/u16.hh b/src/u16.hh new file mode 100644 index 0000000..6894a84 --- /dev/null +++ b/src/u16.hh @@ -0,0 +1,86 @@ +#ifndef U16_HH +#define U16_HH + +#include +#include +#include +#include + +#include "u.hh" + +namespace u16 { + +template + requires std::is_same_v, uint16_t> +std::expected read(T& start, const T& end) { + if (start == end) return std::unexpected(u::ReadError::End); + uint16_t u = *start; + if (u >= 0xd800 && u <= 0xdbff) { + if (std::distance(start, end) < 2) { + return std::unexpected(u::ReadError::Incomplete); + } + std::advance(start, 1); + if (*start >= 0xdc00 && *start <= 0xdfff) { + uint16_t v = *start; + std::advance(start, 1); + return 0x10000 + (((u - 0xd800) << 10) | (v - 0xdc00)); + } + return std::unexpected(u::ReadError::Invalid); + } + std::advance(start, 1); + if (u >= 0xdc00 && u <= 0xdfff) { + return std::unexpected(u::ReadError::Invalid); + } + return u; +} + +template + requires std::is_same_v, uint16_t> +std::expected read_replace(T& start, + const T& end) { + auto ret = read(start, end); + if (ret.has_value()) + return *ret; + switch (ret.error()) { + case u::ReadError::Incomplete: + return std::unexpected(u::ReadErrorReplace::Incomplete); + case u::ReadError::End: + return std::unexpected(u::ReadErrorReplace::End); + case u::ReadError::Invalid: + return 0xfffd; + } +} + +template + requires std::is_same_v, uint16_t> +bool write(T& start, const T& end, uint32_t code) { + if (code < 0x10000) { + if (start == end) return false; + *start = static_cast(code); + } else { + if (std::distance(start, end) < 2) return false; + code -= 0x10000; + *start = static_cast(0xd800 + (code >> 10)); + std::advance(start, 1); + *start = static_cast(0xdc00 + (code & 0x3ff)); + } + std::advance(start, 1); + return true; +} + +template + requires std::is_same_v, uint16_t> +bool skip(T& start, const T& end) { + if (start == end) return false; + if (*start >= 0xd800 && *start <= 0xdbff) { + if (std::distance(start, end) < 2) return false; + std::advance(start, 2); + return true; + } + std::advance(start, 1); + return true; +} + +} // namespace u16 + +#endif // U16_HH -- cgit v1.3