diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2025-09-04 22:24:13 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2025-09-04 22:24:13 +0200 |
| commit | 65860e6c873e6e056fe3d1dadd1d309b1bd66e7b (patch) | |
| tree | cb59ed23c72b841fc2688606d68359b3f6b1e324 /src/u16.hh | |
| parent | d75b25d50f4df655d1e69ff900cfeee823039296 (diff) | |
Add UTF-8, UTF-16 and Modified UTF-8 support
Diffstat (limited to 'src/u16.hh')
| -rw-r--r-- | src/u16.hh | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/src/u16.hh b/src/u16.hh new file mode 100644 index 0000000..6894a84 --- /dev/null +++ b/src/u16.hh @@ -0,0 +1,86 @@ +#ifndef U16_HH +#define U16_HH + +#include <cstdint> +#include <expected> +#include <iterator> +#include <type_traits> + +#include "u.hh" + +namespace u16 { + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +std::expected<uint32_t, u::ReadError> read(T& start, const T& end) { + if (start == end) return std::unexpected(u::ReadError::End); + uint16_t u = *start; + if (u >= 0xd800 && u <= 0xdbff) { + if (std::distance(start, end) < 2) { + return std::unexpected(u::ReadError::Incomplete); + } + std::advance(start, 1); + if (*start >= 0xdc00 && *start <= 0xdfff) { + uint16_t v = *start; + std::advance(start, 1); + return 0x10000 + (((u - 0xd800) << 10) | (v - 0xdc00)); + } + return std::unexpected(u::ReadError::Invalid); + } + std::advance(start, 1); + if (u >= 0xdc00 && u <= 0xdfff) { + return std::unexpected(u::ReadError::Invalid); + } + return u; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start, + const T& end) { + auto ret = read(start, end); + if (ret.has_value()) + return *ret; + switch (ret.error()) { + case u::ReadError::Incomplete: + return std::unexpected(u::ReadErrorReplace::Incomplete); + case u::ReadError::End: + return std::unexpected(u::ReadErrorReplace::End); + case u::ReadError::Invalid: + return 0xfffd; + } +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +bool write(T& start, const T& end, uint32_t code) { + if (code < 0x10000) { + if (start == end) return false; + *start = static_cast<uint16_t>(code); + } else { + if (std::distance(start, end) < 2) return false; + code -= 0x10000; + *start = static_cast<uint16_t>(0xd800 + (code >> 10)); + std::advance(start, 1); + *start = static_cast<uint16_t>(0xdc00 + (code & 0x3ff)); + } + std::advance(start, 1); + return true; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +bool skip(T& start, const T& end) { + if (start == end) return false; + if (*start >= 0xd800 && *start <= 0xdbff) { + if (std::distance(start, end) < 2) return false; + std::advance(start, 2); + return true; + } + std::advance(start, 1); + return true; +} + +} // namespace u16 + +#endif // U16_HH |
