diff options
Diffstat (limited to 'utf/src/utf16.cc')
| -rw-r--r-- | utf/src/utf16.cc | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/utf/src/utf16.cc b/utf/src/utf16.cc new file mode 100644 index 0000000..43595bf --- /dev/null +++ b/utf/src/utf16.cc @@ -0,0 +1,67 @@ +#include "utf16.hh" + +#include "utf_error.hh" + +namespace utf { + +namespace { + +inline bool is_high_surrogate(uint16_t c) { + return c >= 0xd800 && c <= 0xdbff; +} + +inline bool is_low_surrogate(uint16_t c) { + return c >= 0xdc00 && c <= 0xdfff; +} + +} // namespace + +uint32_t read16be(std::string_view data, std::size_t& offset) { + if (offset > data.size() || data.size() - offset < 2) + return NEED_MORE; + uint16_t c = static_cast<uint16_t>(data[offset]) << 8 + | static_cast<uint16_t>(data[offset + 1] & 0xff); + if (is_high_surrogate(c)) { + if (data.size() - offset < 4) + return NEED_MORE; + uint16_t d = static_cast<uint16_t>(data[offset + 2]) << 8 + | static_cast<uint16_t>(data[offset + 3] & 0xff); + if (is_low_surrogate(d)) { + offset += 4; + return 0x10000 + + (static_cast<uint32_t>(c & 0x3ff) << 10 + | (d & 0x3ff)); + } + return INVALID; + } else if (is_low_surrogate(c)) { + return INVALID; + } + offset += 2; + return c; +} + +uint32_t read16le(std::string_view data, std::size_t& offset) { + if (offset > data.size() || data.size() - offset < 2) + return NEED_MORE; + uint16_t c = static_cast<uint16_t>(data[offset + 1]) << 8 + | static_cast<uint16_t>(data[offset] & 0xff); + if (is_high_surrogate(c)) { + if (data.size() - offset < 4) + return NEED_MORE; + uint16_t d = static_cast<uint16_t>(data[offset + 3]) << 8 + | static_cast<uint16_t>(data[offset + 2] & 0xff); + if (is_low_surrogate(d)) { + offset += 4; + return 0x10000 + + (static_cast<uint32_t>(c & 0x3ff) << 10 + | (d & 0x3ff)); + } + return INVALID; + } else if (is_low_surrogate(c)) { + return INVALID; + } + offset += 2; + return c; +} + +} // namespace utf |
