summaryrefslogtreecommitdiff
path: root/utf/src/utf16.cc
diff options
context:
space:
mode:
Diffstat (limited to 'utf/src/utf16.cc')
-rw-r--r--utf/src/utf16.cc67
1 files changed, 67 insertions, 0 deletions
diff --git a/utf/src/utf16.cc b/utf/src/utf16.cc
new file mode 100644
index 0000000..43595bf
--- /dev/null
+++ b/utf/src/utf16.cc
@@ -0,0 +1,67 @@
+#include "utf16.hh"
+
+#include "utf_error.hh"
+
+namespace utf {
+
+namespace {
+
+inline bool is_high_surrogate(uint16_t c) {
+ return c >= 0xd800 && c <= 0xdbff;
+}
+
+inline bool is_low_surrogate(uint16_t c) {
+ return c >= 0xdc00 && c <= 0xdfff;
+}
+
+} // namespace
+
+uint32_t read16be(std::string_view data, std::size_t& offset) {
+ if (offset > data.size() || data.size() - offset < 2)
+ return NEED_MORE;
+ uint16_t c = static_cast<uint16_t>(data[offset]) << 8
+ | static_cast<uint16_t>(data[offset + 1] & 0xff);
+ if (is_high_surrogate(c)) {
+ if (data.size() - offset < 4)
+ return NEED_MORE;
+ uint16_t d = static_cast<uint16_t>(data[offset + 2]) << 8
+ | static_cast<uint16_t>(data[offset + 3] & 0xff);
+ if (is_low_surrogate(d)) {
+ offset += 4;
+ return 0x10000
+ + (static_cast<uint32_t>(c & 0x3ff) << 10
+ | (d & 0x3ff));
+ }
+ return INVALID;
+ } else if (is_low_surrogate(c)) {
+ return INVALID;
+ }
+ offset += 2;
+ return c;
+}
+
+uint32_t read16le(std::string_view data, std::size_t& offset) {
+ if (offset > data.size() || data.size() - offset < 2)
+ return NEED_MORE;
+ uint16_t c = static_cast<uint16_t>(data[offset + 1]) << 8
+ | static_cast<uint16_t>(data[offset] & 0xff);
+ if (is_high_surrogate(c)) {
+ if (data.size() - offset < 4)
+ return NEED_MORE;
+ uint16_t d = static_cast<uint16_t>(data[offset + 3]) << 8
+ | static_cast<uint16_t>(data[offset + 2] & 0xff);
+ if (is_low_surrogate(d)) {
+ offset += 4;
+ return 0x10000
+ + (static_cast<uint32_t>(c & 0x3ff) << 10
+ | (d & 0x3ff));
+ }
+ return INVALID;
+ } else if (is_low_surrogate(c)) {
+ return INVALID;
+ }
+ offset += 2;
+ return c;
+}
+
+} // namespace utf