summaryrefslogtreecommitdiff
path: root/utf/src/utf32.cc
diff options
context:
space:
mode:
Diffstat (limited to 'utf/src/utf32.cc')
-rw-r--r--utf/src/utf32.cc43
1 files changed, 43 insertions, 0 deletions
diff --git a/utf/src/utf32.cc b/utf/src/utf32.cc
new file mode 100644
index 0000000..cfa29b6
--- /dev/null
+++ b/utf/src/utf32.cc
@@ -0,0 +1,43 @@
+#include "utf32.hh"
+
+#include "utf_error.hh"
+
+namespace utf {
+
+namespace {
+
+inline bool valid_codepoint(uint32_t c) {
+ return (c < 0xd800) || (c > 0xdfff && c <= 0x10ffff);
+}
+
+} // namespace
+
+uint32_t read32be(std::string_view data, std::size_t& offset) {
+ if (offset > data.size() || data.size() - offset < 4)
+ return NEED_MORE;
+ uint32_t c = static_cast<uint32_t>(data[offset]) << 24
+ | static_cast<uint32_t>(data[offset + 1] & 0xff) << 16
+ | static_cast<uint32_t>(data[offset + 2] & 0xff) << 8
+ | static_cast<uint32_t>(data[offset + 3] & 0xff);
+ if (valid_codepoint(c)) {
+ offset += 4;
+ return c;
+ }
+ return INVALID;
+}
+
+uint32_t read32le(std::string_view data, std::size_t& offset) {
+ if (offset > data.size() || data.size() - offset < 4)
+ return NEED_MORE;
+ uint32_t c = static_cast<uint32_t>(data[offset + 3]) << 24
+ | static_cast<uint32_t>(data[offset + 2] & 0xff) << 16
+ | static_cast<uint32_t>(data[offset + 1] & 0xff) << 8
+ | static_cast<uint32_t>(data[offset] & 0xff);
+ if (valid_codepoint(c)) {
+ offset += 4;
+ return c;
+ }
+ return INVALID;
+}
+
+} // namespace utf