From 7dd49c6293172b494c78918507242cdb55d35137 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Sun, 21 Jan 2024 12:31:30 +0100 Subject: WIP --- utf/src/utf16.cc | 4 ++-- utf/src/utf32.cc | 4 ++-- utf/src/utf8.cc | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 6 deletions(-) (limited to 'utf/src') diff --git a/utf/src/utf16.cc b/utf/src/utf16.cc index 43595bf..623c1be 100644 --- a/utf/src/utf16.cc +++ b/utf/src/utf16.cc @@ -16,7 +16,7 @@ inline bool is_low_surrogate(uint16_t c) { } // namespace -uint32_t read16be(std::string_view data, std::size_t& offset) { +uint32_t read16be(std::span data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 2) return NEED_MORE; uint16_t c = static_cast(data[offset]) << 8 @@ -40,7 +40,7 @@ uint32_t read16be(std::string_view data, std::size_t& offset) { return c; } -uint32_t read16le(std::string_view data, std::size_t& offset) { +uint32_t read16le(std::span data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 2) return NEED_MORE; uint16_t c = static_cast(data[offset + 1]) << 8 diff --git a/utf/src/utf32.cc b/utf/src/utf32.cc index cfa29b6..e33b0b4 100644 --- a/utf/src/utf32.cc +++ b/utf/src/utf32.cc @@ -12,7 +12,7 @@ inline bool valid_codepoint(uint32_t c) { } // namespace -uint32_t read32be(std::string_view data, std::size_t& offset) { +uint32_t read32be(std::span data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 4) return NEED_MORE; uint32_t c = static_cast(data[offset]) << 24 @@ -26,7 +26,7 @@ uint32_t read32be(std::string_view data, std::size_t& offset) { return INVALID; } -uint32_t read32le(std::string_view data, std::size_t& offset) { +uint32_t read32le(std::span data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 4) return NEED_MORE; uint32_t c = static_cast(data[offset + 3]) << 24 diff --git a/utf/src/utf8.cc b/utf/src/utf8.cc index 54b0296..0e444ae 100644 --- a/utf/src/utf8.cc +++ b/utf/src/utf8.cc @@ -12,12 +12,12 @@ inline bool valid_codepoint(uint32_t c) { } // namespace -uint32_t read8(std::string_view data, std::size_t& offset) { +uint32_t read8(std::span data, std::size_t& offset) { if (offset >= data.size()) return NEED_MORE; uint32_t ret; uint8_t size; - switch (static_cast(data[offset]) >> 4) { + switch (data[offset] >> 4) { case 15: if (data[offset] & 0x08) return INVALID; @@ -65,4 +65,35 @@ uint32_t read8(std::string_view data, std::size_t& offset) { return ret; } +bool write8(uint32_t codepoint, std::span data, std::size_t& offset) { + if (offset >= data.size()) UNLIKELY { + return false; + } + if (codepoint < 0x80) { + data[offset++] = codepoint; + } else if (codepoint < 0x800) { + if (data.size() - offset < 2) UNLIKELY { + return false; + } + data[offset++] = 0xc0 | (codepoint >> 6); + data[offset++] = 0x80 | (codepoint & 0x3f); + } else if (codepoint < 0x10000) { + if (data.size() - offset < 3) UNLIKELY { + return false; + } + data[offset++] = 0xe0 | (codepoint >> 12); + data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f); + data[offset++] = 0x80 | (codepoint & 0x3f); + } else { + if (data.size() - offset < 4) UNLIKELY { + return false; + } + data[offset++] = 0xf0 | (codepoint >> 18); + data[offset++] = 0x80 | ((codepoint >> 12) & 0x3f); + data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f); + data[offset++] = 0x80 | (codepoint & 0x3f); + } + return true; +} + } // namespace utf -- cgit v1.2.3-70-g09d2