diff options
Diffstat (limited to 'utf/src')
| -rw-r--r-- | utf/src/utf16.cc | 4 | ||||
| -rw-r--r-- | utf/src/utf32.cc | 4 | ||||
| -rw-r--r-- | utf/src/utf8.cc | 35 |
3 files changed, 37 insertions, 6 deletions
diff --git a/utf/src/utf16.cc b/utf/src/utf16.cc index 43595bf..623c1be 100644 --- a/utf/src/utf16.cc +++ b/utf/src/utf16.cc @@ -16,7 +16,7 @@ inline bool is_low_surrogate(uint16_t c) { } // namespace -uint32_t read16be(std::string_view data, std::size_t& offset) { +uint32_t read16be(std::span<uint8_t const> data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 2) return NEED_MORE; uint16_t c = static_cast<uint16_t>(data[offset]) << 8 @@ -40,7 +40,7 @@ uint32_t read16be(std::string_view data, std::size_t& offset) { return c; } -uint32_t read16le(std::string_view data, std::size_t& offset) { +uint32_t read16le(std::span<uint8_t const> data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 2) return NEED_MORE; uint16_t c = static_cast<uint16_t>(data[offset + 1]) << 8 diff --git a/utf/src/utf32.cc b/utf/src/utf32.cc index cfa29b6..e33b0b4 100644 --- a/utf/src/utf32.cc +++ b/utf/src/utf32.cc @@ -12,7 +12,7 @@ inline bool valid_codepoint(uint32_t c) { } // namespace -uint32_t read32be(std::string_view data, std::size_t& offset) { +uint32_t read32be(std::span<uint8_t const> data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 4) return NEED_MORE; uint32_t c = static_cast<uint32_t>(data[offset]) << 24 @@ -26,7 +26,7 @@ uint32_t read32be(std::string_view data, std::size_t& offset) { return INVALID; } -uint32_t read32le(std::string_view data, std::size_t& offset) { +uint32_t read32le(std::span<uint8_t const> data, std::size_t& offset) { if (offset > data.size() || data.size() - offset < 4) return NEED_MORE; uint32_t c = static_cast<uint32_t>(data[offset + 3]) << 24 diff --git a/utf/src/utf8.cc b/utf/src/utf8.cc index 54b0296..0e444ae 100644 --- a/utf/src/utf8.cc +++ b/utf/src/utf8.cc @@ -12,12 +12,12 @@ inline bool valid_codepoint(uint32_t c) { } // namespace -uint32_t read8(std::string_view data, std::size_t& offset) { +uint32_t read8(std::span<uint8_t const> data, std::size_t& offset) { if (offset >= data.size()) return NEED_MORE; uint32_t ret; uint8_t size; - switch (static_cast<uint8_t>(data[offset]) >> 4) { + switch (data[offset] >> 4) { case 15: if (data[offset] & 0x08) return INVALID; @@ -65,4 +65,35 @@ uint32_t read8(std::string_view data, std::size_t& offset) { return ret; } +bool write8(uint32_t codepoint, std::span<uint8_t> data, std::size_t& offset) { + if (offset >= data.size()) UNLIKELY { + return false; + } + if (codepoint < 0x80) { + data[offset++] = codepoint; + } else if (codepoint < 0x800) { + if (data.size() - offset < 2) UNLIKELY { + return false; + } + data[offset++] = 0xc0 | (codepoint >> 6); + data[offset++] = 0x80 | (codepoint & 0x3f); + } else if (codepoint < 0x10000) { + if (data.size() - offset < 3) UNLIKELY { + return false; + } + data[offset++] = 0xe0 | (codepoint >> 12); + data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f); + data[offset++] = 0x80 | (codepoint & 0x3f); + } else { + if (data.size() - offset < 4) UNLIKELY { + return false; + } + data[offset++] = 0xf0 | (codepoint >> 18); + data[offset++] = 0x80 | ((codepoint >> 12) & 0x3f); + data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f); + data[offset++] = 0x80 | (codepoint & 0x3f); + } + return true; +} + } // namespace utf |
