From 7dd49c6293172b494c78918507242cdb55d35137 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Sun, 21 Jan 2024 12:31:30 +0100 Subject: WIP --- utf/src/utf8.cc | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'utf/src/utf8.cc') diff --git a/utf/src/utf8.cc b/utf/src/utf8.cc index 54b0296..0e444ae 100644 --- a/utf/src/utf8.cc +++ b/utf/src/utf8.cc @@ -12,12 +12,12 @@ inline bool valid_codepoint(uint32_t c) { } // namespace -uint32_t read8(std::string_view data, std::size_t& offset) { +uint32_t read8(std::span data, std::size_t& offset) { if (offset >= data.size()) return NEED_MORE; uint32_t ret; uint8_t size; - switch (static_cast(data[offset]) >> 4) { + switch (data[offset] >> 4) { case 15: if (data[offset] & 0x08) return INVALID; @@ -65,4 +65,35 @@ uint32_t read8(std::string_view data, std::size_t& offset) { return ret; } +bool write8(uint32_t codepoint, std::span data, std::size_t& offset) { + if (offset >= data.size()) UNLIKELY { + return false; + } + if (codepoint < 0x80) { + data[offset++] = codepoint; + } else if (codepoint < 0x800) { + if (data.size() - offset < 2) UNLIKELY { + return false; + } + data[offset++] = 0xc0 | (codepoint >> 6); + data[offset++] = 0x80 | (codepoint & 0x3f); + } else if (codepoint < 0x10000) { + if (data.size() - offset < 3) UNLIKELY { + return false; + } + data[offset++] = 0xe0 | (codepoint >> 12); + data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f); + data[offset++] = 0x80 | (codepoint & 0x3f); + } else { + if (data.size() - offset < 4) UNLIKELY { + return false; + } + data[offset++] = 0xf0 | (codepoint >> 18); + data[offset++] = 0x80 | ((codepoint >> 12) & 0x3f); + data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f); + data[offset++] = 0x80 | (codepoint & 0x3f); + } + return true; +} + } // namespace utf -- cgit v1.2.3-70-g09d2