summaryrefslogtreecommitdiff
path: root/utf/src/utf8.cc
diff options
context:
space:
mode:
Diffstat (limited to 'utf/src/utf8.cc')
-rw-r--r--utf/src/utf8.cc35
1 files changed, 33 insertions, 2 deletions
diff --git a/utf/src/utf8.cc b/utf/src/utf8.cc
index 54b0296..0e444ae 100644
--- a/utf/src/utf8.cc
+++ b/utf/src/utf8.cc
@@ -12,12 +12,12 @@ inline bool valid_codepoint(uint32_t c) {
} // namespace
-uint32_t read8(std::string_view data, std::size_t& offset) {
+uint32_t read8(std::span<uint8_t const> data, std::size_t& offset) {
if (offset >= data.size())
return NEED_MORE;
uint32_t ret;
uint8_t size;
- switch (static_cast<uint8_t>(data[offset]) >> 4) {
+ switch (data[offset] >> 4) {
case 15:
if (data[offset] & 0x08)
return INVALID;
@@ -65,4 +65,35 @@ uint32_t read8(std::string_view data, std::size_t& offset) {
return ret;
}
+bool write8(uint32_t codepoint, std::span<uint8_t> data, std::size_t& offset) {
+ if (offset >= data.size()) UNLIKELY {
+ return false;
+ }
+ if (codepoint < 0x80) {
+ data[offset++] = codepoint;
+ } else if (codepoint < 0x800) {
+ if (data.size() - offset < 2) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xc0 | (codepoint >> 6);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ } else if (codepoint < 0x10000) {
+ if (data.size() - offset < 3) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xe0 | (codepoint >> 12);
+ data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ } else {
+ if (data.size() - offset < 4) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xf0 | (codepoint >> 18);
+ data[offset++] = 0x80 | ((codepoint >> 12) & 0x3f);
+ data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ }
+ return true;
+}
+
} // namespace utf