summaryrefslogtreecommitdiff
path: root/utf/src
diff options
context:
space:
mode:
Diffstat (limited to 'utf/src')
-rw-r--r--utf/src/utf16.cc4
-rw-r--r--utf/src/utf32.cc4
-rw-r--r--utf/src/utf8.cc35
3 files changed, 37 insertions, 6 deletions
diff --git a/utf/src/utf16.cc b/utf/src/utf16.cc
index 43595bf..623c1be 100644
--- a/utf/src/utf16.cc
+++ b/utf/src/utf16.cc
@@ -16,7 +16,7 @@ inline bool is_low_surrogate(uint16_t c) {
} // namespace
-uint32_t read16be(std::string_view data, std::size_t& offset) {
+uint32_t read16be(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 2)
return NEED_MORE;
uint16_t c = static_cast<uint16_t>(data[offset]) << 8
@@ -40,7 +40,7 @@ uint32_t read16be(std::string_view data, std::size_t& offset) {
return c;
}
-uint32_t read16le(std::string_view data, std::size_t& offset) {
+uint32_t read16le(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 2)
return NEED_MORE;
uint16_t c = static_cast<uint16_t>(data[offset + 1]) << 8
diff --git a/utf/src/utf32.cc b/utf/src/utf32.cc
index cfa29b6..e33b0b4 100644
--- a/utf/src/utf32.cc
+++ b/utf/src/utf32.cc
@@ -12,7 +12,7 @@ inline bool valid_codepoint(uint32_t c) {
} // namespace
-uint32_t read32be(std::string_view data, std::size_t& offset) {
+uint32_t read32be(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 4)
return NEED_MORE;
uint32_t c = static_cast<uint32_t>(data[offset]) << 24
@@ -26,7 +26,7 @@ uint32_t read32be(std::string_view data, std::size_t& offset) {
return INVALID;
}
-uint32_t read32le(std::string_view data, std::size_t& offset) {
+uint32_t read32le(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 4)
return NEED_MORE;
uint32_t c = static_cast<uint32_t>(data[offset + 3]) << 24
diff --git a/utf/src/utf8.cc b/utf/src/utf8.cc
index 54b0296..0e444ae 100644
--- a/utf/src/utf8.cc
+++ b/utf/src/utf8.cc
@@ -12,12 +12,12 @@ inline bool valid_codepoint(uint32_t c) {
} // namespace
-uint32_t read8(std::string_view data, std::size_t& offset) {
+uint32_t read8(std::span<uint8_t const> data, std::size_t& offset) {
if (offset >= data.size())
return NEED_MORE;
uint32_t ret;
uint8_t size;
- switch (static_cast<uint8_t>(data[offset]) >> 4) {
+ switch (data[offset] >> 4) {
case 15:
if (data[offset] & 0x08)
return INVALID;
@@ -65,4 +65,35 @@ uint32_t read8(std::string_view data, std::size_t& offset) {
return ret;
}
+bool write8(uint32_t codepoint, std::span<uint8_t> data, std::size_t& offset) {
+ if (offset >= data.size()) UNLIKELY {
+ return false;
+ }
+ if (codepoint < 0x80) {
+ data[offset++] = codepoint;
+ } else if (codepoint < 0x800) {
+ if (data.size() - offset < 2) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xc0 | (codepoint >> 6);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ } else if (codepoint < 0x10000) {
+ if (data.size() - offset < 3) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xe0 | (codepoint >> 12);
+ data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ } else {
+ if (data.size() - offset < 4) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xf0 | (codepoint >> 18);
+ data[offset++] = 0x80 | ((codepoint >> 12) & 0x3f);
+ data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ }
+ return true;
+}
+
} // namespace utf