summaryrefslogtreecommitdiff
path: root/utf
diff options
context:
space:
mode:
Diffstat (limited to 'utf')
-rw-r--r--utf/inc/utf16.hh12
-rw-r--r--utf/inc/utf32.hh12
-rw-r--r--utf/inc/utf8.hh19
-rw-r--r--utf/meson.build6
-rw-r--r--utf/src/utf16.cc4
-rw-r--r--utf/src/utf32.cc4
-rw-r--r--utf/src/utf8.cc35
-rw-r--r--utf/tst/test_utf16.cc81
-rw-r--r--utf/tst/test_utf32.cc85
-rw-r--r--utf/tst/test_utf8.cc204
10 files changed, 270 insertions, 192 deletions
diff --git a/utf/inc/utf16.hh b/utf/inc/utf16.hh
index 344b1a2..b9229bc 100644
--- a/utf/inc/utf16.hh
+++ b/utf/inc/utf16.hh
@@ -4,27 +4,29 @@
#include "macros.hh"
#include <cstdint>
-#include <string_view>
+#include <span>
namespace utf {
-/* Read one unicode codepoint from UTF-16 BigEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-16 BigEndian encoded data if possible.
* If successfull offset is incremented to point to next codepoint.
* Will fail:
* - not enough data is left in data given offset, returns NEED_MORE.
* - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs,
* returns INVALID.
*/
-uint32_t HIDDEN read16be(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read16be(std::span<uint8_t const> data, std::size_t& offset);
-/* Read one unicode codepoint from UTF-16 LittleEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-16 LittleEndian encoded data if possible.
* If successfull offset is incremented to point to next codepoint.
* Will fail:
* - not enough data is left in data given offset, returns NEED_MORE.
* - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs,
* returns INVALID.
*/
-uint32_t HIDDEN read16le(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read16le(std::span<uint8_t const> data, std::size_t& offset);
} // namespace utf
diff --git a/utf/inc/utf32.hh b/utf/inc/utf32.hh
index 2d3088e..4ee5eac 100644
--- a/utf/inc/utf32.hh
+++ b/utf/inc/utf32.hh
@@ -4,25 +4,27 @@
#include "macros.hh"
#include <cstdint>
-#include <string_view>
+#include <span>
namespace utf {
-/* Read one unicode codepoint from UTF-32 BigEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-32 BigEndian encoded data if possible.
* If successfull offset is incremented to point to next codepoint.
* Will fail:
* - not enough data is left in data given offset, returns NEED_MORE.
* - data is not valid UTF-32, ie. outside valid ranges, returns INVALID.
*/
-uint32_t HIDDEN read32be(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read32be(std::span<uint8_t const> data, std::size_t& offset);
-/* Read one unicode codepoint from UTF-32 LittleEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-32 LittleEndian encoded data if possible.
* If successfull offset is incremented to point to next codepoint.
* Will fail:
* - not enough data is left in data given offset, returns NEED_MORE.
* - data is not valid UTF-32, ie. outside valid ranges, returns INVALID.
*/
-uint32_t HIDDEN read32le(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read32le(std::span<uint8_t const> data, std::size_t& offset);
} // namespace utf
diff --git a/utf/inc/utf8.hh b/utf/inc/utf8.hh
index a3ea84a..7735ecd 100644
--- a/utf/inc/utf8.hh
+++ b/utf/inc/utf8.hh
@@ -4,18 +4,29 @@
#include "macros.hh"
#include <cstdint>
-#include <string_view>
+#include <span>
namespace utf {
-/* Read one unicode codepoint from UTF-8 encoded data if possible.
- * If successfull offset is incremented to point to next codepoint.
+/**
+ * Read one unicode codepoint from UTF-8 encoded data if possible.
+ * If successful, offset is incremented to point to next codepoint.
* Will fail:
* - not enough data is left in data given offset, returns NEED_MORE.
* - data is not valid UTF-8, this includes overlong encodings and
* invalid unicode code points, returns INVALID.
*/
-uint32_t HIDDEN read8(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read8(std::span<uint8_t const> data, std::size_t& offset);
+
+/**
+ * Write one unicode codepoint to UTF-8 encoded data if possible.
+ * If successful, offset is incremented to the end of the written data
+ * and true is returned.
+ * If not successful, offset is not incremented and false is returned.
+ * data is not modified.
+ */
+bool HIDDEN write8(uint32_t codepoint, std::span<uint8_t> data,
+ std::size_t& offset);
} // namespace utf
diff --git a/utf/meson.build b/utf/meson.build
index 64db6ff..051ddd1 100644
--- a/utf/meson.build
+++ b/utf/meson.build
@@ -23,16 +23,16 @@ test('utf8',
executable(
'test_utf8',
sources: ['tst/test_utf8.cc'],
- dependencies: [utf_dep, gtest_dep]))
+ dependencies: [utf_dep, gmock_dep, gtest_dep]))
test('utf16',
executable(
'test_utf16',
sources: ['tst/test_utf16.cc'],
- dependencies: [utf_dep, gtest_dep]))
+ dependencies: [utf_dep, gmock_dep, gtest_dep]))
test('utf32',
executable(
'test_utf32',
sources: ['tst/test_utf32.cc'],
- dependencies: [utf_dep, gtest_dep]))
+ dependencies: [utf_dep, gmock_dep, gtest_dep]))
diff --git a/utf/src/utf16.cc b/utf/src/utf16.cc
index 43595bf..623c1be 100644
--- a/utf/src/utf16.cc
+++ b/utf/src/utf16.cc
@@ -16,7 +16,7 @@ inline bool is_low_surrogate(uint16_t c) {
} // namespace
-uint32_t read16be(std::string_view data, std::size_t& offset) {
+uint32_t read16be(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 2)
return NEED_MORE;
uint16_t c = static_cast<uint16_t>(data[offset]) << 8
@@ -40,7 +40,7 @@ uint32_t read16be(std::string_view data, std::size_t& offset) {
return c;
}
-uint32_t read16le(std::string_view data, std::size_t& offset) {
+uint32_t read16le(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 2)
return NEED_MORE;
uint16_t c = static_cast<uint16_t>(data[offset + 1]) << 8
diff --git a/utf/src/utf32.cc b/utf/src/utf32.cc
index cfa29b6..e33b0b4 100644
--- a/utf/src/utf32.cc
+++ b/utf/src/utf32.cc
@@ -12,7 +12,7 @@ inline bool valid_codepoint(uint32_t c) {
} // namespace
-uint32_t read32be(std::string_view data, std::size_t& offset) {
+uint32_t read32be(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 4)
return NEED_MORE;
uint32_t c = static_cast<uint32_t>(data[offset]) << 24
@@ -26,7 +26,7 @@ uint32_t read32be(std::string_view data, std::size_t& offset) {
return INVALID;
}
-uint32_t read32le(std::string_view data, std::size_t& offset) {
+uint32_t read32le(std::span<uint8_t const> data, std::size_t& offset) {
if (offset > data.size() || data.size() - offset < 4)
return NEED_MORE;
uint32_t c = static_cast<uint32_t>(data[offset + 3]) << 24
diff --git a/utf/src/utf8.cc b/utf/src/utf8.cc
index 54b0296..0e444ae 100644
--- a/utf/src/utf8.cc
+++ b/utf/src/utf8.cc
@@ -12,12 +12,12 @@ inline bool valid_codepoint(uint32_t c) {
} // namespace
-uint32_t read8(std::string_view data, std::size_t& offset) {
+uint32_t read8(std::span<uint8_t const> data, std::size_t& offset) {
if (offset >= data.size())
return NEED_MORE;
uint32_t ret;
uint8_t size;
- switch (static_cast<uint8_t>(data[offset]) >> 4) {
+ switch (data[offset] >> 4) {
case 15:
if (data[offset] & 0x08)
return INVALID;
@@ -65,4 +65,35 @@ uint32_t read8(std::string_view data, std::size_t& offset) {
return ret;
}
+bool write8(uint32_t codepoint, std::span<uint8_t> data, std::size_t& offset) {
+ if (offset >= data.size()) UNLIKELY {
+ return false;
+ }
+ if (codepoint < 0x80) {
+ data[offset++] = codepoint;
+ } else if (codepoint < 0x800) {
+ if (data.size() - offset < 2) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xc0 | (codepoint >> 6);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ } else if (codepoint < 0x10000) {
+ if (data.size() - offset < 3) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xe0 | (codepoint >> 12);
+ data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ } else {
+ if (data.size() - offset < 4) UNLIKELY {
+ return false;
+ }
+ data[offset++] = 0xf0 | (codepoint >> 18);
+ data[offset++] = 0x80 | ((codepoint >> 12) & 0x3f);
+ data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+ data[offset++] = 0x80 | (codepoint & 0x3f);
+ }
+ return true;
+}
+
} // namespace utf
diff --git a/utf/tst/test_utf16.cc b/utf/tst/test_utf16.cc
index c17982e..3b3c03c 100644
--- a/utf/tst/test_utf16.cc
+++ b/utf/tst/test_utf16.cc
@@ -2,156 +2,137 @@
#include "utf_error.hh"
+#include <array>
#include <gtest/gtest.h>
TEST(utf16be, sanity) {
- std::string_view str("\x00\x24", 2);
size_t offset = 0;
- auto ret = utf::read16be(str, offset);
+ auto ret = utf::read16be(std::array<uint8_t, 2>({0x00, 0x24}), offset);
EXPECT_EQ('$', ret);
EXPECT_EQ(2, offset);
- str = "\x20\xAC";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 2>({0x20, 0xAC}), offset);
EXPECT_EQ(0x20AC, ret);
EXPECT_EQ(2, offset);
- str = "\xD8\x01\xDC\x37";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 4>({0xD8, 0x01, 0xDC, 0x37}), offset);
EXPECT_EQ(0x10437, ret);
EXPECT_EQ(4, offset);
- str = "\xD8\x52\xDF\x62";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 4>({0xD8, 0x52, 0xDF, 0x62}), offset);
EXPECT_EQ(0x24B62, ret);
EXPECT_EQ(4, offset);
}
TEST(utf16le, sanity) {
- std::string_view str("\x24\x00", 2);
size_t offset = 0;
- auto ret = utf::read16le(str, offset);
+ auto ret = utf::read16le(std::array<uint8_t, 2>({0x24, 0x00}), offset);
EXPECT_EQ('$', ret);
EXPECT_EQ(2, offset);
- str = "\xAC\x20";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 2>({0xAC, 0x20}), offset);
EXPECT_EQ(0x20AC, ret);
EXPECT_EQ(2, offset);
- str = "\x01\xD8\x37\xDC";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 4>({0x01, 0xD8, 0x37, 0xDC}), offset);
EXPECT_EQ(0x10437, ret);
EXPECT_EQ(4, offset);
- str = "\x52\xD8\x62\xDF";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 4>({0x52, 0xD8, 0x62, 0xDF}), offset);
EXPECT_EQ(0x24B62, ret);
EXPECT_EQ(4, offset);
}
TEST(utf16be, bom) {
- std::string_view str("\xFE\xFF\x20\xAC");
+ std::array<uint8_t, 4> data({0xFE, 0xFF, 0x20, 0xAC});
size_t offset = 0;
- auto ret = utf::read16be(str, offset);
+ auto ret = utf::read16be(data, offset);
EXPECT_EQ(0xFEFF, ret);
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(data, offset);
EXPECT_EQ(0x20AC, ret);
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(data, offset);
EXPECT_EQ(utf::NEED_MORE, ret);
- EXPECT_EQ(str.size(), offset);
+ EXPECT_EQ(data.size(), offset);
}
TEST(utf16le, bom) {
- std::string_view str("\xFF\xFE\xAC\x20");
+ std::array<uint8_t, 4> data({0xFF, 0xFE, 0xAC, 0x20});
size_t offset = 0;
- auto ret = utf::read16le(str, offset);
+ auto ret = utf::read16le(data, offset);
EXPECT_EQ(0xFEFF, ret);
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(data, offset);
EXPECT_EQ(0x20AC, ret);
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(data, offset);
EXPECT_EQ(utf::NEED_MORE, ret);
- EXPECT_EQ(str.size(), offset);
+ EXPECT_EQ(data.size(), offset);
}
TEST(utf16be, invalid) {
- std::string_view str("\xD8");
size_t offset = 0;
- auto ret = utf::read16be(str, offset);
+ auto ret = utf::read16be(std::array<uint8_t, 1>({0xD8}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 0>(), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\xD8\x01";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 2>({0xD8, 0x01}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\xD8\x01\xDC";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 3>({0xD8, 0x01, 0xDC}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\xDC\x37\xD8\x01";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 4>({0xDC, 0x37, 0xD8, 0x01}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\xD8\x01\xD8\x01";
offset = 0;
- ret = utf::read16be(str, offset);
+ ret = utf::read16be(std::array<uint8_t, 4>({0xD8, 0x01, 0xD8, 0x01}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
}
TEST(utf16le, invalid) {
- std::string_view str("\x01");
size_t offset = 0;
- auto ret = utf::read16le(str, offset);
+ auto ret = utf::read16le(std::array<uint8_t, 1>({0x01}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 0>(), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\x01\xD8";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 2>({0x01, 0xD8}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\x01\xD8\x37";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 3>({0x01, 0xD8, 0x37}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\x37\xDC\x01\xD8";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 4>({0x37, 0xDC, 0x01, 0xD8}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\x01\xD8\x01\xD8";
offset = 0;
- ret = utf::read16le(str, offset);
+ ret = utf::read16le(std::array<uint8_t, 4>({0x01, 0xD8, 0x01, 0xD8}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
}
diff --git a/utf/tst/test_utf32.cc b/utf/tst/test_utf32.cc
index 796b4cd..447b541 100644
--- a/utf/tst/test_utf32.cc
+++ b/utf/tst/test_utf32.cc
@@ -2,144 +2,137 @@
#include "utf_error.hh"
+#include <array>
#include <gtest/gtest.h>
TEST(utf32be, sanity) {
- std::string_view str("\x00\x00\x00\x24", 4);
size_t offset = 0;
- auto ret = utf::read32be(str, offset);
+ auto ret = utf::read32be(
+ std::array<uint8_t, 4>({0x00, 0x00, 0x00, 0x24}), offset);
EXPECT_EQ('$', ret);
EXPECT_EQ(4, offset);
- str = std::string_view("\x00\x00\x20\xAC", 4);
offset = 0;
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(
+ std::array<uint8_t, 4>({0x00, 0x00, 0x20, 0xAC}), offset);
EXPECT_EQ(0x20AC, ret);
EXPECT_EQ(4, offset);
- str = std::string_view("\x00\x01\x04\x37", 4);
offset = 0;
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(
+ std::array<uint8_t, 4>({0x00, 0x01, 0x04, 0x37}), offset);
EXPECT_EQ(0x10437, ret);
EXPECT_EQ(4, offset);
}
TEST(utf32le, sanity) {
- std::string_view str("\x24\x00\x00\x00", 4);
size_t offset = 0;
- auto ret = utf::read32le(str, offset);
+ auto ret = utf::read32le(
+ std::array<uint8_t, 4>({0x24, 0x00, 0x00, 0x00}), offset);
EXPECT_EQ('$', ret);
EXPECT_EQ(4, offset);
- str = std::string_view("\xAC\x20\x00\x00", 4);
offset = 0;
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(
+ std::array<uint8_t, 4>({0xAC, 0x20, 0x00, 0x00}), offset);
EXPECT_EQ(0x20AC, ret);
EXPECT_EQ(4, offset);
- str = std::string_view("\x37\x04\x01\x00", 4);
offset = 0;
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(
+ std::array<uint8_t, 4>({0x37, 0x04, 0x01, 0x00}), offset);
EXPECT_EQ(0x10437, ret);
EXPECT_EQ(4, offset);
}
TEST(utf32be, invalid) {
- std::string_view str("\xFF\xFF\xFF\xFF");
size_t offset = 0;
- auto ret = utf::read32be(str, offset);
+ auto ret = utf::read32be(
+ std::array<uint8_t, 4>({0xFF, 0xFF, 0xFF, 0xFF}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00\x00\xD8\x00", 4);
offset = 0;
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(
+ std::array<uint8_t, 4>({0x00, 0x00, 0xD8, 0x00}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "";
offset = 0;
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(std::array<uint8_t, 1>({}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00", 1);
offset = 0;
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(std::array<uint8_t, 1>({0x00}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00\x00", 2);
offset = 0;
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(std::array<uint8_t, 2>({0x00, 0x00}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00\x00\x00", 3);
offset = 0;
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(std::array<uint8_t, 3>({0x00, 0x00, 0x00}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
}
TEST(utf32le, invalid) {
- std::string_view str("\xFF\xFF\xFF\xFF");
size_t offset = 0;
- auto ret = utf::read32le(str, offset);
+ auto ret = utf::read32le(
+ std::array<uint8_t, 4>({0xFF, 0xFF, 0xFF, 0xFF}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00\xD8\x00\x00", 4);
offset = 0;
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(
+ std::array<uint8_t, 4>({0x00, 0xD8, 0x00, 0x00}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "";
offset = 0;
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(std::array<uint8_t, 0>(), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00", 1);
offset = 0;
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(std::array<uint8_t, 1>({0x00}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00\x00", 2);
offset = 0;
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(std::array<uint8_t, 2>({0x00, 0x00}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = std::string_view("\x00\x00\x00", 3);
offset = 0;
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(std::array<uint8_t, 3>({0x00, 0x00, 0x00}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
}
TEST(utf32be, bom) {
- std::string_view str("\x00\x00\xFF\xFE\x00\x00\x20\xAC", 8);
+ std::array<uint8_t, 8> data({0x00, 0x00, 0xFF, 0xFE, 0x00, 0x00, 0x20, 0xAC});
size_t offset = 0;
- auto ret = utf::read32be(str, offset);
+ auto ret = utf::read32be(data, offset);
EXPECT_EQ(0xFFFE, ret);
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(data, offset);
EXPECT_EQ(0x20AC, ret);
- ret = utf::read32be(str, offset);
+ ret = utf::read32be(data, offset);
EXPECT_EQ(utf::NEED_MORE, ret);
- EXPECT_EQ(str.size(), offset);
+ EXPECT_EQ(data.size(), offset);
}
TEST(utf32le, bom) {
- std::string_view str("\xFE\xFF\x00\x00\xAC\x20\x00\x00", 8);
+ std::array<uint8_t, 8> data({0xFE, 0xFF, 0x00, 0x00, 0xAC, 0x20, 0x00, 0x00});
size_t offset = 0;
- auto ret = utf::read32le(str, offset);
+ auto ret = utf::read32le(data, offset);
EXPECT_EQ(0xFFFE, ret);
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(data, offset);
EXPECT_EQ(0x20AC, ret);
- ret = utf::read32le(str, offset);
+ ret = utf::read32le(data, offset);
EXPECT_EQ(utf::NEED_MORE, ret);
- EXPECT_EQ(str.size(), offset);
+ EXPECT_EQ(data.size(), offset);
}
diff --git a/utf/tst/test_utf8.cc b/utf/tst/test_utf8.cc
index 10df969..8bdeba4 100644
--- a/utf/tst/test_utf8.cc
+++ b/utf/tst/test_utf8.cc
@@ -2,187 +2,245 @@
#include "utf_error.hh"
+#include <array>
+#include <gmock/gmock.h>
#include <gtest/gtest.h>
+#include <span>
-TEST(utf8, sanity) {
- std::string_view str("$");
+TEST(utf8, read_sanity) {
size_t offset = 0;
- auto ret = utf::read8(str, offset);
+ auto ret = utf::read8(std::array<uint8_t, 1>({'$'}), offset);
EXPECT_EQ('$', ret);
EXPECT_EQ(1, offset);
- str = "\xC2\xA3";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 2>({0xC2, 0xA3}), offset);
EXPECT_EQ(0xa3, ret);
EXPECT_EQ(2, offset);
- str = "\xD0\x98";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 2>({0xD0, 0x98}), offset);
EXPECT_EQ(0x418, ret);
EXPECT_EQ(2, offset);
- str = "\xE0\xA4\xB9";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 3>({0xE0, 0xA4, 0xB9}), offset);
EXPECT_EQ(0x939, ret);
EXPECT_EQ(3, offset);
- str = "\xE2\x82\xAC";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 3>({0xE2, 0x82, 0xAC}), offset);
EXPECT_EQ(0x20AC, ret);
EXPECT_EQ(3, offset);
- str = "\xED\x95\x9C";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 3>({0xED, 0x95, 0x9C}), offset);
EXPECT_EQ(0xD55C, ret);
EXPECT_EQ(3, offset);
- str = "\xF0\x90\x8D\x88";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 4>({0xF0, 0x90, 0x8D, 0x88}), offset);
EXPECT_EQ(0x10348, ret);
EXPECT_EQ(4, offset);
}
-TEST(utf8, overlong) {
- std::string_view str("\xF0\x82\x82\xAC");
+TEST(utf8, write_sanity) {
+ std::array<uint8_t, 10> out;
size_t offset = 0;
- auto ret = utf::read8(str, offset);
+ EXPECT_TRUE(utf::write8('$', out, offset));
+ EXPECT_THAT(std::span(out).subspan(0, offset),
+ testing::ElementsAre('$'));
+ EXPECT_EQ(1, offset);
+
+ offset = 0;
+ EXPECT_TRUE(utf::write8(0xa3, out, offset));
+ EXPECT_THAT(std::span(out).subspan(0, offset),
+ testing::ElementsAre(0xC2, 0xA3));
+ EXPECT_EQ(2, offset);
+
+ offset = 0;
+ EXPECT_TRUE(utf::write8(0x418, out, offset));
+ EXPECT_THAT(std::span(out).subspan(0, offset),
+ testing::ElementsAre(0xD0, 0x98));
+ EXPECT_EQ(2, offset);
+
+ offset = 0;
+ EXPECT_TRUE(utf::write8(0x939, out, offset));
+ EXPECT_THAT(std::span(out).subspan(0, offset),
+ testing::ElementsAre(0xE0, 0xA4, 0xB9));
+ EXPECT_EQ(3, offset);
+
+ offset = 0;
+ EXPECT_TRUE(utf::write8(0x20AC, out, offset));
+ EXPECT_THAT(std::span(out).subspan(0, offset),
+ testing::ElementsAre(0xE2, 0x82, 0xAC));
+ EXPECT_EQ(3, offset);
+
+ offset = 0;
+ EXPECT_TRUE(utf::write8(0xD55C, out, offset));
+ EXPECT_THAT(std::span(out).subspan(0, offset),
+ testing::ElementsAre(0xED, 0x95, 0x9C));
+ EXPECT_EQ(3, offset);
+
+ offset = 0;
+ EXPECT_TRUE(utf::write8(0x10348, out, offset));
+ EXPECT_THAT(std::span(out).subspan(0, offset),
+ testing::ElementsAre(0xF0, 0x90, 0x8D, 0x88));
+ EXPECT_EQ(4, offset);
+}
+
+TEST(utf8, read_overlong) {
+ size_t offset = 0;
+ auto ret = utf::read8(
+ std::array<uint8_t, 4>({0xF0, 0x82, 0x82, 0xAC}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\xE0\x81\x81";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 3>({0xE0, 0x81, 0x81}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\xC0\x80";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 2>({0xC0, 0x80}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
}
-TEST(utf8, invalid) {
- std::string_view str("\xED\xB0\x80");
+TEST(utf8, read_invalid) {
size_t offset = 0;
- auto ret = utf::read8(str, offset);
+ auto ret = utf::read8(std::array<uint8_t, 3>({0xED, 0xB0, 0x80}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\xFB\xFF\xFF";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 3>({0xFB, 0xFF, 0xFF}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\xFF\xFF\xFF\xFF\xFF";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(
+ std::array<uint8_t, 5>({0xFF, 0xFF, 0xFF, 0xFF, 0xFF}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 0>(), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\x80";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 1>({0x80}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\xC2";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 1>({0xC2}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\xC2\x03";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 2>({0xC2, 0x03}), offset);
EXPECT_EQ(utf::INVALID, ret);
EXPECT_EQ(0, offset);
- str = "\xE0\xA4";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 2>({0xE0, 0xA4}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
- str = "\xF0\x90\x8D";
offset = 0;
- ret = utf::read8(str, offset);
+ ret = utf::read8(std::array<uint8_t, 3>({0xF0, 0x90, 0x8D}), offset);
EXPECT_EQ(utf::NEED_MORE, ret);
EXPECT_EQ(0, offset);
}
-TEST(utf8, multiple1) {
- std::string_view str("\x4D\xC3\xAC\x6E\x68\x20\x6E\xC3\xB3\x69\x20\x74\x69"
- "\xE1\xBA\xBF\x6E\x67\x20\x56\x69\xE1\xBB\x87\x74");
+TEST(utf8, read_multiple1) {
+ std::array<uint8_t, 25> data({
+ 0x4D, 0xC3, 0xAC, 0x6E, 0x68, 0x20, 0x6E, 0xC3, 0xB3, 0x69,
+ 0x20, 0x74, 0x69, 0xE1, 0xBA, 0xBF, 0x6E, 0x67, 0x20, 0x56,
+ 0x69, 0xE1, 0xBB, 0x87, 0x74
+ });
size_t offset = 0;
- auto ret = utf::read8(str, offset);
+ auto ret = utf::read8(data, offset);
EXPECT_EQ('M', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(0xEC, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('n', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('h', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(' ', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('n', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(0xF3, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('i', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(' ', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('t', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('i', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(0x1EBF, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('n', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('g', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(' ', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('V', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('i', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(0x1EC7, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ('t', ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(utf::NEED_MORE, ret);
- EXPECT_EQ(str.size(), offset);
+ EXPECT_EQ(data.size(), offset);
}
-TEST(utf8, multiple2) {
- std::string_view str("\xF0\xA8\x89\x9F\xE5\x91\x90\xE3\x97\x82\xE8\xB6\x8A");
+TEST(utf8, read_multiple2) {
+ std::array<uint8_t, 13> data({
+ 0xF0, 0xA8, 0x89, 0x9F, 0xE5, 0x91, 0x90, 0xE3, 0x97, 0x82,
+ 0xE8, 0xB6, 0x8A,
+ });
size_t offset = 0;
- auto ret = utf::read8(str, offset);
+ auto ret = utf::read8(data, offset);
EXPECT_EQ(0x2825F, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(0x5450, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(0x35C2, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(0x8D8A, ret);
- ret = utf::read8(str, offset);
+ ret = utf::read8(data, offset);
EXPECT_EQ(utf::NEED_MORE, ret);
- EXPECT_EQ(str.size(), offset);
+ EXPECT_EQ(data.size(), offset);
+}
+
+TEST(utf8, write_no_space) {
+ std::array<uint8_t, 10> data;
+ std::span<uint8_t> out(data);
+ size_t offset = 0;
+ EXPECT_FALSE(utf::write8('$', out.subspan(0, 0), offset));
+ EXPECT_EQ(0u, offset);
+
+ EXPECT_FALSE(utf::write8(0xa3, out.subspan(0, 1), offset));
+ EXPECT_EQ(0u, offset);
+ EXPECT_FALSE(utf::write8(0x418, out.subspan(0, 0), offset));
+ EXPECT_EQ(0u, offset);
+
+ EXPECT_FALSE(utf::write8(0x939, out.subspan(0, 2), offset));
+ EXPECT_EQ(0u, offset);
+ EXPECT_FALSE(utf::write8(0x20AC, out.subspan(0, 0), offset));
+ EXPECT_EQ(0u, offset);
+
+ EXPECT_FALSE(utf::write8(0x10348, out.subspan(0, 3), offset));
+ EXPECT_EQ(0u, offset);
}