summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.dir-locals.el2
-rw-r--r--meson.build18
-rw-r--r--src/u.hh19
-rw-r--r--src/u16.hh86
-rw-r--r--src/u8.hh180
-rw-r--r--src/umod8.hh186
-rw-r--r--test/u.cc683
7 files changed, 1172 insertions, 2 deletions
diff --git a/.dir-locals.el b/.dir-locals.el
index 573c58e..224840b 100644
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -7,6 +7,6 @@
(locate-dominating-file default-directory ".dir-locals.el")))
(setq-local flycheck-clangcheck-build-path
(concat project-path "build"))
- (setq-local flycheck-clang-language-standard "c++20")
+ (setq-local flycheck-clang-language-standard "c++23")
(setq-local flycheck-clang-definitions '("HAVE_CONFIG_H"))
(setq-local flycheck-clang-include-path '("../src" "../build")))))))
diff --git a/meson.build b/meson.build
index 15dc548..5d39c12 100644
--- a/meson.build
+++ b/meson.build
@@ -3,7 +3,7 @@ project(
'cpp',
version : '0.1',
meson_version : '>= 1.3.0',
- default_options : ['warning_level=3', 'cpp_std=c++20'],
+ default_options : ['warning_level=3', 'cpp_std=c++23'],
)
conf_data = configuration_data()
@@ -22,6 +22,10 @@ exe = executable(
sources: [
'src/args.cc',
'src/args.hh',
+ 'src/u.hh',
+ 'src/u16.hh',
+ 'src/u8.hh',
+ 'src/umod8.hh',
'src/main.cc',
],
include_directories: inc,
@@ -44,3 +48,15 @@ test('args', executable(
],
include_directories: inc,
dependencies : test_dependencies))
+
+test('u', executable(
+ 'test_u',
+ sources: [
+ 'src/u.hh',
+ 'src/u16.hh',
+ 'src/u8.hh',
+ 'src/umod8.hh',
+ 'test/u.cc',
+ ],
+ include_directories: inc,
+ dependencies : test_dependencies))
diff --git a/src/u.hh b/src/u.hh
new file mode 100644
index 0000000..583b67b
--- /dev/null
+++ b/src/u.hh
@@ -0,0 +1,19 @@
+#ifndef U_HH
+#define U_HH
+
+namespace u {
+
+enum class ReadError {
+ Invalid, // Invalid sequence
+ End, // At end (it == end)
+ Incomplete, // Too few bytes
+};
+
+enum class ReadErrorReplace {
+ End, // At end (it == end)
+ Incomplete, // Too few bytes
+};
+
+} // namespace u
+
+#endif // U_HH
diff --git a/src/u16.hh b/src/u16.hh
new file mode 100644
index 0000000..6894a84
--- /dev/null
+++ b/src/u16.hh
@@ -0,0 +1,86 @@
+#ifndef U16_HH
+#define U16_HH
+
+#include <cstdint>
+#include <expected>
+#include <iterator>
+#include <type_traits>
+
+#include "u.hh"
+
+namespace u16 {
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint16_t>
+std::expected<uint32_t, u::ReadError> read(T& start, const T& end) {
+ if (start == end) return std::unexpected(u::ReadError::End);
+ uint16_t u = *start;
+ if (u >= 0xd800 && u <= 0xdbff) {
+ if (std::distance(start, end) < 2) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ std::advance(start, 1);
+ if (*start >= 0xdc00 && *start <= 0xdfff) {
+ uint16_t v = *start;
+ std::advance(start, 1);
+ return 0x10000 + (((u - 0xd800) << 10) | (v - 0xdc00));
+ }
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ std::advance(start, 1);
+ if (u >= 0xdc00 && u <= 0xdfff) {
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ return u;
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint16_t>
+std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start,
+ const T& end) {
+ auto ret = read(start, end);
+ if (ret.has_value())
+ return *ret;
+ switch (ret.error()) {
+ case u::ReadError::Incomplete:
+ return std::unexpected(u::ReadErrorReplace::Incomplete);
+ case u::ReadError::End:
+ return std::unexpected(u::ReadErrorReplace::End);
+ case u::ReadError::Invalid:
+ return 0xfffd;
+ }
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint16_t>
+bool write(T& start, const T& end, uint32_t code) {
+ if (code < 0x10000) {
+ if (start == end) return false;
+ *start = static_cast<uint16_t>(code);
+ } else {
+ if (std::distance(start, end) < 2) return false;
+ code -= 0x10000;
+ *start = static_cast<uint16_t>(0xd800 + (code >> 10));
+ std::advance(start, 1);
+ *start = static_cast<uint16_t>(0xdc00 + (code & 0x3ff));
+ }
+ std::advance(start, 1);
+ return true;
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint16_t>
+bool skip(T& start, const T& end) {
+ if (start == end) return false;
+ if (*start >= 0xd800 && *start <= 0xdbff) {
+ if (std::distance(start, end) < 2) return false;
+ std::advance(start, 2);
+ return true;
+ }
+ std::advance(start, 1);
+ return true;
+}
+
+} // namespace u16
+
+#endif // U16_HH
diff --git a/src/u8.hh b/src/u8.hh
new file mode 100644
index 0000000..413b156
--- /dev/null
+++ b/src/u8.hh
@@ -0,0 +1,180 @@
+#ifndef U8_HH
+#define U8_HH
+
+#include <cstdint>
+#include <expected>
+#include <iterator>
+#include <type_traits>
+
+#include "u.hh"
+
+namespace u8 {
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+std::expected<uint32_t, u::ReadError> read(T& start, const T& end) {
+ if (start == end) return std::unexpected(u::ReadError::End);
+ uint32_t u;
+ switch (*start >> 4) {
+ case 0xf:
+ // 11110uvv 10vvwwww 10xxxxyy 10yyzzzz
+ if (std::distance(start, end) < 4) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x07) << 18;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 3);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= (*start & 0x3f) << 12;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 2);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= (*start & 0x3f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u < 0x10000 || u > 0x10ffff) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ case 0xe:
+ // 1110wwww 10xxxxyy 10yyzzzz
+ if (std::distance(start, end) < 3) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x0f) << 12;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 2);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= (*start & 0x3f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u < 0x800 || (u >= 0xd800 && u <= 0xdfff)) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ case 0xd:
+ case 0xc:
+ // 110xxxyy 10yyzzzz
+ if (std::distance(start, end) < 2) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x1f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u < 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ case 0xb:
+ case 0xa:
+ case 0x9:
+ case 0x8:
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ default:
+ // 0yyyzzzz
+ u = *start;
+ break;
+ }
+ std::advance(start, 1);
+ return u;
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start,
+ const T& end) {
+ auto ret = read(start, end);
+ if (ret.has_value())
+ return *ret;
+ switch (ret.error()) {
+ case u::ReadError::Incomplete:
+ return std::unexpected(u::ReadErrorReplace::Incomplete);
+ case u::ReadError::End:
+ return std::unexpected(u::ReadErrorReplace::End);
+ case u::ReadError::Invalid:
+ return 0xfffd;
+ }
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+bool write(T& start, const T& end, uint32_t code) {
+ if (code < 0x80) {
+ if (start == end) return false;
+ *start = static_cast<uint8_t>(code);
+ } else if (code < 0x800) {
+ if (std::distance(start, end) < 2) return false;
+ *start = 0xc0 | static_cast<uint8_t>(code >> 6);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ } else if (code < 0x10000) {
+ if (std::distance(start, end) < 3) return false;
+ *start = 0xe0 | static_cast<uint8_t>(code >> 12);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ } else {
+ if (std::distance(start, end) < 4) return false;
+ *start = 0xf0 | static_cast<uint8_t>(code >> 18);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>((code >> 12) & 0x3f);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ }
+ std::advance(start, 1);
+ return true;
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+bool skip(T& start, const T& end) {
+ if (start == end) return false;
+ switch (*start >> 4) {
+ case 0xf:
+ if (std::distance(start, end) < 4) return false;
+ std::advance(start, 4);
+ break;
+ case 0xe:
+ if (std::distance(start, end) < 3) return false;
+ std::advance(start, 3);
+ break;
+ case 0xc:
+ case 0xd:
+ if (std::distance(start, end) < 2) return false;
+ std::advance(start, 2);
+ break;
+ default:
+ std::advance(start, 1);
+ break;
+ }
+ return true;
+}
+
+} // namespace u8
+
+#endif // U8_HH
diff --git a/src/umod8.hh b/src/umod8.hh
new file mode 100644
index 0000000..8d4fdb2
--- /dev/null
+++ b/src/umod8.hh
@@ -0,0 +1,186 @@
+#ifndef UMOD8_HH
+#define UMOD8_HH
+
+#include <cstdint>
+#include <expected>
+#include <iterator>
+#include <type_traits>
+
+#include "u.hh"
+
+namespace umod8 {
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+std::expected<uint32_t, u::ReadError> read(T& start, const T& end) {
+ if (start == end) return std::unexpected(u::ReadError::End);
+ uint32_t u;
+ switch (*start >> 4) {
+ case 0xe: {
+ auto const tmp = start;
+ // 1110wwww 10xxxxyy 10yyzzzz
+ if (std::distance(start, end) < 3) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x0f) << 12;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 2);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= (*start & 0x3f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u < 0x800) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ if (u >= 0xd800 && u <= 0xdbff) {
+ std::advance(start, 1);
+ // Not going recursive here as we don't want it unbounded
+ // Lone surrogate pair at end == invalid.
+ if (start == end) return std::unexpected(u::ReadError::Invalid);
+ if ((*start >> 4) == 0xe) {
+ if (std::distance(start, end) < 3) {
+ start = tmp;
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ uint32_t v = (*start & 0x0f) << 12;
+ std::advance(start, 1);
+ if ((*start & 0xc0) == 0x80) {
+ v |= (*start & 0x3f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) == 0x80) {
+ v |= *start & 0x3f;
+ if (v >= 0xdc00 && v <= 0xdfff) {
+ std::advance(start, 1);
+ return 0x10000 + (((u - 0xd800) << 10) | (v - 0xdc00));
+ }
+ }
+ }
+ start = std::next(tmp, 3);
+ }
+ // Next character may be valid, invalid, something, but we know
+ // it is not the second half of a surrogate pair, so consider
+ // this first part invalid.
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ if (u >= 0xdc00 && u <= 0xdfff) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ }
+ case 0xd:
+ case 0xc:
+ // 110xxxyy 10yyzzzz
+ if (std::distance(start, end) < 2) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x1f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u > 0 && u < 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ case 0xf:
+ case 0xb:
+ case 0xa:
+ case 0x9:
+ case 0x8:
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ default:
+ // 0yyyzzzz
+ u = *start;
+ break;
+ }
+ std::advance(start, 1);
+ return u;
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start,
+ const T& end) {
+ auto ret = read(start, end);
+ if (ret.has_value())
+ return *ret;
+ switch (ret.error()) {
+ case u::ReadError::Incomplete:
+ return std::unexpected(u::ReadErrorReplace::Incomplete);
+ case u::ReadError::End:
+ return std::unexpected(u::ReadErrorReplace::End);
+ case u::ReadError::Invalid:
+ return 0xfffd;
+ }
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+bool write(T& start, const T& end, uint32_t code) {
+ if (code > 0 && code < 0x80) {
+ if (start == end) return false;
+ *start = static_cast<uint8_t>(code);
+ } else if (code < 0x800) {
+ if (std::distance(start, end) < 2) return false;
+ *start = 0xc0 | static_cast<uint8_t>(code >> 6);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ } else if (code < 0x10000) {
+ if (std::distance(start, end) < 3) return false;
+ *start = 0xe0 | static_cast<uint8_t>(code >> 12);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ } else {
+ auto tmp = start;
+ code -= 0x10000;
+ if (write(start, end, 0xd800 + (code >> 10)) &&
+ write(start, end, 0xdc00 + (code & 0x3ff))) {
+ return true;
+ }
+ start = tmp;
+ return false;
+ }
+ std::advance(start, 1);
+ return true;
+}
+
+template<std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+bool skip(T& start, const T& end) {
+ if (start == end) return false;
+ switch (*start >> 4) {
+ case 0xe: {
+ auto tmp = start;
+ if (read(start, end).has_value()) return true;
+ start = tmp;
+ return false;
+ }
+ case 0xc:
+ case 0xd:
+ if (std::distance(start, end) < 2) return false;
+ std::advance(start, 2);
+ break;
+ default:
+ std::advance(start, 1);
+ break;
+ }
+ return true;
+}
+
+} // namespace umod8
+
+#endif // UMOD8_HH
diff --git a/test/u.cc b/test/u.cc
new file mode 100644
index 0000000..933a4f2
--- /dev/null
+++ b/test/u.cc
@@ -0,0 +1,683 @@
+#include <gtest/gtest.h>
+
+#include "u8.hh"
+#include "umod8.hh"
+#include "u16.hh"
+
+#include <vector>
+
+TEST(u8, empty) {
+ std::vector<uint8_t> empty;
+ auto it = empty.begin();
+ auto ret = u8::read(it, empty.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::End, ret.error());
+
+ auto ret_replace = u8::read_replace(it, empty.end());
+ ASSERT_FALSE(ret_replace.has_value());
+ EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
+
+ EXPECT_FALSE(u8::write(it, empty.end(), 0x40));
+
+ EXPECT_FALSE(u8::skip(it, empty.end()));
+}
+
+TEST(u8, examples) {
+ {
+ std::vector<uint8_t> literal{0x57};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x57, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u8::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xce, 0x92};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x392, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u8::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xec, 0x9c, 0x84};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0xc704, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u8::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xf0, 0x90, 0x8d, 0x85};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x10345, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0x10345, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u8::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(1, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u8::write(it, literal.end(), 0x57));
+ EXPECT_EQ(0x57, literal[0]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(2, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u8::write(it, literal.end(), 0x392));
+ EXPECT_EQ(0xce, literal[0]);
+ EXPECT_EQ(0x92, literal[1]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(3, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u8::write(it, literal.end(), 0xc704));
+ EXPECT_EQ(0xec, literal[0]);
+ EXPECT_EQ(0x9c, literal[1]);
+ EXPECT_EQ(0x84, literal[2]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(4, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u8::write(it, literal.end(), 0x10345));
+ EXPECT_EQ(0xf0, literal[0]);
+ EXPECT_EQ(0x90, literal[1]);
+ EXPECT_EQ(0x8d, literal[2]);
+ EXPECT_EQ(0x85, literal[3]);
+ EXPECT_EQ(it, literal.end());
+ }
+}
+
+TEST(u8, overlong) {
+ {
+ std::vector<uint8_t> literal{0xc0, 0x80};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ }
+}
+
+TEST(u8, incomplete) {
+ {
+ std::vector<uint8_t> literal{0xce};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xec};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xec, 0x9c};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_FALSE(ret_replace.has_value());
+ EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xf0};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xf0, 0x90};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xf0, 0x90, 0x8d};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+}
+
+TEST(u8, invalid) {
+ {
+ std::vector<uint8_t> literal{0xf0, 0xf0, 0xf0, 0xf0};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xa0};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xce, 0xff};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xec, 0xff, 0x84};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xec, 0x9c, 0xff};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xf0, 0xff, 0x8d, 0x85};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xf0, 0x90, 0xff, 0x85};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xf0, 0x90, 0x8d, 0xff};
+ auto it = literal.begin();
+ auto ret = u8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+}
+
+TEST(umod8, empty) {
+ std::vector<uint8_t> empty;
+ auto it = empty.begin();
+ auto ret = umod8::read(it, empty.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::End, ret.error());
+
+ auto ret_replace = umod8::read_replace(it, empty.end());
+ ASSERT_FALSE(ret_replace.has_value());
+ EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
+
+ EXPECT_FALSE(umod8::write(it, empty.end(), 0x40));
+
+ EXPECT_FALSE(umod8::skip(it, empty.end()));
+}
+
+TEST(umod8, examples) {
+ {
+ std::vector<uint8_t> literal{0x45};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x45, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(umod8::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xc8, 0x85};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x205, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(umod8::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x10400, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ auto ret_replace = umod8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0x10400, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(umod8::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xc0, 0x80};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, *ret);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(1, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(umod8::write(it, literal.end(), 0x45));
+ EXPECT_EQ(0x45, literal[0]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(2, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(umod8::write(it, literal.end(), 0x205));
+ EXPECT_EQ(0xc8, literal[0]);
+ EXPECT_EQ(0x85, literal[1]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(6, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(umod8::write(it, literal.end(), 0x10400));
+ EXPECT_EQ(0xed, literal[0]);
+ EXPECT_EQ(0xa0, literal[1]);
+ EXPECT_EQ(0x81, literal[2]);
+ EXPECT_EQ(0xed, literal[3]);
+ EXPECT_EQ(0xb0, literal[4]);
+ EXPECT_EQ(0x80, literal[5]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal(2, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(umod8::write(it, literal.end(), 0x0));
+ EXPECT_EQ(0xc0, literal[0]);
+ EXPECT_EQ(0x80, literal[1]);
+ EXPECT_EQ(it, literal.end());
+ }
+}
+
+TEST(umod8, overlong) {
+}
+
+TEST(umod8, incomplete) {
+ {
+ std::vector<uint8_t> literal{0xc8};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xed};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xa0};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+
+ it = literal.begin();
+ auto ret_replace = umod8::read_replace(it, literal.end());
+ ASSERT_FALSE(ret_replace.has_value());
+ EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+}
+
+TEST(umod8, invalid) {
+ {
+ std::vector<uint8_t> literal{0xf0, 0xf0, 0xf0, 0xf0};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = umod8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(3, literal.end() - it);
+ }
+ {
+ std::vector<uint8_t> literal{0xa0};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = umod8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xce, 0xff};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = umod8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xec, 0xff, 0x84};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = umod8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xec, 0x9c, 0xff};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = umod8::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xb0, 0x80, 0xed, 0xa0, 0x81};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ EXPECT_EQ(3, literal.end() - it);
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xa0, 0x81};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xff, 0x80};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ EXPECT_EQ(3, literal.end() - it);
+ }
+ {
+ std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0, 0xff};
+ auto it = literal.begin();
+ auto ret = umod8::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ EXPECT_EQ(3, literal.end() - it);
+ }
+}
+
+TEST(u16, empty) {
+ std::vector<uint16_t> empty;
+ auto it = empty.begin();
+ auto ret = u16::read(it, empty.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::End, ret.error());
+
+ auto ret_replace = u16::read_replace(it, empty.end());
+ ASSERT_FALSE(ret_replace.has_value());
+ EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
+
+ EXPECT_FALSE(u16::write(it, empty.end(), 0x40));
+
+ EXPECT_FALSE(u16::skip(it, empty.end()));
+}
+
+TEST(u16, examples) {
+ {
+ std::vector<uint16_t> literal{0x24};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x24, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u16::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint16_t> literal{0x20ac};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x20ac, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u16::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint16_t> literal{0xd801, 0xdc37};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x10437, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u16::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint16_t> literal{0xd852, 0xdf62};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0x24b62, *ret);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ auto ret_replace = u16::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0x24b62, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+
+ it = literal.begin();
+ EXPECT_TRUE(u16::skip(it, literal.end()));
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint16_t> literal(1, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u16::write(it, literal.end(), 0x24));
+ EXPECT_EQ(0x24, literal[0]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint16_t> literal(1, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u16::write(it, literal.end(), 0x20ac));
+ EXPECT_EQ(0x20ac, literal[0]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint16_t> literal(2, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u16::write(it, literal.end(), 0x10437));
+ EXPECT_EQ(0xd801, literal[0]);
+ EXPECT_EQ(0xdc37, literal[1]);
+ EXPECT_EQ(it, literal.end());
+ }
+ {
+ std::vector<uint16_t> literal(2, 0x0);
+ auto it = literal.begin();
+ EXPECT_TRUE(u16::write(it, literal.end(), 0x24b62));
+ EXPECT_EQ(0xd852, literal[0]);
+ EXPECT_EQ(0xdf62, literal[1]);
+ EXPECT_EQ(it, literal.end());
+ }
+}
+
+TEST(u16, incomplete) {
+ {
+ std::vector<uint16_t> literal{0xd801};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+ {
+ std::vector<uint16_t> literal{0xd852};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Incomplete, ret.error());
+ }
+}
+
+TEST(u16, invalid) {
+ {
+ std::vector<uint16_t> literal{0xdc37, 0xd801};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u16::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_NE(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end());
+ ASSERT_FALSE(ret_replace.has_value());
+ EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+ }
+ {
+ std::vector<uint16_t> literal{0xd852, 0xd852};
+ auto it = literal.begin();
+ auto ret = u16::read(it, literal.end());
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(u::ReadError::Invalid, ret.error());
+ it = literal.begin();
+ auto ret_replace = u16::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_NE(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end());
+ ASSERT_FALSE(ret_replace.has_value());
+ EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+ }
+ {
+ std::vector<uint16_t> literal{0xdc37, 0xdf62};
+ auto it = literal.begin();
+ auto ret_replace = u16::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_NE(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end());
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, *ret_replace);
+ EXPECT_EQ(it, literal.end());
+ }
+}