diff options
| -rw-r--r-- | .dir-locals.el | 2 | ||||
| -rw-r--r-- | meson.build | 18 | ||||
| -rw-r--r-- | src/u.hh | 19 | ||||
| -rw-r--r-- | src/u16.hh | 86 | ||||
| -rw-r--r-- | src/u8.hh | 180 | ||||
| -rw-r--r-- | src/umod8.hh | 186 | ||||
| -rw-r--r-- | test/u.cc | 683 |
7 files changed, 1172 insertions, 2 deletions
diff --git a/.dir-locals.el b/.dir-locals.el index 573c58e..224840b 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -7,6 +7,6 @@ (locate-dominating-file default-directory ".dir-locals.el"))) (setq-local flycheck-clangcheck-build-path (concat project-path "build")) - (setq-local flycheck-clang-language-standard "c++20") + (setq-local flycheck-clang-language-standard "c++23") (setq-local flycheck-clang-definitions '("HAVE_CONFIG_H")) (setq-local flycheck-clang-include-path '("../src" "../build"))))))) diff --git a/meson.build b/meson.build index 15dc548..5d39c12 100644 --- a/meson.build +++ b/meson.build @@ -3,7 +3,7 @@ project( 'cpp', version : '0.1', meson_version : '>= 1.3.0', - default_options : ['warning_level=3', 'cpp_std=c++20'], + default_options : ['warning_level=3', 'cpp_std=c++23'], ) conf_data = configuration_data() @@ -22,6 +22,10 @@ exe = executable( sources: [ 'src/args.cc', 'src/args.hh', + 'src/u.hh', + 'src/u16.hh', + 'src/u8.hh', + 'src/umod8.hh', 'src/main.cc', ], include_directories: inc, @@ -44,3 +48,15 @@ test('args', executable( ], include_directories: inc, dependencies : test_dependencies)) + +test('u', executable( + 'test_u', + sources: [ + 'src/u.hh', + 'src/u16.hh', + 'src/u8.hh', + 'src/umod8.hh', + 'test/u.cc', + ], + include_directories: inc, + dependencies : test_dependencies)) diff --git a/src/u.hh b/src/u.hh new file mode 100644 index 0000000..583b67b --- /dev/null +++ b/src/u.hh @@ -0,0 +1,19 @@ +#ifndef U_HH +#define U_HH + +namespace u { + +enum class ReadError { + Invalid, // Invalid sequence + End, // At end (it == end) + Incomplete, // Too few bytes +}; + +enum class ReadErrorReplace { + End, // At end (it == end) + Incomplete, // Too few bytes +}; + +} // namespace u + +#endif // U_HH diff --git a/src/u16.hh b/src/u16.hh new file mode 100644 index 0000000..6894a84 --- /dev/null +++ b/src/u16.hh @@ -0,0 +1,86 @@ +#ifndef U16_HH +#define U16_HH + +#include <cstdint> +#include <expected> +#include <iterator> +#include <type_traits> + +#include "u.hh" + +namespace u16 { + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +std::expected<uint32_t, u::ReadError> read(T& start, const T& end) { + if (start == end) return std::unexpected(u::ReadError::End); + uint16_t u = *start; + if (u >= 0xd800 && u <= 0xdbff) { + if (std::distance(start, end) < 2) { + return std::unexpected(u::ReadError::Incomplete); + } + std::advance(start, 1); + if (*start >= 0xdc00 && *start <= 0xdfff) { + uint16_t v = *start; + std::advance(start, 1); + return 0x10000 + (((u - 0xd800) << 10) | (v - 0xdc00)); + } + return std::unexpected(u::ReadError::Invalid); + } + std::advance(start, 1); + if (u >= 0xdc00 && u <= 0xdfff) { + return std::unexpected(u::ReadError::Invalid); + } + return u; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start, + const T& end) { + auto ret = read(start, end); + if (ret.has_value()) + return *ret; + switch (ret.error()) { + case u::ReadError::Incomplete: + return std::unexpected(u::ReadErrorReplace::Incomplete); + case u::ReadError::End: + return std::unexpected(u::ReadErrorReplace::End); + case u::ReadError::Invalid: + return 0xfffd; + } +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +bool write(T& start, const T& end, uint32_t code) { + if (code < 0x10000) { + if (start == end) return false; + *start = static_cast<uint16_t>(code); + } else { + if (std::distance(start, end) < 2) return false; + code -= 0x10000; + *start = static_cast<uint16_t>(0xd800 + (code >> 10)); + std::advance(start, 1); + *start = static_cast<uint16_t>(0xdc00 + (code & 0x3ff)); + } + std::advance(start, 1); + return true; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint16_t> +bool skip(T& start, const T& end) { + if (start == end) return false; + if (*start >= 0xd800 && *start <= 0xdbff) { + if (std::distance(start, end) < 2) return false; + std::advance(start, 2); + return true; + } + std::advance(start, 1); + return true; +} + +} // namespace u16 + +#endif // U16_HH diff --git a/src/u8.hh b/src/u8.hh new file mode 100644 index 0000000..413b156 --- /dev/null +++ b/src/u8.hh @@ -0,0 +1,180 @@ +#ifndef U8_HH +#define U8_HH + +#include <cstdint> +#include <expected> +#include <iterator> +#include <type_traits> + +#include "u.hh" + +namespace u8 { + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +std::expected<uint32_t, u::ReadError> read(T& start, const T& end) { + if (start == end) return std::unexpected(u::ReadError::End); + uint32_t u; + switch (*start >> 4) { + case 0xf: + // 11110uvv 10vvwwww 10xxxxyy 10yyzzzz + if (std::distance(start, end) < 4) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x07) << 18; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 3); + return std::unexpected(u::ReadError::Invalid); + } + u |= (*start & 0x3f) << 12; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 2); + return std::unexpected(u::ReadError::Invalid); + } + u |= (*start & 0x3f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u < 0x10000 || u > 0x10ffff) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + case 0xe: + // 1110wwww 10xxxxyy 10yyzzzz + if (std::distance(start, end) < 3) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x0f) << 12; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 2); + return std::unexpected(u::ReadError::Invalid); + } + u |= (*start & 0x3f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u < 0x800 || (u >= 0xd800 && u <= 0xdfff)) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + case 0xd: + case 0xc: + // 110xxxyy 10yyzzzz + if (std::distance(start, end) < 2) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x1f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u < 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + case 0xb: + case 0xa: + case 0x9: + case 0x8: + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + default: + // 0yyyzzzz + u = *start; + break; + } + std::advance(start, 1); + return u; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start, + const T& end) { + auto ret = read(start, end); + if (ret.has_value()) + return *ret; + switch (ret.error()) { + case u::ReadError::Incomplete: + return std::unexpected(u::ReadErrorReplace::Incomplete); + case u::ReadError::End: + return std::unexpected(u::ReadErrorReplace::End); + case u::ReadError::Invalid: + return 0xfffd; + } +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +bool write(T& start, const T& end, uint32_t code) { + if (code < 0x80) { + if (start == end) return false; + *start = static_cast<uint8_t>(code); + } else if (code < 0x800) { + if (std::distance(start, end) < 2) return false; + *start = 0xc0 | static_cast<uint8_t>(code >> 6); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } else if (code < 0x10000) { + if (std::distance(start, end) < 3) return false; + *start = 0xe0 | static_cast<uint8_t>(code >> 12); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } else { + if (std::distance(start, end) < 4) return false; + *start = 0xf0 | static_cast<uint8_t>(code >> 18); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>((code >> 12) & 0x3f); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } + std::advance(start, 1); + return true; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +bool skip(T& start, const T& end) { + if (start == end) return false; + switch (*start >> 4) { + case 0xf: + if (std::distance(start, end) < 4) return false; + std::advance(start, 4); + break; + case 0xe: + if (std::distance(start, end) < 3) return false; + std::advance(start, 3); + break; + case 0xc: + case 0xd: + if (std::distance(start, end) < 2) return false; + std::advance(start, 2); + break; + default: + std::advance(start, 1); + break; + } + return true; +} + +} // namespace u8 + +#endif // U8_HH diff --git a/src/umod8.hh b/src/umod8.hh new file mode 100644 index 0000000..8d4fdb2 --- /dev/null +++ b/src/umod8.hh @@ -0,0 +1,186 @@ +#ifndef UMOD8_HH +#define UMOD8_HH + +#include <cstdint> +#include <expected> +#include <iterator> +#include <type_traits> + +#include "u.hh" + +namespace umod8 { + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +std::expected<uint32_t, u::ReadError> read(T& start, const T& end) { + if (start == end) return std::unexpected(u::ReadError::End); + uint32_t u; + switch (*start >> 4) { + case 0xe: { + auto const tmp = start; + // 1110wwww 10xxxxyy 10yyzzzz + if (std::distance(start, end) < 3) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x0f) << 12; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 2); + return std::unexpected(u::ReadError::Invalid); + } + u |= (*start & 0x3f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u < 0x800) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + if (u >= 0xd800 && u <= 0xdbff) { + std::advance(start, 1); + // Not going recursive here as we don't want it unbounded + // Lone surrogate pair at end == invalid. + if (start == end) return std::unexpected(u::ReadError::Invalid); + if ((*start >> 4) == 0xe) { + if (std::distance(start, end) < 3) { + start = tmp; + return std::unexpected(u::ReadError::Incomplete); + } + uint32_t v = (*start & 0x0f) << 12; + std::advance(start, 1); + if ((*start & 0xc0) == 0x80) { + v |= (*start & 0x3f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) == 0x80) { + v |= *start & 0x3f; + if (v >= 0xdc00 && v <= 0xdfff) { + std::advance(start, 1); + return 0x10000 + (((u - 0xd800) << 10) | (v - 0xdc00)); + } + } + } + start = std::next(tmp, 3); + } + // Next character may be valid, invalid, something, but we know + // it is not the second half of a surrogate pair, so consider + // this first part invalid. + return std::unexpected(u::ReadError::Invalid); + } + if (u >= 0xdc00 && u <= 0xdfff) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + } + case 0xd: + case 0xc: + // 110xxxyy 10yyzzzz + if (std::distance(start, end) < 2) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x1f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u > 0 && u < 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + case 0xf: + case 0xb: + case 0xa: + case 0x9: + case 0x8: + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + default: + // 0yyyzzzz + u = *start; + break; + } + std::advance(start, 1); + return u; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start, + const T& end) { + auto ret = read(start, end); + if (ret.has_value()) + return *ret; + switch (ret.error()) { + case u::ReadError::Incomplete: + return std::unexpected(u::ReadErrorReplace::Incomplete); + case u::ReadError::End: + return std::unexpected(u::ReadErrorReplace::End); + case u::ReadError::Invalid: + return 0xfffd; + } +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +bool write(T& start, const T& end, uint32_t code) { + if (code > 0 && code < 0x80) { + if (start == end) return false; + *start = static_cast<uint8_t>(code); + } else if (code < 0x800) { + if (std::distance(start, end) < 2) return false; + *start = 0xc0 | static_cast<uint8_t>(code >> 6); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } else if (code < 0x10000) { + if (std::distance(start, end) < 3) return false; + *start = 0xe0 | static_cast<uint8_t>(code >> 12); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } else { + auto tmp = start; + code -= 0x10000; + if (write(start, end, 0xd800 + (code >> 10)) && + write(start, end, 0xdc00 + (code & 0x3ff))) { + return true; + } + start = tmp; + return false; + } + std::advance(start, 1); + return true; +} + +template<std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +bool skip(T& start, const T& end) { + if (start == end) return false; + switch (*start >> 4) { + case 0xe: { + auto tmp = start; + if (read(start, end).has_value()) return true; + start = tmp; + return false; + } + case 0xc: + case 0xd: + if (std::distance(start, end) < 2) return false; + std::advance(start, 2); + break; + default: + std::advance(start, 1); + break; + } + return true; +} + +} // namespace umod8 + +#endif // UMOD8_HH diff --git a/test/u.cc b/test/u.cc new file mode 100644 index 0000000..933a4f2 --- /dev/null +++ b/test/u.cc @@ -0,0 +1,683 @@ +#include <gtest/gtest.h> + +#include "u8.hh" +#include "umod8.hh" +#include "u16.hh" + +#include <vector> + +TEST(u8, empty) { + std::vector<uint8_t> empty; + auto it = empty.begin(); + auto ret = u8::read(it, empty.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::End, ret.error()); + + auto ret_replace = u8::read_replace(it, empty.end()); + ASSERT_FALSE(ret_replace.has_value()); + EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error()); + + EXPECT_FALSE(u8::write(it, empty.end(), 0x40)); + + EXPECT_FALSE(u8::skip(it, empty.end())); +} + +TEST(u8, examples) { + { + std::vector<uint8_t> literal{0x57}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x57, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u8::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xce, 0x92}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x392, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u8::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xec, 0x9c, 0x84}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0xc704, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u8::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xf0, 0x90, 0x8d, 0x85}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x10345, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0x10345, *ret_replace); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u8::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(1, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u8::write(it, literal.end(), 0x57)); + EXPECT_EQ(0x57, literal[0]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(2, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u8::write(it, literal.end(), 0x392)); + EXPECT_EQ(0xce, literal[0]); + EXPECT_EQ(0x92, literal[1]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(3, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u8::write(it, literal.end(), 0xc704)); + EXPECT_EQ(0xec, literal[0]); + EXPECT_EQ(0x9c, literal[1]); + EXPECT_EQ(0x84, literal[2]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(4, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u8::write(it, literal.end(), 0x10345)); + EXPECT_EQ(0xf0, literal[0]); + EXPECT_EQ(0x90, literal[1]); + EXPECT_EQ(0x8d, literal[2]); + EXPECT_EQ(0x85, literal[3]); + EXPECT_EQ(it, literal.end()); + } +} + +TEST(u8, overlong) { + { + std::vector<uint8_t> literal{0xc0, 0x80}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + } +} + +TEST(u8, incomplete) { + { + std::vector<uint8_t> literal{0xce}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint8_t> literal{0xec}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint8_t> literal{0xec, 0x9c}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_FALSE(ret_replace.has_value()); + EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error()); + } + { + std::vector<uint8_t> literal{0xf0}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint8_t> literal{0xf0, 0x90}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint8_t> literal{0xf0, 0x90, 0x8d}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } +} + +TEST(u8, invalid) { + { + std::vector<uint8_t> literal{0xf0, 0xf0, 0xf0, 0xf0}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xa0}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xce, 0xff}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xec, 0xff, 0x84}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xec, 0x9c, 0xff}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xf0, 0xff, 0x8d, 0x85}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xf0, 0x90, 0xff, 0x85}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xf0, 0x90, 0x8d, 0xff}; + auto it = literal.begin(); + auto ret = u8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } +} + +TEST(umod8, empty) { + std::vector<uint8_t> empty; + auto it = empty.begin(); + auto ret = umod8::read(it, empty.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::End, ret.error()); + + auto ret_replace = umod8::read_replace(it, empty.end()); + ASSERT_FALSE(ret_replace.has_value()); + EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error()); + + EXPECT_FALSE(umod8::write(it, empty.end(), 0x40)); + + EXPECT_FALSE(umod8::skip(it, empty.end())); +} + +TEST(umod8, examples) { + { + std::vector<uint8_t> literal{0x45}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x45, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(umod8::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xc8, 0x85}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x205, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(umod8::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x10400, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + auto ret_replace = umod8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0x10400, *ret_replace); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(umod8::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xc0, 0x80}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0, *ret); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(1, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(umod8::write(it, literal.end(), 0x45)); + EXPECT_EQ(0x45, literal[0]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(2, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(umod8::write(it, literal.end(), 0x205)); + EXPECT_EQ(0xc8, literal[0]); + EXPECT_EQ(0x85, literal[1]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(6, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(umod8::write(it, literal.end(), 0x10400)); + EXPECT_EQ(0xed, literal[0]); + EXPECT_EQ(0xa0, literal[1]); + EXPECT_EQ(0x81, literal[2]); + EXPECT_EQ(0xed, literal[3]); + EXPECT_EQ(0xb0, literal[4]); + EXPECT_EQ(0x80, literal[5]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal(2, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(umod8::write(it, literal.end(), 0x0)); + EXPECT_EQ(0xc0, literal[0]); + EXPECT_EQ(0x80, literal[1]); + EXPECT_EQ(it, literal.end()); + } +} + +TEST(umod8, overlong) { +} + +TEST(umod8, incomplete) { + { + std::vector<uint8_t> literal{0xc8}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint8_t> literal{0xed}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint8_t> literal{0xed, 0xa0}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + + it = literal.begin(); + auto ret_replace = umod8::read_replace(it, literal.end()); + ASSERT_FALSE(ret_replace.has_value()); + EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error()); + } + { + std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } +} + +TEST(umod8, invalid) { + { + std::vector<uint8_t> literal{0xf0, 0xf0, 0xf0, 0xf0}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = umod8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(3, literal.end() - it); + } + { + std::vector<uint8_t> literal{0xa0}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = umod8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xce, 0xff}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = umod8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xec, 0xff, 0x84}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = umod8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xec, 0x9c, 0xff}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = umod8::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xed, 0xb0, 0x80, 0xed, 0xa0, 0x81}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + EXPECT_EQ(3, literal.end() - it); + } + { + std::vector<uint8_t> literal{0xed, 0xa0, 0x81}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xff, 0x80}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + EXPECT_EQ(3, literal.end() - it); + } + { + std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0, 0xff}; + auto it = literal.begin(); + auto ret = umod8::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + EXPECT_EQ(3, literal.end() - it); + } +} + +TEST(u16, empty) { + std::vector<uint16_t> empty; + auto it = empty.begin(); + auto ret = u16::read(it, empty.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::End, ret.error()); + + auto ret_replace = u16::read_replace(it, empty.end()); + ASSERT_FALSE(ret_replace.has_value()); + EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error()); + + EXPECT_FALSE(u16::write(it, empty.end(), 0x40)); + + EXPECT_FALSE(u16::skip(it, empty.end())); +} + +TEST(u16, examples) { + { + std::vector<uint16_t> literal{0x24}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x24, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u16::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint16_t> literal{0x20ac}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x20ac, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u16::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint16_t> literal{0xd801, 0xdc37}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x10437, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u16::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint16_t> literal{0xd852, 0xdf62}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0x24b62, *ret); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + auto ret_replace = u16::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0x24b62, *ret_replace); + EXPECT_EQ(it, literal.end()); + + it = literal.begin(); + EXPECT_TRUE(u16::skip(it, literal.end())); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint16_t> literal(1, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u16::write(it, literal.end(), 0x24)); + EXPECT_EQ(0x24, literal[0]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint16_t> literal(1, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u16::write(it, literal.end(), 0x20ac)); + EXPECT_EQ(0x20ac, literal[0]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint16_t> literal(2, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u16::write(it, literal.end(), 0x10437)); + EXPECT_EQ(0xd801, literal[0]); + EXPECT_EQ(0xdc37, literal[1]); + EXPECT_EQ(it, literal.end()); + } + { + std::vector<uint16_t> literal(2, 0x0); + auto it = literal.begin(); + EXPECT_TRUE(u16::write(it, literal.end(), 0x24b62)); + EXPECT_EQ(0xd852, literal[0]); + EXPECT_EQ(0xdf62, literal[1]); + EXPECT_EQ(it, literal.end()); + } +} + +TEST(u16, incomplete) { + { + std::vector<uint16_t> literal{0xd801}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } + { + std::vector<uint16_t> literal{0xd852}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Incomplete, ret.error()); + } +} + +TEST(u16, invalid) { + { + std::vector<uint16_t> literal{0xdc37, 0xd801}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u16::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_NE(it, literal.end()); + ret_replace = u16::read_replace(it, literal.end()); + ASSERT_FALSE(ret_replace.has_value()); + EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error()); + } + { + std::vector<uint16_t> literal{0xd852, 0xd852}; + auto it = literal.begin(); + auto ret = u16::read(it, literal.end()); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(u::ReadError::Invalid, ret.error()); + it = literal.begin(); + auto ret_replace = u16::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_NE(it, literal.end()); + ret_replace = u16::read_replace(it, literal.end()); + ASSERT_FALSE(ret_replace.has_value()); + EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error()); + } + { + std::vector<uint16_t> literal{0xdc37, 0xdf62}; + auto it = literal.begin(); + auto ret_replace = u16::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_NE(it, literal.end()); + ret_replace = u16::read_replace(it, literal.end()); + ASSERT_TRUE(ret_replace.has_value()); + EXPECT_EQ(0xfffd, *ret_replace); + EXPECT_EQ(it, literal.end()); + } +} |
