summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2025-09-15 20:52:51 +0200
committerJoel Klinghed <the_jk@spawned.biz>2025-09-15 20:52:51 +0200
commit18a622f378b403788c67fc785d30f4609caa3fc7 (patch)
tree9d13f4ef49a06c9e4837487f61bc90b734ad9b9a /test
parent28c6425e4ed1cd2eab538e7cba08c18aa83d8af5 (diff)
uio: Unicode reader
Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8.
Diffstat (limited to 'test')
-rw-r--r--test/io.cc10
-rw-r--r--test/u.cc87
-rw-r--r--test/uio.cc620
3 files changed, 674 insertions, 43 deletions
diff --git a/test/io.cc b/test/io.cc
index 23c10d4..2441138 100644
--- a/test/io.cc
+++ b/test/io.cc
@@ -124,7 +124,7 @@ TEST_F(IoTest, read_empty) {
auto ret = io::openat(dirfd(), "test");
ASSERT_TRUE(ret.has_value());
std::string tmp(10, ' ');
- auto ret2 = ret.value()->read(tmp);
+ auto ret2 = ret.value()->read(tmp.data(), tmp.size());
ASSERT_TRUE(ret2.has_value());
EXPECT_EQ(0, ret2.value());
}
@@ -145,7 +145,7 @@ TEST_F(IoTest, read) {
auto ret = io::openat(dirfd(), "test");
ASSERT_TRUE(ret.has_value());
std::string tmp(12, ' ');
- auto ret2 = ret.value()->repeat_read(tmp);
+ auto ret2 = ret.value()->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret2.has_value());
EXPECT_EQ(11, ret2.value());
tmp.resize(ret2.value());
@@ -161,7 +161,7 @@ TEST_F(IoTest, skip) {
ASSERT_TRUE(ret2.has_value());
EXPECT_EQ(6, ret2.value());
std::string tmp(12, ' ');
- auto ret3 = ret.value()->repeat_read(tmp);
+ auto ret3 = ret.value()->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret3.has_value());
EXPECT_EQ(5, ret3.value());
tmp.resize(ret3.value());
@@ -175,7 +175,7 @@ TEST_F(IoTest, read_block) {
ASSERT_TRUE(ret.has_value());
auto ret2 = io_make_max_block(std::move(ret.value()), 2);
std::string tmp(12, ' ');
- auto ret3 = ret2->repeat_read(tmp);
+ auto ret3 = ret2->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret3.has_value());
EXPECT_EQ(11, ret3.value());
tmp.resize(ret3.value());
@@ -192,7 +192,7 @@ TEST_F(IoTest, skip_block) {
ASSERT_TRUE(ret3.has_value());
EXPECT_EQ(6, ret3.value());
std::string tmp(12, ' ');
- auto ret4 = ret2->repeat_read(tmp);
+ auto ret4 = ret2->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret4.has_value());
EXPECT_EQ(5, ret4.value());
tmp.resize(ret4.value());
diff --git a/test/u.cc b/test/u.cc
index 53455f2..dc77e7d 100644
--- a/test/u.cc
+++ b/test/u.cc
@@ -4,6 +4,7 @@
#include "umod8.hh"
#include "u16.hh"
+#include <iterator>
#include <vector>
namespace {
@@ -20,7 +21,7 @@ TEST(u8, empty) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::End, ret.error());
- auto ret_replace = u8::read_replace(it, empty.end());
+ auto ret_replace = u8::read_replace(it, empty.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
@@ -75,7 +76,7 @@ TEST(u8, examples) {
EXPECT_EQ(it, literal.end());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0x10345, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -153,9 +154,14 @@ TEST(u8, incomplete) {
EXPECT_EQ(u::ReadError::Incomplete, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+
+ it = literal.begin();
+ ret_replace = u8::read_replace(it, literal.end(), true);
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, ret_replace.value());
}
{
std::vector<uint8_t> literal{0xf0};
@@ -188,10 +194,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xa0};
@@ -200,10 +206,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xce, 0xff};
@@ -212,10 +218,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0xff, 0x84};
@@ -224,10 +230,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0x9c, 0xff};
@@ -236,10 +242,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xf0, 0xff, 0x8d, 0x85};
@@ -248,10 +254,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xf0, 0x90, 0xff, 0x85};
@@ -260,10 +266,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xf0, 0x90, 0x8d, 0xff};
@@ -272,10 +278,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
}
@@ -286,7 +292,7 @@ TEST(umod8, empty) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::End, ret.error());
- auto ret_replace = umod8::read_replace(it, empty.end());
+ auto ret_replace = umod8::read_replace(it, empty.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
@@ -329,7 +335,7 @@ TEST(umod8, examples) {
EXPECT_EQ(it, literal.end());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0x10400, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -416,9 +422,14 @@ TEST(umod8, incomplete) {
EXPECT_EQ(u::ReadError::Incomplete, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+
+ it = literal.begin();
+ ret_replace = umod8::read_replace(it, literal.end(), true);
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, ret_replace.value());
}
{
std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0};
@@ -437,7 +448,7 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_EQ(3, literal.end() - it);
@@ -449,7 +460,7 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -461,10 +472,10 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0xff, 0x84};
@@ -473,10 +484,10 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0x9c, 0xff};
@@ -485,10 +496,10 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xed, 0xb0, 0x80, 0xed, 0xa0, 0x81};
@@ -531,7 +542,7 @@ TEST(u16, empty) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::End, ret.error());
- auto ret_replace = u16::read_replace(it, empty.end());
+ auto ret_replace = u16::read_replace(it, empty.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
@@ -586,7 +597,7 @@ TEST(u16, examples) {
EXPECT_EQ(it, literal.end());
it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0x24b62, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -652,11 +663,11 @@ TEST(u16, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_NE(it, literal.end());
- ret_replace = u16::read_replace(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
}
@@ -667,22 +678,22 @@ TEST(u16, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_NE(it, literal.end());
- ret_replace = u16::read_replace(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
}
{
std::vector<uint16_t> literal{0xdc37, 0xdf62};
auto it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_NE(it, literal.end());
- ret_replace = u16::read_replace(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_EQ(it, literal.end());
diff --git a/test/uio.cc b/test/uio.cc
new file mode 100644
index 0000000..ce666c1
--- /dev/null
+++ b/test/uio.cc
@@ -0,0 +1,620 @@
+#include <gtest/gtest.h>
+
+#include "io.hh"
+#include "io_test_helper.hh"
+#include "uio.hh"
+
+#include <string>
+
+using namespace std::literals::string_literals;
+
+TEST(uio_u8, empty) {
+ auto uio = u8::open(io::memory(""));
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+}
+
+TEST(uio_u16, empty) {
+ auto uio = u16::open(io::memory(""));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+}
+
+TEST(uio_u8, sample) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, sample_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, sample_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u8, sample_detect) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"));
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, sample_detect_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, sample_detect_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u8, invalid) {
+ auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 20);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ("r", tmp);
+ ret = uio->repeat_read(tmp, 20);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u8, invalid_detect) {
+ auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"),
+ u::ReaderConfig{.strict=true});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 20);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ("r", tmp);
+ ret = uio->repeat_read(tmp, 20);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u8, invalid_replace) {
+ auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 20);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(16, ret.value());
+ EXPECT_EQ("r\xef\xbf\xbdksm\xef\xbf\xbdrg\xef\xbf\xbds", tmp);
+}
+
+TEST(uio_u8, read_error) {
+ auto uio = u8::open(io_make_breaking(io::memory("\xef\xbf\xbd"), 1),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::Error, ret.error());
+}
+
+TEST(uio_u16, read_error) {
+ auto uio = u16::open(io_make_breaking(io::memory("\x00\x24"s), 1),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::Error, ret.error());
+}
+
+TEST(uio_u8, read_incomplete_strict) {
+ auto uio = u8::open(io::memory("\xef"),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u8, read_incomplete) {
+ auto uio = u8::open(io::memory("\xef"),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ("\xef\xbf\xbd", tmp);
+}
+
+TEST(uio_u16, read_incomplete_strict_be) {
+ auto uio = u16::open(io::memory("\x00"s),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, read_incomplete_be) {
+ auto uio = u16::open(io::memory("\x00"s),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0xfffd, tmp[0]);
+}
+
+TEST(uio_u16, read_incomplete_strict_le) {
+ auto uio = u16::open(io::memory("$"),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, read_incomplete_le) {
+ auto uio = u16::open(io::memory("$"),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0xfffd, tmp[0]);
+}
+
+TEST(uio_u8, max_too_small) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->read(tmp.data(), 0);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+
+ ret = uio->read(tmp, 2);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+}
+
+TEST(uio_u16, max_too_small_be) {
+ auto uio = u16::open(io::memory("\xD8\x01\xDC\x37"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->read(tmp.data(), 0);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+
+ ret = uio->read(tmp, 1);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+}
+
+TEST(uio_u16, max_too_small_le) {
+ auto uio = u16::open(io::memory("\x01\xD8\x37\xDC"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->read(tmp.data(), 0);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+
+ ret = uio->read(tmp, 1);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+}
+
+TEST(uio_u8, partial) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 4);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ("\xf0\x90\x8d\x85", tmp);
+
+ ret = uio->repeat_read(tmp, 2);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ("es", tmp);
+}
+
+TEST(uio_u16, partial_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 1);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+
+ ret = uio->repeat_read(tmp, 2);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}
+
+TEST(uio_u16, partial_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 1);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+
+ ret = uio->repeat_read(tmp, 2);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}
+
+TEST(uio_u16, invalid_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_detect_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s),
+ u::ReaderConfig{.strict=true});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_replace_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xfffd, tmp[1]);
+}
+
+TEST(uio_u16, invalid_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_detect_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s),
+ u::ReaderConfig{.strict=true});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_replace_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xfffd, tmp[1]);
+}
+
+TEST(uio_u8, bom) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u8, bom_keep) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{
+ .input=u::ReaderInputFormat::UTF8,
+ .skip_bom=false,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(9, ret.value());
+ EXPECT_EQ("\xef\xbb\xbf\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, bom_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_keep_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{
+ .input=u::ReaderInputFormat::UTF16_BE,
+ .skip_bom=false,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u16, bom_keep_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{
+ .input=u::ReaderInputFormat::UTF16_LE,
+ .skip_bom=false,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u8, bom_detect) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"));
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u8, bom_keep_detect) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.skip_bom=false});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(9, ret.value());
+ EXPECT_EQ("\xef\xbb\xbf\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, bom_detect_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_detect_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_keep_detect_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.skip_bom=false});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u16, bom_keep_detect_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.skip_bom=false});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u8, input_utf16_be) {
+ auto uio = u8::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(5, ret.value());
+ EXPECT_EQ("$\xf0\x90\x90\xb7", tmp);
+}
+
+TEST(uio_u8, input_utf16_le) {
+ auto uio = u8::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(5, ret.value());
+ EXPECT_EQ("$\xf0\x90\x90\xb7", tmp);
+}
+
+TEST(uio_u16, input_utf8) {
+ auto uio = u16::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xd800, tmp[0]);
+ EXPECT_EQ(0xdf45, tmp[1]);
+ EXPECT_EQ(0x65, tmp[2]);
+ EXPECT_EQ(0x73, tmp[3]);
+}
+
+TEST(uio_u8, skip) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_skip(3);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+ ret = uio->repeat_skip(4);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ("es", tmp);
+}
+
+TEST(uio_u16, skip_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_skip(4); // Note that this is in bytes
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}
+
+TEST(uio_u16, skip_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_skip(4); // Note that this is in bytes
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}