From 18a622f378b403788c67fc785d30f4609caa3fc7 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Mon, 15 Sep 2025 20:52:51 +0200 Subject: uio: Unicode reader Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8. --- test/uio.cc | 620 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 620 insertions(+) create mode 100644 test/uio.cc (limited to 'test/uio.cc') diff --git a/test/uio.cc b/test/uio.cc new file mode 100644 index 0000000..ce666c1 --- /dev/null +++ b/test/uio.cc @@ -0,0 +1,620 @@ +#include + +#include "io.hh" +#include "io_test_helper.hh" +#include "uio.hh" + +#include + +using namespace std::literals::string_literals; + +TEST(uio_u8, empty) { + auto uio = u8::open(io::memory("")); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0, ret.value()); +} + +TEST(uio_u16, empty) { + auto uio = u16::open(io::memory("")); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0, ret.value()); +} + +TEST(uio_u8, sample) { + auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF8}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(6, ret.value()); + EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp); +} + +TEST(uio_u16, sample_be) { + auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u16, sample_le) { + auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u8, sample_detect) { + auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es")); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(6, ret.value()); + EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp); +} + +TEST(uio_u16, sample_detect_be) { + auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s)); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u16, sample_detect_le) { + auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s)); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u8, invalid) { + auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"), + u::ReaderConfig{ + .strict=true, + .input=u::ReaderInputFormat::UTF8, + }); + std::string tmp; + auto ret = uio->repeat_read(tmp, 20); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ("r", tmp); + ret = uio->repeat_read(tmp, 20); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u8, invalid_detect) { + auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"), + u::ReaderConfig{.strict=true}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 20); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ("r", tmp); + ret = uio->repeat_read(tmp, 20); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u8, invalid_replace) { + auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"), + u::ReaderConfig{ + .strict=false, + .input=u::ReaderInputFormat::UTF8, + }); + std::string tmp; + auto ret = uio->repeat_read(tmp, 20); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(16, ret.value()); + EXPECT_EQ("r\xef\xbf\xbdksm\xef\xbf\xbdrg\xef\xbf\xbds", tmp); +} + +TEST(uio_u8, read_error) { + auto uio = u8::open(io_make_breaking(io::memory("\xef\xbf\xbd"), 1), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF8}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::Error, ret.error()); +} + +TEST(uio_u16, read_error) { + auto uio = u16::open(io_make_breaking(io::memory("\x00\x24"s), 1), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::Error, ret.error()); +} + +TEST(uio_u8, read_incomplete_strict) { + auto uio = u8::open(io::memory("\xef"), + u::ReaderConfig{ + .strict=true, + .input=u::ReaderInputFormat::UTF8, + }); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u8, read_incomplete) { + auto uio = u8::open(io::memory("\xef"), + u::ReaderConfig{ + .strict=false, + .input=u::ReaderInputFormat::UTF8, + }); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ("\xef\xbf\xbd", tmp); +} + +TEST(uio_u16, read_incomplete_strict_be) { + auto uio = u16::open(io::memory("\x00"s), + u::ReaderConfig{ + .strict=true, + .input=u::ReaderInputFormat::UTF16_BE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u16, read_incomplete_be) { + auto uio = u16::open(io::memory("\x00"s), + u::ReaderConfig{ + .strict=false, + .input=u::ReaderInputFormat::UTF16_BE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0xfffd, tmp[0]); +} + +TEST(uio_u16, read_incomplete_strict_le) { + auto uio = u16::open(io::memory("$"), + u::ReaderConfig{ + .strict=true, + .input=u::ReaderInputFormat::UTF16_LE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u16, read_incomplete_le) { + auto uio = u16::open(io::memory("$"), + u::ReaderConfig{ + .strict=false, + .input=u::ReaderInputFormat::UTF16_LE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0xfffd, tmp[0]); +} + +TEST(uio_u8, max_too_small) { + auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF8}); + std::string tmp; + auto ret = uio->read(tmp.data(), 0); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0, ret.value()); + + ret = uio->read(tmp, 2); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error()); +} + +TEST(uio_u16, max_too_small_be) { + auto uio = u16::open(io::memory("\xD8\x01\xDC\x37"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE}); + std::u16string tmp; + auto ret = uio->read(tmp.data(), 0); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0, ret.value()); + + ret = uio->read(tmp, 1); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error()); +} + +TEST(uio_u16, max_too_small_le) { + auto uio = u16::open(io::memory("\x01\xD8\x37\xDC"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE}); + std::u16string tmp; + auto ret = uio->read(tmp.data(), 0); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(0, ret.value()); + + ret = uio->read(tmp, 1); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error()); +} + +TEST(uio_u8, partial) { + auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF8}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 4); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + EXPECT_EQ("\xf0\x90\x8d\x85", tmp); + + ret = uio->repeat_read(tmp, 2); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ("es", tmp); +} + +TEST(uio_u16, partial_be) { + auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 1); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + + ret = uio->repeat_read(tmp, 2); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0xd801, tmp[0]); + EXPECT_EQ(0xdc37, tmp[1]); +} + +TEST(uio_u16, partial_le) { + auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 1); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + + ret = uio->repeat_read(tmp, 2); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0xd801, tmp[0]); + EXPECT_EQ(0xdc37, tmp[1]); +} + +TEST(uio_u16, invalid_be) { + auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s), + u::ReaderConfig{ + .strict=true, + .input=u::ReaderInputFormat::UTF16_BE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u16, invalid_detect_be) { + auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s), + u::ReaderConfig{.strict=true}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u16, invalid_replace_be) { + auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s), + u::ReaderConfig{ + .strict=false, + .input=u::ReaderInputFormat::UTF16_BE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xfffd, tmp[1]); +} + +TEST(uio_u16, invalid_le) { + auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s), + u::ReaderConfig{ + .strict=true, + .input=u::ReaderInputFormat::UTF16_LE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u16, invalid_detect_le) { + auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s), + u::ReaderConfig{.strict=true}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + ret = uio->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(uio_u16, invalid_replace_le) { + auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s), + u::ReaderConfig{ + .strict=false, + .input=u::ReaderInputFormat::UTF16_LE, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xfffd, tmp[1]); +} + +TEST(uio_u8, bom) { + auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF8}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(6, ret.value()); + EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp); +} + +TEST(uio_u8, bom_keep) { + auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{ + .input=u::ReaderInputFormat::UTF8, + .skip_bom=false, + }); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(9, ret.value()); + EXPECT_EQ("\xef\xbb\xbf\xf0\x90\x8D\x85" "es", tmp); +} + +TEST(uio_u16, bom_be) { + auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u16, bom_le) { + auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u16, bom_keep_be) { + auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s), + u::ReaderConfig{ + .input=u::ReaderInputFormat::UTF16_BE, + .skip_bom=false, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + EXPECT_EQ(0xfeff, tmp[0]); + EXPECT_EQ(0x24, tmp[1]); + EXPECT_EQ(0xd801, tmp[2]); + EXPECT_EQ(0xdc37, tmp[3]); +} + +TEST(uio_u16, bom_keep_le) { + auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s), + u::ReaderConfig{ + .input=u::ReaderInputFormat::UTF16_LE, + .skip_bom=false, + }); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + EXPECT_EQ(0xfeff, tmp[0]); + EXPECT_EQ(0x24, tmp[1]); + EXPECT_EQ(0xd801, tmp[2]); + EXPECT_EQ(0xdc37, tmp[3]); +} + +TEST(uio_u8, bom_detect) { + auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es")); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(6, ret.value()); + EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp); +} + +TEST(uio_u8, bom_keep_detect) { + auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{.skip_bom=false}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(9, ret.value()); + EXPECT_EQ("\xef\xbb\xbf\xf0\x90\x8D\x85" "es", tmp); +} + +TEST(uio_u16, bom_detect_be) { + auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s)); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u16, bom_detect_le) { + auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s)); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ(0x24, tmp[0]); + EXPECT_EQ(0xd801, tmp[1]); + EXPECT_EQ(0xdc37, tmp[2]); +} + +TEST(uio_u16, bom_keep_detect_be) { + auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s), + u::ReaderConfig{.skip_bom=false}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + EXPECT_EQ(0xfeff, tmp[0]); + EXPECT_EQ(0x24, tmp[1]); + EXPECT_EQ(0xd801, tmp[2]); + EXPECT_EQ(0xdc37, tmp[3]); +} + +TEST(uio_u16, bom_keep_detect_le) { + auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s), + u::ReaderConfig{.skip_bom=false}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + EXPECT_EQ(0xfeff, tmp[0]); + EXPECT_EQ(0x24, tmp[1]); + EXPECT_EQ(0xd801, tmp[2]); + EXPECT_EQ(0xdc37, tmp[3]); +} + +TEST(uio_u8, input_utf16_be) { + auto uio = u8::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(5, ret.value()); + EXPECT_EQ("$\xf0\x90\x90\xb7", tmp); +} + +TEST(uio_u8, input_utf16_le) { + auto uio = u8::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE}); + std::string tmp; + auto ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(5, ret.value()); + EXPECT_EQ("$\xf0\x90\x90\xb7", tmp); +} + +TEST(uio_u16, input_utf8) { + auto uio = u16::open(io::memory("\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF8}); + std::u16string tmp; + auto ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + EXPECT_EQ(0xd800, tmp[0]); + EXPECT_EQ(0xdf45, tmp[1]); + EXPECT_EQ(0x65, tmp[2]); + EXPECT_EQ(0x73, tmp[3]); +} + +TEST(uio_u8, skip) { + auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF8}); + std::string tmp; + auto ret = uio->repeat_skip(3); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error()); + ret = uio->repeat_skip(4); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + ret = uio->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ("es", tmp); +} + +TEST(uio_u16, skip_be) { + auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE}); + std::u16string tmp; + auto ret = uio->repeat_skip(4); // Note that this is in bytes + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0xd801, tmp[0]); + EXPECT_EQ(0xdc37, tmp[1]); +} + +TEST(uio_u16, skip_le) { + auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s), + u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE}); + std::u16string tmp; + auto ret = uio->repeat_skip(4); // Note that this is in bytes + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + ret = uio->repeat_read(tmp, 5); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0xd801, tmp[0]); + EXPECT_EQ(0xdc37, tmp[1]); +} -- cgit v1.3