From 18a622f378b403788c67fc785d30f4609caa3fc7 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Mon, 15 Sep 2025 20:52:51 +0200 Subject: uio: Unicode reader Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8. --- test/io.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'test/io.cc') diff --git a/test/io.cc b/test/io.cc index 23c10d4..2441138 100644 --- a/test/io.cc +++ b/test/io.cc @@ -124,7 +124,7 @@ TEST_F(IoTest, read_empty) { auto ret = io::openat(dirfd(), "test"); ASSERT_TRUE(ret.has_value()); std::string tmp(10, ' '); - auto ret2 = ret.value()->read(tmp); + auto ret2 = ret.value()->read(tmp.data(), tmp.size()); ASSERT_TRUE(ret2.has_value()); EXPECT_EQ(0, ret2.value()); } @@ -145,7 +145,7 @@ TEST_F(IoTest, read) { auto ret = io::openat(dirfd(), "test"); ASSERT_TRUE(ret.has_value()); std::string tmp(12, ' '); - auto ret2 = ret.value()->repeat_read(tmp); + auto ret2 = ret.value()->repeat_read(tmp.data(), tmp.size()); ASSERT_TRUE(ret2.has_value()); EXPECT_EQ(11, ret2.value()); tmp.resize(ret2.value()); @@ -161,7 +161,7 @@ TEST_F(IoTest, skip) { ASSERT_TRUE(ret2.has_value()); EXPECT_EQ(6, ret2.value()); std::string tmp(12, ' '); - auto ret3 = ret.value()->repeat_read(tmp); + auto ret3 = ret.value()->repeat_read(tmp.data(), tmp.size()); ASSERT_TRUE(ret3.has_value()); EXPECT_EQ(5, ret3.value()); tmp.resize(ret3.value()); @@ -175,7 +175,7 @@ TEST_F(IoTest, read_block) { ASSERT_TRUE(ret.has_value()); auto ret2 = io_make_max_block(std::move(ret.value()), 2); std::string tmp(12, ' '); - auto ret3 = ret2->repeat_read(tmp); + auto ret3 = ret2->repeat_read(tmp.data(), tmp.size()); ASSERT_TRUE(ret3.has_value()); EXPECT_EQ(11, ret3.value()); tmp.resize(ret3.value()); @@ -192,7 +192,7 @@ TEST_F(IoTest, skip_block) { ASSERT_TRUE(ret3.has_value()); EXPECT_EQ(6, ret3.value()); std::string tmp(12, ' '); - auto ret4 = ret2->repeat_read(tmp); + auto ret4 = ret2->repeat_read(tmp.data(), tmp.size()); ASSERT_TRUE(ret4.has_value()); EXPECT_EQ(5, ret4.value()); tmp.resize(ret4.value()); -- cgit v1.3