From 50348284f5d82ccfd65b0c803ba0ba895912ceff Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Thu, 18 Sep 2025 23:57:56 +0200 Subject: java::uescape: Unicode reader that knows about Java's \uXXXX escapes --- test/java_uescape.cc | 262 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 test/java_uescape.cc (limited to 'test') diff --git a/test/java_uescape.cc b/test/java_uescape.cc new file mode 100644 index 0000000..a6657d8 --- /dev/null +++ b/test/java_uescape.cc @@ -0,0 +1,262 @@ +#include "java_uescape.hh" + +#include "io_test_helper.hh" +#include "uline.hh" + +#include +#include + +TEST(java_uescape_u8, escaped_escape) { + auto reader = u8::java::open(io::memory(R"(\\u2122=\u2122)")); + std::string tmp; + auto ret = reader->repeat_read(tmp, 20); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(11, ret.value()); + EXPECT_EQ(R"(\\u2122=)" + "\xe2\x84\xa2", + tmp); +} + +TEST(java_uescape_u8, no_double_escape) { + auto reader = u8::java::open(io::memory(R"(\u005cu005a)")); + std::string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(6, ret.value()); + EXPECT_EQ(R"(\u005a)", tmp); +} + +TEST(java_uescape_u8, double_u) { + auto reader = u8::java::open(io::memory(R"(\uu005a)")); + std::string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ("Z", tmp); +} + +TEST(java_uescape_u8, one_by_one) { + auto reader = u8::java::open(io_make_max_block( + io::memory(R"(\u0066\u006f\u006f\u000d\u000a\u0062\u0061\u0072)"), 1)); + std::string tmp; + auto ret = reader->repeat_read(tmp, 20); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(8, ret.value()); + EXPECT_EQ("foo\r\nbar", tmp); +} + +TEST(java_uescape_u8, line) { + auto reader = u8::line::open(u8::java::open( + io::memory(R"(\u0066\u006f\u006f\u000d\u000a\u0062\u0061\u0072)"))); + auto ret = reader->read(); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ("foo", ret.value()); + ret = reader->read(); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ("bar", ret.value()); + ret = reader->read(); + EXPECT_FALSE(ret.has_value()); +} + +TEST(java_uescape_u8, incomplete) { + auto reader = u8::java::open(io::memory(R"(\)")); + std::string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(R"(\)", tmp); + + reader = u8::java::open(io::memory(R"(\u)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u8::java::open(io::memory(R"(\\\u)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u8::java::open(io::memory(R"(\uu)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u8::java::open(io::memory(R"(\u0)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u8::java::open(io::memory(R"(\u00)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u8::java::open(io::memory(R"(\u006)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(java_uescape_u8, surrogate) { + auto reader = u8::java::open(io::memory(R"(\ud801\udc37)")); + std::string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(4, ret.value()); + EXPECT_EQ("\xf0\x90\x90\xb7", tmp); +} + +TEST(java_uescape_u8, bad_surrogate) { + auto reader = u8::java::open(io::memory(R"(\udc37\ud801)")); + std::string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(6, ret.value()); + EXPECT_EQ("\xef\xbf\xbd\xef\xbf\xbd", tmp); + + reader = u8::java::open(io::memory(R"(\udc37)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ("\xef\xbf\xbd", tmp); + + reader = u8::java::open(io::memory(R"(\ud801)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(3, ret.value()); + EXPECT_EQ("\xef\xbf\xbd", tmp); + + reader = u8::java::open(io::memory(R"(\ud801\u)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(java_uescape_u16, escaped_escape) { + auto reader = u16::java::open(io::memory(R"(\\u2122=\u2122)")); + std::u16string tmp; + auto ret = reader->repeat_read(tmp, 20); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(9, ret.value()); + EXPECT_EQ( + uR"(\\u2122=)" + u"\u2122", + tmp); +} + +TEST(java_uescape_u16, no_double_escape) { + auto reader = u16::java::open(io::memory(R"(\u005cu005a)")); + std::u16string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(6, ret.value()); + EXPECT_EQ(uR"(\u005a)", tmp); +} + +TEST(java_uescape_u16, double_u) { + auto reader = u16::java::open(io::memory(R"(\uu005a)")); + std::u16string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(u"Z", tmp); +} + +TEST(java_uescape_u16, one_by_one) { + auto reader = u16::java::open(io_make_max_block( + io::memory(R"(\u0066\u006f\u006f\u000d\u000a\u0062\u0061\u0072)"), 1)); + std::u16string tmp; + auto ret = reader->repeat_read(tmp, 20); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(8, ret.value()); + EXPECT_EQ(u"foo\r\nbar", tmp); +} + +TEST(java_uescape_u16, line) { + auto reader = u16::line::open(u16::java::open( + io::memory(R"(\u0066\u006f\u006f\u000d\u000a\u0062\u0061\u0072)"))); + auto ret = reader->read(); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(u"foo", ret.value()); + ret = reader->read(); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(u"bar", ret.value()); + ret = reader->read(); + EXPECT_FALSE(ret.has_value()); +} + +TEST(java_uescape_u16, incomplete) { + auto reader = u16::java::open(io::memory(R"(\)")); + std::u16string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(uR"(\)", tmp); + + reader = u16::java::open(io::memory(R"(\u)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u16::java::open(io::memory(R"(\\\u)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u16::java::open(io::memory(R"(\uu)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u16::java::open(io::memory(R"(\u0)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u16::java::open(io::memory(R"(\u00)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); + + reader = u16::java::open(io::memory(R"(\u006)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} + +TEST(java_uescape_u16, surrogate) { + auto reader = u16::java::open(io::memory(R"(\ud801\udc37)")); + std::u16string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0xd801, tmp[0]); + EXPECT_EQ(0xdc37, tmp[1]); +} + +TEST(java_uescape_u16, bad_surrogate) { + auto reader = u16::java::open(io::memory(R"(\udc37\ud801)")); + std::u16string tmp; + auto ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(2, ret.value()); + EXPECT_EQ(0xfffd, tmp[0]); + EXPECT_EQ(0xfffd, tmp[1]); + + reader = u16::java::open(io::memory(R"(\udc37)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0xfffd, tmp[0]); + + reader = u16::java::open(io::memory(R"(\ud801)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_TRUE(ret.has_value()); + EXPECT_EQ(1, ret.value()); + EXPECT_EQ(0xfffd, tmp[0]); + + reader = u16::java::open(io::memory(R"(\ud801\u)")); + ret = reader->repeat_read(tmp, 10); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::ReadError::InvalidData, ret.error()); +} -- cgit v1.3