#include "utf16.hh" #include "utf_error.hh" #include TEST(utf16be, sanity) { std::string_view str("\x00\x24", 2); size_t offset = 0; auto ret = utf::read16be(str, offset); EXPECT_EQ('$', ret); EXPECT_EQ(2, offset); str = "\x20\xAC"; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(0x20AC, ret); EXPECT_EQ(2, offset); str = "\xD8\x01\xDC\x37"; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(0x10437, ret); EXPECT_EQ(4, offset); str = "\xD8\x52\xDF\x62"; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(0x24B62, ret); EXPECT_EQ(4, offset); } TEST(utf16le, sanity) { std::string_view str("\x24\x00", 2); size_t offset = 0; auto ret = utf::read16le(str, offset); EXPECT_EQ('$', ret); EXPECT_EQ(2, offset); str = "\xAC\x20"; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(0x20AC, ret); EXPECT_EQ(2, offset); str = "\x01\xD8\x37\xDC"; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(0x10437, ret); EXPECT_EQ(4, offset); str = "\x52\xD8\x62\xDF"; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(0x24B62, ret); EXPECT_EQ(4, offset); } TEST(utf16be, bom) { std::string_view str("\xFE\xFF\x20\xAC"); size_t offset = 0; auto ret = utf::read16be(str, offset); EXPECT_EQ(0xFEFF, ret); ret = utf::read16be(str, offset); EXPECT_EQ(0x20AC, ret); ret = utf::read16be(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(str.size(), offset); } TEST(utf16le, bom) { std::string_view str("\xFF\xFE\xAC\x20"); size_t offset = 0; auto ret = utf::read16le(str, offset); EXPECT_EQ(0xFEFF, ret); ret = utf::read16le(str, offset); EXPECT_EQ(0x20AC, ret); ret = utf::read16le(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(str.size(), offset); } TEST(utf16be, invalid) { std::string_view str("\xD8"); size_t offset = 0; auto ret = utf::read16be(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = ""; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = "\xD8\x01"; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = "\xD8\x01\xDC"; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = "\xDC\x37\xD8\x01"; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); str = "\xD8\x01\xD8\x01"; offset = 0; ret = utf::read16be(str, offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); } TEST(utf16le, invalid) { std::string_view str("\x01"); size_t offset = 0; auto ret = utf::read16le(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = ""; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = "\x01\xD8"; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = "\x01\xD8\x37"; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); str = "\x37\xDC\x01\xD8"; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); str = "\x01\xD8\x01\xD8"; offset = 0; ret = utf::read16le(str, offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); }