#include "utf8.hh" #include "utf_error.hh" #include #include #include #include TEST(utf8, read_sanity) { size_t offset = 0; auto ret = utf::read8(std::array({'$'}), offset); EXPECT_EQ('$', ret); EXPECT_EQ(1, offset); offset = 0; ret = utf::read8(std::array({0xC2, 0xA3}), offset); EXPECT_EQ(0xa3, ret); EXPECT_EQ(2, offset); offset = 0; ret = utf::read8(std::array({0xD0, 0x98}), offset); EXPECT_EQ(0x418, ret); EXPECT_EQ(2, offset); offset = 0; ret = utf::read8(std::array({0xE0, 0xA4, 0xB9}), offset); EXPECT_EQ(0x939, ret); EXPECT_EQ(3, offset); offset = 0; ret = utf::read8(std::array({0xE2, 0x82, 0xAC}), offset); EXPECT_EQ(0x20AC, ret); EXPECT_EQ(3, offset); offset = 0; ret = utf::read8(std::array({0xED, 0x95, 0x9C}), offset); EXPECT_EQ(0xD55C, ret); EXPECT_EQ(3, offset); offset = 0; ret = utf::read8(std::array({0xF0, 0x90, 0x8D, 0x88}), offset); EXPECT_EQ(0x10348, ret); EXPECT_EQ(4, offset); } TEST(utf8, write_sanity) { std::array out; size_t offset = 0; EXPECT_TRUE(utf::write8('$', out, offset)); EXPECT_THAT(std::span(out).subspan(0, offset), testing::ElementsAre('$')); EXPECT_EQ(1, offset); offset = 0; EXPECT_TRUE(utf::write8(0xa3, out, offset)); EXPECT_THAT(std::span(out).subspan(0, offset), testing::ElementsAre(0xC2, 0xA3)); EXPECT_EQ(2, offset); offset = 0; EXPECT_TRUE(utf::write8(0x418, out, offset)); EXPECT_THAT(std::span(out).subspan(0, offset), testing::ElementsAre(0xD0, 0x98)); EXPECT_EQ(2, offset); offset = 0; EXPECT_TRUE(utf::write8(0x939, out, offset)); EXPECT_THAT(std::span(out).subspan(0, offset), testing::ElementsAre(0xE0, 0xA4, 0xB9)); EXPECT_EQ(3, offset); offset = 0; EXPECT_TRUE(utf::write8(0x20AC, out, offset)); EXPECT_THAT(std::span(out).subspan(0, offset), testing::ElementsAre(0xE2, 0x82, 0xAC)); EXPECT_EQ(3, offset); offset = 0; EXPECT_TRUE(utf::write8(0xD55C, out, offset)); EXPECT_THAT(std::span(out).subspan(0, offset), testing::ElementsAre(0xED, 0x95, 0x9C)); EXPECT_EQ(3, offset); offset = 0; EXPECT_TRUE(utf::write8(0x10348, out, offset)); EXPECT_THAT(std::span(out).subspan(0, offset), testing::ElementsAre(0xF0, 0x90, 0x8D, 0x88)); EXPECT_EQ(4, offset); } TEST(utf8, read_overlong) { size_t offset = 0; auto ret = utf::read8( std::array({0xF0, 0x82, 0x82, 0xAC}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0xE0, 0x81, 0x81}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0xC0, 0x80}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); } TEST(utf8, read_invalid) { size_t offset = 0; auto ret = utf::read8(std::array({0xED, 0xB0, 0x80}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0xFB, 0xFF, 0xFF}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8( std::array({0xFF, 0xFF, 0xFF, 0xFF, 0xFF}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array(), offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0x80}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0xC2}), offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0xC2, 0x03}), offset); EXPECT_EQ(utf::INVALID, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0xE0, 0xA4}), offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); offset = 0; ret = utf::read8(std::array({0xF0, 0x90, 0x8D}), offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(0, offset); } TEST(utf8, read_multiple1) { std::array data({ 0x4D, 0xC3, 0xAC, 0x6E, 0x68, 0x20, 0x6E, 0xC3, 0xB3, 0x69, 0x20, 0x74, 0x69, 0xE1, 0xBA, 0xBF, 0x6E, 0x67, 0x20, 0x56, 0x69, 0xE1, 0xBB, 0x87, 0x74 }); size_t offset = 0; auto ret = utf::read8(data, offset); EXPECT_EQ('M', ret); ret = utf::read8(data, offset); EXPECT_EQ(0xEC, ret); ret = utf::read8(data, offset); EXPECT_EQ('n', ret); ret = utf::read8(data, offset); EXPECT_EQ('h', ret); ret = utf::read8(data, offset); EXPECT_EQ(' ', ret); ret = utf::read8(data, offset); EXPECT_EQ('n', ret); ret = utf::read8(data, offset); EXPECT_EQ(0xF3, ret); ret = utf::read8(data, offset); EXPECT_EQ('i', ret); ret = utf::read8(data, offset); EXPECT_EQ(' ', ret); ret = utf::read8(data, offset); EXPECT_EQ('t', ret); ret = utf::read8(data, offset); EXPECT_EQ('i', ret); ret = utf::read8(data, offset); EXPECT_EQ(0x1EBF, ret); ret = utf::read8(data, offset); EXPECT_EQ('n', ret); ret = utf::read8(data, offset); EXPECT_EQ('g', ret); ret = utf::read8(data, offset); EXPECT_EQ(' ', ret); ret = utf::read8(data, offset); EXPECT_EQ('V', ret); ret = utf::read8(data, offset); EXPECT_EQ('i', ret); ret = utf::read8(data, offset); EXPECT_EQ(0x1EC7, ret); ret = utf::read8(data, offset); EXPECT_EQ('t', ret); ret = utf::read8(data, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(data.size(), offset); } TEST(utf8, read_multiple2) { std::array data({ 0xF0, 0xA8, 0x89, 0x9F, 0xE5, 0x91, 0x90, 0xE3, 0x97, 0x82, 0xE8, 0xB6, 0x8A, }); size_t offset = 0; auto ret = utf::read8(data, offset); EXPECT_EQ(0x2825F, ret); ret = utf::read8(data, offset); EXPECT_EQ(0x5450, ret); ret = utf::read8(data, offset); EXPECT_EQ(0x35C2, ret); ret = utf::read8(data, offset); EXPECT_EQ(0x8D8A, ret); ret = utf::read8(data, offset); EXPECT_EQ(utf::NEED_MORE, ret); EXPECT_EQ(data.size(), offset); } TEST(utf8, write_no_space) { std::array data; std::span out(data); size_t offset = 0; EXPECT_FALSE(utf::write8('$', out.subspan(0, 0), offset)); EXPECT_EQ(0u, offset); EXPECT_FALSE(utf::write8(0xa3, out.subspan(0, 1), offset)); EXPECT_EQ(0u, offset); EXPECT_FALSE(utf::write8(0x418, out.subspan(0, 0), offset)); EXPECT_EQ(0u, offset); EXPECT_FALSE(utf::write8(0x939, out.subspan(0, 2), offset)); EXPECT_EQ(0u, offset); EXPECT_FALSE(utf::write8(0x20AC, out.subspan(0, 0), offset)); EXPECT_EQ(0u, offset); EXPECT_FALSE(utf::write8(0x10348, out.subspan(0, 3), offset)); EXPECT_EQ(0u, offset); }