diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2023-06-13 10:07:16 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2023-06-13 10:07:16 +0200 |
| commit | fc4547b412e28164af1bf8981234c6af959ccc0b (patch) | |
| tree | 061253e7a4f6abaca282223b36d10f0bed8cad23 /utf/tst/test_utf8.cc | |
WIP
Diffstat (limited to 'utf/tst/test_utf8.cc')
| -rw-r--r-- | utf/tst/test_utf8.cc | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/utf/tst/test_utf8.cc b/utf/tst/test_utf8.cc new file mode 100644 index 0000000..10df969 --- /dev/null +++ b/utf/tst/test_utf8.cc @@ -0,0 +1,188 @@ +#include "utf8.hh" + +#include "utf_error.hh" + +#include <gtest/gtest.h> + +TEST(utf8, sanity) { + std::string_view str("$"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ('$', ret); + EXPECT_EQ(1, offset); + + str = "\xC2\xA3"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0xa3, ret); + EXPECT_EQ(2, offset); + + str = "\xD0\x98"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x418, ret); + EXPECT_EQ(2, offset); + + str = "\xE0\xA4\xB9"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x939, ret); + EXPECT_EQ(3, offset); + + str = "\xE2\x82\xAC"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x20AC, ret); + EXPECT_EQ(3, offset); + + str = "\xED\x95\x9C"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0xD55C, ret); + EXPECT_EQ(3, offset); + + str = "\xF0\x90\x8D\x88"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x10348, ret); + EXPECT_EQ(4, offset); +} + +TEST(utf8, overlong) { + std::string_view str("\xF0\x82\x82\xAC"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xE0\x81\x81"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xC0\x80"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); +} + +TEST(utf8, invalid) { + std::string_view str("\xED\xB0\x80"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xFB\xFF\xFF"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xFF\xFF\xFF\xFF\xFF"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = ""; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\x80"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xC2"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\xC2\x03"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xE0\xA4"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\xF0\x90\x8D"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); +} + +TEST(utf8, multiple1) { + std::string_view str("\x4D\xC3\xAC\x6E\x68\x20\x6E\xC3\xB3\x69\x20\x74\x69" + "\xE1\xBA\xBF\x6E\x67\x20\x56\x69\xE1\xBB\x87\x74"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ('M', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0xEC, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('n', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('h', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(' ', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('n', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0xF3, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('i', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(' ', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('t', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('i', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x1EBF, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('n', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('g', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(' ', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('V', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('i', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x1EC7, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('t', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} + +TEST(utf8, multiple2) { + std::string_view str("\xF0\xA8\x89\x9F\xE5\x91\x90\xE3\x97\x82\xE8\xB6\x8A"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ(0x2825F, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x5450, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x35C2, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x8D8A, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} |
