diff options
Diffstat (limited to 'utf/tst')
| -rw-r--r-- | utf/tst/test_utf16.cc | 157 | ||||
| -rw-r--r-- | utf/tst/test_utf32.cc | 145 | ||||
| -rw-r--r-- | utf/tst/test_utf8.cc | 188 |
3 files changed, 490 insertions, 0 deletions
diff --git a/utf/tst/test_utf16.cc b/utf/tst/test_utf16.cc new file mode 100644 index 0000000..c17982e --- /dev/null +++ b/utf/tst/test_utf16.cc @@ -0,0 +1,157 @@ +#include "utf16.hh" + +#include "utf_error.hh" + +#include <gtest/gtest.h> + +TEST(utf16be, sanity) { + std::string_view str("\x00\x24", 2); + size_t offset = 0; + auto ret = utf::read16be(str, offset); + EXPECT_EQ('$', ret); + EXPECT_EQ(2, offset); + + str = "\x20\xAC"; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(0x20AC, ret); + EXPECT_EQ(2, offset); + + str = "\xD8\x01\xDC\x37"; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(0x10437, ret); + EXPECT_EQ(4, offset); + + str = "\xD8\x52\xDF\x62"; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(0x24B62, ret); + EXPECT_EQ(4, offset); +} + +TEST(utf16le, sanity) { + std::string_view str("\x24\x00", 2); + size_t offset = 0; + auto ret = utf::read16le(str, offset); + EXPECT_EQ('$', ret); + EXPECT_EQ(2, offset); + + str = "\xAC\x20"; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(0x20AC, ret); + EXPECT_EQ(2, offset); + + str = "\x01\xD8\x37\xDC"; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(0x10437, ret); + EXPECT_EQ(4, offset); + + str = "\x52\xD8\x62\xDF"; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(0x24B62, ret); + EXPECT_EQ(4, offset); +} + +TEST(utf16be, bom) { + std::string_view str("\xFE\xFF\x20\xAC"); + size_t offset = 0; + auto ret = utf::read16be(str, offset); + EXPECT_EQ(0xFEFF, ret); + ret = utf::read16be(str, offset); + EXPECT_EQ(0x20AC, ret); + ret = utf::read16be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} + +TEST(utf16le, bom) { + std::string_view str("\xFF\xFE\xAC\x20"); + size_t offset = 0; + auto ret = utf::read16le(str, offset); + EXPECT_EQ(0xFEFF, ret); + ret = utf::read16le(str, offset); + EXPECT_EQ(0x20AC, ret); + ret = utf::read16le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} + +TEST(utf16be, invalid) { + std::string_view str("\xD8"); + size_t offset = 0; + auto ret = utf::read16be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = ""; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\xD8\x01"; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\xD8\x01\xDC"; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\xDC\x37\xD8\x01"; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xD8\x01\xD8\x01"; + offset = 0; + ret = utf::read16be(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); +} + +TEST(utf16le, invalid) { + std::string_view str("\x01"); + size_t offset = 0; + auto ret = utf::read16le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = ""; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\x01\xD8"; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\x01\xD8\x37"; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\x37\xDC\x01\xD8"; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\x01\xD8\x01\xD8"; + offset = 0; + ret = utf::read16le(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); +} diff --git a/utf/tst/test_utf32.cc b/utf/tst/test_utf32.cc new file mode 100644 index 0000000..796b4cd --- /dev/null +++ b/utf/tst/test_utf32.cc @@ -0,0 +1,145 @@ +#include "utf32.hh" + +#include "utf_error.hh" + +#include <gtest/gtest.h> + +TEST(utf32be, sanity) { + std::string_view str("\x00\x00\x00\x24", 4); + size_t offset = 0; + auto ret = utf::read32be(str, offset); + EXPECT_EQ('$', ret); + EXPECT_EQ(4, offset); + + str = std::string_view("\x00\x00\x20\xAC", 4); + offset = 0; + ret = utf::read32be(str, offset); + EXPECT_EQ(0x20AC, ret); + EXPECT_EQ(4, offset); + + str = std::string_view("\x00\x01\x04\x37", 4); + offset = 0; + ret = utf::read32be(str, offset); + EXPECT_EQ(0x10437, ret); + EXPECT_EQ(4, offset); +} + +TEST(utf32le, sanity) { + std::string_view str("\x24\x00\x00\x00", 4); + size_t offset = 0; + auto ret = utf::read32le(str, offset); + EXPECT_EQ('$', ret); + EXPECT_EQ(4, offset); + + str = std::string_view("\xAC\x20\x00\x00", 4); + offset = 0; + ret = utf::read32le(str, offset); + EXPECT_EQ(0x20AC, ret); + EXPECT_EQ(4, offset); + + str = std::string_view("\x37\x04\x01\x00", 4); + offset = 0; + ret = utf::read32le(str, offset); + EXPECT_EQ(0x10437, ret); + EXPECT_EQ(4, offset); +} + +TEST(utf32be, invalid) { + std::string_view str("\xFF\xFF\xFF\xFF"); + size_t offset = 0; + auto ret = utf::read32be(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00\x00\xD8\x00", 4); + offset = 0; + ret = utf::read32be(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = ""; + offset = 0; + ret = utf::read32be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00", 1); + offset = 0; + ret = utf::read32be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00\x00", 2); + offset = 0; + ret = utf::read32be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00\x00\x00", 3); + offset = 0; + ret = utf::read32be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); +} + +TEST(utf32le, invalid) { + std::string_view str("\xFF\xFF\xFF\xFF"); + size_t offset = 0; + auto ret = utf::read32le(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00\xD8\x00\x00", 4); + offset = 0; + ret = utf::read32le(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = ""; + offset = 0; + ret = utf::read32le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00", 1); + offset = 0; + ret = utf::read32le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00\x00", 2); + offset = 0; + ret = utf::read32le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = std::string_view("\x00\x00\x00", 3); + offset = 0; + ret = utf::read32le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); +} + +TEST(utf32be, bom) { + std::string_view str("\x00\x00\xFF\xFE\x00\x00\x20\xAC", 8); + size_t offset = 0; + auto ret = utf::read32be(str, offset); + EXPECT_EQ(0xFFFE, ret); + ret = utf::read32be(str, offset); + EXPECT_EQ(0x20AC, ret); + ret = utf::read32be(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} + +TEST(utf32le, bom) { + std::string_view str("\xFE\xFF\x00\x00\xAC\x20\x00\x00", 8); + size_t offset = 0; + auto ret = utf::read32le(str, offset); + EXPECT_EQ(0xFFFE, ret); + ret = utf::read32le(str, offset); + EXPECT_EQ(0x20AC, ret); + ret = utf::read32le(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} diff --git a/utf/tst/test_utf8.cc b/utf/tst/test_utf8.cc new file mode 100644 index 0000000..10df969 --- /dev/null +++ b/utf/tst/test_utf8.cc @@ -0,0 +1,188 @@ +#include "utf8.hh" + +#include "utf_error.hh" + +#include <gtest/gtest.h> + +TEST(utf8, sanity) { + std::string_view str("$"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ('$', ret); + EXPECT_EQ(1, offset); + + str = "\xC2\xA3"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0xa3, ret); + EXPECT_EQ(2, offset); + + str = "\xD0\x98"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x418, ret); + EXPECT_EQ(2, offset); + + str = "\xE0\xA4\xB9"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x939, ret); + EXPECT_EQ(3, offset); + + str = "\xE2\x82\xAC"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x20AC, ret); + EXPECT_EQ(3, offset); + + str = "\xED\x95\x9C"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0xD55C, ret); + EXPECT_EQ(3, offset); + + str = "\xF0\x90\x8D\x88"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(0x10348, ret); + EXPECT_EQ(4, offset); +} + +TEST(utf8, overlong) { + std::string_view str("\xF0\x82\x82\xAC"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xE0\x81\x81"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xC0\x80"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); +} + +TEST(utf8, invalid) { + std::string_view str("\xED\xB0\x80"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xFB\xFF\xFF"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xFF\xFF\xFF\xFF\xFF"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = ""; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\x80"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xC2"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\xC2\x03"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::INVALID, ret); + EXPECT_EQ(0, offset); + + str = "\xE0\xA4"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); + + str = "\xF0\x90\x8D"; + offset = 0; + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(0, offset); +} + +TEST(utf8, multiple1) { + std::string_view str("\x4D\xC3\xAC\x6E\x68\x20\x6E\xC3\xB3\x69\x20\x74\x69" + "\xE1\xBA\xBF\x6E\x67\x20\x56\x69\xE1\xBB\x87\x74"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ('M', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0xEC, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('n', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('h', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(' ', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('n', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0xF3, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('i', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(' ', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('t', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('i', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x1EBF, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('n', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('g', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(' ', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('V', ret); + ret = utf::read8(str, offset); + EXPECT_EQ('i', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x1EC7, ret); + ret = utf::read8(str, offset); + EXPECT_EQ('t', ret); + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} + +TEST(utf8, multiple2) { + std::string_view str("\xF0\xA8\x89\x9F\xE5\x91\x90\xE3\x97\x82\xE8\xB6\x8A"); + size_t offset = 0; + auto ret = utf::read8(str, offset); + EXPECT_EQ(0x2825F, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x5450, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x35C2, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(0x8D8A, ret); + ret = utf::read8(str, offset); + EXPECT_EQ(utf::NEED_MORE, ret); + EXPECT_EQ(str.size(), offset); +} |
