From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- test/buffer.cc | 65 +++++++++++++++++ test/csv.cc | 90 ++++++++++++++++++++++++ test/decompress.cc | 72 +++++++++++++++++++ test/io.cc | 142 ++++++++++++++++++++++++++++++++++++++ test/io_test_helper.cc | 82 ++++++++++++++++++++++ test/io_test_helper.hh | 18 +++++ test/line.cc | 184 +++++++++++++++++++++++++++++++++++++++++++++++++ test/str.cc | 38 ++++++++++ test/u.cc | 42 +++++++++++ 9 files changed, 733 insertions(+) create mode 100644 test/buffer.cc create mode 100644 test/csv.cc create mode 100644 test/decompress.cc create mode 100644 test/io.cc create mode 100644 test/io_test_helper.cc create mode 100644 test/io_test_helper.hh create mode 100644 test/line.cc create mode 100644 test/str.cc (limited to 'test') diff --git a/test/buffer.cc b/test/buffer.cc new file mode 100644 index 0000000..869e781 --- /dev/null +++ b/test/buffer.cc @@ -0,0 +1,65 @@ +#include + +#include "buffer.hh" + +#include + +TEST(buffer_fixed, empty) { + auto buffer = Buffer::fixed(10); + EXPECT_TRUE(buffer->empty()); + EXPECT_FALSE(buffer->full()); + size_t avail; + buffer->rptr(avail); + EXPECT_EQ(0, avail); + buffer->wptr(avail); + EXPECT_EQ(10, avail); +} + +TEST(buffer_dynamic, empty) { + auto buffer = Buffer::dynamic(10, 100); + EXPECT_TRUE(buffer->empty()); + EXPECT_FALSE(buffer->full()); + size_t avail; + buffer->rptr(avail); + EXPECT_EQ(0, avail); + buffer->wptr(avail); + EXPECT_EQ(10, avail); +} + +TEST(buffer_fixed, write_read) { + auto buffer = Buffer::fixed(10); + size_t avail; + auto* wptr = buffer->wptr(avail); + EXPECT_EQ(10, avail); + memcpy(wptr, "Hello", 6); + buffer->commit(6); + EXPECT_FALSE(buffer->empty()); + auto* rptr = buffer->rptr(avail); + EXPECT_EQ(6, avail); + EXPECT_STREQ("Hello", reinterpret_cast(rptr)); + buffer->consume(3); + rptr = buffer->rptr(avail); + EXPECT_EQ(3, avail); + EXPECT_STREQ("lo", reinterpret_cast(rptr)); + buffer->consume(3); + EXPECT_TRUE(buffer->empty()); +} + +TEST(buffer_dynamic, write_read) { + auto buffer = Buffer::dynamic(10, 100); + size_t avail; + auto* wptr = buffer->wptr(avail); + EXPECT_EQ(10, avail); + memcpy(wptr, "Hello", 6); + buffer->commit(6); + EXPECT_FALSE(buffer->empty()); + auto* rptr = buffer->rptr(avail); + EXPECT_EQ(6, avail); + EXPECT_STREQ("Hello", reinterpret_cast(rptr)); + buffer->consume(3); + rptr = buffer->rptr(avail); + EXPECT_EQ(3, avail); + EXPECT_STREQ("lo", reinterpret_cast(rptr)); + buffer->consume(3); + EXPECT_TRUE(buffer->empty()); +} diff --git a/test/csv.cc b/test/csv.cc new file mode 100644 index 0000000..49fe540 --- /dev/null +++ b/test/csv.cc @@ -0,0 +1,90 @@ +#include + +#include "csv.hh" + +TEST(csv, empty) { + auto csv = csv::open(io::memory("")); + auto line = csv->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(0, line.value().size()); +} + +TEST(csv, one_value) { + auto csv = csv::open(io::memory("foo")); + auto line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(1, line.value().size()); + EXPECT_EQ("foo", line.value()[0]); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(0, line.value().size()); +} + +TEST(csv, two_value) { + auto csv = csv::open(io::memory("foo,bar")); + auto line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(2, line.value().size()); + EXPECT_EQ("foo", line.value()[0]); + EXPECT_EQ("bar", line.value()[1]); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(0, line.value().size()); +} + +TEST(csv, empty_value) { + auto csv = csv::open(io::memory("foo,,bar,")); + auto line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(4, line.value().size()); + EXPECT_EQ("foo", line.value()[0]); + EXPECT_EQ("", line.value()[1]); + EXPECT_EQ("bar", line.value()[2]); + EXPECT_EQ("", line.value()[3]); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(0, line.value().size()); +} + +TEST(csv, many_lines) { + auto csv = csv::open(io::memory("foo,bar\nfoobar\nf,o,o,")); + auto line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(2, line.value().size()); + EXPECT_EQ("foo", line.value()[0]); + EXPECT_EQ("bar", line.value()[1]); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(1, line.value().size()); + EXPECT_EQ("foobar", line.value()[0]); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(4, line.value().size()); + EXPECT_EQ("f", line.value()[0]); + EXPECT_EQ("o", line.value()[1]); + EXPECT_EQ("o", line.value()[2]); + EXPECT_EQ("", line.value()[3]); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(0, line.value().size()); +} + +TEST(csv, blank_lines) { + auto csv = csv::open(io::memory("foo,bar\n\nbar,foo\n\n")); + auto line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(2, line.value().size()); + EXPECT_EQ("foo", line.value()[0]); + EXPECT_EQ("bar", line.value()[1]); + EXPECT_EQ(1, csv->number()); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + ASSERT_EQ(2, line.value().size()); + EXPECT_EQ("bar", line.value()[0]); + EXPECT_EQ("foo", line.value()[1]); + EXPECT_EQ(3, csv->number()); + line = csv->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(0, line.value().size()); + EXPECT_EQ(4, csv->number()); +} diff --git a/test/decompress.cc b/test/decompress.cc new file mode 100644 index 0000000..35c4477 --- /dev/null +++ b/test/decompress.cc @@ -0,0 +1,72 @@ +#include + +#include "decompress.hh" + +TEST(z_decompress, empty) { + static const unsigned char data[] = { + 0x1f, 0x8b, 0x08, 0x08, 0x33, 0xd4, 0xbd, 0x68, + 0x02, 0x03, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00 + }; + auto reader = decompress::gzip(io::memory(std::string( + reinterpret_cast(data), sizeof(data)))); + char buf[10]; + auto got = reader->read(buf, sizeof(buf)); + ASSERT_TRUE(got.has_value()); + EXPECT_EQ(0, got.value()); +} + +TEST(z_decompress, hello) { + static const unsigned char data[] = { + 0x1f, 0x8b, 0x08, 0x08, 0xf7, 0xd5, 0xbd, 0x68, + 0x02, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, + 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00, 0x82, + 0x89, 0xd1, 0xf7, 0x05, 0x00, 0x00, 0x00, + }; + auto reader = decompress::gzip(io::memory(std::string( + reinterpret_cast(data), sizeof(data)))); + char buf[10]; + auto got = reader->read(buf, sizeof(buf)); + ASSERT_TRUE(got.has_value()); + EXPECT_EQ(5, got.value()); + buf[5] = '\0'; + EXPECT_STREQ("Hello", buf); +} + +TEST(xz_decompress, empty) { + static const unsigned char data[] = { + 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00, 0x00, 0x04, + 0xe6, 0xd6, 0xb4, 0x46, 0x00, 0x00, 0x00, 0x00, + 0x1c, 0xdf, 0x44, 0x21, 0x1f, 0xb6, 0xf3, 0x7d, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x59, 0x5a + }; + auto reader = decompress::xz(io::memory(std::string( + reinterpret_cast(data), sizeof(data)))); + char buf[10]; + auto got = reader->read(buf, sizeof(buf)); + ASSERT_TRUE(got.has_value()); + EXPECT_EQ(0, got.value()); +} + +TEST(xz_decompress, hello) { + static const unsigned char data[] = { + 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00, 0x00, 0x04, + 0xe6, 0xd6, 0xb4, 0x46, 0x04, 0xc0, 0x09, 0x05, + 0x21, 0x01, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x76, 0xe9, 0x07, 0x70, + 0x01, 0x00, 0x04, 0x48, 0x65, 0x6c, 0x6c, 0x6f, + 0x00, 0x00, 0x00, 0x00, 0xc8, 0xac, 0x7b, 0xc8, + 0x3b, 0x5c, 0xcf, 0x51, 0x00, 0x01, 0x25, 0x05, + 0x43, 0x91, 0x1f, 0xb8, 0x1f, 0xb6, 0xf3, 0x7d, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x59, 0x5a, + }; + auto reader = decompress::xz(io::memory(std::string( + reinterpret_cast(data), sizeof(data)))); + char buf[10]; + auto got = reader->read(buf, sizeof(buf)); + ASSERT_TRUE(got.has_value()); + EXPECT_EQ(5, got.value()); + buf[5] = '\0'; + EXPECT_STREQ("Hello", buf); +} diff --git a/test/io.cc b/test/io.cc new file mode 100644 index 0000000..ad192ed --- /dev/null +++ b/test/io.cc @@ -0,0 +1,142 @@ +#include + +#include "io.hh" + +#include +#include +#include +#include +#include +#include + +namespace { + +bool remove_recursive(int fd) { + auto* dir = fdopendir(fd); + if (!dir) return false; + while (auto* ent = readdir(dir)) { + if (ent->d_name[0] == '.') { + if (ent->d_name[1] == '\0') continue; + if (ent->d_name[1] == '.' && ent->d_name[2] == '\0') continue; + } + bool is_dir; + if (ent->d_type == DT_DIR) { + is_dir = true; + } else if (ent->d_type == DT_UNKNOWN) { + struct stat buf; + if (fstatat(dirfd(dir), ent->d_name, &buf, AT_SYMLINK_NOFOLLOW) == 0) { + is_dir = S_ISDIR(buf.st_mode); + } else { + if (errno != ENOENT) { + closedir(dir); + return false; + } + is_dir = false; + } + } else { + is_dir = false; + } + + if (is_dir) { + int fd2 = openat(dirfd(dir), ent->d_name, O_RDONLY | O_DIRECTORY); + if (fd2 == -1) { + if (errno != ENOENT) { + closedir(dir); + return false; + } + } else { + if (!remove_recursive(fd2)) { + closedir(dir); + return false; + } + } + } + if (unlinkat(dirfd(dir), ent->d_name, is_dir ? AT_REMOVEDIR : 0)) { + if (errno != ENOENT) { + closedir(dir); + return false; + } + } + } + closedir(dir); + return true; +} + +class IoTest : public testing::Test { + protected: + void SetUp() override { + // NOLINTNEXTLINE(misc-include-cleaner) + tmpdir_ = P_tmpdir "/jkc-test-io-XXXXXX"; + // NOLINTNEXTLINE(misc-include-cleaner) + auto* ret = mkdtemp(tmpdir_.data()); + ASSERT_EQ(ret, tmpdir_.data()); + dirfd_ = open(tmpdir_.c_str(), O_PATH | O_DIRECTORY); + ASSERT_NE(-1, dirfd_); + } + + void TearDown() override { + int fd = openat(dirfd_, ".", O_RDONLY | O_DIRECTORY); + EXPECT_NE(-1, fd); + if (fd != -1) { + EXPECT_TRUE(remove_recursive(fd)); + } + close(dirfd_); + rmdir(tmpdir_.c_str()); + } + + [[nodiscard]] int dirfd() const { + return dirfd_; + } + + void touch(const std::string& name, const std::string& value = "") { + auto fd = openat(dirfd(), name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0700); + EXPECT_NE(-1, fd); + if (fd == -1) return; + size_t offset = 0; + while (offset < value.size()) { + auto ret = write(fd, value.data() + offset, value.size() - offset); + EXPECT_LT(0, ret); + if (ret <= 0) { + break; + } + offset += ret; + } + close(fd); + } + + private: + int dirfd_; + std::string tmpdir_; +}; + +} // namespace + +TEST_F(IoTest, no_such_file) { + auto ret = io::openat(dirfd(), "no-such-file"); + ASSERT_FALSE(ret.has_value()); + EXPECT_EQ(io::OpenError::NoSuchFile, ret.error()); +} + +TEST_F(IoTest, read_empty) { + touch("test"); + + auto ret = io::openat(dirfd(), "test"); + ASSERT_TRUE(ret.has_value()); + std::string tmp(10, ' '); + auto ret2 = ret.value()->read(tmp); + ASSERT_TRUE(ret2.has_value()); + EXPECT_EQ(0, ret2.value()); +} + +TEST_F(IoTest, read) { + touch("test", "hello world"); + + auto ret = io::openat(dirfd(), "test"); + ASSERT_TRUE(ret.has_value()); + std::string tmp(12, ' '); + auto ret2 = ret.value()->repeat_read(tmp); + ASSERT_TRUE(ret2.has_value()); + EXPECT_EQ(11, ret2.value()); + tmp.resize(ret2.value()); + EXPECT_EQ("hello world", tmp); +} diff --git a/test/io_test_helper.cc b/test/io_test_helper.cc new file mode 100644 index 0000000..514e888 --- /dev/null +++ b/test/io_test_helper.cc @@ -0,0 +1,82 @@ +#include "io_test_helper.hh" + +#include "io.hh" + +#include +#include +#include +#include +#include + +namespace { + +class BreakingReader : public io::Reader { + public: + BreakingReader(std::unique_ptr reader, size_t offset, + io::ReadError error) + : reader_(std::move(reader)), offset_(offset), error_(error) {} + + [[nodiscard]] + std::expected read(void* dst, size_t max) override { + if (offset_ == 0) + return std::unexpected(error_); + size_t avail = std::min(offset_, max); + auto ret = reader_->read(dst, avail); + if (ret.has_value()) { + offset_ -= ret.value(); + } + return ret; + } + + [[nodiscard]] + std::expected skip(size_t max) override { + if (offset_ == 0) + return std::unexpected(error_); + size_t avail = std::min(offset_, max); + auto ret = reader_->skip(avail); + if (ret.has_value()) { + offset_ -= ret.value(); + } + return ret; + } + + private: + std::unique_ptr reader_; + size_t offset_; + io::ReadError const error_; +}; + +class MaxBlockReader : public io::Reader { + public: + MaxBlockReader(std::unique_ptr reader, size_t max_block_size) + : reader_(std::move(reader)), max_block_size_(max_block_size) {} + + [[nodiscard]] + std::expected read(void* dst, size_t max) override { + size_t avail = std::min(max_block_size_, max); + return reader_->read(dst, avail); + } + + [[nodiscard]] + std::expected skip(size_t max) override { + size_t avail = std::min(max_block_size_, max); + return reader_->skip(avail); + } + + private: + std::unique_ptr reader_; + size_t const max_block_size_; +}; + +} // namespace + +std::unique_ptr io_make_breaking( + std::unique_ptr reader, size_t offset, + io::ReadError error) { + return std::make_unique(std::move(reader), offset, error); +} + +std::unique_ptr io_make_max_block( + std::unique_ptr reader, size_t max_block_size) { + return std::make_unique(std::move(reader), max_block_size); +} diff --git a/test/io_test_helper.hh b/test/io_test_helper.hh new file mode 100644 index 0000000..ce191cf --- /dev/null +++ b/test/io_test_helper.hh @@ -0,0 +1,18 @@ +#ifndef IO_TEST_HELPER_HH +#define IO_TEST_HELPER_HH + +#include "io.hh" // IWYU pragma: export + +#include +#include + +[[nodiscard]] +std::unique_ptr io_make_breaking( + std::unique_ptr reader, size_t offset = 0, + io::ReadError error = io::ReadError::Error); + +[[nodiscard]] +std::unique_ptr io_make_max_block( + std::unique_ptr reader, size_t max_block_size); + +#endif // IO_TEST_HELPER_HH diff --git a/test/line.cc b/test/line.cc new file mode 100644 index 0000000..0f90723 --- /dev/null +++ b/test/line.cc @@ -0,0 +1,184 @@ +#include + +#include "io_test_helper.hh" +#include "line.hh" + +#include +#include +#include + +TEST(line, empty) { + auto reader = line::open(io::memory("")); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(0, reader->number()); +} + +TEST(line, one_line) { + auto reader = line::open(io::memory("foo")); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(1, reader->number()); +} + +TEST(line, many_lines) { + auto reader = line::open(io::memory("foo\nbar\nfoobar\n")); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("bar", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foobar", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(line, many_lines_mixed) { + auto reader = line::open(io::memory("foo\r\nbar\rfoobar\n")); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("bar", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foobar", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(line, empty_line) { + auto reader = line::open(io::memory("\n")); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(1, reader->number()); +} + +TEST(line, max_line) { + auto reader = line::open(io::memory("012345678901234567890123456789"), 10); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("0123456789", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("0123456789", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("0123456789", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(line, read_error) { + auto reader = line::open( + io_make_breaking(io::memory("foo bar fum\nfim zam"), /* offset */ 5)); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_FALSE(line.error().eof); + EXPECT_EQ(io::ReadError::Error, line.error().io_error.value()); +} + +TEST(line, read_error_newline) { + auto reader = line::open( + io_make_breaking(io::memory("foo bar\r\nfim zam"), /* offset */ 8)); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_FALSE(line.error().eof); + EXPECT_EQ(io::ReadError::Error, line.error().io_error.value()); +} + +TEST(line, blocky) { + auto reader = line::open( + io_make_max_block(io::memory("foo bar\r\nfim zam"), + /* max_block_size */ 1)); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo bar", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("fim zam", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(line, blocky_newline) { + auto reader = line::open( + io_make_max_block(io::memory("foo bar\r\nfim zam"), + /* max_block_size */ 8)); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo bar", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("fim zam", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(line, eof_newline) { + auto reader = line::open(io::memory("foo bar\r")); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo bar", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(line, max_newline) { + auto reader = line::open(io::memory("foo bar\r"), 6); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo ba", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("r", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(line, max_line_overflow) { + EXPECT_DEATH_IF_SUPPORTED({ + std::ignore = line::open(io::memory(""), + std::numeric_limits::max()); + }, ""); +} diff --git a/test/str.cc b/test/str.cc new file mode 100644 index 0000000..35d70d7 --- /dev/null +++ b/test/str.cc @@ -0,0 +1,38 @@ +#include + +#include "str.hh" + +TEST(str, split) { + auto ret = str::split(""); + EXPECT_EQ(0, ret.size()); + + ret = str::split("", ' ', true); + ASSERT_EQ(1, ret.size()); + EXPECT_EQ("", ret[0]); + + ret = str::split(" "); + EXPECT_EQ(0, ret.size()); + + ret = str::split(" ", ' ', true); + ASSERT_EQ(2, ret.size()); + EXPECT_EQ("", ret[0]); + EXPECT_EQ("", ret[1]); + + ret = str::split(" a b "); + ASSERT_EQ(2, ret.size()); + EXPECT_EQ("a", ret[0]); + EXPECT_EQ("b", ret[1]); + + ret = str::split(" a b ", ' ', true); + ASSERT_EQ(4, ret.size()); + EXPECT_EQ("", ret[0]); + EXPECT_EQ("a", ret[1]); + EXPECT_EQ("b", ret[2]); + EXPECT_EQ("", ret[3]); + + ret = str::split(" a b", ' ', true); + ASSERT_EQ(3, ret.size()); + EXPECT_EQ("", ret[0]); + EXPECT_EQ("a", ret[1]); + EXPECT_EQ("b", ret[2]); +} diff --git a/test/u.cc b/test/u.cc index 933a4f2..de04e39 100644 --- a/test/u.cc +++ b/test/u.cc @@ -681,3 +681,45 @@ TEST(u16, invalid) { EXPECT_EQ(it, literal.end()); } } + +TEST(u, lookup_gc) { + EXPECT_EQ(u::lookup_gc(0x41), u::GeneralCategory::LETTER_UPPERCASE); + EXPECT_EQ(u::lookup_gc(0x61), u::GeneralCategory::LETTER_LOWERCASE); + EXPECT_EQ(u::lookup_gc(0x1c5), u::GeneralCategory::LETTER_TITLECASE); + EXPECT_EQ(u::lookup_gc(0x374), u::GeneralCategory::LETTER_MODIFIER); + EXPECT_EQ(u::lookup_gc(0x34ff), u::GeneralCategory::LETTER_OTHER); + + EXPECT_EQ(u::lookup_gc(0x483), u::GeneralCategory::MARK_NONSPACING); + EXPECT_EQ(u::lookup_gc(0x93b), u::GeneralCategory::MARK_SPACING_COMBINDING); + EXPECT_EQ(u::lookup_gc(0x20de), u::GeneralCategory::MARK_SPACING_ENCLOSING); + + EXPECT_EQ(u::lookup_gc(0xa620), u::GeneralCategory::NUMBER_DIGIT); + EXPECT_EQ(u::lookup_gc(0xa6e6), u::GeneralCategory::NUMBER_LETTER); + EXPECT_EQ(u::lookup_gc(0xa830), u::GeneralCategory::NUMBER_OTHER); + + EXPECT_EQ(u::lookup_gc(0xfe33), u::GeneralCategory::PUNCTUATION_CONNECTOR); + EXPECT_EQ(u::lookup_gc(0xfe58), u::GeneralCategory::PUNCTUATION_DASH); + EXPECT_EQ(u::lookup_gc(0xff08), u::GeneralCategory::PUNCTUATION_OPEN); + EXPECT_EQ(u::lookup_gc(0xff09), u::GeneralCategory::PUNCTUATION_CLOSE); + EXPECT_EQ(u::lookup_gc(0xab), u::GeneralCategory::PUNCTUATION_INITIAL_QUOTE); + EXPECT_EQ(u::lookup_gc(0xbb), u::GeneralCategory::PUNCTUATION_FINAL_QUOTE); + EXPECT_EQ(u::lookup_gc(0xff1a), u::GeneralCategory::PUNCTUATION_OTHER); + + EXPECT_EQ(u::lookup_gc(0xd7), u::GeneralCategory::SYMBOL_MATH); + EXPECT_EQ(u::lookup_gc(0x58f), u::GeneralCategory::SYMBOL_CURRENCY); + EXPECT_EQ(u::lookup_gc(0x5e), u::GeneralCategory::SYMBOL_MODIFIER); + EXPECT_EQ(u::lookup_gc(0xf03), u::GeneralCategory::SYMBOL_OTHER); + + EXPECT_EQ(u::lookup_gc(0x20), u::GeneralCategory::SEPARATOR_SPACE); + EXPECT_EQ(u::lookup_gc(0x2028), u::GeneralCategory::SEPARATOR_LINE); + EXPECT_EQ(u::lookup_gc(0x2029), u::GeneralCategory::SEPARATOR_PARAGRAPH); + + EXPECT_EQ(u::lookup_gc(0xa), u::GeneralCategory::OTHER_CONTROL); + EXPECT_EQ(u::lookup_gc(0x202d), u::GeneralCategory::OTHER_FORMAT); + EXPECT_EQ(u::lookup_gc(0xd800), u::GeneralCategory::OTHER_SURROGATE); + EXPECT_EQ(u::lookup_gc(0xdbff), u::GeneralCategory::OTHER_SURROGATE); + EXPECT_EQ(u::lookup_gc(0xdfff), u::GeneralCategory::OTHER_SURROGATE); + EXPECT_EQ(u::lookup_gc(0xe000), u::GeneralCategory::OTHER_PRIVATE_USE); + + EXPECT_EQ(u::lookup_gc(0xffffffff), u::GeneralCategory::OTHER_UNASSIGNED); +} -- cgit v1.3