From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- test/u.cc | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'test/u.cc') diff --git a/test/u.cc b/test/u.cc index 933a4f2..de04e39 100644 --- a/test/u.cc +++ b/test/u.cc @@ -681,3 +681,45 @@ TEST(u16, invalid) { EXPECT_EQ(it, literal.end()); } } + +TEST(u, lookup_gc) { + EXPECT_EQ(u::lookup_gc(0x41), u::GeneralCategory::LETTER_UPPERCASE); + EXPECT_EQ(u::lookup_gc(0x61), u::GeneralCategory::LETTER_LOWERCASE); + EXPECT_EQ(u::lookup_gc(0x1c5), u::GeneralCategory::LETTER_TITLECASE); + EXPECT_EQ(u::lookup_gc(0x374), u::GeneralCategory::LETTER_MODIFIER); + EXPECT_EQ(u::lookup_gc(0x34ff), u::GeneralCategory::LETTER_OTHER); + + EXPECT_EQ(u::lookup_gc(0x483), u::GeneralCategory::MARK_NONSPACING); + EXPECT_EQ(u::lookup_gc(0x93b), u::GeneralCategory::MARK_SPACING_COMBINDING); + EXPECT_EQ(u::lookup_gc(0x20de), u::GeneralCategory::MARK_SPACING_ENCLOSING); + + EXPECT_EQ(u::lookup_gc(0xa620), u::GeneralCategory::NUMBER_DIGIT); + EXPECT_EQ(u::lookup_gc(0xa6e6), u::GeneralCategory::NUMBER_LETTER); + EXPECT_EQ(u::lookup_gc(0xa830), u::GeneralCategory::NUMBER_OTHER); + + EXPECT_EQ(u::lookup_gc(0xfe33), u::GeneralCategory::PUNCTUATION_CONNECTOR); + EXPECT_EQ(u::lookup_gc(0xfe58), u::GeneralCategory::PUNCTUATION_DASH); + EXPECT_EQ(u::lookup_gc(0xff08), u::GeneralCategory::PUNCTUATION_OPEN); + EXPECT_EQ(u::lookup_gc(0xff09), u::GeneralCategory::PUNCTUATION_CLOSE); + EXPECT_EQ(u::lookup_gc(0xab), u::GeneralCategory::PUNCTUATION_INITIAL_QUOTE); + EXPECT_EQ(u::lookup_gc(0xbb), u::GeneralCategory::PUNCTUATION_FINAL_QUOTE); + EXPECT_EQ(u::lookup_gc(0xff1a), u::GeneralCategory::PUNCTUATION_OTHER); + + EXPECT_EQ(u::lookup_gc(0xd7), u::GeneralCategory::SYMBOL_MATH); + EXPECT_EQ(u::lookup_gc(0x58f), u::GeneralCategory::SYMBOL_CURRENCY); + EXPECT_EQ(u::lookup_gc(0x5e), u::GeneralCategory::SYMBOL_MODIFIER); + EXPECT_EQ(u::lookup_gc(0xf03), u::GeneralCategory::SYMBOL_OTHER); + + EXPECT_EQ(u::lookup_gc(0x20), u::GeneralCategory::SEPARATOR_SPACE); + EXPECT_EQ(u::lookup_gc(0x2028), u::GeneralCategory::SEPARATOR_LINE); + EXPECT_EQ(u::lookup_gc(0x2029), u::GeneralCategory::SEPARATOR_PARAGRAPH); + + EXPECT_EQ(u::lookup_gc(0xa), u::GeneralCategory::OTHER_CONTROL); + EXPECT_EQ(u::lookup_gc(0x202d), u::GeneralCategory::OTHER_FORMAT); + EXPECT_EQ(u::lookup_gc(0xd800), u::GeneralCategory::OTHER_SURROGATE); + EXPECT_EQ(u::lookup_gc(0xdbff), u::GeneralCategory::OTHER_SURROGATE); + EXPECT_EQ(u::lookup_gc(0xdfff), u::GeneralCategory::OTHER_SURROGATE); + EXPECT_EQ(u::lookup_gc(0xe000), u::GeneralCategory::OTHER_PRIVATE_USE); + + EXPECT_EQ(u::lookup_gc(0xffffffff), u::GeneralCategory::OTHER_UNASSIGNED); +} -- cgit v1.3