From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- src/ugc.hh | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 src/ugc.hh (limited to 'src/ugc.hh') diff --git a/src/ugc.hh b/src/ugc.hh new file mode 100644 index 0000000..c49d50f --- /dev/null +++ b/src/ugc.hh @@ -0,0 +1,49 @@ +#ifndef UGC_HH +#define UGC_HH + +#include + +namespace u { + +enum class GeneralCategory : uint8_t { + LETTER_UPPERCASE, + LETTER_LOWERCASE, + LETTER_TITLECASE, + LETTER_MODIFIER, + LETTER_OTHER, + + MARK_NONSPACING, + MARK_SPACING_COMBINDING, + MARK_SPACING_ENCLOSING, + + NUMBER_DIGIT, + NUMBER_LETTER, + NUMBER_OTHER, + + PUNCTUATION_CONNECTOR, + PUNCTUATION_DASH, + PUNCTUATION_OPEN, + PUNCTUATION_CLOSE, + PUNCTUATION_INITIAL_QUOTE, + PUNCTUATION_FINAL_QUOTE, + PUNCTUATION_OTHER, + + SYMBOL_MATH, + SYMBOL_CURRENCY, + SYMBOL_MODIFIER, + SYMBOL_OTHER, + + SEPARATOR_SPACE, + SEPARATOR_LINE, + SEPARATOR_PARAGRAPH, + + OTHER_CONTROL, + OTHER_FORMAT, + OTHER_SURROGATE, + OTHER_PRIVATE_USE, + OTHER_UNASSIGNED, +}; + +} // namespace u + +#endif // UGC_HH -- cgit v1.3