From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- src/u.cc | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/u.cc (limited to 'src/u.cc') diff --git a/src/u.cc b/src/u.cc new file mode 100644 index 0000000..3c06ba8 --- /dev/null +++ b/src/u.cc @@ -0,0 +1,46 @@ +#include "u.hh" + +#include +#include + +namespace u { + +// These are generated by gen_ugc +GeneralCategory u6_2_0_lookup_gc(uint32_t code); +GeneralCategory u8_0_0_lookup_gc(uint32_t code); +GeneralCategory u10_0_0_lookup_gc(uint32_t code); +GeneralCategory u11_0_0_lookup_gc(uint32_t code); +GeneralCategory u12_1_0_lookup_gc(uint32_t code); +GeneralCategory u13_0_0_lookup_gc(uint32_t code); +GeneralCategory u14_0_0_lookup_gc(uint32_t code); +GeneralCategory u15_0_0_lookup_gc(uint32_t code); +GeneralCategory u15_1_0_lookup_gc(uint32_t code); +GeneralCategory u16_0_0_lookup_gc(uint32_t code); + +GeneralCategory lookup_gc(uint32_t code, Version version) { + switch (version) { + case Version::u6_2_0: + return u6_2_0_lookup_gc(code); + case Version::u8_0_0: + return u8_0_0_lookup_gc(code); + case Version::u10_0_0: + return u10_0_0_lookup_gc(code); + case Version::u11_0_0: + return u11_0_0_lookup_gc(code); + case Version::u12_1_0: + return u12_1_0_lookup_gc(code); + case Version::u13_0_0: + return u13_0_0_lookup_gc(code); + case Version::u14_0_0: + return u14_0_0_lookup_gc(code); + case Version::u15_0_0: + return u15_0_0_lookup_gc(code); + case Version::u15_1_0: + return u15_1_0_lookup_gc(code); + case Version::u16_0_0: + return u16_0_0_lookup_gc(code); + } + std::unreachable(); +} + +} // namespace u -- cgit v1.3