From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- src/str.cc | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/str.cc (limited to 'src/str.cc') diff --git a/src/str.cc b/src/str.cc new file mode 100644 index 0000000..f81617d --- /dev/null +++ b/src/str.cc @@ -0,0 +1,34 @@ +#include "str.hh" + +#include +#include +#include + +namespace str { + +void split(std::string_view str, std::vector& out, + char separator, bool keep_empty) { + out.clear(); + + size_t offset = 0; + while (true) { + auto next = str.find(separator, offset); + if (next == std::string_view::npos) { + if (keep_empty || offset < str.size()) + out.push_back(str.substr(offset)); + break; + } + if (keep_empty || offset < next) + out.push_back(str.substr(offset, next - offset)); + offset = next + 1; + } +} + +std::vector split(std::string_view str, + char separator, bool keep_empty) { + std::vector vec; + split(str, vec, separator, keep_empty); + return vec; +} + +} // namespace str -- cgit v1.3