From 18a622f378b403788c67fc785d30f4609caa3fc7 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Mon, 15 Sep 2025 20:52:51 +0200 Subject: uio: Unicode reader Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8. --- meson.build | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'meson.build') diff --git a/meson.build b/meson.build index b0708c2..94519bc 100644 --- a/meson.build +++ b/meson.build @@ -149,6 +149,20 @@ unicode_lib = library( ) unicode_dep = declare_dependency(link_with: unicode_lib) +uio_lib = library( + 'uio', + sources: [ + 'src/uio.cc', + 'src/uio.hh', + ], + include_directories: inc, + dependencies: [buffer_dep, io_dep, unicode_dep], +) +uio_dep = declare_dependency( + link_with: uio_lib, + dependencies: [buffer_dep, io_dep, unicode_dep], +) + jkc = executable( 'jkc', sources: [ @@ -265,6 +279,17 @@ test('decompress', executable( ], )) +test('uio', executable( + 'test_uio', + sources: ['test/uio.cc'], + include_directories: inc, + dependencies: [ + io_test_helper_dep, + uio_dep, + test_dependencies, + ], +)) + run_clang_tidy = find_program('run-clang-tidy', required: false) if run_clang_tidy.found() -- cgit v1.3