From 18a622f378b403788c67fc785d30f4609caa3fc7 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Mon, 15 Sep 2025 20:52:51 +0200 Subject: uio: Unicode reader Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8. --- src/io.hh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/io.hh') diff --git a/src/io.hh b/src/io.hh index 315d0bb..e93b72b 100644 --- a/src/io.hh +++ b/src/io.hh @@ -10,7 +10,8 @@ namespace io { enum class ReadError { Error, - InvalidData, // Used by decompress and such + InvalidData, // invalid data read (not used by raw file) + MaxTooSmall, // max argument needs to be bigger (not used by raw file) }; enum class OpenError { @@ -27,11 +28,8 @@ class Reader { size_t max) = 0; [[nodiscard]] virtual std::expected skip(size_t max) = 0; - [[nodiscard]] std::expected read(std::string& str); - [[nodiscard]] std::expected repeat_read(void* dst, size_t max); - [[nodiscard]] std::expected repeat_read(std::string& str); [[nodiscard]] std::expected repeat_skip(size_t max); protected: -- cgit v1.3