uio: Unicode reader

Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8.
author: Joel Klinghed <the_jk@spawned.biz> 2025-09-15 20:52:51 +0200
committer: Joel Klinghed <the_jk@spawned.biz> 2025-09-15 20:52:51 +0200
commit: 18a622f378b403788c67fc785d30f4609caa3fc7 (patch)
tree: 9d13f4ef49a06c9e4837487f61bc90b734ad9b9a /src/io.cc
parent: 28c6425e4ed1cd2eab538e7cba08c18aa83d8af5 (diff)
1 files changed, 0 insertions, 8 deletions
diff --git a/src/io.cc b/src/io.cc
index baf162a..e0ab787 100644
--- a/src/io.cc
+++ b/src/io.cc
@@ -149,10 +149,6 @@ class StringReader : public MemoryReader {
 
 }  // namespace
 
-std::expected<size_t, ReadError> Reader::read(std::string& str) {
-  return read(str.data(), str.size());
-}
-
 std::expected<size_t, ReadError> Reader::repeat_read(void* dst, size_t max) {
   auto ret = read(dst, max);
   if (!ret.has_value() || ret.value() == 0 || ret.value() == max)
@@ -171,10 +167,6 @@ std::expected<size_t, ReadError> Reader::repeat_read(void* dst, size_t max) {
   return offset;
 }
 
-std::expected<size_t, ReadError> Reader::repeat_read(std::string& str) {
-  return repeat_read(str.data(), str.size());
-}
-
 std::expected<size_t, ReadError> Reader::repeat_skip(size_t max) {
   auto ret = skip(max);
   if (!ret.has_value() || ret.value() == 0 || ret.value() == max)
author	Joel Klinghed <the_jk@spawned.biz>	2025-09-15 20:52:51 +0200
committer	Joel Klinghed <the_jk@spawned.biz>	2025-09-15 20:52:51 +0200
commit	18a622f378b403788c67fc785d30f4609caa3fc7 (patch)
tree	9d13f4ef49a06c9e4837487f61bc90b734ad9b9a /src/io.cc
parent	28c6425e4ed1cd2eab538e7cba08c18aa83d8af5 (diff)