summaryrefslogtreecommitdiff
path: root/src/io.hh
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2025-09-15 20:52:51 +0200
committerJoel Klinghed <the_jk@spawned.biz>2025-09-15 20:52:51 +0200
commit18a622f378b403788c67fc785d30f4609caa3fc7 (patch)
tree9d13f4ef49a06c9e4837487f61bc90b734ad9b9a /src/io.hh
parent28c6425e4ed1cd2eab538e7cba08c18aa83d8af5 (diff)
uio: Unicode reader
Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8.
Diffstat (limited to 'src/io.hh')
-rw-r--r--src/io.hh6
1 files changed, 2 insertions, 4 deletions
diff --git a/src/io.hh b/src/io.hh
index 315d0bb..e93b72b 100644
--- a/src/io.hh
+++ b/src/io.hh
@@ -10,7 +10,8 @@ namespace io {
enum class ReadError {
Error,
- InvalidData, // Used by decompress and such
+ InvalidData, // invalid data read (not used by raw file)
+ MaxTooSmall, // max argument needs to be bigger (not used by raw file)
};
enum class OpenError {
@@ -27,11 +28,8 @@ class Reader {
size_t max) = 0;
[[nodiscard]] virtual std::expected<size_t, ReadError> skip(size_t max) = 0;
- [[nodiscard]] std::expected<size_t, ReadError> read(std::string& str);
-
[[nodiscard]] std::expected<size_t, ReadError> repeat_read(void* dst,
size_t max);
- [[nodiscard]] std::expected<size_t, ReadError> repeat_read(std::string& str);
[[nodiscard]] std::expected<size_t, ReadError> repeat_skip(size_t max);
protected: