From 18a622f378b403788c67fc785d30f4609caa3fc7 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Mon, 15 Sep 2025 20:52:51 +0200 Subject: uio: Unicode reader Reads UTF-8 and UTF-16 into UTF-8 or UTF-16 strings. If strict is true, fails at first invalid character. If strict is false, invalid characters are replaced with U+FFFD. For the replacement, I changed behavior if uN::read_replace to only jump one byte. Otherwise a common invalid case when ISO-8859-1 or WIN-1252 are read as UTF-8 would skip many characters. If skip_bom is true any bom at start of stream is ignored. If skip_bom is false any bom will be included. Input format can be forced, if not detect is used which will try to guess and then fallback to UTF-8. --- src/gen_ugc.cc | 2 + src/io.cc | 8 - src/io.hh | 6 +- src/u16.hh | 11 +- src/u8.hh | 11 +- src/uio.cc | 700 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/uio.hh | 78 +++++++ src/umod8.hh | 11 +- 8 files changed, 806 insertions(+), 21 deletions(-) create mode 100644 src/uio.cc create mode 100644 src/uio.hh (limited to 'src') diff --git a/src/gen_ugc.cc b/src/gen_ugc.cc index e9bce11..7583272 100644 --- a/src/gen_ugc.cc +++ b/src/gen_ugc.cc @@ -149,6 +149,8 @@ std::string_view ioerr2str(io::ReadError error) { return "Invalid (compressed) data"; case io::ReadError::Error: return "Fatal error"; + case io::ReadError::MaxTooSmall: + return "Too small buffer"; } std::unreachable(); } diff --git a/src/io.cc b/src/io.cc index baf162a..e0ab787 100644 --- a/src/io.cc +++ b/src/io.cc @@ -149,10 +149,6 @@ class StringReader : public MemoryReader { } // namespace -std::expected Reader::read(std::string& str) { - return read(str.data(), str.size()); -} - std::expected Reader::repeat_read(void* dst, size_t max) { auto ret = read(dst, max); if (!ret.has_value() || ret.value() == 0 || ret.value() == max) @@ -171,10 +167,6 @@ std::expected Reader::repeat_read(void* dst, size_t max) { return offset; } -std::expected Reader::repeat_read(std::string& str) { - return repeat_read(str.data(), str.size()); -} - std::expected Reader::repeat_skip(size_t max) { auto ret = skip(max); if (!ret.has_value() || ret.value() == 0 || ret.value() == max) diff --git a/src/io.hh b/src/io.hh index 315d0bb..e93b72b 100644 --- a/src/io.hh +++ b/src/io.hh @@ -10,7 +10,8 @@ namespace io { enum class ReadError { Error, - InvalidData, // Used by decompress and such + InvalidData, // invalid data read (not used by raw file) + MaxTooSmall, // max argument needs to be bigger (not used by raw file) }; enum class OpenError { @@ -27,11 +28,8 @@ class Reader { size_t max) = 0; [[nodiscard]] virtual std::expected skip(size_t max) = 0; - [[nodiscard]] std::expected read(std::string& str); - [[nodiscard]] std::expected repeat_read(void* dst, size_t max); - [[nodiscard]] std::expected repeat_read(std::string& str); [[nodiscard]] std::expected repeat_skip(size_t max); protected: diff --git a/src/u16.hh b/src/u16.hh index 781e6a4..d6a3672 100644 --- a/src/u16.hh +++ b/src/u16.hh @@ -38,19 +38,24 @@ std::expected read(T& start, const T& end) { template requires std::is_same_v, uint16_t> std::expected read_replace(T& start, - const T& end) { + const T& end, + bool eof) { + auto const tmp = start; auto ret = read(start, end); if (ret.has_value()) return *ret; switch (ret.error()) { case u::ReadError::Incomplete: + if (eof) + break; return std::unexpected(u::ReadErrorReplace::Incomplete); case u::ReadError::End: return std::unexpected(u::ReadErrorReplace::End); case u::ReadError::Invalid: - return 0xfffd; + break; } - std::unreachable(); + start = tmp + 1; + return 0xfffd; } template diff --git a/src/u8.hh b/src/u8.hh index 3c1d19e..b89f80f 100644 --- a/src/u8.hh +++ b/src/u8.hh @@ -105,19 +105,24 @@ std::expected read(T& start, const T& end) { template requires std::is_same_v, uint8_t> std::expected read_replace(T& start, - const T& end) { + const T& end, + bool eof) { + auto const tmp = start; auto ret = read(start, end); if (ret.has_value()) return *ret; switch (ret.error()) { case u::ReadError::Incomplete: + if (eof) + break; return std::unexpected(u::ReadErrorReplace::Incomplete); case u::ReadError::End: return std::unexpected(u::ReadErrorReplace::End); case u::ReadError::Invalid: - return 0xfffd; + break; } - std::unreachable(); + start = tmp + 1; + return 0xfffd; } template diff --git a/src/uio.cc b/src/uio.cc new file mode 100644 index 0000000..1bf5e40 --- /dev/null +++ b/src/uio.cc @@ -0,0 +1,700 @@ +#include "uio.hh" + +#include "buffer.hh" +#include "u8.hh" +#include "u16.hh" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +constexpr u::ReaderInputFormat kU16NativeInputFormat = + (std::endian::native == std::endian::big) + ? u::ReaderInputFormat::UTF16_BE + : u::ReaderInputFormat::UTF16_LE; + +constexpr u::ReaderInputFormat kU16SwapInputFormat = + (std::endian::native == std::endian::big) + ? u::ReaderInputFormat::UTF16_LE + : u::ReaderInputFormat::UTF16_BE; + +constexpr size_t kByteBufferSize = 65535; +constexpr size_t kUnicodeBufferSize = 8192; +constexpr size_t kUSwapBufferSize = kByteBufferSize / 4; + +template +class UnicodeReader : public io::Reader { + public: + UnicodeReader(std::unique_ptr in, u::ReaderConfig config) + : in_(std::move(in)), config_(config), skip_bom_(config_.skip_bom) {} + + std::expected read(void* dst, size_t max) override { + auto err = fill(); + if (err.has_value()) + return std::unexpected(err.value()); + + auto* in = reinterpret_cast(in_ptr_); + auto read_err = reader_(in, in + in_avail_, in_eof_, + u_buffer_wptr_, + u_buffer_ + kUnicodeBufferSize); + byte_buffer_->consume(in - reinterpret_cast(in_ptr_)); + if (read_err.has_value()) { + switch (read_err.value()) { + case u::ReadError::Invalid: + // Only return error if we have no bytes to output. + if (u_buffer_wptr_ == u_buffer_) + return std::unexpected(io::ReadError::InvalidData); + break; + case u::ReadError::End: + break; + case u::ReadError::Incomplete: + if (in_eof_) { + // Only return error if we have no bytes to output. + if (u_buffer_wptr_ == u_buffer_) + return std::unexpected(io::ReadError::InvalidData); + } else { + // We clearly need more data, call fill again. + if (u_buffer_wptr_ == u_buffer_) + return read(dst, max); + } + break; + } + } + + if (skip_bom_ && u_buffer_wptr_ > u_buffer_) { + if (u_buffer_[0] == 0xfeff) { + --u_buffer_wptr_; + memmove(u_buffer_, u_buffer_ + 1, + (u_buffer_wptr_ - u_buffer_) * sizeof(uint32_t)); + } + skip_bom_ = false; + } + + auto* u_out = const_cast(u_buffer_); + auto* d_out = dst; + if (writer_(u_out, u_buffer_wptr_, d_out, max)) { + assert(u_out == u_buffer_wptr_); + u_buffer_wptr_ = u_buffer_; + } else if (u_out == u_buffer_) { + // Unable to write anything. + if (max == 0) return 0; + return std::unexpected(io::ReadError::MaxTooSmall); + } else { + size_t left = u_buffer_wptr_ - u_out; + memmove(u_buffer_, u_out, left * sizeof(uint32_t)); + u_buffer_wptr_ = u_buffer_ + left; + } + + return reinterpret_cast(d_out) - reinterpret_cast(dst); + } + + std::expected skip(size_t max) override { + auto tmp = std::make_unique_for_overwrite(max); + return read(tmp.get(), max); + } + + private: + std::optional fill() { + in_ptr_ = byte_buffer_->rptr(in_avail_); + if (!in_eof_ && in_avail_ < kByteBufferSize / 2) { + auto* wptr = byte_buffer_->wptr(in_avail_); + auto got = in_->read(wptr, in_avail_); + if (got.has_value()) { + byte_buffer_->commit(got.value()); + if (got.value() == 0) + in_eof_ = true; + } else { + return got.error(); + } + in_ptr_ = byte_buffer_->rptr(in_avail_); + } + return std::nullopt; + } + + std::unique_ptr in_; + u::ReaderConfig const config_; + UReader reader_; + UWriter writer_; + bool skip_bom_; + void const* in_ptr_{nullptr}; + size_t in_avail_{0}; + bool in_eof_{false}; + std::unique_ptr byte_buffer_{Buffer::fixed(kByteBufferSize)}; + uint32_t u_buffer_[kUnicodeBufferSize]; + uint32_t* u_buffer_wptr_{u_buffer_}; +}; + +struct U8ReaderStrict { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool /* in_eof */, + uint32_t* &out, + uint32_t const* out_end) { + std::optional ret; + while (out < out_end) { + auto const tmp = in; + auto c = u8::read(in, in_end); + if (c.has_value()) { + *(out++) = c.value(); + } else { + ret = c.error(); + in = tmp; + break; + } + } + return ret; + } +}; + +struct U8Reader { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool in_eof, uint32_t* &out, + uint32_t const* out_end) { + std::optional ret; + while (out < out_end) { + auto c = u8::read_replace(in, in_end, in_eof); + if (c.has_value()) { + *(out++) = c.value(); + } else { + switch (c.error()) { + case u::ReadErrorReplace::End: + ret = u::ReadError::End; + break; + case u::ReadErrorReplace::Incomplete: + ret = u::ReadError::Incomplete; + break; + } + break; + } + } + return ret; + } +}; + +struct U16NativeReaderStrict { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool /* in_eof */, + uint32_t* &out, + uint32_t const* out_end) { + auto* it = reinterpret_cast(in); + auto* const end = it + ((in_end - in) / 2); + if (it == end && in < in_end) + return u::ReadError::Incomplete; + std::optional ret; + while (out < out_end) { + auto const tmp = in; + auto c = u16::read(it, end); + if (c.has_value()) { + *(out++) = c.value(); + } else { + ret = c.error(); + in = tmp; + break; + } + } + in = reinterpret_cast(it); + return ret; + } +}; + +struct U16SwapReaderStrict { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool /* in_eof */, + uint32_t* &out, + uint32_t const* out_end) { + auto* it = buffer_; + auto* const end = it + ((in_end - in) / 2); + if (it == end && in < in_end) + return u::ReadError::Incomplete; + { + auto* in2 = reinterpret_cast(in); + for (auto* it2 = it; it2 != end; ++it2) *it2 = std::byteswap(*(in2++)); + } + std::optional ret; + while (out < out_end) { + auto const tmp = in; + auto c = u16::read(it, end); + if (c.has_value()) { + *(out++) = c.value(); + } else { + ret = c.error(); + in = tmp; + break; + } + } + in += (it - buffer_) * 2; + return ret; + } + + private: + uint16_t buffer_[kUSwapBufferSize]; +}; + +struct U16NativeReader { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool in_eof, + uint32_t* &out, + uint32_t const* out_end) { + auto* it = reinterpret_cast(in); + auto* const end = it + ((in_end - in) / 2); + if (it == end && in < in_end) { + if (out == out_end) + return std::nullopt; + if (in_eof) { + *(out++) = 0xfffd; + in = in_end; + return std::nullopt; + } + return u::ReadError::Incomplete; + } + std::optional ret; + while (out < out_end) { + auto c = u16::read_replace(it, end, in_eof); + if (c.has_value()) { + *(out++) = c.value(); + } else { + switch (c.error()) { + case u::ReadErrorReplace::End: + ret = u::ReadError::End; + break; + case u::ReadErrorReplace::Incomplete: + ret = u::ReadError::Incomplete; + break; + } + break; + } + } + in = reinterpret_cast(it); + return ret; + } +}; + +struct U16SwapReader { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool in_eof, + uint32_t* &out, + uint32_t const* out_end) { + auto* it = buffer_; + auto* const end = it + ((in_end - in) / 2); + if (it == end && in < in_end) { + if (out == out_end) + return std::nullopt; + if (in_eof) { + *(out++) = 0xfffd; + in = in_end; + return std::nullopt; + } + return u::ReadError::Incomplete; + } + { + auto* in2 = reinterpret_cast(in); + for (auto* it2 = it; it2 != end; ++it2) *it2 = std::byteswap(*(in2++)); + } + std::optional ret; + while (out < out_end) { + auto c = u16::read_replace(it, end, in_eof); + if (c.has_value()) { + *(out++) = c.value(); + } else { + switch (c.error()) { + case u::ReadErrorReplace::End: + ret = u::ReadError::End; + break; + case u::ReadErrorReplace::Incomplete: + ret = u::ReadError::Incomplete; + break; + } + break; + } + } + in += (it - buffer_) * 2; + return ret; + } + + private: + uint16_t buffer_[kUSwapBufferSize]; +}; + +bool detect(uint8_t const* in, uint8_t const* in_end, bool in_eof, + u::ReaderInputFormat &format) { + if (in == in_end) { + if (in_eof) { + // Doesn't matter, go with UTF-8 just to get out of "detect" + format = u::ReaderInputFormat::UTF8; + return true; + } + return false; + } + + // UTF-8 BOM ? + if (in_end - in >= 3) { + if (in[0] == 0xef && in[1] == 0xbb && in[2] == 0xbf) { + format = u::ReaderInputFormat::UTF8; + return true; + } + } + + // UTF-16 BOM ? + if (in_end - in >= 2) { + auto* data = reinterpret_cast(in); + if (data[0] == 0xFEFF) { + format = kU16NativeInputFormat; + return true; + } + if (data[0] == 0xFFFE) { + format = kU16SwapInputFormat; + return true; + } + } + + // Check for zero bytes, not allowed in UTF-8 and likely for UTF-16 + // encoding western characters. + if (in_end - in >= 2) { + if (in[0] == 0x00 && in[1] != 0x00) { + format = u::ReaderInputFormat::UTF16_BE; + return true; + } + if (in[0] != 0x00 && in[1] == 0x00) { + format = u::ReaderInputFormat::UTF16_LE; + return true; + } + } + + if (in_end - in >= 2 || in_eof) { + // We have no idea what it is, fallback to UTF-8 and let it "handle" + // whatever the input data actually is. + format = u::ReaderInputFormat::UTF8; + return true; + } + return false; +} + +struct DetectReaderStrict { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool in_eof, + uint32_t* &out, + uint32_t const* out_end) { + switch (format_) { + case u::ReaderInputFormat::DETECT: + if (detect(in, in_end, in_eof, format_)) { + return operator()(in, in_end, in_eof, out, out_end); + } + return u::ReadError::Incomplete; + case u::ReaderInputFormat::UTF8: + return u8_reader_(in, in_end, in_eof, out, out_end); + case kU16NativeInputFormat: + return u16_native_reader_(in, in_end, in_eof, out, out_end); + case kU16SwapInputFormat: + return u16_swap_reader_(in, in_end, in_eof, out, out_end); + } + std::unreachable(); + } + + private: + u::ReaderInputFormat format_{u::ReaderInputFormat::DETECT}; + U8ReaderStrict u8_reader_; + U16NativeReaderStrict u16_native_reader_; + U16SwapReaderStrict u16_swap_reader_; +}; + +struct DetectReader { + std::optional operator()(uint8_t const* &in, + uint8_t const* in_end, + bool in_eof, + uint32_t* &out, + uint32_t const* out_end) { + switch (format_) { + case u::ReaderInputFormat::DETECT: + if (detect(in, in_end, in_eof, format_)) { + return operator()(in, in_end, in_eof, out, out_end); + } + return u::ReadError::Incomplete; + case u::ReaderInputFormat::UTF8: + return u8_reader_(in, in_end, in_eof, out, out_end); + case kU16NativeInputFormat: + return u16_native_reader_(in, in_end, in_eof, out, out_end); + case kU16SwapInputFormat: + return u16_swap_reader_(in, in_end, in_eof, out, out_end); + } + std::unreachable(); + } + + private: + u::ReaderInputFormat format_{u::ReaderInputFormat::DETECT}; + U8Reader u8_reader_; + U16NativeReader u16_native_reader_; + U16SwapReader u16_swap_reader_; +}; + +struct U8Writer { + bool operator()(uint32_t const* &in, uint32_t const* in_end, + void* &out, size_t out_avail) { + auto* it = reinterpret_cast(out); + auto* const end = it + out_avail; + bool ret = true; + while (in < in_end) { + if (!u8::write(it, end, *in)) { + ret = false; + break; + } + ++in; + } + out = it; + return ret; + } +}; + +struct U16NativeWriter { + public: + bool operator()(uint32_t const* &in, uint32_t const* in_end, + void* &out, size_t out_avail) { + auto* it = reinterpret_cast(out); + auto* const end = it + (out_avail / 2); + bool ret = true; + while (in < in_end) { + if (!u16::write(it, end, *in)) { + ret = false; + break; + } + ++in; + } + out = it; + return ret; + } +}; + +/* +struct U16SwapWriter { + bool operator()(uint32_t const* &in, uint32_t const* in_end, + void* &out, size_t out_avail) { + auto* it = reinterpret_cast(out); + auto* const end = it + (out_avail / 2); + bool ret = true; + while (in < in_end) { + auto tmp = it; + if (!u16::write(it, end, *in)) { + ret = false; + break; + } + ++in; + *tmp = std::byteswap(*tmp); + if (++tmp != it) *tmp = std::byteswap(*tmp); + } + out = it; + return ret; + } +}; +*/ + +} // namespace + +namespace u8 { + +namespace { + +template +class UnicodeReaderU8Writer : public UnicodeReader, + public virtual Reader { + public: + UnicodeReaderU8Writer(std::unique_ptr in, + u::ReaderConfig config) + : UnicodeReader(std::move(in), config) {} + + std::expected read(void* dst, size_t max) override { + return UnicodeReader::read(dst, max); + } + + std::expected skip(size_t max) override { + return UnicodeReader::skip(max); + } +}; + +} // namespace + + +std::expected Reader::read(std::string& data, + size_t max) { + if (max > data.size()) + data.resize(max); + auto ret = read(data.data(), max); + if (ret.has_value()) { + data.resize(ret.value()); + } + return ret; +} + +std::expected Reader::repeat_read( + std::string& data, size_t max) { + if (max > data.size()) + data.resize(max); + auto ret = repeat_read(data.data(), max); + if (ret.has_value()) { + data.resize(ret.value()); + } + return ret; +} + +std::unique_ptr open( + std::unique_ptr reader, u::ReaderConfig config) { + switch (config.input) { + case u::ReaderInputFormat::UTF8: + if (config.strict) + return std::make_unique>( + std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + case kU16NativeInputFormat: + if (config.strict) + return std::make_unique>( + std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + break; + case kU16SwapInputFormat: + if (config.strict) + return std::make_unique>( + std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + break; + case u::ReaderInputFormat::DETECT: + if (config.strict) + return std::make_unique>( + std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + break; + } + std::unreachable(); +} + +std::expected, io::OpenError> open( + const std::string& file_path, u::ReaderConfig config) { + auto ret = io::open(file_path); + if (ret.has_value()) + return open(std::move(ret.value()), config); + return std::unexpected(ret.error()); +} + +std::expected, io::OpenError> openat( + int dirfd, const std::string& file_path, u::ReaderConfig config) { + auto ret = io::openat(dirfd, file_path); + if (ret.has_value()) + return open(std::move(ret.value()), config); + return std::unexpected(ret.error()); +} + +} // namespace u8 + +namespace u16 { + +namespace { + +template +class UnicodeReaderU16NativeWriter : public UnicodeReader, + public virtual Reader { + public: + UnicodeReaderU16NativeWriter(std::unique_ptr in, + u::ReaderConfig config) + : UnicodeReader(std::move(in), config) {} + + std::expected read(void* dst, size_t max) override { + return UnicodeReader::read(dst, max); + } + + std::expected skip(size_t max) override { + return UnicodeReader::skip(max); + } +}; + +} // namespace + +std::expected Reader::read(std::u16string& data, + size_t max) { + if (max > data.size()) + data.resize(max); + auto ret = read(data.data(), max * 2); + if (ret.has_value()) { + data.resize(ret.value()); + return ret.value() / 2; + } + return ret; +} + +std::expected Reader::repeat_read( + std::u16string& data, size_t max) { + if (max > data.size()) + data.resize(max); + auto ret = repeat_read(data.data(), max * 2); + if (ret.has_value()) { + data.resize(ret.value()); + return ret.value() / 2; + } + return ret; +} + +std::unique_ptr open( + std::unique_ptr reader, u::ReaderConfig config) { + switch (config.input) { + case u::ReaderInputFormat::UTF8: + if (config.strict) + return std::make_unique>( + std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + case kU16NativeInputFormat: + if (config.strict) + return std::make_unique>(std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + break; + case kU16SwapInputFormat: + if (config.strict) + return std::make_unique>(std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + break; + case u::ReaderInputFormat::DETECT: + if (config.strict) + return std::make_unique>(std::move(reader), config); + return std::make_unique>( + std::move(reader), config); + break; + } + std::unreachable(); +} + +std::expected, io::OpenError> open( + const std::string& file_path, u::ReaderConfig config) { + auto ret = io::open(file_path); + if (ret.has_value()) + return open(std::move(ret.value()), config); + return std::unexpected(ret.error()); +} + +std::expected, io::OpenError> openat( + int dirfd, const std::string& file_path, u::ReaderConfig config) { + auto ret = io::openat(dirfd, file_path); + if (ret.has_value()) + return open(std::move(ret.value()), config); + return std::unexpected(ret.error()); +} + +} // namespace u16 diff --git a/src/uio.hh b/src/uio.hh new file mode 100644 index 0000000..a0911a1 --- /dev/null +++ b/src/uio.hh @@ -0,0 +1,78 @@ +#ifndef UIO_HH +#define UIO_HH + +#include "io.hh" // IWYU pragma: export + +#include +#include +#include + +namespace u { + +enum class ReaderInputFormat { + UTF8, + UTF16_BE, + UTF16_LE, + DETECT, +}; + +struct ReaderConfig { + // If false (default), invalid data is replaced with U+FFFD + bool strict{false}; + // Input format + ReaderInputFormat input{ReaderInputFormat::DETECT}; + // If true (default), any BOM found at start of stream will be skipped + bool skip_bom{true}; +}; + +} // namespace u8 + +namespace u8 { + +class Reader : public io::Reader { + public: + using io::Reader::read; + using io::Reader::repeat_read; + + [[nodiscard]] std::expected read( + std::string& data, size_t max); + + [[nodiscard]] std::expected repeat_read( + std::string& data, size_t max); +}; + +[[nodiscard]] std::unique_ptr open( + std::unique_ptr reader, u::ReaderConfig config = {}); + +[[nodiscard]] std::expected, io::OpenError> open( + const std::string& file_path, u::ReaderConfig config = {}); +[[nodiscard]] std::expected, io::OpenError> openat( + int dirfd, const std::string& file_path, u::ReaderConfig config = {}); + +} // namespace u8 + +namespace u16 { + +class Reader : public io::Reader { + public: + using io::Reader::read; + using io::Reader::repeat_read; + + [[nodiscard]] std::expected read( + std::u16string& data, size_t max); + + [[nodiscard]] std::expected repeat_read( + std::u16string& data, size_t max); +}; + +[[nodiscard]] std::unique_ptr open( + std::unique_ptr reader, u::ReaderConfig config = {}); + +[[nodiscard]] std::expected, io::OpenError> open( + const std::string& file_path, u::ReaderConfig config = {}); +[[nodiscard]] std::expected, io::OpenError> openat( + int dirfd, const std::string& file_path, u::ReaderConfig config = {}); + +} // namespace u16 + +#endif // UIO_HH diff --git a/src/umod8.hh b/src/umod8.hh index 117591f..b91b199 100644 --- a/src/umod8.hh +++ b/src/umod8.hh @@ -113,19 +113,24 @@ std::expected read(T& start, const T& end) { template requires std::is_same_v, uint8_t> std::expected read_replace(T& start, - const T& end) { + const T& end, + bool eof) { + auto const tmp = start; auto ret = read(start, end); if (ret.has_value()) return *ret; switch (ret.error()) { case u::ReadError::Incomplete: + if (eof) + break; return std::unexpected(u::ReadErrorReplace::Incomplete); case u::ReadError::End: return std::unexpected(u::ReadErrorReplace::End); case u::ReadError::Invalid: - return 0xfffd; + break; } - std::unreachable(); + start = tmp + 1; + return 0xfffd; } template -- cgit v1.3