summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--meson.build25
-rw-r--r--src/gen_ugc.cc2
-rw-r--r--src/io.cc8
-rw-r--r--src/io.hh6
-rw-r--r--src/u16.hh11
-rw-r--r--src/u8.hh11
-rw-r--r--src/uio.cc700
-rw-r--r--src/uio.hh78
-rw-r--r--src/umod8.hh11
-rw-r--r--test/io.cc10
-rw-r--r--test/u.cc87
-rw-r--r--test/uio.cc620
12 files changed, 1505 insertions, 64 deletions
diff --git a/meson.build b/meson.build
index b0708c2..94519bc 100644
--- a/meson.build
+++ b/meson.build
@@ -149,6 +149,20 @@ unicode_lib = library(
)
unicode_dep = declare_dependency(link_with: unicode_lib)
+uio_lib = library(
+ 'uio',
+ sources: [
+ 'src/uio.cc',
+ 'src/uio.hh',
+ ],
+ include_directories: inc,
+ dependencies: [buffer_dep, io_dep, unicode_dep],
+)
+uio_dep = declare_dependency(
+ link_with: uio_lib,
+ dependencies: [buffer_dep, io_dep, unicode_dep],
+)
+
jkc = executable(
'jkc',
sources: [
@@ -265,6 +279,17 @@ test('decompress', executable(
],
))
+test('uio', executable(
+ 'test_uio',
+ sources: ['test/uio.cc'],
+ include_directories: inc,
+ dependencies: [
+ io_test_helper_dep,
+ uio_dep,
+ test_dependencies,
+ ],
+))
+
run_clang_tidy = find_program('run-clang-tidy', required: false)
if run_clang_tidy.found()
diff --git a/src/gen_ugc.cc b/src/gen_ugc.cc
index e9bce11..7583272 100644
--- a/src/gen_ugc.cc
+++ b/src/gen_ugc.cc
@@ -149,6 +149,8 @@ std::string_view ioerr2str(io::ReadError error) {
return "Invalid (compressed) data";
case io::ReadError::Error:
return "Fatal error";
+ case io::ReadError::MaxTooSmall:
+ return "Too small buffer";
}
std::unreachable();
}
diff --git a/src/io.cc b/src/io.cc
index baf162a..e0ab787 100644
--- a/src/io.cc
+++ b/src/io.cc
@@ -149,10 +149,6 @@ class StringReader : public MemoryReader {
} // namespace
-std::expected<size_t, ReadError> Reader::read(std::string& str) {
- return read(str.data(), str.size());
-}
-
std::expected<size_t, ReadError> Reader::repeat_read(void* dst, size_t max) {
auto ret = read(dst, max);
if (!ret.has_value() || ret.value() == 0 || ret.value() == max)
@@ -171,10 +167,6 @@ std::expected<size_t, ReadError> Reader::repeat_read(void* dst, size_t max) {
return offset;
}
-std::expected<size_t, ReadError> Reader::repeat_read(std::string& str) {
- return repeat_read(str.data(), str.size());
-}
-
std::expected<size_t, ReadError> Reader::repeat_skip(size_t max) {
auto ret = skip(max);
if (!ret.has_value() || ret.value() == 0 || ret.value() == max)
diff --git a/src/io.hh b/src/io.hh
index 315d0bb..e93b72b 100644
--- a/src/io.hh
+++ b/src/io.hh
@@ -10,7 +10,8 @@ namespace io {
enum class ReadError {
Error,
- InvalidData, // Used by decompress and such
+ InvalidData, // invalid data read (not used by raw file)
+ MaxTooSmall, // max argument needs to be bigger (not used by raw file)
};
enum class OpenError {
@@ -27,11 +28,8 @@ class Reader {
size_t max) = 0;
[[nodiscard]] virtual std::expected<size_t, ReadError> skip(size_t max) = 0;
- [[nodiscard]] std::expected<size_t, ReadError> read(std::string& str);
-
[[nodiscard]] std::expected<size_t, ReadError> repeat_read(void* dst,
size_t max);
- [[nodiscard]] std::expected<size_t, ReadError> repeat_read(std::string& str);
[[nodiscard]] std::expected<size_t, ReadError> repeat_skip(size_t max);
protected:
diff --git a/src/u16.hh b/src/u16.hh
index 781e6a4..d6a3672 100644
--- a/src/u16.hh
+++ b/src/u16.hh
@@ -38,19 +38,24 @@ std::expected<uint32_t, u::ReadError> read(T& start, const T& end) {
template<std::forward_iterator T>
requires std::is_same_v<std::iter_value_t<T>, uint16_t>
std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start,
- const T& end) {
+ const T& end,
+ bool eof) {
+ auto const tmp = start;
auto ret = read(start, end);
if (ret.has_value())
return *ret;
switch (ret.error()) {
case u::ReadError::Incomplete:
+ if (eof)
+ break;
return std::unexpected(u::ReadErrorReplace::Incomplete);
case u::ReadError::End:
return std::unexpected(u::ReadErrorReplace::End);
case u::ReadError::Invalid:
- return 0xfffd;
+ break;
}
- std::unreachable();
+ start = tmp + 1;
+ return 0xfffd;
}
template<std::forward_iterator T>
diff --git a/src/u8.hh b/src/u8.hh
index 3c1d19e..b89f80f 100644
--- a/src/u8.hh
+++ b/src/u8.hh
@@ -105,19 +105,24 @@ std::expected<uint32_t, u::ReadError> read(T& start, const T& end) {
template<std::forward_iterator T>
requires std::is_same_v<std::iter_value_t<T>, uint8_t>
std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start,
- const T& end) {
+ const T& end,
+ bool eof) {
+ auto const tmp = start;
auto ret = read(start, end);
if (ret.has_value())
return *ret;
switch (ret.error()) {
case u::ReadError::Incomplete:
+ if (eof)
+ break;
return std::unexpected(u::ReadErrorReplace::Incomplete);
case u::ReadError::End:
return std::unexpected(u::ReadErrorReplace::End);
case u::ReadError::Invalid:
- return 0xfffd;
+ break;
}
- std::unreachable();
+ start = tmp + 1;
+ return 0xfffd;
}
template<std::forward_iterator T>
diff --git a/src/uio.cc b/src/uio.cc
new file mode 100644
index 0000000..1bf5e40
--- /dev/null
+++ b/src/uio.cc
@@ -0,0 +1,700 @@
+#include "uio.hh"
+
+#include "buffer.hh"
+#include "u8.hh"
+#include "u16.hh"
+
+#include <bit>
+#include <cassert>
+#include <cstring>
+#include <expected>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+namespace {
+
+constexpr u::ReaderInputFormat kU16NativeInputFormat =
+ (std::endian::native == std::endian::big)
+ ? u::ReaderInputFormat::UTF16_BE
+ : u::ReaderInputFormat::UTF16_LE;
+
+constexpr u::ReaderInputFormat kU16SwapInputFormat =
+ (std::endian::native == std::endian::big)
+ ? u::ReaderInputFormat::UTF16_LE
+ : u::ReaderInputFormat::UTF16_BE;
+
+constexpr size_t kByteBufferSize = 65535;
+constexpr size_t kUnicodeBufferSize = 8192;
+constexpr size_t kUSwapBufferSize = kByteBufferSize / 4;
+
+template<typename UReader, typename UWriter>
+class UnicodeReader : public io::Reader {
+ public:
+ UnicodeReader(std::unique_ptr<io::Reader> in, u::ReaderConfig config)
+ : in_(std::move(in)), config_(config), skip_bom_(config_.skip_bom) {}
+
+ std::expected<size_t, io::ReadError> read(void* dst, size_t max) override {
+ auto err = fill();
+ if (err.has_value())
+ return std::unexpected(err.value());
+
+ auto* in = reinterpret_cast<uint8_t const*>(in_ptr_);
+ auto read_err = reader_(in, in + in_avail_, in_eof_,
+ u_buffer_wptr_,
+ u_buffer_ + kUnicodeBufferSize);
+ byte_buffer_->consume(in - reinterpret_cast<uint8_t const*>(in_ptr_));
+ if (read_err.has_value()) {
+ switch (read_err.value()) {
+ case u::ReadError::Invalid:
+ // Only return error if we have no bytes to output.
+ if (u_buffer_wptr_ == u_buffer_)
+ return std::unexpected(io::ReadError::InvalidData);
+ break;
+ case u::ReadError::End:
+ break;
+ case u::ReadError::Incomplete:
+ if (in_eof_) {
+ // Only return error if we have no bytes to output.
+ if (u_buffer_wptr_ == u_buffer_)
+ return std::unexpected(io::ReadError::InvalidData);
+ } else {
+ // We clearly need more data, call fill again.
+ if (u_buffer_wptr_ == u_buffer_)
+ return read(dst, max);
+ }
+ break;
+ }
+ }
+
+ if (skip_bom_ && u_buffer_wptr_ > u_buffer_) {
+ if (u_buffer_[0] == 0xfeff) {
+ --u_buffer_wptr_;
+ memmove(u_buffer_, u_buffer_ + 1,
+ (u_buffer_wptr_ - u_buffer_) * sizeof(uint32_t));
+ }
+ skip_bom_ = false;
+ }
+
+ auto* u_out = const_cast<uint32_t const*>(u_buffer_);
+ auto* d_out = dst;
+ if (writer_(u_out, u_buffer_wptr_, d_out, max)) {
+ assert(u_out == u_buffer_wptr_);
+ u_buffer_wptr_ = u_buffer_;
+ } else if (u_out == u_buffer_) {
+ // Unable to write anything.
+ if (max == 0) return 0;
+ return std::unexpected(io::ReadError::MaxTooSmall);
+ } else {
+ size_t left = u_buffer_wptr_ - u_out;
+ memmove(u_buffer_, u_out, left * sizeof(uint32_t));
+ u_buffer_wptr_ = u_buffer_ + left;
+ }
+
+ return reinterpret_cast<char*>(d_out) - reinterpret_cast<char*>(dst);
+ }
+
+ std::expected<size_t, io::ReadError> skip(size_t max) override {
+ auto tmp = std::make_unique_for_overwrite<char[]>(max);
+ return read(tmp.get(), max);
+ }
+
+ private:
+ std::optional<io::ReadError> fill() {
+ in_ptr_ = byte_buffer_->rptr(in_avail_);
+ if (!in_eof_ && in_avail_ < kByteBufferSize / 2) {
+ auto* wptr = byte_buffer_->wptr(in_avail_);
+ auto got = in_->read(wptr, in_avail_);
+ if (got.has_value()) {
+ byte_buffer_->commit(got.value());
+ if (got.value() == 0)
+ in_eof_ = true;
+ } else {
+ return got.error();
+ }
+ in_ptr_ = byte_buffer_->rptr(in_avail_);
+ }
+ return std::nullopt;
+ }
+
+ std::unique_ptr<io::Reader> in_;
+ u::ReaderConfig const config_;
+ UReader reader_;
+ UWriter writer_;
+ bool skip_bom_;
+ void const* in_ptr_{nullptr};
+ size_t in_avail_{0};
+ bool in_eof_{false};
+ std::unique_ptr<Buffer> byte_buffer_{Buffer::fixed(kByteBufferSize)};
+ uint32_t u_buffer_[kUnicodeBufferSize];
+ uint32_t* u_buffer_wptr_{u_buffer_};
+};
+
+struct U8ReaderStrict {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool /* in_eof */,
+ uint32_t* &out,
+ uint32_t const* out_end) {
+ std::optional<u::ReadError> ret;
+ while (out < out_end) {
+ auto const tmp = in;
+ auto c = u8::read(in, in_end);
+ if (c.has_value()) {
+ *(out++) = c.value();
+ } else {
+ ret = c.error();
+ in = tmp;
+ break;
+ }
+ }
+ return ret;
+ }
+};
+
+struct U8Reader {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool in_eof, uint32_t* &out,
+ uint32_t const* out_end) {
+ std::optional<u::ReadError> ret;
+ while (out < out_end) {
+ auto c = u8::read_replace(in, in_end, in_eof);
+ if (c.has_value()) {
+ *(out++) = c.value();
+ } else {
+ switch (c.error()) {
+ case u::ReadErrorReplace::End:
+ ret = u::ReadError::End;
+ break;
+ case u::ReadErrorReplace::Incomplete:
+ ret = u::ReadError::Incomplete;
+ break;
+ }
+ break;
+ }
+ }
+ return ret;
+ }
+};
+
+struct U16NativeReaderStrict {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool /* in_eof */,
+ uint32_t* &out,
+ uint32_t const* out_end) {
+ auto* it = reinterpret_cast<uint16_t const*>(in);
+ auto* const end = it + ((in_end - in) / 2);
+ if (it == end && in < in_end)
+ return u::ReadError::Incomplete;
+ std::optional<u::ReadError> ret;
+ while (out < out_end) {
+ auto const tmp = in;
+ auto c = u16::read(it, end);
+ if (c.has_value()) {
+ *(out++) = c.value();
+ } else {
+ ret = c.error();
+ in = tmp;
+ break;
+ }
+ }
+ in = reinterpret_cast<uint8_t const*>(it);
+ return ret;
+ }
+};
+
+struct U16SwapReaderStrict {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool /* in_eof */,
+ uint32_t* &out,
+ uint32_t const* out_end) {
+ auto* it = buffer_;
+ auto* const end = it + ((in_end - in) / 2);
+ if (it == end && in < in_end)
+ return u::ReadError::Incomplete;
+ {
+ auto* in2 = reinterpret_cast<uint16_t const*>(in);
+ for (auto* it2 = it; it2 != end; ++it2) *it2 = std::byteswap(*(in2++));
+ }
+ std::optional<u::ReadError> ret;
+ while (out < out_end) {
+ auto const tmp = in;
+ auto c = u16::read(it, end);
+ if (c.has_value()) {
+ *(out++) = c.value();
+ } else {
+ ret = c.error();
+ in = tmp;
+ break;
+ }
+ }
+ in += (it - buffer_) * 2;
+ return ret;
+ }
+
+ private:
+ uint16_t buffer_[kUSwapBufferSize];
+};
+
+struct U16NativeReader {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool in_eof,
+ uint32_t* &out,
+ uint32_t const* out_end) {
+ auto* it = reinterpret_cast<uint16_t const*>(in);
+ auto* const end = it + ((in_end - in) / 2);
+ if (it == end && in < in_end) {
+ if (out == out_end)
+ return std::nullopt;
+ if (in_eof) {
+ *(out++) = 0xfffd;
+ in = in_end;
+ return std::nullopt;
+ }
+ return u::ReadError::Incomplete;
+ }
+ std::optional<u::ReadError> ret;
+ while (out < out_end) {
+ auto c = u16::read_replace(it, end, in_eof);
+ if (c.has_value()) {
+ *(out++) = c.value();
+ } else {
+ switch (c.error()) {
+ case u::ReadErrorReplace::End:
+ ret = u::ReadError::End;
+ break;
+ case u::ReadErrorReplace::Incomplete:
+ ret = u::ReadError::Incomplete;
+ break;
+ }
+ break;
+ }
+ }
+ in = reinterpret_cast<uint8_t const*>(it);
+ return ret;
+ }
+};
+
+struct U16SwapReader {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool in_eof,
+ uint32_t* &out,
+ uint32_t const* out_end) {
+ auto* it = buffer_;
+ auto* const end = it + ((in_end - in) / 2);
+ if (it == end && in < in_end) {
+ if (out == out_end)
+ return std::nullopt;
+ if (in_eof) {
+ *(out++) = 0xfffd;
+ in = in_end;
+ return std::nullopt;
+ }
+ return u::ReadError::Incomplete;
+ }
+ {
+ auto* in2 = reinterpret_cast<uint16_t const*>(in);
+ for (auto* it2 = it; it2 != end; ++it2) *it2 = std::byteswap(*(in2++));
+ }
+ std::optional<u::ReadError> ret;
+ while (out < out_end) {
+ auto c = u16::read_replace(it, end, in_eof);
+ if (c.has_value()) {
+ *(out++) = c.value();
+ } else {
+ switch (c.error()) {
+ case u::ReadErrorReplace::End:
+ ret = u::ReadError::End;
+ break;
+ case u::ReadErrorReplace::Incomplete:
+ ret = u::ReadError::Incomplete;
+ break;
+ }
+ break;
+ }
+ }
+ in += (it - buffer_) * 2;
+ return ret;
+ }
+
+ private:
+ uint16_t buffer_[kUSwapBufferSize];
+};
+
+bool detect(uint8_t const* in, uint8_t const* in_end, bool in_eof,
+ u::ReaderInputFormat &format) {
+ if (in == in_end) {
+ if (in_eof) {
+ // Doesn't matter, go with UTF-8 just to get out of "detect"
+ format = u::ReaderInputFormat::UTF8;
+ return true;
+ }
+ return false;
+ }
+
+ // UTF-8 BOM ?
+ if (in_end - in >= 3) {
+ if (in[0] == 0xef && in[1] == 0xbb && in[2] == 0xbf) {
+ format = u::ReaderInputFormat::UTF8;
+ return true;
+ }
+ }
+
+ // UTF-16 BOM ?
+ if (in_end - in >= 2) {
+ auto* data = reinterpret_cast<uint16_t const*>(in);
+ if (data[0] == 0xFEFF) {
+ format = kU16NativeInputFormat;
+ return true;
+ }
+ if (data[0] == 0xFFFE) {
+ format = kU16SwapInputFormat;
+ return true;
+ }
+ }
+
+ // Check for zero bytes, not allowed in UTF-8 and likely for UTF-16
+ // encoding western characters.
+ if (in_end - in >= 2) {
+ if (in[0] == 0x00 && in[1] != 0x00) {
+ format = u::ReaderInputFormat::UTF16_BE;
+ return true;
+ }
+ if (in[0] != 0x00 && in[1] == 0x00) {
+ format = u::ReaderInputFormat::UTF16_LE;
+ return true;
+ }
+ }
+
+ if (in_end - in >= 2 || in_eof) {
+ // We have no idea what it is, fallback to UTF-8 and let it "handle"
+ // whatever the input data actually is.
+ format = u::ReaderInputFormat::UTF8;
+ return true;
+ }
+ return false;
+}
+
+struct DetectReaderStrict {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool in_eof,
+ uint32_t* &out,
+ uint32_t const* out_end) {
+ switch (format_) {
+ case u::ReaderInputFormat::DETECT:
+ if (detect(in, in_end, in_eof, format_)) {
+ return operator()(in, in_end, in_eof, out, out_end);
+ }
+ return u::ReadError::Incomplete;
+ case u::ReaderInputFormat::UTF8:
+ return u8_reader_(in, in_end, in_eof, out, out_end);
+ case kU16NativeInputFormat:
+ return u16_native_reader_(in, in_end, in_eof, out, out_end);
+ case kU16SwapInputFormat:
+ return u16_swap_reader_(in, in_end, in_eof, out, out_end);
+ }
+ std::unreachable();
+ }
+
+ private:
+ u::ReaderInputFormat format_{u::ReaderInputFormat::DETECT};
+ U8ReaderStrict u8_reader_;
+ U16NativeReaderStrict u16_native_reader_;
+ U16SwapReaderStrict u16_swap_reader_;
+};
+
+struct DetectReader {
+ std::optional<u::ReadError> operator()(uint8_t const* &in,
+ uint8_t const* in_end,
+ bool in_eof,
+ uint32_t* &out,
+ uint32_t const* out_end) {
+ switch (format_) {
+ case u::ReaderInputFormat::DETECT:
+ if (detect(in, in_end, in_eof, format_)) {
+ return operator()(in, in_end, in_eof, out, out_end);
+ }
+ return u::ReadError::Incomplete;
+ case u::ReaderInputFormat::UTF8:
+ return u8_reader_(in, in_end, in_eof, out, out_end);
+ case kU16NativeInputFormat:
+ return u16_native_reader_(in, in_end, in_eof, out, out_end);
+ case kU16SwapInputFormat:
+ return u16_swap_reader_(in, in_end, in_eof, out, out_end);
+ }
+ std::unreachable();
+ }
+
+ private:
+ u::ReaderInputFormat format_{u::ReaderInputFormat::DETECT};
+ U8Reader u8_reader_;
+ U16NativeReader u16_native_reader_;
+ U16SwapReader u16_swap_reader_;
+};
+
+struct U8Writer {
+ bool operator()(uint32_t const* &in, uint32_t const* in_end,
+ void* &out, size_t out_avail) {
+ auto* it = reinterpret_cast<uint8_t*>(out);
+ auto* const end = it + out_avail;
+ bool ret = true;
+ while (in < in_end) {
+ if (!u8::write(it, end, *in)) {
+ ret = false;
+ break;
+ }
+ ++in;
+ }
+ out = it;
+ return ret;
+ }
+};
+
+struct U16NativeWriter {
+ public:
+ bool operator()(uint32_t const* &in, uint32_t const* in_end,
+ void* &out, size_t out_avail) {
+ auto* it = reinterpret_cast<uint16_t*>(out);
+ auto* const end = it + (out_avail / 2);
+ bool ret = true;
+ while (in < in_end) {
+ if (!u16::write(it, end, *in)) {
+ ret = false;
+ break;
+ }
+ ++in;
+ }
+ out = it;
+ return ret;
+ }
+};
+
+/*
+struct U16SwapWriter {
+ bool operator()(uint32_t const* &in, uint32_t const* in_end,
+ void* &out, size_t out_avail) {
+ auto* it = reinterpret_cast<uint16_t*>(out);
+ auto* const end = it + (out_avail / 2);
+ bool ret = true;
+ while (in < in_end) {
+ auto tmp = it;
+ if (!u16::write(it, end, *in)) {
+ ret = false;
+ break;
+ }
+ ++in;
+ *tmp = std::byteswap(*tmp);
+ if (++tmp != it) *tmp = std::byteswap(*tmp);
+ }
+ out = it;
+ return ret;
+ }
+};
+*/
+
+} // namespace
+
+namespace u8 {
+
+namespace {
+
+template<typename UReader>
+class UnicodeReaderU8Writer : public UnicodeReader<UReader, U8Writer>,
+ public virtual Reader {
+ public:
+ UnicodeReaderU8Writer(std::unique_ptr<io::Reader> in,
+ u::ReaderConfig config)
+ : UnicodeReader<UReader, U8Writer>(std::move(in), config) {}
+
+ std::expected<size_t, io::ReadError> read(void* dst, size_t max) override {
+ return UnicodeReader<UReader, U8Writer>::read(dst, max);
+ }
+
+ std::expected<size_t, io::ReadError> skip(size_t max) override {
+ return UnicodeReader<UReader, U8Writer>::skip(max);
+ }
+};
+
+} // namespace
+
+
+std::expected<size_t, io::ReadError> Reader::read(std::string& data,
+ size_t max) {
+ if (max > data.size())
+ data.resize(max);
+ auto ret = read(data.data(), max);
+ if (ret.has_value()) {
+ data.resize(ret.value());
+ }
+ return ret;
+}
+
+std::expected<size_t, io::ReadError> Reader::repeat_read(
+ std::string& data, size_t max) {
+ if (max > data.size())
+ data.resize(max);
+ auto ret = repeat_read(data.data(), max);
+ if (ret.has_value()) {
+ data.resize(ret.value());
+ }
+ return ret;
+}
+
+std::unique_ptr<Reader> open(
+ std::unique_ptr<io::Reader> reader, u::ReaderConfig config) {
+ switch (config.input) {
+ case u::ReaderInputFormat::UTF8:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU8Writer<U8ReaderStrict>>(
+ std::move(reader), config);
+ return std::make_unique<UnicodeReaderU8Writer<U8Reader>>(
+ std::move(reader), config);
+ case kU16NativeInputFormat:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU8Writer<U16NativeReaderStrict>>(
+ std::move(reader), config);
+ return std::make_unique<UnicodeReaderU8Writer<U16NativeReader>>(
+ std::move(reader), config);
+ break;
+ case kU16SwapInputFormat:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU8Writer<U16SwapReaderStrict>>(
+ std::move(reader), config);
+ return std::make_unique<UnicodeReaderU8Writer<U16SwapReader>>(
+ std::move(reader), config);
+ break;
+ case u::ReaderInputFormat::DETECT:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU8Writer<DetectReaderStrict>>(
+ std::move(reader), config);
+ return std::make_unique<UnicodeReaderU8Writer<DetectReader>>(
+ std::move(reader), config);
+ break;
+ }
+ std::unreachable();
+}
+
+std::expected<std::unique_ptr<Reader>, io::OpenError> open(
+ const std::string& file_path, u::ReaderConfig config) {
+ auto ret = io::open(file_path);
+ if (ret.has_value())
+ return open(std::move(ret.value()), config);
+ return std::unexpected(ret.error());
+}
+
+std::expected<std::unique_ptr<Reader>, io::OpenError> openat(
+ int dirfd, const std::string& file_path, u::ReaderConfig config) {
+ auto ret = io::openat(dirfd, file_path);
+ if (ret.has_value())
+ return open(std::move(ret.value()), config);
+ return std::unexpected(ret.error());
+}
+
+} // namespace u8
+
+namespace u16 {
+
+namespace {
+
+template<typename UReader>
+class UnicodeReaderU16NativeWriter : public UnicodeReader<UReader,
+ U16NativeWriter>,
+ public virtual Reader {
+ public:
+ UnicodeReaderU16NativeWriter(std::unique_ptr<io::Reader> in,
+ u::ReaderConfig config)
+ : UnicodeReader<UReader, U16NativeWriter>(std::move(in), config) {}
+
+ std::expected<size_t, io::ReadError> read(void* dst, size_t max) override {
+ return UnicodeReader<UReader, U16NativeWriter>::read(dst, max);
+ }
+
+ std::expected<size_t, io::ReadError> skip(size_t max) override {
+ return UnicodeReader<UReader, U16NativeWriter>::skip(max);
+ }
+};
+
+} // namespace
+
+std::expected<size_t, io::ReadError> Reader::read(std::u16string& data,
+ size_t max) {
+ if (max > data.size())
+ data.resize(max);
+ auto ret = read(data.data(), max * 2);
+ if (ret.has_value()) {
+ data.resize(ret.value());
+ return ret.value() / 2;
+ }
+ return ret;
+}
+
+std::expected<size_t, io::ReadError> Reader::repeat_read(
+ std::u16string& data, size_t max) {
+ if (max > data.size())
+ data.resize(max);
+ auto ret = repeat_read(data.data(), max * 2);
+ if (ret.has_value()) {
+ data.resize(ret.value());
+ return ret.value() / 2;
+ }
+ return ret;
+}
+
+std::unique_ptr<Reader> open(
+ std::unique_ptr<io::Reader> reader, u::ReaderConfig config) {
+ switch (config.input) {
+ case u::ReaderInputFormat::UTF8:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU16NativeWriter<U8ReaderStrict>>(
+ std::move(reader), config);
+ return std::make_unique<UnicodeReaderU16NativeWriter<U8Reader>>(
+ std::move(reader), config);
+ case kU16NativeInputFormat:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU16NativeWriter<
+ U16NativeReaderStrict>>(std::move(reader), config);
+ return std::make_unique<UnicodeReaderU16NativeWriter<U16NativeReader>>(
+ std::move(reader), config);
+ break;
+ case kU16SwapInputFormat:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU16NativeWriter<
+ U16SwapReaderStrict>>(std::move(reader), config);
+ return std::make_unique<UnicodeReaderU16NativeWriter<U16SwapReader>>(
+ std::move(reader), config);
+ break;
+ case u::ReaderInputFormat::DETECT:
+ if (config.strict)
+ return std::make_unique<UnicodeReaderU16NativeWriter<
+ DetectReaderStrict>>(std::move(reader), config);
+ return std::make_unique<UnicodeReaderU16NativeWriter<DetectReader>>(
+ std::move(reader), config);
+ break;
+ }
+ std::unreachable();
+}
+
+std::expected<std::unique_ptr<Reader>, io::OpenError> open(
+ const std::string& file_path, u::ReaderConfig config) {
+ auto ret = io::open(file_path);
+ if (ret.has_value())
+ return open(std::move(ret.value()), config);
+ return std::unexpected(ret.error());
+}
+
+std::expected<std::unique_ptr<Reader>, io::OpenError> openat(
+ int dirfd, const std::string& file_path, u::ReaderConfig config) {
+ auto ret = io::openat(dirfd, file_path);
+ if (ret.has_value())
+ return open(std::move(ret.value()), config);
+ return std::unexpected(ret.error());
+}
+
+} // namespace u16
diff --git a/src/uio.hh b/src/uio.hh
new file mode 100644
index 0000000..a0911a1
--- /dev/null
+++ b/src/uio.hh
@@ -0,0 +1,78 @@
+#ifndef UIO_HH
+#define UIO_HH
+
+#include "io.hh" // IWYU pragma: export
+
+#include <cstddef>
+#include <expected>
+#include <string>
+
+namespace u {
+
+enum class ReaderInputFormat {
+ UTF8,
+ UTF16_BE,
+ UTF16_LE,
+ DETECT,
+};
+
+struct ReaderConfig {
+ // If false (default), invalid data is replaced with U+FFFD
+ bool strict{false};
+ // Input format
+ ReaderInputFormat input{ReaderInputFormat::DETECT};
+ // If true (default), any BOM found at start of stream will be skipped
+ bool skip_bom{true};
+};
+
+} // namespace u8
+
+namespace u8 {
+
+class Reader : public io::Reader {
+ public:
+ using io::Reader::read;
+ using io::Reader::repeat_read;
+
+ [[nodiscard]] std::expected<size_t, io::ReadError> read(
+ std::string& data, size_t max);
+
+ [[nodiscard]] std::expected<size_t, io::ReadError> repeat_read(
+ std::string& data, size_t max);
+};
+
+[[nodiscard]] std::unique_ptr<Reader> open(
+ std::unique_ptr<io::Reader> reader, u::ReaderConfig config = {});
+
+[[nodiscard]] std::expected<std::unique_ptr<Reader>, io::OpenError> open(
+ const std::string& file_path, u::ReaderConfig config = {});
+[[nodiscard]] std::expected<std::unique_ptr<Reader>, io::OpenError> openat(
+ int dirfd, const std::string& file_path, u::ReaderConfig config = {});
+
+} // namespace u8
+
+namespace u16 {
+
+class Reader : public io::Reader {
+ public:
+ using io::Reader::read;
+ using io::Reader::repeat_read;
+
+ [[nodiscard]] std::expected<size_t, io::ReadError> read(
+ std::u16string& data, size_t max);
+
+ [[nodiscard]] std::expected<size_t, io::ReadError> repeat_read(
+ std::u16string& data, size_t max);
+};
+
+[[nodiscard]] std::unique_ptr<Reader> open(
+ std::unique_ptr<io::Reader> reader, u::ReaderConfig config = {});
+
+[[nodiscard]] std::expected<std::unique_ptr<Reader>, io::OpenError> open(
+ const std::string& file_path, u::ReaderConfig config = {});
+[[nodiscard]] std::expected<std::unique_ptr<Reader>, io::OpenError> openat(
+ int dirfd, const std::string& file_path, u::ReaderConfig config = {});
+
+} // namespace u16
+
+#endif // UIO_HH
diff --git a/src/umod8.hh b/src/umod8.hh
index 117591f..b91b199 100644
--- a/src/umod8.hh
+++ b/src/umod8.hh
@@ -113,19 +113,24 @@ std::expected<uint32_t, u::ReadError> read(T& start, const T& end) {
template<std::forward_iterator T>
requires std::is_same_v<std::iter_value_t<T>, uint8_t>
std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start,
- const T& end) {
+ const T& end,
+ bool eof) {
+ auto const tmp = start;
auto ret = read(start, end);
if (ret.has_value())
return *ret;
switch (ret.error()) {
case u::ReadError::Incomplete:
+ if (eof)
+ break;
return std::unexpected(u::ReadErrorReplace::Incomplete);
case u::ReadError::End:
return std::unexpected(u::ReadErrorReplace::End);
case u::ReadError::Invalid:
- return 0xfffd;
+ break;
}
- std::unreachable();
+ start = tmp + 1;
+ return 0xfffd;
}
template<std::forward_iterator T>
diff --git a/test/io.cc b/test/io.cc
index 23c10d4..2441138 100644
--- a/test/io.cc
+++ b/test/io.cc
@@ -124,7 +124,7 @@ TEST_F(IoTest, read_empty) {
auto ret = io::openat(dirfd(), "test");
ASSERT_TRUE(ret.has_value());
std::string tmp(10, ' ');
- auto ret2 = ret.value()->read(tmp);
+ auto ret2 = ret.value()->read(tmp.data(), tmp.size());
ASSERT_TRUE(ret2.has_value());
EXPECT_EQ(0, ret2.value());
}
@@ -145,7 +145,7 @@ TEST_F(IoTest, read) {
auto ret = io::openat(dirfd(), "test");
ASSERT_TRUE(ret.has_value());
std::string tmp(12, ' ');
- auto ret2 = ret.value()->repeat_read(tmp);
+ auto ret2 = ret.value()->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret2.has_value());
EXPECT_EQ(11, ret2.value());
tmp.resize(ret2.value());
@@ -161,7 +161,7 @@ TEST_F(IoTest, skip) {
ASSERT_TRUE(ret2.has_value());
EXPECT_EQ(6, ret2.value());
std::string tmp(12, ' ');
- auto ret3 = ret.value()->repeat_read(tmp);
+ auto ret3 = ret.value()->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret3.has_value());
EXPECT_EQ(5, ret3.value());
tmp.resize(ret3.value());
@@ -175,7 +175,7 @@ TEST_F(IoTest, read_block) {
ASSERT_TRUE(ret.has_value());
auto ret2 = io_make_max_block(std::move(ret.value()), 2);
std::string tmp(12, ' ');
- auto ret3 = ret2->repeat_read(tmp);
+ auto ret3 = ret2->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret3.has_value());
EXPECT_EQ(11, ret3.value());
tmp.resize(ret3.value());
@@ -192,7 +192,7 @@ TEST_F(IoTest, skip_block) {
ASSERT_TRUE(ret3.has_value());
EXPECT_EQ(6, ret3.value());
std::string tmp(12, ' ');
- auto ret4 = ret2->repeat_read(tmp);
+ auto ret4 = ret2->repeat_read(tmp.data(), tmp.size());
ASSERT_TRUE(ret4.has_value());
EXPECT_EQ(5, ret4.value());
tmp.resize(ret4.value());
diff --git a/test/u.cc b/test/u.cc
index 53455f2..dc77e7d 100644
--- a/test/u.cc
+++ b/test/u.cc
@@ -4,6 +4,7 @@
#include "umod8.hh"
#include "u16.hh"
+#include <iterator>
#include <vector>
namespace {
@@ -20,7 +21,7 @@ TEST(u8, empty) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::End, ret.error());
- auto ret_replace = u8::read_replace(it, empty.end());
+ auto ret_replace = u8::read_replace(it, empty.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
@@ -75,7 +76,7 @@ TEST(u8, examples) {
EXPECT_EQ(it, literal.end());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0x10345, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -153,9 +154,14 @@ TEST(u8, incomplete) {
EXPECT_EQ(u::ReadError::Incomplete, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+
+ it = literal.begin();
+ ret_replace = u8::read_replace(it, literal.end(), true);
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, ret_replace.value());
}
{
std::vector<uint8_t> literal{0xf0};
@@ -188,10 +194,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xa0};
@@ -200,10 +206,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xce, 0xff};
@@ -212,10 +218,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0xff, 0x84};
@@ -224,10 +230,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0x9c, 0xff};
@@ -236,10 +242,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xf0, 0xff, 0x8d, 0x85};
@@ -248,10 +254,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xf0, 0x90, 0xff, 0x85};
@@ -260,10 +266,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xf0, 0x90, 0x8d, 0xff};
@@ -272,10 +278,10 @@ TEST(u8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u8::read_replace(it, literal.end());
+ auto ret_replace = u8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
}
@@ -286,7 +292,7 @@ TEST(umod8, empty) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::End, ret.error());
- auto ret_replace = umod8::read_replace(it, empty.end());
+ auto ret_replace = umod8::read_replace(it, empty.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
@@ -329,7 +335,7 @@ TEST(umod8, examples) {
EXPECT_EQ(it, literal.end());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0x10400, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -416,9 +422,14 @@ TEST(umod8, incomplete) {
EXPECT_EQ(u::ReadError::Incomplete, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
+
+ it = literal.begin();
+ ret_replace = umod8::read_replace(it, literal.end(), true);
+ ASSERT_TRUE(ret_replace.has_value());
+ EXPECT_EQ(0xfffd, ret_replace.value());
}
{
std::vector<uint8_t> literal{0xed, 0xa0, 0x81, 0xed, 0xb0};
@@ -437,7 +448,7 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_EQ(3, literal.end() - it);
@@ -449,7 +460,7 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -461,10 +472,10 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0xff, 0x84};
@@ -473,10 +484,10 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xec, 0x9c, 0xff};
@@ -485,10 +496,10 @@ TEST(umod8, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = umod8::read_replace(it, literal.end());
+ auto ret_replace = umod8::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
- EXPECT_EQ(it, literal.end());
+ EXPECT_EQ(it, std::next(literal.begin()));
}
{
std::vector<uint8_t> literal{0xed, 0xb0, 0x80, 0xed, 0xa0, 0x81};
@@ -531,7 +542,7 @@ TEST(u16, empty) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::End, ret.error());
- auto ret_replace = u16::read_replace(it, empty.end());
+ auto ret_replace = u16::read_replace(it, empty.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::End, ret_replace.error());
@@ -586,7 +597,7 @@ TEST(u16, examples) {
EXPECT_EQ(it, literal.end());
it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0x24b62, *ret_replace);
EXPECT_EQ(it, literal.end());
@@ -652,11 +663,11 @@ TEST(u16, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_NE(it, literal.end());
- ret_replace = u16::read_replace(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
}
@@ -667,22 +678,22 @@ TEST(u16, invalid) {
ASSERT_FALSE(ret.has_value());
EXPECT_EQ(u::ReadError::Invalid, ret.error());
it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_NE(it, literal.end());
- ret_replace = u16::read_replace(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_FALSE(ret_replace.has_value());
EXPECT_EQ(u::ReadErrorReplace::Incomplete, ret_replace.error());
}
{
std::vector<uint16_t> literal{0xdc37, 0xdf62};
auto it = literal.begin();
- auto ret_replace = u16::read_replace(it, literal.end());
+ auto ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_NE(it, literal.end());
- ret_replace = u16::read_replace(it, literal.end());
+ ret_replace = u16::read_replace(it, literal.end(), false);
ASSERT_TRUE(ret_replace.has_value());
EXPECT_EQ(0xfffd, *ret_replace);
EXPECT_EQ(it, literal.end());
diff --git a/test/uio.cc b/test/uio.cc
new file mode 100644
index 0000000..ce666c1
--- /dev/null
+++ b/test/uio.cc
@@ -0,0 +1,620 @@
+#include <gtest/gtest.h>
+
+#include "io.hh"
+#include "io_test_helper.hh"
+#include "uio.hh"
+
+#include <string>
+
+using namespace std::literals::string_literals;
+
+TEST(uio_u8, empty) {
+ auto uio = u8::open(io::memory(""));
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+}
+
+TEST(uio_u16, empty) {
+ auto uio = u16::open(io::memory(""));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+}
+
+TEST(uio_u8, sample) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, sample_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, sample_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u8, sample_detect) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"));
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, sample_detect_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, sample_detect_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u8, invalid) {
+ auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 20);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ("r", tmp);
+ ret = uio->repeat_read(tmp, 20);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u8, invalid_detect) {
+ auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"),
+ u::ReaderConfig{.strict=true});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 20);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ("r", tmp);
+ ret = uio->repeat_read(tmp, 20);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u8, invalid_replace) {
+ auto uio = u8::open(io::memory("r\xe4ksm\xf6rg\xe5s"),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 20);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(16, ret.value());
+ EXPECT_EQ("r\xef\xbf\xbdksm\xef\xbf\xbdrg\xef\xbf\xbds", tmp);
+}
+
+TEST(uio_u8, read_error) {
+ auto uio = u8::open(io_make_breaking(io::memory("\xef\xbf\xbd"), 1),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::Error, ret.error());
+}
+
+TEST(uio_u16, read_error) {
+ auto uio = u16::open(io_make_breaking(io::memory("\x00\x24"s), 1),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::Error, ret.error());
+}
+
+TEST(uio_u8, read_incomplete_strict) {
+ auto uio = u8::open(io::memory("\xef"),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u8, read_incomplete) {
+ auto uio = u8::open(io::memory("\xef"),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF8,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ("\xef\xbf\xbd", tmp);
+}
+
+TEST(uio_u16, read_incomplete_strict_be) {
+ auto uio = u16::open(io::memory("\x00"s),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, read_incomplete_be) {
+ auto uio = u16::open(io::memory("\x00"s),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0xfffd, tmp[0]);
+}
+
+TEST(uio_u16, read_incomplete_strict_le) {
+ auto uio = u16::open(io::memory("$"),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, read_incomplete_le) {
+ auto uio = u16::open(io::memory("$"),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0xfffd, tmp[0]);
+}
+
+TEST(uio_u8, max_too_small) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->read(tmp.data(), 0);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+
+ ret = uio->read(tmp, 2);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+}
+
+TEST(uio_u16, max_too_small_be) {
+ auto uio = u16::open(io::memory("\xD8\x01\xDC\x37"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->read(tmp.data(), 0);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+
+ ret = uio->read(tmp, 1);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+}
+
+TEST(uio_u16, max_too_small_le) {
+ auto uio = u16::open(io::memory("\x01\xD8\x37\xDC"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->read(tmp.data(), 0);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(0, ret.value());
+
+ ret = uio->read(tmp, 1);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+}
+
+TEST(uio_u8, partial) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 4);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ("\xf0\x90\x8d\x85", tmp);
+
+ ret = uio->repeat_read(tmp, 2);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ("es", tmp);
+}
+
+TEST(uio_u16, partial_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 1);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+
+ ret = uio->repeat_read(tmp, 2);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}
+
+TEST(uio_u16, partial_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 1);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+
+ ret = uio->repeat_read(tmp, 2);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}
+
+TEST(uio_u16, invalid_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_detect_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s),
+ u::ReaderConfig{.strict=true});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_replace_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xd8\x01"s),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_BE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xfffd, tmp[1]);
+}
+
+TEST(uio_u16, invalid_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s),
+ u::ReaderConfig{
+ .strict=true,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_detect_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s),
+ u::ReaderConfig{.strict=true});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(1, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::InvalidData, ret.error());
+}
+
+TEST(uio_u16, invalid_replace_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xd8"s),
+ u::ReaderConfig{
+ .strict=false,
+ .input=u::ReaderInputFormat::UTF16_LE,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xfffd, tmp[1]);
+}
+
+TEST(uio_u8, bom) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u8, bom_keep) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{
+ .input=u::ReaderInputFormat::UTF8,
+ .skip_bom=false,
+ });
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(9, ret.value());
+ EXPECT_EQ("\xef\xbb\xbf\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, bom_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_keep_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{
+ .input=u::ReaderInputFormat::UTF16_BE,
+ .skip_bom=false,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u16, bom_keep_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{
+ .input=u::ReaderInputFormat::UTF16_LE,
+ .skip_bom=false,
+ });
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u8, bom_detect) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"));
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(6, ret.value());
+ EXPECT_EQ("\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u8, bom_keep_detect) {
+ auto uio = u8::open(io::memory("\xef\xbb\xbf\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.skip_bom=false});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(9, ret.value());
+ EXPECT_EQ("\xef\xbb\xbf\xf0\x90\x8D\x85" "es", tmp);
+}
+
+TEST(uio_u16, bom_detect_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_detect_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s));
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(3, ret.value());
+ EXPECT_EQ(0x24, tmp[0]);
+ EXPECT_EQ(0xd801, tmp[1]);
+ EXPECT_EQ(0xdc37, tmp[2]);
+}
+
+TEST(uio_u16, bom_keep_detect_be) {
+ auto uio = u16::open(io::memory("\xfe\xff\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.skip_bom=false});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u16, bom_keep_detect_le) {
+ auto uio = u16::open(io::memory("\xff\xfe\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.skip_bom=false});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xfeff, tmp[0]);
+ EXPECT_EQ(0x24, tmp[1]);
+ EXPECT_EQ(0xd801, tmp[2]);
+ EXPECT_EQ(0xdc37, tmp[3]);
+}
+
+TEST(uio_u8, input_utf16_be) {
+ auto uio = u8::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(5, ret.value());
+ EXPECT_EQ("$\xf0\x90\x90\xb7", tmp);
+}
+
+TEST(uio_u8, input_utf16_le) {
+ auto uio = u8::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::string tmp;
+ auto ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(5, ret.value());
+ EXPECT_EQ("$\xf0\x90\x90\xb7", tmp);
+}
+
+TEST(uio_u16, input_utf8) {
+ auto uio = u16::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::u16string tmp;
+ auto ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ EXPECT_EQ(0xd800, tmp[0]);
+ EXPECT_EQ(0xdf45, tmp[1]);
+ EXPECT_EQ(0x65, tmp[2]);
+ EXPECT_EQ(0x73, tmp[3]);
+}
+
+TEST(uio_u8, skip) {
+ auto uio = u8::open(io::memory("\xf0\x90\x8D\x85" "es"),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF8});
+ std::string tmp;
+ auto ret = uio->repeat_skip(3);
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::MaxTooSmall, ret.error());
+ ret = uio->repeat_skip(4);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(4, ret.value());
+ ret = uio->repeat_read(tmp, 10);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ("es", tmp);
+}
+
+TEST(uio_u16, skip_be) {
+ auto uio = u16::open(io::memory("\x00\x24\xD8\x01\xDC\x37"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_BE});
+ std::u16string tmp;
+ auto ret = uio->repeat_skip(4); // Note that this is in bytes
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}
+
+TEST(uio_u16, skip_le) {
+ auto uio = u16::open(io::memory("\x24\x00\x01\xD8\x37\xDC"s),
+ u::ReaderConfig{.input=u::ReaderInputFormat::UTF16_LE});
+ std::u16string tmp;
+ auto ret = uio->repeat_skip(4); // Note that this is in bytes
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ ret = uio->repeat_read(tmp, 5);
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(2, ret.value());
+ EXPECT_EQ(0xd801, tmp[0]);
+ EXPECT_EQ(0xdc37, tmp[1]);
+}