diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2025-09-17 00:48:46 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2025-09-17 00:48:46 +0200 |
| commit | 2a9e59adb5db8630ab7bdbdeedac623e3397989b (patch) | |
| tree | 44043f182f12ba9feb2c42ff2f4962ef1c452a96 /src | |
| parent | d091dc78154d1c6341251f1ed13bfe3b33f10773 (diff) | |
uline: Add unicode line reader
Diffstat (limited to 'src')
| -rw-r--r-- | src/uline.cc | 198 | ||||
| -rw-r--r-- | src/uline.hh | 72 |
2 files changed, 270 insertions, 0 deletions
diff --git a/src/uline.cc b/src/uline.cc new file mode 100644 index 0000000..21927b3 --- /dev/null +++ b/src/uline.cc @@ -0,0 +1,198 @@ +#include "uline.hh" + +#include "check.hh" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <expected> +#include <memory> +#include <string_view> +#include <utility> + +namespace { + +template <typename T, typename Reader> +class UnicodeReader { + public: + UnicodeReader(std::unique_ptr<Reader> reader, size_t max_len, + std::array<T, 2> line_terminators) + : reader_(std::move(reader)), + max_len_(max_len), + line_terminators_(line_terminators), + buffer_(std::make_unique_for_overwrite<T[]>( + check::add(max_len, static_cast<size_t>(2)))), + rptr_(buffer_.get()), + wptr_(buffer_.get()), + search_(rptr_), + end_(buffer_.get() + check::add(max_len, static_cast<size_t>(2))) {} + + [[nodiscard]] + std::expected<std::basic_string_view<T>, line::ReadError> read() { + while (true) { + search_ = std::find_first_of(search_, wptr_, line_terminators_.begin(), + line_terminators_.end()); + if (search_ < wptr_) { + if (std::cmp_greater(search_ - rptr_, max_len_)) { + return line(max_len_, 0); + } + + size_t tlen; + if (*search_ == line_terminators_[1]) { + tlen = 1; + } else { + if (search_ + 1 == wptr_) { + make_space_if_needed(); + auto got = fill(); + if (got.has_value()) { + if (got.value() == 0) { + return line(search_ - rptr_, 1); + } + } else { + return std::unexpected(line::ReadError(got.error())); + } + } + if (search_[1] == line_terminators_[1]) { + tlen = 2; + } else { + tlen = 1; + } + } + return line(search_ - rptr_, tlen); + } + if (std::cmp_greater_equal(wptr_ - rptr_, max_len_)) { + return line(max_len_, 0); + } + + make_space_if_needed(); + auto got = fill(); + if (got.has_value()) { + if (got.value() == 0) { + if (rptr_ == wptr_) { + return std::unexpected(line::ReadError()); + } + return line(wptr_ - rptr_, 0); + } + } else { + return std::unexpected(line::ReadError(got.error())); + } + } + } + + [[nodiscard]] uint64_t number() const { return number_; } + + private: + std::basic_string_view<T> line(size_t len, size_t terminator_len) { + assert(len <= max_len_); + auto ret = std::basic_string_view<T>(rptr_, len); + rptr_ += len + terminator_len; + search_ = rptr_; + ++number_; + return ret; + } + + void make_space_if_needed() { + size_t free = rptr_ - buffer_.get(); + if (free == 0) + return; + size_t avail = end_ - wptr_; + if (avail > 1024) + return; + memmove(buffer_.get(), rptr_, (wptr_ - rptr_) * sizeof(T)); + search_ -= free; + wptr_ -= free; + rptr_ = buffer_.get(); + } + + std::expected<size_t, io::ReadError> fill() { + auto ret = reader_->read(wptr_, (end_ - wptr_) * sizeof(T)); + if (ret.has_value()) + wptr_ += ret.value() / sizeof(T); + return ret; + } + + std::unique_ptr<Reader> reader_; + size_t const max_len_; + std::array<T, 2> const line_terminators_; + uint64_t number_{0}; + std::unique_ptr<T[]> buffer_; + T* rptr_; + T* wptr_; + T* search_; + T* const end_; +}; + +} // namespace + +namespace u8 { + +namespace line { + +namespace { + +class ReaderImpl : public UnicodeReader<char, u8::Reader>, + public virtual Reader { + public: + ReaderImpl(std::unique_ptr<u8::Reader> reader, size_t max_len) + : UnicodeReader<char, u8::Reader>(std::move(reader), max_len, + {'\r', '\n'}) {} + + [[nodiscard]] + std::expected<std::string_view, ::line::ReadError> read() override { + return UnicodeReader<char, u8::Reader>::read(); + } + + [[nodiscard]] + uint64_t number() const override { + return UnicodeReader<char, u8::Reader>::number(); + } +}; + +} // namespace + +[[nodiscard]] +std::unique_ptr<Reader> open(std::unique_ptr<u8::Reader> reader, + size_t max_len) { + return std::make_unique<ReaderImpl>(std::move(reader), std::move(max_len)); +} + +} // namespace line + +} // namespace u8 + +namespace u16 { + +namespace line { + +namespace { + +class ReaderImpl : public UnicodeReader<char16_t, u16::Reader>, + public virtual Reader { + public: + ReaderImpl(std::unique_ptr<u16::Reader> reader, size_t max_len) + : UnicodeReader<char16_t, u16::Reader>(std::move(reader), max_len, + {u'\r', u'\n'}) {} + + [[nodiscard]] + std::expected<std::u16string_view, ::line::ReadError> read() override { + return UnicodeReader<char16_t, u16::Reader>::read(); + } + + [[nodiscard]] + uint64_t number() const override { + return UnicodeReader<char16_t, u16::Reader>::number(); + } +}; + +} // namespace + +[[nodiscard]] +std::unique_ptr<Reader> open(std::unique_ptr<u16::Reader> reader, + size_t max_len) { + return std::make_unique<ReaderImpl>(std::move(reader), std::move(max_len)); +} + +} // namespace line + +} // namespace u16 diff --git a/src/uline.hh b/src/uline.hh new file mode 100644 index 0000000..a3bcef3 --- /dev/null +++ b/src/uline.hh @@ -0,0 +1,72 @@ +#ifndef ULINE_HH +#define ULINE_HH + +#include "line.hh" // IWYU pragma: export +#include "uio.hh" // IWYU pragma: export + +#include <cstddef> +#include <expected> +#include <memory> +#include <string_view> + +namespace u8 { + +namespace line { + +class Reader { + public: + virtual ~Reader() = default; + + // Returned view is only valid until next call to read. + [[nodiscard]] + virtual std::expected<std::string_view, ::line::ReadError> read() = 0; + + // Starts at zero. Returns next line. + // So, before first read it is zero, after first read it is one. + [[nodiscard]] virtual uint64_t number() const = 0; + + protected: + Reader() = default; + + Reader(Reader const&) = delete; + Reader& operator=(Reader const&) = delete; +}; + +[[nodiscard]] std::unique_ptr<Reader> open(std::unique_ptr<u8::Reader> reader, + size_t max_len = 8192); + +} // namespace line + +} // namespace u8 + +namespace u16 { + +namespace line { + +class Reader { + public: + virtual ~Reader() = default; + + // Returned view is only valid until next call to read. + [[nodiscard]] + virtual std::expected<std::u16string_view, ::line::ReadError> read() = 0; + + // Starts at zero. Returns next line. + // So, before first read it is zero, after first read it is one. + [[nodiscard]] virtual uint64_t number() const = 0; + + protected: + Reader() = default; + + Reader(Reader const&) = delete; + Reader& operator=(Reader const&) = delete; +}; + +[[nodiscard]] std::unique_ptr<Reader> open(std::unique_ptr<u16::Reader> reader, + size_t max_len = 8192); + +} // namespace line + +} // namespace u16 + +#endif // ULINE_HH |
