summaryrefslogtreecommitdiff
path: root/src/uline.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/uline.cc')
-rw-r--r--src/uline.cc198
1 files changed, 198 insertions, 0 deletions
diff --git a/src/uline.cc b/src/uline.cc
new file mode 100644
index 0000000..21927b3
--- /dev/null
+++ b/src/uline.cc
@@ -0,0 +1,198 @@
+#include "uline.hh"
+
+#include "check.hh"
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <expected>
+#include <memory>
+#include <string_view>
+#include <utility>
+
+namespace {
+
+template <typename T, typename Reader>
+class UnicodeReader {
+ public:
+ UnicodeReader(std::unique_ptr<Reader> reader, size_t max_len,
+ std::array<T, 2> line_terminators)
+ : reader_(std::move(reader)),
+ max_len_(max_len),
+ line_terminators_(line_terminators),
+ buffer_(std::make_unique_for_overwrite<T[]>(
+ check::add(max_len, static_cast<size_t>(2)))),
+ rptr_(buffer_.get()),
+ wptr_(buffer_.get()),
+ search_(rptr_),
+ end_(buffer_.get() + check::add(max_len, static_cast<size_t>(2))) {}
+
+ [[nodiscard]]
+ std::expected<std::basic_string_view<T>, line::ReadError> read() {
+ while (true) {
+ search_ = std::find_first_of(search_, wptr_, line_terminators_.begin(),
+ line_terminators_.end());
+ if (search_ < wptr_) {
+ if (std::cmp_greater(search_ - rptr_, max_len_)) {
+ return line(max_len_, 0);
+ }
+
+ size_t tlen;
+ if (*search_ == line_terminators_[1]) {
+ tlen = 1;
+ } else {
+ if (search_ + 1 == wptr_) {
+ make_space_if_needed();
+ auto got = fill();
+ if (got.has_value()) {
+ if (got.value() == 0) {
+ return line(search_ - rptr_, 1);
+ }
+ } else {
+ return std::unexpected(line::ReadError(got.error()));
+ }
+ }
+ if (search_[1] == line_terminators_[1]) {
+ tlen = 2;
+ } else {
+ tlen = 1;
+ }
+ }
+ return line(search_ - rptr_, tlen);
+ }
+ if (std::cmp_greater_equal(wptr_ - rptr_, max_len_)) {
+ return line(max_len_, 0);
+ }
+
+ make_space_if_needed();
+ auto got = fill();
+ if (got.has_value()) {
+ if (got.value() == 0) {
+ if (rptr_ == wptr_) {
+ return std::unexpected(line::ReadError());
+ }
+ return line(wptr_ - rptr_, 0);
+ }
+ } else {
+ return std::unexpected(line::ReadError(got.error()));
+ }
+ }
+ }
+
+ [[nodiscard]] uint64_t number() const { return number_; }
+
+ private:
+ std::basic_string_view<T> line(size_t len, size_t terminator_len) {
+ assert(len <= max_len_);
+ auto ret = std::basic_string_view<T>(rptr_, len);
+ rptr_ += len + terminator_len;
+ search_ = rptr_;
+ ++number_;
+ return ret;
+ }
+
+ void make_space_if_needed() {
+ size_t free = rptr_ - buffer_.get();
+ if (free == 0)
+ return;
+ size_t avail = end_ - wptr_;
+ if (avail > 1024)
+ return;
+ memmove(buffer_.get(), rptr_, (wptr_ - rptr_) * sizeof(T));
+ search_ -= free;
+ wptr_ -= free;
+ rptr_ = buffer_.get();
+ }
+
+ std::expected<size_t, io::ReadError> fill() {
+ auto ret = reader_->read(wptr_, (end_ - wptr_) * sizeof(T));
+ if (ret.has_value())
+ wptr_ += ret.value() / sizeof(T);
+ return ret;
+ }
+
+ std::unique_ptr<Reader> reader_;
+ size_t const max_len_;
+ std::array<T, 2> const line_terminators_;
+ uint64_t number_{0};
+ std::unique_ptr<T[]> buffer_;
+ T* rptr_;
+ T* wptr_;
+ T* search_;
+ T* const end_;
+};
+
+} // namespace
+
+namespace u8 {
+
+namespace line {
+
+namespace {
+
+class ReaderImpl : public UnicodeReader<char, u8::Reader>,
+ public virtual Reader {
+ public:
+ ReaderImpl(std::unique_ptr<u8::Reader> reader, size_t max_len)
+ : UnicodeReader<char, u8::Reader>(std::move(reader), max_len,
+ {'\r', '\n'}) {}
+
+ [[nodiscard]]
+ std::expected<std::string_view, ::line::ReadError> read() override {
+ return UnicodeReader<char, u8::Reader>::read();
+ }
+
+ [[nodiscard]]
+ uint64_t number() const override {
+ return UnicodeReader<char, u8::Reader>::number();
+ }
+};
+
+} // namespace
+
+[[nodiscard]]
+std::unique_ptr<Reader> open(std::unique_ptr<u8::Reader> reader,
+ size_t max_len) {
+ return std::make_unique<ReaderImpl>(std::move(reader), std::move(max_len));
+}
+
+} // namespace line
+
+} // namespace u8
+
+namespace u16 {
+
+namespace line {
+
+namespace {
+
+class ReaderImpl : public UnicodeReader<char16_t, u16::Reader>,
+ public virtual Reader {
+ public:
+ ReaderImpl(std::unique_ptr<u16::Reader> reader, size_t max_len)
+ : UnicodeReader<char16_t, u16::Reader>(std::move(reader), max_len,
+ {u'\r', u'\n'}) {}
+
+ [[nodiscard]]
+ std::expected<std::u16string_view, ::line::ReadError> read() override {
+ return UnicodeReader<char16_t, u16::Reader>::read();
+ }
+
+ [[nodiscard]]
+ uint64_t number() const override {
+ return UnicodeReader<char16_t, u16::Reader>::number();
+ }
+};
+
+} // namespace
+
+[[nodiscard]]
+std::unique_ptr<Reader> open(std::unique_ptr<u16::Reader> reader,
+ size_t max_len) {
+ return std::make_unique<ReaderImpl>(std::move(reader), std::move(max_len));
+}
+
+} // namespace line
+
+} // namespace u16