diff options
| -rw-r--r-- | meson.build | 25 | ||||
| -rw-r--r-- | src/uline.cc | 198 | ||||
| -rw-r--r-- | src/uline.hh | 72 | ||||
| -rw-r--r-- | test/uline.cc | 386 |
4 files changed, 681 insertions, 0 deletions
diff --git a/meson.build b/meson.build index 94519bc..ee265db 100644 --- a/meson.build +++ b/meson.build @@ -163,6 +163,20 @@ uio_dep = declare_dependency( dependencies: [buffer_dep, io_dep, unicode_dep], ) +uline_lib = library( + 'uline', + sources: [ + 'src/uline.cc', + 'src/uline.hh', + ], + include_directories: inc, + dependencies: [buffer_dep, uio_dep], +) +uline_dep = declare_dependency( + link_with: uline_lib, + dependencies: [buffer_dep, uio_dep], +) + jkc = executable( 'jkc', sources: [ @@ -290,6 +304,17 @@ test('uio', executable( ], )) +test('uline', executable( + 'test_uline', + sources: ['test/uline.cc'], + include_directories: inc, + dependencies: [ + uline_dep, + io_test_helper_dep, + test_dependencies, + ], +)) + run_clang_tidy = find_program('run-clang-tidy', required: false) if run_clang_tidy.found() diff --git a/src/uline.cc b/src/uline.cc new file mode 100644 index 0000000..21927b3 --- /dev/null +++ b/src/uline.cc @@ -0,0 +1,198 @@ +#include "uline.hh" + +#include "check.hh" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <expected> +#include <memory> +#include <string_view> +#include <utility> + +namespace { + +template <typename T, typename Reader> +class UnicodeReader { + public: + UnicodeReader(std::unique_ptr<Reader> reader, size_t max_len, + std::array<T, 2> line_terminators) + : reader_(std::move(reader)), + max_len_(max_len), + line_terminators_(line_terminators), + buffer_(std::make_unique_for_overwrite<T[]>( + check::add(max_len, static_cast<size_t>(2)))), + rptr_(buffer_.get()), + wptr_(buffer_.get()), + search_(rptr_), + end_(buffer_.get() + check::add(max_len, static_cast<size_t>(2))) {} + + [[nodiscard]] + std::expected<std::basic_string_view<T>, line::ReadError> read() { + while (true) { + search_ = std::find_first_of(search_, wptr_, line_terminators_.begin(), + line_terminators_.end()); + if (search_ < wptr_) { + if (std::cmp_greater(search_ - rptr_, max_len_)) { + return line(max_len_, 0); + } + + size_t tlen; + if (*search_ == line_terminators_[1]) { + tlen = 1; + } else { + if (search_ + 1 == wptr_) { + make_space_if_needed(); + auto got = fill(); + if (got.has_value()) { + if (got.value() == 0) { + return line(search_ - rptr_, 1); + } + } else { + return std::unexpected(line::ReadError(got.error())); + } + } + if (search_[1] == line_terminators_[1]) { + tlen = 2; + } else { + tlen = 1; + } + } + return line(search_ - rptr_, tlen); + } + if (std::cmp_greater_equal(wptr_ - rptr_, max_len_)) { + return line(max_len_, 0); + } + + make_space_if_needed(); + auto got = fill(); + if (got.has_value()) { + if (got.value() == 0) { + if (rptr_ == wptr_) { + return std::unexpected(line::ReadError()); + } + return line(wptr_ - rptr_, 0); + } + } else { + return std::unexpected(line::ReadError(got.error())); + } + } + } + + [[nodiscard]] uint64_t number() const { return number_; } + + private: + std::basic_string_view<T> line(size_t len, size_t terminator_len) { + assert(len <= max_len_); + auto ret = std::basic_string_view<T>(rptr_, len); + rptr_ += len + terminator_len; + search_ = rptr_; + ++number_; + return ret; + } + + void make_space_if_needed() { + size_t free = rptr_ - buffer_.get(); + if (free == 0) + return; + size_t avail = end_ - wptr_; + if (avail > 1024) + return; + memmove(buffer_.get(), rptr_, (wptr_ - rptr_) * sizeof(T)); + search_ -= free; + wptr_ -= free; + rptr_ = buffer_.get(); + } + + std::expected<size_t, io::ReadError> fill() { + auto ret = reader_->read(wptr_, (end_ - wptr_) * sizeof(T)); + if (ret.has_value()) + wptr_ += ret.value() / sizeof(T); + return ret; + } + + std::unique_ptr<Reader> reader_; + size_t const max_len_; + std::array<T, 2> const line_terminators_; + uint64_t number_{0}; + std::unique_ptr<T[]> buffer_; + T* rptr_; + T* wptr_; + T* search_; + T* const end_; +}; + +} // namespace + +namespace u8 { + +namespace line { + +namespace { + +class ReaderImpl : public UnicodeReader<char, u8::Reader>, + public virtual Reader { + public: + ReaderImpl(std::unique_ptr<u8::Reader> reader, size_t max_len) + : UnicodeReader<char, u8::Reader>(std::move(reader), max_len, + {'\r', '\n'}) {} + + [[nodiscard]] + std::expected<std::string_view, ::line::ReadError> read() override { + return UnicodeReader<char, u8::Reader>::read(); + } + + [[nodiscard]] + uint64_t number() const override { + return UnicodeReader<char, u8::Reader>::number(); + } +}; + +} // namespace + +[[nodiscard]] +std::unique_ptr<Reader> open(std::unique_ptr<u8::Reader> reader, + size_t max_len) { + return std::make_unique<ReaderImpl>(std::move(reader), std::move(max_len)); +} + +} // namespace line + +} // namespace u8 + +namespace u16 { + +namespace line { + +namespace { + +class ReaderImpl : public UnicodeReader<char16_t, u16::Reader>, + public virtual Reader { + public: + ReaderImpl(std::unique_ptr<u16::Reader> reader, size_t max_len) + : UnicodeReader<char16_t, u16::Reader>(std::move(reader), max_len, + {u'\r', u'\n'}) {} + + [[nodiscard]] + std::expected<std::u16string_view, ::line::ReadError> read() override { + return UnicodeReader<char16_t, u16::Reader>::read(); + } + + [[nodiscard]] + uint64_t number() const override { + return UnicodeReader<char16_t, u16::Reader>::number(); + } +}; + +} // namespace + +[[nodiscard]] +std::unique_ptr<Reader> open(std::unique_ptr<u16::Reader> reader, + size_t max_len) { + return std::make_unique<ReaderImpl>(std::move(reader), std::move(max_len)); +} + +} // namespace line + +} // namespace u16 diff --git a/src/uline.hh b/src/uline.hh new file mode 100644 index 0000000..a3bcef3 --- /dev/null +++ b/src/uline.hh @@ -0,0 +1,72 @@ +#ifndef ULINE_HH +#define ULINE_HH + +#include "line.hh" // IWYU pragma: export +#include "uio.hh" // IWYU pragma: export + +#include <cstddef> +#include <expected> +#include <memory> +#include <string_view> + +namespace u8 { + +namespace line { + +class Reader { + public: + virtual ~Reader() = default; + + // Returned view is only valid until next call to read. + [[nodiscard]] + virtual std::expected<std::string_view, ::line::ReadError> read() = 0; + + // Starts at zero. Returns next line. + // So, before first read it is zero, after first read it is one. + [[nodiscard]] virtual uint64_t number() const = 0; + + protected: + Reader() = default; + + Reader(Reader const&) = delete; + Reader& operator=(Reader const&) = delete; +}; + +[[nodiscard]] std::unique_ptr<Reader> open(std::unique_ptr<u8::Reader> reader, + size_t max_len = 8192); + +} // namespace line + +} // namespace u8 + +namespace u16 { + +namespace line { + +class Reader { + public: + virtual ~Reader() = default; + + // Returned view is only valid until next call to read. + [[nodiscard]] + virtual std::expected<std::u16string_view, ::line::ReadError> read() = 0; + + // Starts at zero. Returns next line. + // So, before first read it is zero, after first read it is one. + [[nodiscard]] virtual uint64_t number() const = 0; + + protected: + Reader() = default; + + Reader(Reader const&) = delete; + Reader& operator=(Reader const&) = delete; +}; + +[[nodiscard]] std::unique_ptr<Reader> open(std::unique_ptr<u16::Reader> reader, + size_t max_len = 8192); + +} // namespace line + +} // namespace u16 + +#endif // ULINE_HH diff --git a/test/uline.cc b/test/uline.cc new file mode 100644 index 0000000..ca3f2bb --- /dev/null +++ b/test/uline.cc @@ -0,0 +1,386 @@ +#include "uline.hh" + +#include "io_test_helper.hh" + +#include <cstddef> +#include <gtest/gtest.h> +#include <limits> +#include <string_view> +#include <utility> + +using namespace std::literals::string_view_literals; + +TEST(u8line, empty) { + auto reader = u8::line::open(u8::open(io::memory(""))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(0, reader->number()); +} + +TEST(u8line, one_line) { + auto reader = + u8::line::open(u8::open(io::memory("r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(1, reader->number()); +} + +TEST(u8line, many_lines) { + auto reader = u8::line::open(u8::open(io::memory("foo\nbar\nfoobar\n"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("bar", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foobar", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(u8line, many_lines_mixed) { + auto reader = u8::line::open(u8::open(io::memory("foo\r\nbar\rfoobar\n"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("bar", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foobar", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(u8line, empty_line) { + auto reader = u8::line::open(u8::open(io::memory("\n"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(1, reader->number()); +} + +TEST(u8line, max_line) { + auto reader = u8::line::open( + u8::open(io::memory("012345678901234567890123456789")), 10); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("0123456789", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("0123456789", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("0123456789", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(u8line, read_error) { + auto reader = u8::line::open(u8::open( + io_make_breaking(io::memory("foo bar fum\nfim zam"), /* offset */ 5))); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_FALSE(line.error().eof); + EXPECT_EQ(io::ReadError::Error, line.error().io_error.value()); +} + +TEST(u8line, read_error_newline) { + auto reader = u8::line::open(u8::open( + io_make_breaking(io::memory("foo bar\r\nfim zam"), /* offset */ 8))); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_FALSE(line.error().eof); + EXPECT_EQ(io::ReadError::Error, line.error().io_error.value()); +} + +TEST(u8line, blocky) { + auto reader = u8::line::open( + u8::open(io_make_max_block(io::memory("foo bar\r\nfim zam"), + /* max_block_size */ 1))); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo bar", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("fim zam", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u8line, blocky_newline) { + auto reader = u8::line::open( + u8::open(io_make_max_block(io::memory("foo bar\r\nfim zam"), + /* max_block_size */ 8))); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo bar", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("fim zam", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u8line, eof_newline) { + auto reader = u8::line::open(u8::open(io::memory("foo bar\r"))); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo bar", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u8line, max_newline) { + auto reader = u8::line::open(u8::open(io::memory("foo bar\r")), 6); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("foo ba", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ("r", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u8line, max_line_overflow) { + EXPECT_DEATH_IF_SUPPORTED( + { + std::ignore = u8::line::open(u8::open(io::memory("")), + std::numeric_limits<size_t>::max()); + }, + ""); +} + +TEST(u16line, empty) { + auto reader = u16::line::open(u16::open(io::memory(""))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(0, reader->number()); +} + +TEST(u16line, one_line) { + auto reader = + u16::line::open(u16::open(io::memory("r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"r\u00e4ksm\u00f6rg\u00e5s", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(1, reader->number()); +} + +TEST(u16line, one_line_u16) { + auto u16tmp = u"r\u00e4ksm\u00f6rg\u00e5s"sv; + auto reader = u16::line::open(u16::open(io::memory(std::string( + reinterpret_cast<const char*>(u16tmp.data()), u16tmp.size() * 2)))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"r\u00e4ksm\u00f6rg\u00e5s", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(1, reader->number()); +} + +TEST(u16line, many_lines) { + auto reader = u16::line::open(u16::open(io::memory("foo\nbar\nfoobar\n"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foo", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"bar", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foobar", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(u16line, many_lines_mixed) { + auto reader = u16::line::open(u16::open(io::memory("foo\r\nbar\rfoobar\n"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foo", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"bar", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foobar", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(u16line, empty_line) { + auto reader = u16::line::open(u16::open(io::memory("\n"))); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(1, reader->number()); +} + +TEST(u16line, max_line) { + auto reader = u16::line::open( + u16::open(io::memory("012345678901234567890123456789")), 10); + EXPECT_EQ(0, reader->number()); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"0123456789", line.value()); + EXPECT_EQ(1, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"0123456789", line.value()); + EXPECT_EQ(2, reader->number()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"0123456789", line.value()); + EXPECT_EQ(3, reader->number()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); + EXPECT_EQ(3, reader->number()); +} + +TEST(u16line, read_error) { + auto reader = u16::line::open(u16::open( + io_make_breaking(io::memory("foo bar fum\nfim zam"), /* offset */ 5))); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_FALSE(line.error().eof); + EXPECT_EQ(io::ReadError::Error, line.error().io_error.value()); +} + +TEST(u16line, read_error_newline) { + auto reader = u16::line::open(u16::open( + io_make_breaking(io::memory("foo bar\r\nfim zam"), /* offset */ 8))); + auto line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_FALSE(line.error().eof); + EXPECT_EQ(io::ReadError::Error, line.error().io_error.value()); +} + +TEST(u16line, blocky) { + auto reader = u16::line::open( + u16::open(io_make_max_block(io::memory("foo bar\r\nfim zam"), + /* max_block_size */ 1))); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foo bar", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"fim zam", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u16line, blocky_newline) { + auto reader = u16::line::open( + u16::open(io_make_max_block(io::memory("foo bar\r\nfim zam"), + /* max_block_size */ 8))); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foo bar", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"fim zam", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u16line, eof_newline) { + auto reader = u16::line::open(u16::open(io::memory("foo bar\r"))); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foo bar", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u16line, max_newline) { + auto reader = u16::line::open(u16::open(io::memory("foo bar\r")), 6); + auto line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"foo ba", line.value()); + line = reader->read(); + ASSERT_TRUE(line.has_value()); + EXPECT_EQ(u"r", line.value()); + line = reader->read(); + ASSERT_FALSE(line.has_value()); + EXPECT_TRUE(line.error().eof); +} + +TEST(u16line, max_line_overflow) { + EXPECT_DEATH_IF_SUPPORTED( + { + std::ignore = u16::line::open(u16::open(io::memory("")), + std::numeric_limits<size_t>::max()); + }, + ""); +} |
