WIP

author: Joel Klinghed <the_jk@spawned.biz> 2024-01-21 12:31:30 +0100
committer: Joel Klinghed <the_jk@spawned.biz> 2024-01-21 12:31:30 +0100
commit: 7dd49c6293172b494c78918507242cdb55d35137 (patch)
tree: 9c8ab822ab9501a5ea2f937e609144e00ea091c4
parent: fc4547b412e28164af1bf8981234c6af959ccc0b (diff)
31 files changed, 2928 insertions, 414 deletions
diff --git a/base/meson.build b/base/meson.build
index 71faace..7668487 100644
--- a/base/meson.build
+++ b/base/meson.build
@@ -8,7 +8,6 @@ if cpp.compiles('''int foo() {
   return 0;
 }''', name: 'C++20 unlikely attribute')
   cpp_flags += '-DHAVE_ATTRIBUTE_UNLIKELY'
-  cpp_flags += '-Wno-c++20-attribute-extensions'
 endif
 
 inc = include_directories('inc')
diff --git a/meson.build b/meson.build
index 2d571dc..d8a9641 100644
--- a/meson.build
+++ b/meson.build
@@ -4,7 +4,7 @@ project(
   meson_version: '>= 0.58',
   default_options : [
     'warning_level=3',
-    'cpp_std=c++17',
+    'cpp_std=c++20',
     'cpp_rtti=false',
     'cpp_eh=none',
     'b_ndebug=if-release',
diff --git a/sax/inc/sax_attributes.hh b/sax/inc/sax_attributes.hh
new file mode 100644
index 0000000..4ab1a44
--- /dev/null
+++ b/sax/inc/sax_attributes.hh
@@ -0,0 +1,146 @@
+#ifndef SAX_ATTRIBUTES_HH
+#define SAX_ATTRIBUTES_HH
+
+#include <iterator>
+#include <optional>
+#include <string_view>
+
+namespace modxml {
+namespace sax {
+
+struct Attribute {
+  std::string_view name;
+  std::string_view value;
+
+  Attribute(std::string_view name, std::string_view value);
+};
+
+/**
+ * A view of attributes, with utility functions.
+ */
+class Attributes {
+ public:
+  virtual ~Attributes() = default;
+
+  class iterator {
+   public:
+    using iterator_category = std::random_access_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using element_type = Attribute;
+    using pointer = element_type const *;
+    using reference = element_type const &;
+
+    iterator()
+        : attributes_(nullptr), index_(0) {}
+    iterator(iterator const& it)
+        : attributes_(it.attributes_), index_(it.index_) {}
+    iterator& operator=(iterator const& it) {
+      attributes_ = it.attributes_;
+      index_ = it.index_;
+      return *this;
+    }
+
+    /**
+     * Comparing two iterators from different Attributes instances is undefined.
+     */
+    bool operator==(iterator const& it) const {
+      return index_ == it.index_;
+    }
+    std::strong_ordering operator<=>(iterator const& it) const {
+      return index_ <=> it.index_;
+    }
+
+    pointer operator->() const { return &attributes_->at(index_); }
+    reference operator*() const { return attributes_->at(index_); }
+    reference operator[](difference_type i) const {
+      return attributes_->at(index_ + i);
+    }
+
+    iterator& operator++() {
+      ++index_;
+      return *this;
+    }
+    iterator operator++(int) {
+      auto ret = *this;
+      ++index_;
+      return ret;
+    }
+    iterator& operator+=(difference_type i) {
+      index_ += i;
+      return *this;
+    }
+    iterator operator+(difference_type i) const {
+      return iterator(attributes_, index_ + i);
+    }
+    friend iterator operator+(difference_type i, iterator const &it) {
+      return iterator(it.attributes_, it.index_ + i);
+    }
+    iterator& operator--() {
+      --index_;
+      return *this;
+    }
+    iterator operator--(int) {
+      auto ret = *this;
+      --index_;
+      return ret;
+    }
+    iterator& operator-=(difference_type i) {
+      index_ -= i;
+      return *this;
+    }
+    difference_type operator-(iterator const& it) const {
+      return index_ - it.index_;
+    }
+    iterator operator-(difference_type i) const {
+      return iterator(attributes_, index_ - i);
+    }
+
+   protected:
+    iterator(Attributes const* attributes, std::size_t index)
+        : attributes_(attributes), index_(index) {}
+
+   private:
+    Attributes const* attributes_;
+    std::size_t index_;
+  };
+
+  static_assert(std::random_access_iterator<iterator>);
+
+  virtual iterator begin() const = 0;
+  virtual iterator end() const = 0;
+
+  virtual std::size_t size() const = 0;
+  /**
+   * name and value of attribute are valid as long as Attributes instance is.
+   */
+  virtual Attribute const& at(std::size_t index) const = 0;
+
+  Attribute const& operator[](std::size_t index) const { return at(index); }
+
+  /**
+   * Return the first attribute with name, if any.
+   */
+  virtual std::optional<std::string_view> find_first(
+      std::string_view name) const;
+
+  /**
+   * Return the last attribute with name, if any.
+   */
+  virtual std::optional<std::string_view> find_last(
+      std::string_view name) const;
+
+  /**
+   * Return the index of the attribute with name, starting with offset.
+   */
+  virtual std::optional<std::size_t> find(std::string_view name,
+                                          std::size_t index = 0) const;
+
+ protected:
+  Attributes() = default;
+};
+
+}  // namespace sax
+}  // namespace modxml
+
+
+#endif  // SAX_ATTRIBUTES_HH
diff --git a/sax/inc/sax_decoder.hh b/sax/inc/sax_decoder.hh
index 40a56c9..8b2490c 100644
--- a/sax/inc/sax_decoder.hh
+++ b/sax/inc/sax_decoder.hh
@@ -1,16 +1,15 @@
 #ifndef SAX_DECODER_HH
 #define SAX_DECODER_HH
 
-#include <memory>
-#include <string>
-#include <string_view>
+#include <cstdint>
+#include <span>
 
 namespace modxml {
 namespace sax {
 
 /**
  * Decoder returned by DecoderFactory. Used by Processor to turn bytes into
- * unicode characters.
+ * unicode characters encoded as UTF-8.
  */
 class Decoder {
  public:
@@ -18,9 +17,9 @@ class Decoder {
 
   enum class State {
     GOOD = 0,
-    // too little data was given to advance
+    // too little data was given to decode
     NEED_MORE,
-    // invalid data was given to advance
+    // invalid data was given to decode
     INVALID,
   };
 
@@ -29,23 +28,22 @@ class Decoder {
    * write them to out (start at out_offset) as UTF-8.
    * All written code points must be valid per Unicode, so inside the
    * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF).
-   * No partial output, only write to out if the whole UTF-8 sequence is
-   * going to fit.
-   * The is always at least 4 bytes available (out.size() - out_offset) when
+   * No partial code point output, only write to out if the whole UTF-8
+   * sequence for the code point is going to fit.
+   * There will always at least 4 bytes available (out.size() - out_offset) when
    * called.
-   * Advance in_offset for data consumed.
+   * Advance in_offset for data consumed. Do NOT read past in.size().
    * Advance out_offset for code points written. Do NOT write past out.size().
-   * Do NOT resize out.
    * If at least one code point is decoded and written to out, return GOOD.
    * If it is not possible to decode a single code point, in_offset and
    * out_offset should not be advanced and something other than GOOD returned.
    * Do not keep any references to any of the parameters after returning, next
-   * advance() call will point to the following bytes, but all parameters
+   * decode() call will point to the following bytes, but all parameters
    * may have changed as they are subject to the buffer implementations of the
    * Processor.
    */
-  virtual State decode(std::string_view in, std::size_t& in_offset,
-                       std::string& out, std::size_t& out_offset) = 0;
+  virtual State decode(std::span<uint8_t const> in, std::size_t& in_offset,
+                       std::span<uint8_t> out, std::size_t& out_offset) = 0;
 
  protected:
   Decoder() = default;
diff --git a/sax/inc/sax_decoder_factory.hh b/sax/inc/sax_decoder_factory.hh
index 80f1af3..2361ac3 100644
--- a/sax/inc/sax_decoder_factory.hh
+++ b/sax/inc/sax_decoder_factory.hh
@@ -2,7 +2,7 @@
 #define SAX_DECODER_FACTORY_HH
 
 #include <memory>
-#include <string>
+#include <string_view>
 
 namespace modxml {
 namespace sax {
@@ -23,7 +23,7 @@ class DecoderFactory {
    * Note that encoding value isn't cleaned up or validated in any way, it is
    * reported EXACTLY as found (even if not valid per XML spec).
    */
-  virtual std::unique_ptr<Decoder> create(std::string const& encoding) = 0;
+  virtual std::unique_ptr<Decoder> create(std::string_view encoding) = 0;
 
  protected:
   DecoderFactory() = default;
diff --git a/sax/inc/sax_delegate.hh b/sax/inc/sax_delegate.hh
index ba63e72..59af2b7 100644
--- a/sax/inc/sax_delegate.hh
+++ b/sax/inc/sax_delegate.hh
@@ -1,9 +1,14 @@
 #ifndef MODXML_SAX_DELEGATE_HH
 #define MODXML_SAX_DELEGATE_HH
 
+#include <cstdint>
+#include <string_view>
+
 namespace modxml {
 namespace sax {
 
+class Attributes;
+
 /**
  * Delegate for processor.
  * Implement to handle events.
@@ -12,6 +17,23 @@ class Delegate {
  public:
   virtual ~Delegate() = default;
 
+  virtual void start_element(std::string_view name,
+                             Attributes const& attributes);
+
+  virtual void end_element(std::string_view name);
+
+  virtual void empty_element(std::string_view name,
+                             Attributes const& attributes);
+
+  virtual void character_data(std::string_view data);
+
+  virtual void processing_instruction(std::string_view target,
+                                      std::string_view data);
+
+  virtual void comment(std::string_view data);
+
+  virtual void error(std::string_view message);
+
  protected:
   Delegate() = default;
 };
diff --git a/sax/inc/sax_processor.hh b/sax/inc/sax_processor.hh
index 7ca32f7..cf53807 100644
--- a/sax/inc/sax_processor.hh
+++ b/sax/inc/sax_processor.hh
@@ -2,6 +2,7 @@
 #define MODXML_SAX_PROCESSOR_HH
 
 #include <memory>
+#include <span>
 
 namespace modxml {
 namespace sax {
@@ -23,6 +24,23 @@ class Processor {
    */
   static std::unique_ptr<Processor> create(std::shared_ptr<Delegate> delegate);
 
+  /**
+   * Process (consume) input data.
+   * Returns bytes consumed, can be zero.
+   */
+  virtual std::size_t process(std::span<uint8_t const> data,
+                              std::size_t offset = 0) = 0;
+
+  /**
+   * When called from delegate, points to the start of the element that
+   * triggered the callback.
+   * When called otherwise, points to the last element that was processed.
+   * Lines start at 1.
+   * Columns start at 0.
+   */
+  virtual uint64_t line() const = 0;
+  virtual uint64_t column() const = 0;
+
  protected:
   Processor() = default;
 
diff --git a/sax/inc/sax_processor_builder.hh b/sax/inc/sax_processor_builder.hh
index 070fbbf..8b114e4 100644
--- a/sax/inc/sax_processor_builder.hh
+++ b/sax/inc/sax_processor_builder.hh
@@ -48,7 +48,7 @@ class ProcessorBuilder {
    * If you give a too small buffer size (such as zero) it will be ignored
    * and a implementation specific minimum will be used instead.
    * This is meant as a possible optimization and can be completely ignored.
-   * Note that the processor will allocate more data if it needed.
+   * Note that the processor will allocate more data if it needs to.
    */
   virtual ProcessorBuilder* set_default_buffer_size(std::size_t size) = 0;
 
diff --git a/sax/meson.build b/sax/meson.build
index ccbdef4..8797c41 100644
--- a/sax/meson.build
+++ b/sax/meson.build
@@ -6,7 +6,11 @@ deps = [
 inc = include_directories('inc')
 lib = shared_library(
   'modxmlsax',
+  'src/buffer.cc',
   'src/decoder.cc',
+  'src/guessing_decoder.cc',
+  'src/sax_attributes.cc',
+  'src/sax_delegate.cc',
   'src/sax_processor.cc',
   'src/sax_processor_builder.cc',
   'src/utils.cc',
@@ -20,3 +24,19 @@ sax_dep = declare_dependency(
   include_directories: inc,
   link_with: lib,
 )
+
+test('buffer',
+     executable(
+       'test_buffer',
+       sources: [
+         'src/buffer.cc',
+         'tst/test_buffer.cc',
+       ],
+       include_directories: 'src',
+       dependencies: [base_dep, gmock_dep, gtest_dep]))
+
+test('decoder',
+     executable(
+       'test_decoder',
+       sources: ['tst/test_decoder.cc'],
+       dependencies: [sax_dep, gtest_dep]))
diff --git a/sax/src/buffer.cc b/sax/src/buffer.cc
new file mode 100644
index 0000000..964865d
--- /dev/null
+++ b/sax/src/buffer.cc
@@ -0,0 +1,398 @@
+#include "buffer.hh"
+
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <limits>
+
+namespace modxml {
+namespace sax {
+
+namespace {
+
+class DynamicBuffer : public Buffer {
+ public:
+  DynamicBuffer(std::size_t default_size, std::size_t max_size)
+      : default_size_(std::min(default_size, max_size)), max_size_(max_size),
+        data_(std::make_unique_for_overwrite<uint8_t[]>(default_size_)),
+        size_(default_size_) {}
+
+  std::span<uint8_t> wspan(std::size_t need) override {
+    auto avail = size_ - (offset_ + fill_);
+    if (need > avail) {
+      if (max_size_ - fill_ < need) // Early exit if need is never possible
+        return {};
+      if (offset_ > 0) {
+        std::copy_n(data_.get() + offset_, fill_, data_.get());
+        offset_ = 0;
+      }
+      avail = size_ - fill_;
+      if (need > avail) {
+        auto const max = std::numeric_limits<std::size_t>::max() / 2;
+        std::size_t new_size = size_;
+        while (true) {
+          if (new_size <= max) {
+            new_size *= 2;
+          } else {
+            new_size = std::numeric_limits<std::size_t>::max();
+          }
+          if (new_size >= max_size_) {
+            new_size = max_size_;
+            break;
+          }
+          if (new_size - fill_ >= need)
+            break;
+        }
+        // Using new as it has std::nothrow which make_unique lacks.
+        // Easy enought to keep track of the pointers here anyway.
+        auto* tmp = new(std::nothrow) uint8_t[new_size];
+        if (tmp == nullptr)
+          return {};
+        std::copy_n(data_.get(), fill_, tmp);
+        size_ = new_size;
+        data_.reset(tmp);
+      }
+    }
+    return {data_.get() + offset_ + fill_, size_ - (offset_ + fill_)};
+  }
+
+  void commit(std::size_t size) override {
+    assert(size_ - (offset_ + fill_) >= size);
+    fill_ += size;
+  }
+
+  std::span<uint8_t const> rspan(std::size_t) override {
+    return {data_.get() + offset_, fill_};
+  }
+
+  void consume(std::size_t size) override {
+    if (size == 0)
+      return;
+    assert(fill_ >= size);
+    fill_ -= size;
+    if (fill_ == 0) {
+      reset();
+    } else {
+      offset_ += size;
+    }
+  }
+
+  std::span<uint8_t> mspan(std::size_t) override {
+    return {data_.get() + offset_, fill_};
+  }
+
+  std::size_t uncommit(std::size_t size) override {
+    auto ret = std::min(size, fill_);
+    fill_ -= ret;
+    if (fill_ == 0) {
+      reset();
+    }
+    return ret;
+  }
+
+  bool empty() const override {
+    return fill_ == 0;
+  }
+
+  bool full() const override {
+    return fill_ >= max_size_;
+  }
+
+  void reset() override {
+    if (size_ != default_size_)
+      data_ = std::make_unique_for_overwrite<uint8_t[]>(size_ = default_size_);
+    offset_ = 0;
+    fill_ = 0;
+  }
+
+ private:
+  std::size_t const default_size_;
+  std::size_t const max_size_;
+  std::unique_ptr<uint8_t[]> data_;
+  std::size_t size_;
+  std::size_t offset_{0};
+  std::size_t fill_{0};
+};
+
+class FixedBuffer : public Buffer {
+ public:
+  explicit FixedBuffer(std::size_t size)
+      : size_(size), data_(std::make_unique<uint8_t[]>(size_)) {}
+
+  std::span<uint8_t> wspan(std::size_t need) override {
+    auto avail = wavail();
+    if (need > avail) {
+      if (need > size_ - ravail())  // Early exit if need will never fit
+        return {};
+      if (rptr_ < wptr_ || (rptr_ == wptr_ && !full_)) {
+        rotate();
+        avail = wavail();
+      } else {
+        return {};
+      }
+    }
+    return {data_.get() + wptr_, avail};
+  }
+
+  void commit(std::size_t size) override {
+    if (size == 0)
+      return;
+    assert(wavail() >= size);
+    wptr_ += size;
+    if (wptr_ == size_)
+      wptr_ = 0;
+    if (rptr_ == wptr_)
+      full_ = true;
+  }
+
+  std::span<uint8_t const> rspan(std::size_t want) override {
+    return mspan(want);
+  }
+
+  void consume(std::size_t size) override {
+    if (size == 0)
+      return;
+    assert(ravail() >= size);
+    full_ = false;
+    rptr_ += size;
+    if (rptr_ == size_)
+      rptr_ = 0;
+    if (rptr_ == wptr_)
+      reset();
+  }
+
+  std::span<uint8_t> mspan(std::size_t want) override {
+    auto avail = ravail();
+    if (want > avail) {
+      if (rptr_ > wptr_ || (rptr_ == wptr_ && full_)) {
+        rotate();
+        avail = ravail();
+      }
+    }
+    return {data_.get() + rptr_, avail};
+  }
+
+  std::size_t uncommit(std::size_t size) override {
+    if (size == 0)
+      return 0;
+    auto ret = do_uncommit(size);
+    if (ret < size) {
+      ret += do_uncommit(size - ret);
+    }
+    return ret;
+  }
+
+  bool empty() const override {
+    return rptr_ == wptr_ && !full_;
+  }
+
+  bool full() const override {
+    return rptr_ == wptr_ && full_;
+  }
+
+  void reset() override {
+    rptr_ = 0;
+    wptr_ = 0;
+    full_ = false;
+  }
+
+ private:
+  std::size_t ravail() const {
+    if (rptr_ < wptr_)
+      return wptr_ - rptr_;
+    if (rptr_ == wptr_ && !full_)
+      return 0;
+    return size_ - rptr_;
+  }
+
+  std::size_t wavail() const {
+    if (rptr_ > wptr_)
+      return rptr_ - wptr_;
+    if (rptr_ == wptr_ && full_)
+      return 0;
+    return size_ - wptr_;
+  }
+
+  std::size_t do_uncommit(std::size_t size) {
+    if (size == 0 || (rptr_ == wptr_ && !full_))
+      return 0;
+
+    full_ = false;
+
+    if (wptr_ == 0)
+      wptr_ = size_;
+
+    auto avail = rptr_ < wptr_ ? wptr_ - rptr_ : wptr_;
+    avail = std::min(avail, size);
+    wptr_ -= avail;
+    return avail;
+  }
+
+  void rotate() {
+    assert(rptr_ > 0);
+
+    if (rptr_ < wptr_) {
+      std::copy(data_.get() + rptr_, data_.get() + wptr_, data_.get());
+      wptr_ -= rptr_;
+      rptr_ = 0;
+    } else if (wptr_ < rptr_ || (wptr_ == rptr_ && full_)) {
+      auto left = wptr_;
+      auto right = size_ - rptr_;
+      // TODO: Can we do this without allocations?
+      if (left <= right) {
+        auto tmp = std::make_unique<uint8_t[]>(left);
+        std::copy_n(data_.get(), left, tmp.get());
+        std::copy_n(data_.get() + rptr_, right, data_.get());
+        std::copy_n(tmp.get(), left, data_.get() + right);
+      } else {
+        auto tmp = std::make_unique<uint8_t[]>(right);
+        std::copy_n(data_.get() + rptr_, right, tmp.get());
+        std::copy_backward(data_.get(), data_.get() + left,
+                           data_.get() + left + right - 1);
+        std::copy_n(tmp.get(), right, data_.get());
+      }
+      wptr_ = left + right;
+      if (wptr_ == size_)
+        wptr_ = 0;
+      rptr_ = 0;
+    } else {
+      assert(false);
+    }
+  }
+
+  std::size_t const size_;
+  std::unique_ptr<uint8_t[]> data_;
+  std::size_t rptr_{0};
+  std::size_t wptr_{0};
+  bool full_{false};
+};
+
+class ReadViewBufferImpl : public ReadViewBuffer {
+ public:
+  explicit ReadViewBufferImpl(std::unique_ptr<Buffer> buffer)
+      : buffer_(std::move(buffer)) {}
+
+  std::size_t consumed() const override {
+    return offset_;
+  }
+
+  std::unique_ptr<Buffer> release() override {
+    return std::move(buffer_);
+  }
+
+  std::span<uint8_t> wspan(std::size_t need) override {
+    return buffer_->wspan(need);
+  }
+
+  void commit(std::size_t size) override {
+    return buffer_->commit(size);
+  }
+
+  std::span<uint8_t const> rspan(std::size_t want) override {
+    auto ret = buffer_->rspan(offset_ + want);
+    if (ret.size() <= offset_)
+      return ret.subspan(0, 0);
+    return ret.subspan(offset_, ret.size() - offset_);
+  }
+
+  void consume(std::size_t size) override {
+    offset_ += size;
+  }
+
+  std::span<uint8_t> mspan(std::size_t want) override {
+    auto ret = buffer_->mspan(offset_ + want);
+    if (ret.size() <= offset_)
+      return ret.subspan(0, 0);
+    return ret.subspan(offset_, ret.size() - offset_);
+  }
+
+  std::size_t uncommit(std::size_t size) override {
+    return buffer_->uncommit(size);
+  }
+
+  bool empty() const override {
+    if (buffer_->empty())
+      return true;
+    auto data = buffer_->rspan(offset_ + 1);
+    return data.size() <= offset_;
+  }
+
+  bool full() const override {
+    return buffer_->full();
+  }
+
+  void reset() override {
+    offset_ = 0;
+  }
+
+ private:
+  std::unique_ptr<Buffer> buffer_;
+  std::size_t offset_{0};
+};
+
+}  // namespace
+
+std::unique_ptr<Buffer> make_buffer(std::size_t default_size,
+                                    std::size_t max_size) {
+  if (default_size >= max_size)
+    return std::make_unique<FixedBuffer>(max_size);
+
+  return std::make_unique<DynamicBuffer>(default_size, max_size);
+}
+
+std::unique_ptr<ReadViewBuffer> make_read_view_buffer(
+    std::unique_ptr<Buffer> buffer) {
+  return std::make_unique<ReadViewBufferImpl>(std::move(buffer));
+}
+
+std::size_t Buffer::write(std::span<uint8_t const> data) {
+  std::size_t offset = 0;
+  while (offset < data.size()) {
+    auto target = wspan();
+    if (target.empty())
+      break;
+    auto size = std::min(data.size() - offset, target.size());
+    std::copy_n(data.data() + offset, size, target.data());
+    commit(size);
+    offset += size;
+  }
+  return offset;
+}
+
+bool Buffer::write_all(std::span<uint8_t const> data) {
+  if (data.empty())
+    return true;
+  auto target = wspan(data.size());
+  if (target.empty())
+    return false;
+  std::copy(data.begin(), data.end(), target.begin());
+  commit(data.size());
+  return true;
+}
+
+std::size_t Buffer::read(std::span<uint8_t> data) {
+  std::size_t offset = 0;
+  while (offset < data.size()) {
+    auto source = rspan();
+    if (source.empty())
+      break;
+    auto size = std::min(data.size() - offset, source.size());
+    std::copy_n(source.data(), size, data.data() + offset);
+    consume(size);
+    offset += size;
+  }
+  return offset;
+}
+
+bool Buffer::read_all(std::span<uint8_t> data) {
+  auto source = rspan(data.size());
+  if (source.size() < data.size())
+    return false;
+  std::copy_n(source.begin(), data.size(), data.begin());
+  consume(data.size());
+  return true;
+}
+
+}  // namespace sax
+}  // namespace modxml
+
diff --git a/sax/src/buffer.hh b/sax/src/buffer.hh
new file mode 100644
index 0000000..d9fb9fc
--- /dev/null
+++ b/sax/src/buffer.hh
@@ -0,0 +1,108 @@
+#ifndef BUFFER_HH
+#define BUFFER_HH
+
+#include "macros.hh"
+
+#include <memory>
+#include <span>
+
+namespace modxml {
+namespace sax {
+
+class HIDDEN Buffer {
+ public:
+  virtual ~Buffer() = default;
+
+  Buffer(Buffer const&) = delete;
+  Buffer& operator=(Buffer const&) = delete;
+
+  // Returns a writable span, either at least need large or in case
+  // the buffer is full, an empty span.
+  // Returned span is valid until any other method is called on the buffer.
+  virtual std::span<uint8_t> wspan(std::size_t need = 1) = 0;
+  // Commit size data from the last returned wspan. size must be <= span.size.
+  // Remember that the span is now invalid and you need to call wspan again
+  // to write more.
+  virtual void commit(std::size_t size) = 0;
+
+  // Returns a readable span of all readily available data in buffer.
+  // If there is enought data in the buffer to satisfy want, the returned
+  // span is at least as large.
+  // Returned span is valid until any other method is called on the buffer.
+  virtual std::span<uint8_t const> rspan(std::size_t want = 1) = 0;
+  // Consume size data from buffer. size must be <= span.size.
+  // Remember that the span is now invalid and you need to call rspan again
+  // to read more.
+  virtual void consume(std::size_t size) = 0;
+
+  // Returns the same span as rspan but this is writable, you can modify
+  // the content. You cannot change the size of the span.
+  // If you wish to append data, use wspan() + commit().
+  // If you wish to remove data, use uncommit().
+  // If you wish to insert you have to be clever.
+  // Returned span is valid until any other method is called on the buffer.
+  virtual std::span<uint8_t> mspan(std::size_t want = 1) = 0;
+
+  // Uncommit the last size bytes in the buffer. Returns the bytes
+  // removed. If you used wspan() + commit() to add ten (10) bytes say and then
+  // call uncommit() with a size of seven (7) the first three (3) bytes written
+  // will the left in the buffer.
+  virtual std::size_t uncommit(std::size_t size) = 0;
+
+  // Returns true if buffer is empty.
+  virtual bool empty() const = 0;
+
+  // Returns true if buffer is full. This means filled to max_size.
+  virtual bool full() const = 0;
+
+  // Clear buffer, reset back to initial state.
+  virtual void reset() = 0;
+
+  // Write as much as possible of data to buffer.
+  // Returns bytes written (may be zero).
+  std::size_t write(std::span<uint8_t const> data);
+
+  // Either write all of the data to buffer or none. Returns true if data was
+  // written or data was empty.
+  bool write_all(std::span<uint8_t const> data);
+
+  // Read as much as possible from buffer to data.
+  // Returns bytes read (may be zero).
+  std::size_t read(std::span<uint8_t> data);
+
+  // Either fill data with data from buffer or return false.
+  bool read_all(std::span<uint8_t> data);
+
+ protected:
+  Buffer() = default;
+};
+
+// Create a buffer. default_size is used as an hint but generally that
+// will be the initial size of the buffer. max_size is an hard limit.
+// max_size == 0 is valid but will return an always full and empty buffer.
+std::unique_ptr<Buffer> HIDDEN make_buffer(std::size_t default_size,
+                                           std::size_t max_size);
+
+class ReadViewBuffer : public Buffer {
+ public:
+  // Returns bytes consumed in this buffer.
+  virtual std::size_t consumed() const = 0;
+
+  // Take ownership back of the wrapped buffer from the read view.
+  // The read view is now unusable.
+  virtual std::unique_ptr<Buffer> release() = 0;
+
+ protected:
+  ReadViewBuffer() = default;
+};
+
+// Create a read view buffer. Writing will go to wrapped buffer. Reading
+// is done on the read view buffer without moving the wrapped buffers read
+// pointer. These views are lightweight.
+std::unique_ptr<ReadViewBuffer> HIDDEN make_read_view_buffer(
+    std::unique_ptr<Buffer> buffer);
+
+}  // namespace sax
+}  // namespace modxml
+
+#endif  // BUFFER_HH
diff --git a/sax/src/decoder.cc b/sax/src/decoder.cc
index 30b1735..35b9b46 100644
--- a/sax/src/decoder.cc
+++ b/sax/src/decoder.cc
@@ -12,273 +12,233 @@ namespace sax {
 
 namespace {
 
-class UtfDecoder : public Decoder {
+class KnownEndianDecoder : public Decoder {
  public:
-  State decode(std::string_view in, std::size_t& in_offset,
-               uint32_t* out, std::size_t out_size,
-               std::size_t& out_offset) override {
-    std::size_t const out_start = out_offset;
+  State decode(std::span<uint8_t const> in, std::size_t& in_offset,
+               std::span<uint8_t> out, std::size_t& out_offset) override {
+    std::size_t tmp = in_offset;
+    uint32_t ret = read(in, tmp);
+    if (ret == utf::NEED_MORE)
+      return State::NEED_MORE;
+    if (ret == utf::INVALID)
+      return State::INVALID;
+
     if (bom_ == -1) UNLIKELY {
-      std::size_t tmp = in_offset;
-      uint32_t ret = read(in, tmp);
-      if (ret == utf::NEED_MORE) {
-        return State::NEED_MORE;
-      }
-      if (ret == utf::INVALID) {
-        return State::INVALID;
-      }
       if (ret == 0xfeff) {
         // To allow offset to advance and to return, we need to
         // read at least one more character completely.
         ret = read(in, tmp);
-        if (ret == utf::NEED_MORE) {
+        if (ret == utf::NEED_MORE)
           return State::NEED_MORE;
-        }
-        if (ret == utf::INVALID) {
+        if (ret == utf::INVALID)
           return State::INVALID;
-        }
         bom_ = 1;
       } else {
         bom_ = 0;
       }
-      in_offset = tmp;
-      out[out_offset++] = ret;
-      if (out_offset == out_size)
-        return State::GOOD;
+      if (!utf::write8(ret, out, out_offset)) {
+        bom_ = -1;
+        return State::NEED_MORE;
+      }
+    } else {
+      if (!utf::write8(ret, out, out_offset))
+        return State::NEED_MORE;
     }
+    in_offset = tmp;
 
-    do {
-      uint32_t ret = read(in, in_offset);
-      if (ret == utf::NEED_MORE) {
-        return out_offset > out_start ? State::GOOD : State::NEED_MORE;
-      }
-      if (ret == utf::INVALID) {
-        return out_offset > out_start ? State::GOOD : State::INVALID;
-      }
-      out[out_offset++] = ret;
-    } while (out_offset < out_size);
-    return State::GOOD;
+    while (true) {
+      ret = read(in, tmp);
+      if (ret == utf::NEED_MORE || ret == utf::INVALID)
+        return State::GOOD;
+      if (!utf::write8(ret, out, out_offset))
+        return State::GOOD;
+      in_offset = tmp;
+    }
   }
 
  protected:
-  UtfDecoder() = default;
+  KnownEndianDecoder() = default;
 
-  virtual uint32_t read(std::string_view data, std::size_t& offset) const = 0;
+  virtual uint32_t read(
+      std::span<uint8_t const> data, std::size_t& offset) const = 0;
 
  private:
   int8_t bom_{-1};
 };
 
-class Utf8Decoder : public UtfDecoder {
+class Utf8Decoder : public KnownEndianDecoder {
  public:
   Utf8Decoder() = default;
 
-  uint32_t read(std::string_view data, std::size_t& offset) const override {
+  uint32_t read(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
     return utf::read8(data, offset);
   }
 };
 
-class Utf16BeDecoder : public UtfDecoder {
+class Utf16BeDecoder : public KnownEndianDecoder {
  public:
   Utf16BeDecoder() = default;
 
-  uint32_t read(std::string_view data, std::size_t& offset) const override {
+  uint32_t read(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
     return utf::read16be(data, offset);
   }
 };
 
-class Utf16LeDecoder : public UtfDecoder {
+class Utf16LeDecoder : public KnownEndianDecoder {
  public:
   Utf16LeDecoder() = default;
 
-  uint32_t read(std::string_view data, std::size_t& offset) const override {
+  uint32_t read(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
     return utf::read16le(data, offset);
   }
 };
 
-class Utf32BeDecoder : public UtfDecoder {
+class Utf32BeDecoder : public KnownEndianDecoder {
  public:
   Utf32BeDecoder() = default;
 
-  uint32_t read(std::string_view data, std::size_t& offset) const override {
+  uint32_t read(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
     return utf::read32be(data, offset);
   }
 };
 
-class Utf32LeDecoder : public UtfDecoder {
+class Utf32LeDecoder : public KnownEndianDecoder {
  public:
   Utf32LeDecoder() = default;
 
-  uint32_t read(std::string_view data, std::size_t& offset) const override {
+  uint32_t read(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
     return utf::read32le(data, offset);
   }
 };
 
-class Utf16Decoder : public Decoder {
+class UnknownEndianDecoder : public Decoder {
  public:
-  Utf16Decoder() = default;
-
-  State decode(std::string_view in, std::size_t& in_offset,
-               uint32_t* out, std::size_t out_size,
-               std::size_t& out_offset) override {
-    std::size_t const out_start = out_offset;
+  State decode(std::span<uint8_t const> in, std::size_t& in_offset,
+               std::span<uint8_t> out, std::size_t& out_offset) override {
+    std::size_t tmp = in_offset;
     if (endian_ == -1) UNLIKELY {
-      std::size_t tmp = in_offset;
-      uint32_t ret = utf::read16be(in, tmp);
-      int8_t endian;
-      if (ret == utf::NEED_MORE) {
+      uint32_t ret = readbe(in, tmp);
+      if (ret == utf::NEED_MORE)
         return State::NEED_MORE;
-      }
-      if (ret == utf::INVALID) {
+      if (ret == utf::INVALID)
         return State::INVALID;
-      }
       if (ret == 0xfeff) {
-        endian = 1;  // Big endian
+        endian_ = 1;
       } else if (ret == 0xfffe) {
-        endian = 0;  // Little endian
+        endian_ = 0;
       } else {
         return State::INVALID;
       }
+      in_offset = tmp;
+    }
 
-      // To allow offset to advance and to return, we need to
-      // read at least one more character completely.
-      ret = endian == 1 ? utf::read16be(in, tmp) : utf::read16le(in, tmp);
-      if (ret == utf::NEED_MORE) {
+    if (endian_ == 0) {
+      uint32_t ret = readle(in, tmp);
+      if (ret == utf::NEED_MORE)
         return State::NEED_MORE;
-      }
-      if (ret == utf::INVALID) {
+      if (ret == utf::INVALID)
         return State::INVALID;
-      }
+      if (!utf::write8(ret, out, out_offset))
+        return State::NEED_MORE;
+      in_offset = tmp;
 
-      endian_ = endian;
+      while (true) {
+        ret = readle(in, tmp);
+        if (ret == utf::NEED_MORE || ret == utf::INVALID)
+          return State::GOOD;
+        if (!utf::write8(ret, out, out_offset))
+          return State::GOOD;
+        in_offset = tmp;
+      }
+    } else /* if (endian_ == 1) */ {
+      uint32_t ret = readbe(in, tmp);
+      if (ret == utf::NEED_MORE)
+        return State::NEED_MORE;
+      if (ret == utf::INVALID)
+        return State::INVALID;
+      if (!utf::write8(ret, out, out_offset))
+        return State::NEED_MORE;
       in_offset = tmp;
-      out[out_offset++] = ret;
-      if (out_offset == out_size)
-        return State::GOOD;
-    }
 
-    if (endian_ == 1) {
-      do {
-        uint32_t ret = utf::read16be(in, in_offset);
-        if (ret == utf::NEED_MORE) {
-          return out_offset > out_start ? State::GOOD : State::NEED_MORE;
-        }
-        if (ret == utf::INVALID) {
-          return out_offset > out_start ? State::GOOD : State::INVALID;
-        }
-        out[out_offset++] = ret;
-      } while (out_offset < out_size);
-    } else {
-      do {
-        uint32_t ret = utf::read16le(in, in_offset);
-        if (ret == utf::NEED_MORE) {
-          return out_offset > out_start ? State::GOOD : State::NEED_MORE;
-        }
-        if (ret == utf::INVALID) {
-          return out_offset > out_start ? State::GOOD : State::INVALID;
-        }
-        out[out_offset++] = ret;
-      } while (out_offset < out_size);
+      while (true) {
+        ret = readbe(in, tmp);
+        if (ret == utf::NEED_MORE || ret == utf::INVALID)
+          return State::GOOD;
+        if (!utf::write8(ret, out, out_offset))
+          return State::GOOD;
+        in_offset = tmp;
+      }
     }
-    return State::GOOD;
   }
 
+ protected:
+  UnknownEndianDecoder() = default;
+
+  virtual uint32_t readle(
+      std::span<uint8_t const> data, std::size_t& offset) const = 0;
+  virtual uint32_t readbe(
+      std::span<uint8_t const> data, std::size_t& offset) const = 0;
+
  private:
   int8_t endian_{-1};
 };
 
-class Utf32Decoder : public Decoder {
+class Utf16Decoder : public UnknownEndianDecoder {
  public:
-  Utf32Decoder() = default;
+  Utf16Decoder() = default;
 
-  State decode(std::string_view in, std::size_t& in_offset,
-               uint32_t* out, std::size_t out_size,
-               std::size_t& out_offset) override {
-    std::size_t const out_start = out_offset;
-    if (endian_ == -1) UNLIKELY {
-      std::size_t tmp = in_offset;
-      uint32_t ret = utf::read32be(in, tmp);
-      int8_t endian;
-      if (ret == utf::NEED_MORE) {
-        return State::NEED_MORE;
-      }
-      if (ret == utf::INVALID) {
-        tmp = in_offset;
-        ret = utf::read32le(in, tmp);
-        if (ret == 0xfeff) {
-          endian = 0;  // Little endian
-        } else {
-          return State::INVALID;
-        }
-      } else if (ret == 0xfeff) {
-        endian = 1;  // Big endian
-      } else {
-        return State::INVALID;
-      }
+  uint32_t readle(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
+    return utf::read16le(data, offset);
+  }
 
-      // To allow offset to advance and to return, we need to
-      // read the next character completely.
-      ret = endian == 1 ? utf::read32be(in, tmp) : utf::read32le(in, tmp);
-      if (ret == utf::NEED_MORE) {
-        return State::NEED_MORE;
-      }
-      if (ret == utf::INVALID) {
-        return State::INVALID;
-      }
+  uint32_t readbe(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
+    return utf::read16be(data, offset);
+  }
+};
 
-      endian_ = endian;
-      in_offset = tmp;
-      out[out_offset++] = ret;
-      if (out_offset == out_size)
-        return State::GOOD;
-    }
+class Utf32Decoder : public UnknownEndianDecoder {
+ public:
+  Utf32Decoder() = default;
 
-    if (endian_ == 1) {
-      do {
-        uint32_t ret = utf::read32be(in, in_offset);
-        if (ret == utf::NEED_MORE) {
-          return out_offset > out_start ? State::GOOD : State::NEED_MORE;
-        }
-        if (ret == utf::INVALID) {
-          return out_offset > out_start ? State::GOOD : State::INVALID;
-        }
-        out[out_offset++] = ret;
-      } while (out_offset < out_size);
-    } else {
-      do {
-        uint32_t ret = utf::read32le(in, in_offset);
-        if (ret == utf::NEED_MORE) {
-          return out_offset > out_start ? State::GOOD : State::NEED_MORE;
-        }
-        if (ret == utf::INVALID) {
-          return out_offset > out_start ? State::GOOD : State::INVALID;
-        }
-        out[out_offset++] = ret;
-      } while (out_offset < out_size);
-    }
-    return State::GOOD;
+  uint32_t readle(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
+    return utf::read32le(data, offset);
   }
 
- private:
-  int8_t endian_{-1};
+  uint32_t readbe(
+      std::span<uint8_t const> data, std::size_t& offset) const override {
+    return utf::read32be(data, offset);
+  }
 };
 
 class AsciiDecoder : public Decoder {
  public:
   AsciiDecoder() = default;
 
-  State decode(std::string_view in, std::size_t& in_offset,
-               uint32_t* out, std::size_t out_size,
-               std::size_t& out_offset) override {
-    std::size_t const out_start = out_offset;
-    do {
-      if (in_offset == in.size())
-        return out_offset > out_start ? State::GOOD : State::NEED_MORE;
-      if (in[in_offset] & 0x80)
-        return out_offset > out_start ? State::GOOD : State::INVALID;
-      out[out_offset++] = in[in_offset++];
-    } while (out_offset < out_size);
-    return State::GOOD;
+  State decode(std::span<uint8_t const> in, std::size_t& in_offset,
+               std::span<uint8_t> out, std::size_t& out_offset) override {
+    if (in_offset >= in.size())
+      return State::NEED_MORE;
+    if (in[in_offset] & 0x80)
+      return State::INVALID;
+    if (!utf::write8(in[in_offset], out, out_offset))
+      return State::NEED_MORE;
+    ++in_offset;
+
+    while (true) {
+      if (in_offset >= in.size() || in[in_offset] & 0x80)
+        return State::GOOD;
+      if (!utf::write8(in[in_offset], out, out_offset))
+        return State::GOOD;
+      ++in_offset;
+    }
   }
 };
 
diff --git a/sax/src/guessing_decoder.cc b/sax/src/guessing_decoder.cc
new file mode 100644
index 0000000..e72dab3
--- /dev/null
+++ b/sax/src/guessing_decoder.cc
@@ -0,0 +1,92 @@
+#include "guessing_decoder.hh"
+
+#include "decoder.hh"
+#include "sax_decoder.hh"
+#include "utf8.hh"
+#include "utf_error.hh"
+
+#include <cassert>
+
+using namespace std::string_view_literals;
+
+namespace modxml {
+namespace sax {
+
+namespace {
+
+bool eq(std::span<uint8_t const> a, std::size_t& a_offset, std::string_view b) {
+  if (a.size() - a_offset < b.size())
+    return false;
+  for (size_t i = 0; i < b.size(); ++i)
+    if (a[a_offset + i] != b[i])
+      return false;
+  return true;
+}
+
+class GuessingDecoder : public Decoder {
+ public:
+  State decode(std::span<uint8_t const> in, std::size_t& in_offset,
+               std::span<uint8_t> out, std::size_t& out_offset) override {
+    assert(in_offset <= in.size());
+
+    if (!decided_) {
+      if (eq(in, in_offset, "\xef\xbb\xbf"sv)) {
+        decided_ = create_utf8_decoder();
+      } else if (eq(in, in_offset, "\xfe\xff\x00\x00"sv)) {
+        in_offset += 4;
+        decided_ = create_utf32be_decoder();
+      } else if (eq(in, in_offset, "\xfe\xff"sv)) {
+        // Could be UTF-32 BOM, need more data to decide
+        // (note, an xml document encoded in UTF-16 that is less than 4 bytes
+        //  is rather impossible).
+        if (in.size() - in_offset < 4)
+          return State::NEED_MORE;
+        in_offset += 2;
+        decided_ = create_utf16be_decoder();
+      } else if (eq(in, in_offset, "\xff\xfe"sv)) {
+        in_offset += 2;
+        decided_ = create_utf16le_decoder();
+      } else if (eq(in, in_offset, "\x00\x00\xff\xfe"sv)) {
+        in_offset += 4;
+        decided_ = create_utf32le_decoder();
+      } else {
+        auto avail = in.size() - in_offset;
+        if (avail == 0)
+          return State::NEED_MORE;
+        if (avail >= 4 && in[in_offset] == 0 && in[in_offset + 1] == 0
+            && in[in_offset + 2] == 0 && in[in_offset + 3] != 0) {
+          decided_ = create_utf32le_decoder();
+        } else if (avail >= 4 && in[in_offset] != 0 && in[in_offset + 1] == 0
+                   && in[in_offset + 2] == 0 && in[in_offset + 3] == 0) {
+          decided_ = create_utf32be_decoder();
+        } else if (avail >= 2 && in[in_offset] == 0 && in[in_offset + 1] != 0) {
+          decided_ = create_utf16le_decoder();
+        } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) {
+          decided_ = create_utf16be_decoder();
+        } else {
+          auto tmp = in_offset;
+          auto ret = utf::read8(in, tmp);
+          if (ret == utf::NEED_MORE)
+            return State::NEED_MORE;
+          if (ret == utf::INVALID)
+            return State::INVALID;
+          // UTF-8 should be good enough to read the XML declaration.
+          decided_ = create_utf8_decoder();
+        }
+      }
+    }
+    return decided_->decode(in, in_offset, out, out_offset);
+  }
+
+ private:
+  std::unique_ptr<Decoder> decided_;
+};
+
+}  // namespace
+
+std::unique_ptr<Decoder> create_guessing_decoder() {
+  return std::make_unique<GuessingDecoder>();
+}
+
+}  // namespace sax
+}  // namespace modxml
diff --git a/sax/src/guessing_decoder.hh b/sax/src/guessing_decoder.hh
new file mode 100644
index 0000000..0f42c3b
--- /dev/null
+++ b/sax/src/guessing_decoder.hh
@@ -0,0 +1,21 @@
+#ifndef GUESSING_DECODER_HH
+#define GUESSING_DECODER_HH
+
+#include "macros.hh"
+
+#include <memory>
+
+namespace modxml {
+namespace sax {
+
+class Decoder;
+
+// Decoder that tries to figure out, using BOM or just magic
+// what encoding is used, optimized for the first character to be
+// '<'.
+std::unique_ptr<Decoder> HIDDEN create_guessing_decoder();
+
+}  // namespace sax
+}  // namespace modxml
+
+#endif  // GUESSING_DECODER_HH
diff --git a/sax/src/sax_attributes.cc b/sax/src/sax_attributes.cc
new file mode 100644
index 0000000..230c677
--- /dev/null
+++ b/sax/src/sax_attributes.cc
@@ -0,0 +1,38 @@
+#include "sax_attributes.hh"
+
+namespace modxml {
+namespace sax {
+
+Attribute::Attribute(std::string_view name, std::string_view value)
+    : name(name), value(value) {}
+
+std::optional<std::string_view> Attributes::find_first(std::string_view name)
+    const {
+  for (auto it = begin(); it != end(); ++it) {
+    if (it->name == name)
+      return it->value;
+  }
+  return std::nullopt;
+}
+
+std::optional<std::string_view> Attributes::find_last(std::string_view name)
+    const {
+  for (size_t i = size(); i > 0; --i) {
+    auto const& a = at(i - 1);
+    if (a.name == name)
+      return a.value;
+  }
+  return std::nullopt;
+}
+
+std::optional<std::size_t> Attributes::find(std::string_view name,
+                                            std::size_t index) const {
+  for (; index < size(); ++index) {
+    if (at(index).name == name)
+      return index;
+  }
+  return std::nullopt;
+}
+
+}  // namespace sax
+}  // namespace modxml
diff --git a/sax/src/sax_delegate.cc b/sax/src/sax_delegate.cc
new file mode 100644
index 0000000..2c2cfcd
--- /dev/null
+++ b/sax/src/sax_delegate.cc
@@ -0,0 +1,21 @@
+#include "sax_delegate.hh"
+
+namespace modxml {
+namespace sax {
+
+void Delegate::start_element(std::string_view, Attributes const&) {}
+
+void Delegate::empty_element(std::string_view, Attributes const&) {}
+
+void Delegate::end_element(std::string_view) {}
+
+void Delegate::character_data(std::string_view) {}
+
+void Delegate::processing_instruction(std::string_view, std::string_view) {}
+
+void Delegate::comment(std::string_view) {}
+
+void Delegate::error(std::string_view) {}
+
+}  // namespace sax
+}  // namespace modxml
diff --git a/sax/src/sax_processor.cc b/sax/src/sax_processor.cc
index ea9f753..afc9d3b 100644
--- a/sax/src/sax_processor.cc
+++ b/sax/src/sax_processor.cc
@@ -1,18 +1,41 @@
 #include "sax_processor.hh"
 
-#include "sax_decoder.hh"
+#include <iostream>
+
+#include "buffer.hh"
+#include "guessing_decoder.hh"
 #include "processor.hh"
+#include "sax_attributes.hh"
+#include "sax_decoder.hh"
+#include "sax_decoder_factory.hh"
+#include "sax_delegate.hh"
+#include "utf8.hh"
+#include "utf_error.hh"
 #include "utils.hh"
 
 #include <algorithm>
+#include <cassert>
+#include <charconv>
+#include <format>
+#include <map>
 #include <optional>
 #include <utility>
+#include <vector>
+
+using namespace std::string_view_literals;
 
 namespace modxml {
 namespace sax {
 
 namespace {
 
+constexpr std::size_t kDefaultBufferSize = 8192;
+constexpr std::size_t kMinBufferSize = 128;
+
+inline bool is_digit(char c) {
+  return c >= '0' && c <= '9';
+}
+
 // 2.2 Characters
 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
 
@@ -75,12 +98,185 @@ inline bool is_namechar(uint32_t c) {
       (c >= 0x300 && c <= 0x36f) || (c >= 0x203f && c <= 0x2040);
 }
 
-/* [5]   	Name	   ::=   	NameStartChar (NameChar)*
+/*
+[5]   	Name	   ::=   	NameStartChar (NameChar)*
 [6]   	Names	   ::=   	Name (#x20 Name)*
 [7]   	Nmtoken	   ::=   	(NameChar)+
 [8]   	Nmtokens	   ::=   	Nmtoken (#x20 Nmtoken)*
 */
 
+inline bool ascii_lowercase(char c) {
+  return (c >= 'A' & c <= 'Z') ? (c | 0x20) : c;
+}
+
+bool eq_lowercase(std::string_view a, std::string_view b) {
+  if (a.size() != b.size())
+    return false;
+  for (std::size_t i = 0; i < a.size(); ++i)
+    if (ascii_lowercase(a[i]) != b[i])
+      return false;
+  return true;
+}
+
+inline std::string_view make_string_view(std::span<uint8_t const> span) {
+  return std::string_view(reinterpret_cast<char const*>(span.data()),
+                          span.size());
+}
+
+class Entities {
+ public:
+  Entities() {
+    data_.emplace("lt", "<");
+    data_.emplace("gt", ">");
+    data_.emplace("amp", "&");
+    data_.emplace("apos", "'");
+    data_.emplace("quot", "\"");
+  }
+
+  std::optional<std::string> get(std::string const& entity) const {
+    if (entity.empty())
+      return std::nullopt;
+    if (entity.front() == '#') {
+      if (entity.size() == 1)
+        return std::nullopt;
+      int base;
+      char const* start;
+      char const* end = entity.data() + entity.size();
+      if (entity[1] == 'x') {
+        start = entity.data() + 2;
+        base = 16;
+      } else {
+        start = entity.data() + 1;
+        base = 10;
+      }
+      uint32_t value;
+      auto [ptr, ec] = std::from_chars(start, end, value, base);
+      if (ec == std::errc() && ptr == end) {
+        uint8_t tmp[4];
+        std::size_t offset = 0;
+        utf::write8(value, tmp, offset);
+        return std::string(reinterpret_cast<char*>(tmp), offset);
+      }
+    }
+    auto it = data_.find(entity);
+    if (it == data_.end())
+      return std::nullopt;
+    return it->second;
+  }
+
+ private:
+  std::map<std::string, std::string> data_;
+};
+
+bool deamp(Entities const& entities, std::string& str, std::size_t last = 0) {
+  while (true) {
+    auto next = str.find('&', last);
+    if (next == std::string::npos)
+      break;
+    next += 1;
+    auto semicolon = str.find(';', next);
+    if (semicolon == std::string::npos)
+      return false;
+    auto replacement = entities.get(str.substr(next, semicolon - next));
+    if (!replacement.has_value())
+      return false;
+  }
+  return true;
+}
+
+std::optional<std::string> unquote(Entities const& entities,
+                                   std::string_view quoted) {
+  assert(quoted.size() >= 2);
+  assert(quoted.front() == quoted.back());
+  std::string ret(quoted.substr(1, quoted.size() - 2));
+  if (deamp(entities, ret))
+    return ret;
+  return std::nullopt;
+}
+
+std::optional<std::string_view> unquote_if_needed(Entities const& entities,
+                                                  std::string_view quoted,
+                                                  std::string& tmp) {
+  assert(quoted.size() >= 2);
+  assert(quoted.front() == quoted.back());
+  auto input = quoted.substr(1, quoted.size() - 2);
+  auto index = input.find('&');
+  if (index == std::string_view::npos)
+    return input;
+  tmp.assign(input);
+  if (deamp(entities, tmp, index))
+    return tmp;
+  return std::nullopt;
+}
+
+class AttributesImpl : public Attributes {
+ public:
+  AttributesImpl() = default;
+
+  bool init(Entities const& entities,
+            std::span<const uint8_t> data,
+            std::vector<size_t> const& offsets,
+            std::size_t first) {
+    std::size_t a = first;
+    attr_.reserve((offsets.size() - first) / 4);
+    while (a + 4 <= offsets.size()) {
+      auto name = make_string_view(data.subspan(offsets[a], offsets[a + 1]));
+      std::string tmp;
+      auto value = unquote_if_needed(
+          entities,
+          make_string_view(data.subspan(offsets[a + 2], offsets[a + 3])),
+          tmp);
+      if (!value.has_value())
+        return false;
+      if (tmp.empty()) {
+        attr_.emplace_back(name, *value);
+      } else {
+        attr_.emplace_back(name, *value, std::move(tmp));
+      }
+      a += 4;
+    }
+    return true;
+  }
+
+  iterator begin() const override {
+    return Iterator(this, 0);
+  }
+
+  iterator end() const override {
+    return Iterator(this, attr_.size());
+  }
+
+  std::size_t size() const override {
+    return attr_.size();
+  }
+
+  Attribute const& at(std::size_t index) const override {
+    return attr_[index];
+  }
+
+ private:
+  class Iterator : public iterator {
+   public:
+    Iterator(Attributes const* attributes, std::size_t index)
+        : iterator(attributes, index) {}
+  };
+
+  struct AttributeImpl : public Attribute {
+    AttributeImpl(std::string_view name, std::string_view value)
+        : Attribute(name, value) {}
+
+    AttributeImpl(std::string_view name, std::string_view value,
+                  std::string&& tmp)
+        : Attribute(name, value), tmp_(std::move(tmp)) {}
+
+   private:
+    std::string tmp_;
+  };
+
+  std::span<const uint8_t> data_;
+  std::vector<AttributeImpl> attr_;
+};
+
 class ProcessorImpl : public Processor {
  public:
   ProcessorImpl(std::shared_ptr<Delegate> delegate,
@@ -91,15 +287,898 @@ class ProcessorImpl : public Processor {
       : delegate_(std::move(delegate)),
         decoder_factory_(std::move(decoder_factory)),
         decoder_(std::move(decoder)),
-        default_buffer_size_(default_buffer_size),
-        max_buffer_size_(max_buffer_size) {}
+        forced_decoder_(decoder_),
+        buffer_(make_buffer(default_buffer_size, max_buffer_size)) {
+    if (!decoder_)
+      decoder_ = create_guessing_decoder();
+
+    expect_document();
+  }
+
+  std::size_t process(std::span<uint8_t const> data,
+                      std::size_t offset) override {
+    cmds_.emplace_back(Command::FILL_BUFFER, Count::ZERO_OR_ONE);
+
+    std::size_t consumed = 0;
+
+    while (true) {
+      if (cmds_.empty()) {
+        if (!buffer_->empty()) {
+          std::cerr << make_string_view(buffer_->rspan()) << std::endl;
+          delegate_->error("Extra data at end");
+        }
+        return consumed;
+      }
+
+      auto current = cmds_.back();
+      auto const old_size = cmds_.size();
+      cmds_.pop_back();
+      Process ret;
+      switch (current.command) {
+        case Command::FILL_BUFFER:
+          ret = fill_buffer(data, offset, consumed);
+          break;
+        case Command::MISC:
+          ret = process_misc(current);
+          break;
+        case Command::SPACE:
+          ret = process_space(current);
+          break;
+        case Command::ELEMENT:
+          ret = process_element(current);
+          break;
+        case Command::COMMENT:
+          ret = process_comment(current);
+          break;
+        case Command::PROCESSING_INSTRUCTION:
+          ret = process_processing_instruction(current);
+          break;
+        case Command::XMLDECL:
+          ret = process_xmldecl(current);
+          break;
+        case Command::ATTRIBUTE:
+          ret = process_attribute(current);
+          break;
+        case Command::NAME:
+          ret = process_name(current);
+          break;
+        case Command::ATTRIBUTE_VALUE:
+          ret = process_attribute_value(current);
+          break;
+        case Command::EQUAL:
+          ret = process_equal(current);
+          break;
+        case Command::START_OR_EMPTY_TAG:
+          ret = process_start_or_empty_tag(current);
+          break;
+        case Command::END_TAG:
+          ret = process_end_tag(current);
+          break;
+      }
+
+      switch (ret) {
+        case Process::NEED_MORE:
+        case Process::ERROR:
+          cmds_.push_back(current);
+          assert(cmds_.size() == old_size);
+          return consumed;
+        case Process::CONTINUE:
+          break;
+      }
+    }
+  }
+
+  uint64_t line() const override { return line_; }
+
+  uint64_t column() const override { return column_; }
 
  private:
+  enum class Process {
+    NEED_MORE,
+    ERROR,
+    CONTINUE,
+  };
+
+  enum class Match {
+    FULL_MATCH,
+    PARTIAL_MATCH,
+    NO_MATCH,
+  };
+
+  enum class Command {
+    FILL_BUFFER,
+
+    ATTRIBUTE,
+    ATTRIBUTE_VALUE,
+    COMMENT,
+    ELEMENT,
+    END_TAG,
+    EQUAL,
+    MISC,
+    NAME,
+    PROCESSING_INSTRUCTION,
+    SPACE,
+    START_OR_EMPTY_TAG,
+    XMLDECL,
+  };
+
+  enum class Count {
+    ONE,
+    ONE_OR_MANY,
+    ZERO_OR_ONE,
+    ZERO_OR_MANY,
+  };
+
+  struct CommandItem {
+    Command const command;
+    Count const count;
+    std::size_t offset;
+
+    CommandItem(Command command, Count count, std::size_t offset = 0)
+        : command(command), count(count), offset(offset) {}
+  };
+
+  struct StackItem {
+    std::vector<std::size_t> offsets;
+  };
+
+  Process fill_buffer(std::span<uint8_t const> data,
+                      std::size_t offset,
+                      std::size_t& consumed) {
+    if (offset >= data.size())
+      return Process::NEED_MORE;
+
+    std::size_t tmp = offset;
+    auto wspan = buffer_->wspan(4);
+    switch (decoder_->decode(data, tmp, wspan, consumed)) {
+      case Decoder::State::GOOD:
+        break;
+      case Decoder::State::NEED_MORE:
+        return Process::NEED_MORE;
+      case Decoder::State::INVALID:
+        delegate_->error("Invalid data");
+        return Process::ERROR;
+    }
+    buffer_->commit(consumed);
+    return Process::CONTINUE;
+  }
+
+  void expect_document() {
+    // document := prolog element Misc*
+    expect_misc(Count::ZERO_OR_MANY);
+    expect_element(Count::ONE);
+    expect_prolog();
+  }
+
+  void expect_misc(Count count) {
+    cmds_.emplace_back(Command::MISC, count);
+  }
+
+  void expect_element(Count count) {
+    // element ::= EmptyElemTag | STag content ETag
+    cmds_.emplace_back(Command::START_OR_EMPTY_TAG, count);
+  }
+
+  void expect_end_tag(Count count) {
+    cmds_.emplace_back(Command::END_TAG, count);
+  }
+
+  void expect_prolog() {
+    // prolog := XMLDecl? Misc* (doctypedecl Misc*)?
+    expect_misc(Count::ZERO_OR_MANY);
+    expect_doctypedecl(Count::ZERO_OR_ONE);
+    expect_misc(Count::ZERO_OR_MANY);
+    expect_xmldecl(Count::ZERO_OR_ONE);
+  }
+
+  void expect_xmldecl(Count count) {
+    cmds_.emplace_back(Command::XMLDECL, count);
+  }
+
+  void expect_doctypedecl(Count) {
+    // TODO
+  }
+
+  void expect_comment(Count count, std::size_t start_offset = 0) {
+    // Comment should never be more than one, should be MISC that is repeated.
+    assert(count == Count::ONE);
+    cmds_.emplace_back(Command::COMMENT, count, start_offset);
+  }
+
+  void expect_content(Count) {
+    // TODO
+  }
+
+  void expect_pi(Count count, std::size_t start_offset = 0) {
+    // PI should never be more than one, should be MISC that is repeated.
+    assert(count == Count::ONE);
+    cmds_.emplace_back(Command::PROCESSING_INSTRUCTION, count, start_offset);
+  }
+
+  void expect_space(Count count) {
+    // There is not way to have SS as S is continous, so we should never
+    // ask for more than one or zero.
+    assert(count == Count::ZERO_OR_ONE || count == Count::ONE);
+    cmds_.emplace_back(Command::SPACE, count);
+  }
+
+  void expect_attribute(Count count) {
+    switch (count) {
+      case Count::ONE_OR_MANY:
+        cmds_.emplace_back(Command::ATTRIBUTE, Count::ZERO_OR_MANY);
+      case Count::ONE:
+        // Attribute ::= Name Eq AttValue
+        expect_attribute_value(Count::ONE);
+        expect_equal(Count::ONE);
+        expect_name(Count::ONE);
+        expect_space(Count::ONE);
+        break;
+      case Count::ZERO_OR_ONE:
+      case Count::ZERO_OR_MANY:
+        cmds_.emplace_back(Command::ATTRIBUTE, count);
+        break;
+    }
+  }
+
+  void expect_attribute_value(Count count) {
+    cmds_.emplace_back(Command::ATTRIBUTE_VALUE, count);
+  }
+
+  void expect_equal(Count count) {
+    // Eq ::= S? '=' S?
+    expect_space(Count::ZERO_OR_ONE);
+    cmds_.emplace_back(Command::EQUAL, count);
+    expect_space(Count::ZERO_OR_ONE);
+  }
+
+  void expect_name(Count count) {
+    cmds_.emplace_back(Command::NAME, count);
+  }
+
+  Process process_misc(CommandItem const& item) {
+    // Misc := Comment | PI | S
+    assert(item.offset == 0);
+
+    switch (match("<!--")) {
+      case Match::FULL_MATCH:
+        add_if_more(item);
+        expect_comment(Count::ONE, 3);
+        return Process::CONTINUE;
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+      case Match::NO_MATCH:
+        break;
+    }
+
+    switch (match("<?")) {
+      case Match::FULL_MATCH:
+        add_if_more(item);
+        expect_pi(Count::ONE, 2);
+        return Process::CONTINUE;
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+      case Match::NO_MATCH:
+        break;
+    }
+
+    switch (match_s()) {
+      case Match::FULL_MATCH:
+        add_if_more(item);
+        expect_space(Count::ONE);
+        return Process::CONTINUE;
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+      case Match::NO_MATCH:
+        break;
+    }
+
+    return no_match(item);
+  }
+
+  Process process_attribute(CommandItem& item) {
+    // This actually parses (S Attribute)* when followed by S?
+    // for Attribute parsing see expect_attribute()
+    // So we need to figure out if the S means start of attribute
+    // or just an S. We do this by checking if the first non-S is
+    // a namestart or something else. We consume the S.
+    uint32_t last_char;
+    auto ret = consume_space(item.offset, last_char);
+    if (ret != Process::CONTINUE)
+      return ret;
+
+    // No S, cannot be followed by an attribute then.
+    if (item.offset == 0)
+      return no_match(item);
+
+    // First character after S isn't a valid first character of a name,
+    // cannot be followed by an attribute then.
+    if (!is_namestartchar(last_char))
+      return no_match(item);
+
+    expect_attribute_value(Count::ONE);
+    expect_equal(Count::ONE);
+    expect_name(Count::ONE);
+    return Process::CONTINUE;
+  }
+
+  Process process_equal(CommandItem const& item) {
+    // Eq ::= S? '=' S?
+    // Spacing added by expect_equal
+    switch (match_consume("=")) {
+      case Match::FULL_MATCH:
+        add_if_more(item);
+        return Process::CONTINUE;
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+      case Match::NO_MATCH:
+        return no_match(item);
+    }
+  }
+
+  Process process_name(CommandItem& item) {
+	// Name ::= NameStartChar (NameChar)*
+    auto data = buffer_->rspan(item.offset + 4);
+    while (true) {
+      std::size_t tmp = item.offset;
+      auto c = utf::read8(data, tmp);
+      if (c == utf::NEED_MORE)
+        return Process::NEED_MORE;
+      if (c == utf::INVALID || !valid_char(c))
+        return invalid_char(data, tmp);
+      if (item.offset == 0) {
+        if (!is_namestartchar(c))
+          return no_match(item);
+      } else {
+        if (!is_namechar(c))
+          break;
+      }
+      item.offset = tmp;
+    }
+
+    assert(!stack_.empty());
+    auto* read_view = static_cast<ReadViewBuffer*>(buffer_.get());
+    stack_.back().offsets.push_back(read_view->consumed());
+    stack_.back().offsets.push_back(item.offset);
+    buffer_->consume(item.offset);
+    return Process::CONTINUE;
+  }
+
+  Process process_attribute_value(CommandItem& item) {
+    // AttValue ::= '"' ([^<&"] | Reference)* '"'
+    //              | "'" ([^<&'] | Reference)* "'"
+
+    uint32_t end_char;
+    auto data = buffer_->rspan(item.offset + 4);
+
+    if (item.offset == 0) {
+      std::size_t tmp = item.offset;
+      auto c = utf::read8(data, tmp);
+      if (c == utf::NEED_MORE)
+        return Process::NEED_MORE;
+      if (c == utf::INVALID || !valid_char(c))
+        return invalid_char(data, tmp);
+      if (c != '"' && c != '\'')
+        return no_match(item);
+      item.offset = tmp;
+      end_char = c;
+    } else {
+      assert(!data.empty());
+      end_char = data[0];  // ok as both " and ' are ASCII
+    }
+
+    while (true) {
+      auto c = utf::read8(data, item.offset);
+      if (c == utf::NEED_MORE)
+        return Process::NEED_MORE;
+      if (c == utf::INVALID || !valid_char(c))
+        return invalid_char(data, item.offset);
+      if (c == end_char)
+        break;
+      // TODO: Should we validate reference already here or do we let
+      // unquoute take care of that? As Reference can't contain end_char
+      // only checking for end_char is safe here.
+    }
+
+    assert(!stack_.empty());
+    auto* read_view = static_cast<ReadViewBuffer*>(buffer_.get());
+    stack_.back().offsets.push_back(read_view->consumed());
+    stack_.back().offsets.push_back(item.offset);
+    buffer_->consume(item.offset);
+    return Process::CONTINUE;
+  }
+
+  Process process_comment(CommandItem& item) {
+    if (item.offset == 0) {
+      switch (match_consume("<!--")) {
+        case Match::FULL_MATCH:
+          item.offset += 3;
+          break;
+        case Match::PARTIAL_MATCH:
+          return Process::NEED_MORE;
+        case Match::NO_MATCH:
+          return no_match(item);
+      }
+    }
+
+    auto match = find("-->", item.offset);
+    switch (match) {
+      case Match::FULL_MATCH: {
+        auto data = buffer_->rspan(item.offset);
+        assert(data.size() >= item.offset);
+        delegate_->comment(
+            make_string_view(data.subspan(3, item.offset - 3)));
+        buffer_->consume(item.offset + 3);
+        return Process::CONTINUE;
+      }
+      case Match::NO_MATCH:
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+    }
+  }
+
+  Process process_processing_instruction(CommandItem& item) {
+    if (item.offset == 0) {
+      switch (match_consume("<?")) {
+        case Match::FULL_MATCH:
+          item.offset += 2;
+          break;
+        case Match::PARTIAL_MATCH:
+          return Process::NEED_MORE;
+        case Match::NO_MATCH:
+          return no_match(item);
+      }
+    }
+
+    // TODO
+    delegate_->error("PI not supported");
+    return Process::ERROR;
+  }
+
+  void add_to_stack(CommandItem const& item, std::size_t offset) {
+    cmds_.emplace_back(item.command, item.count, offset);
+    stack_.emplace_back();
+    buffer_ = make_read_view_buffer(std::move(buffer_));
+    buffer_->consume(offset);
+  }
+
+  std::size_t pop_stack(std::vector<std::size_t>& attr) {
+    assert(!stack_.empty());
+    std::swap(attr, stack_.back().offsets);
+
+    auto* read_view = static_cast<ReadViewBuffer*>(buffer_.get());
+    auto consumed = read_view->consumed();
+
+    buffer_ = read_view->release();
+    stack_.pop_back();
+
+    return consumed;
+  }
+
+  Process process_xmldecl(CommandItem const& item) {
+    // XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+    if (item.offset == 0) {
+      switch (match("<?xml")) {
+        case Match::FULL_MATCH:
+          add_to_stack(item, /* offset */ 5);
+          expect_space(Count::ZERO_OR_ONE);
+          // Parsing as generic "Attribute" here and doing validation later.
+          expect_attribute(Count::ONE_OR_MANY);
+          return Process::CONTINUE;
+        case Match::PARTIAL_MATCH:
+          return Process::NEED_MORE;
+        case Match::NO_MATCH:
+          return no_match(item);
+      }
+    }
+
+    assert(item.offset == 5);
+
+    // Remember that this is still reading for the read view buffer.
+    switch (match_consume("?>")) {
+      case Match::FULL_MATCH:
+        break;
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+      case Match::NO_MATCH:
+        delegate_->error(std::format("Expected end of {}",
+                                     command_name(item.command)));
+        return Process::ERROR;
+    }
+
+    std::vector<std::size_t> attr;
+    auto const consumed = pop_stack(attr);
+
+    // Now we're back to the real buffer
+    auto data = buffer_->rspan(consumed);
+    std::size_t a = 0;
+
+    if (a + 4 <= attr.size() &&
+        make_string_view(data.subspan(attr[a + 0],
+                                      attr[a + 1])) == "version") {
+      auto version = make_string_view(data.subspan(attr[a + 2] + 1,
+                                                   attr[a + 3] - 2));
+      if (!valid_version(version)) {
+        delegate_->error(std::format("Unsupported xmldecl version, {}",
+                                     version));
+        return Process::ERROR;
+      }
+      a += 4;
+    } else {
+      // No version
+      delegate_->error("Invalid xmldecl, must have a version attribute first.");
+      return Process::ERROR;
+    }
+
+    if (a + 4 <= attr.size() &&
+        make_string_view(data.subspan(attr[a + 0],
+                                      attr[a + 1])) == "encoding") {
+      auto encoding = make_string_view(data.subspan(attr[a + 2] + 1,
+                                                    attr[a + 3] - 2));
+      if (forced_decoder_) {
+        // encoding value is ignored
+        // TODO: Should we check that it is valid anyway?
+      } else {
+        auto decoder = pick_decoder_for_encoding(encoding, nullptr);
+        if (!decoder && decoder_factory_)
+          decoder = decoder_factory_->create(encoding);
+        if (!decoder) {
+          delegate_->error(std::format("Unknown encoding {}", encoding));
+          return Process::ERROR;
+        }
+        std::swap(decoder_, decoder);
+        // TODO: Re-decode the rest of the buffer?
+      }
+      a += 4;
+    }
+
+    if (a + 4 <= attr.size() &&
+        make_string_view(data.subspan(attr[a + 0],
+                                      attr[a + 1])) == "standalone") {
+      auto sd = make_string_view(data.subspan(attr[a + 2] + 1,
+                                              attr[a + 3] - 2));
+      if (sd == "yes") {
+        // TODO: Handle standalone == yes
+      } else if (sd == "no") {
+        // TODO: Handle standalone == no
+      } else {
+        delegate_->error(std::format(
+            "Invalid xmldecl, standalone attribute has unsupported value, {}",
+            sd));
+        return Process::ERROR;
+      }
+      a += 4;
+    }
+
+    if (a < attr.size()) {
+      delegate_->error(
+          std::format("Invalid xmldecl, unknown attribute, {}",
+                      make_string_view(data.subspan(attr[a + 0],
+                                                    attr[a + 1]))));
+      return Process::ERROR;
+    }
+
+    buffer_->consume(consumed);
+    return Process::CONTINUE;
+  }
+
+  Process process_start_or_empty_tag(CommandItem const& item) {
+    // EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
+    // STag         ::=	'<' Name (S Attribute)* S? '>'
+    if (item.offset == 0) {
+      switch (match("<")) {
+        case Match::FULL_MATCH:
+          add_to_stack(item, /* offset */ 1);
+          expect_space(Count::ZERO_OR_ONE);
+          expect_attribute(Count::ZERO_OR_MANY);
+          expect_name(Count::ONE);
+          return Process::CONTINUE;
+        case Match::PARTIAL_MATCH:
+          return Process::NEED_MORE;
+        case Match::NO_MATCH:
+          return no_match(item);
+      }
+    }
+
+    assert(item.offset == 1);
+
+    bool empty_tag;
+
+    // Remember that this is still reading for the read view buffer.
+    switch (match_consume("/>")) {
+      case Match::FULL_MATCH:
+        empty_tag = true;
+        break;
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+      case Match::NO_MATCH:
+        switch (match_consume(">")) {
+          case Match::FULL_MATCH:
+            empty_tag = false;
+            break;
+          case Match::PARTIAL_MATCH:
+            return Process::NEED_MORE;
+          case Match::NO_MATCH:
+            delegate_->error(std::format("Expected end of {}",
+                                         command_name(item.command)));
+            return Process::ERROR;
+        }
+        break;
+    }
+
+    std::vector<std::size_t> attr;
+    auto const consumed = pop_stack(attr);
+
+    // Now we're back to the real buffer
+    auto data = buffer_->rspan(consumed);
+
+    assert(attr.size() >= 2);
+    auto name = make_string_view(data.subspan(attr[0], attr[1]));
+
+    AttributesImpl attributes;
+    if (!attributes.init(entities_, data, std::move(attr), 2)) {
+      delegate_->error("Invalid references in attribute values");
+      return Process::ERROR;
+    }
+
+    add_if_more(item);
+
+    if (empty_tag) {
+      delegate_->empty_element(name, attributes);
+    } else {
+      delegate_->start_element(name, attributes);
+      expect_end_tag(Count::ONE);
+      expect_content(Count::ONE);
+    }
+
+    buffer_->consume(consumed);
+    return Process::CONTINUE;
+  }
+
+  Process process_end_tag(CommandItem const& item) {
+    // ETag ::=	'</' Name S? '>'
+    if (item.offset == 0) {
+      switch (match("</")) {
+        case Match::FULL_MATCH:
+          add_to_stack(item, /* offset */ 2);
+          expect_space(Count::ZERO_OR_ONE);
+          expect_name(Count::ONE);
+          return Process::CONTINUE;
+        case Match::PARTIAL_MATCH:
+          return Process::NEED_MORE;
+        case Match::NO_MATCH:
+          return no_match(item);
+      }
+    }
+
+    assert(item.offset == 1);
+
+    // Remember that this is still reading for the read view buffer.
+    switch (match_consume(">")) {
+      case Match::FULL_MATCH:
+        break;
+      case Match::PARTIAL_MATCH:
+        return Process::NEED_MORE;
+      case Match::NO_MATCH:
+        delegate_->error(std::format("Expected end of {}",
+                                     command_name(item.command)));
+        return Process::ERROR;
+    }
+
+    std::vector<std::size_t> attr;
+    auto const consumed = pop_stack(attr);
+
+    // Now we're back to the real buffer
+    auto data = buffer_->rspan(consumed);
+
+    assert(attr.size() == 2);
+    auto name = make_string_view(data.subspan(attr[0], attr[1]));
+
+    add_if_more(item);
+
+    delegate_->end_element(name);
+
+    buffer_->consume(consumed);
+    return Process::CONTINUE;
+  }
+
+  static bool valid_version(std::string_view version) {
+    if (version.size() < 3)
+      return false;
+    if (!version.starts_with("1."))
+      return false;
+    for (std::size_t i = 2; i < version.size(); ++i) {
+      if (!is_digit(version[i]))
+        return false;
+    }
+    return true;
+  }
+
+  Process process_element(CommandItem& item) {
+    // TODO
+    delegate_->error("Element is not yet supported");
+    return Process::ERROR;
+  }
+
+  Process consume_space(std::size_t& count, uint32_t& last_char) {
+    auto data = buffer_->rspan(4);
+    std::size_t consumed = 0;
+    while (true) {
+      std::size_t offset = consumed;
+      auto c = utf::read8(data, offset);
+      if (c == utf::NEED_MORE) {
+        buffer_->consume(consumed);
+        return Process::NEED_MORE;
+      }
+      if (c == utf::INVALID || !valid_char(c))
+        return invalid_char(data, offset);
+      if (!is_ws(c)) {
+        last_char = c;
+        buffer_->consume(consumed);
+        return Process::CONTINUE;
+      }
+      ++count;
+      handle_ws(c);
+      consumed = offset;
+    }
+  }
+
+  Process process_space(CommandItem& item) {
+    // S ::= (#x20 | #x9 | #xD | #xA)+
+    // item.offset is only used to count spaces. We consume each space as it
+    // is found so no offset in buffer.
+    uint32_t unused;
+    auto ret = consume_space(item.offset, unused);
+    if (ret != Process::CONTINUE)
+      return ret;
+
+    if (item.offset == 0)
+      return no_match(item);
+
+    add_if_more(item);
+    return Process::CONTINUE;
+  }
+
+  void add_if_more(CommandItem const& item) {
+    switch (item.count) {
+      case Count::ONE:
+        break;
+      case Count::ONE_OR_MANY:
+        cmds_.emplace_back(item.command, Count::ZERO_OR_MANY);
+        break;
+      case Count::ZERO_OR_ONE:
+        break;
+      case Count::ZERO_OR_MANY:
+        cmds_.emplace_back(item.command, item.count);
+    }
+  }
+
+  Match find(std::string_view str, std::size_t& offset) {
+    auto data = buffer_->rspan(offset + str.size());
+    std::size_t i = 0;
+    while (offset < data.size()) {
+      if (str[i] == data[offset]) {
+        ++i;
+        if (i == str.size()) {
+          offset -= i;
+          return Match::FULL_MATCH;
+        }
+      } else {
+        i = 0;
+      }
+      ++offset;
+    }
+    if (i > 0) {
+      offset -= i;
+      return Match::PARTIAL_MATCH;
+    }
+    return Match::NO_MATCH;
+  }
+
+  Match match(std::string_view str, std::size_t offset = 0) {
+    auto data = buffer_->rspan(offset + str.size());
+    if (data.size() <= offset)
+      return Match::PARTIAL_MATCH;
+    auto const avail = std::min(str.size(), data.size() - offset);
+    for (std::size_t i = 0; i < avail; ++i) {
+      if (str[i] != data[offset + i])
+        return Match::NO_MATCH;
+    }
+    if (avail < str.size())
+      return Match::PARTIAL_MATCH;
+    return Match::FULL_MATCH;
+  }
+
+  Match match_consume(std::string_view str) {
+    auto ret = match(str);
+    if (ret == Match::FULL_MATCH)
+      buffer_->consume(str.size());
+    return ret;
+  }
+
+  Match match_s() {
+    auto data = buffer_->rspan(4);
+    std::size_t offset = 0;
+    auto c = utf::read8(data, offset);
+    if (c == utf::NEED_MORE)
+      return data.size() == 0 ? Match::PARTIAL_MATCH : Match::NO_MATCH;
+    if (c == utf::INVALID)
+      return Match::NO_MATCH;
+    if (!valid_char(c) || !is_ws(c))
+      return Match::NO_MATCH;
+    return Match::FULL_MATCH;
+  }
+
+  Process no_match(CommandItem const& item) {
+    switch (item.count) {
+      case Count::ONE:
+      case Count::ONE_OR_MANY:
+        delegate_->error(std::format("Expected {}",
+                                     command_name(item.command)));
+        return Process::ERROR;
+      case Count::ZERO_OR_ONE:
+      case Count::ZERO_OR_MANY:
+        break;
+    }
+    return Process::CONTINUE;
+  }
+
+  void handle_ws(uint32_t c) {
+    if (c == '\n') {
+      ++line_;
+      column_ = 0;
+    } else {
+      ++column_;
+    }
+  }
+
+  Process invalid_char(std::span<uint8_t const> data, std::size_t offset) {
+    delegate_->error(std::format("Invalid char {:02x}", data[offset]));
+    return Process::ERROR;
+  }
+
+  static std::string_view command_name(Command command) {
+    switch (command) {
+      case Command::MISC:
+        return "misc"sv;
+      case Command::FILL_BUFFER:
+        return "more data"sv;
+      case Command::ELEMENT:
+        return "element"sv;
+      case Command::SPACE:
+        return "whitespace"sv;
+      case Command::COMMENT:
+        return "comment"sv;
+      case Command::PROCESSING_INSTRUCTION:
+        return "processing instruction"sv;
+      case Command::XMLDECL:
+        return "xml declaration"sv;
+      case Command::ATTRIBUTE:
+        return "attribute"sv;
+      case Command::ATTRIBUTE_VALUE:
+        return "attribute value"sv;
+      case Command::NAME:
+        return "name"sv;
+      case Command::EQUAL:
+        return "equal sign (=)"sv;
+      case Command::START_OR_EMPTY_TAG:
+        return "element"sv;
+      case Command::END_TAG:
+        return "end tag"sv;
+    }
+    assert(false);
+    return {};
+  }
+
   std::shared_ptr<Delegate> delegate_;
   std::shared_ptr<DecoderFactory> decoder_factory_;
   std::unique_ptr<Decoder> decoder_;
-  std::size_t default_buffer_size_;
-  std::size_t max_buffer_size_;
+  bool const forced_decoder_;
+  std::unique_ptr<Buffer> buffer_;
+  Entities entities_;
+  std::vector<CommandItem> cmds_;
+  std::vector<StackItem> stack_;
+  uint64_t line_{1};
+  uint64_t column_{0};
 };
 
 }  // namespace
@@ -117,9 +1196,9 @@ std::unique_ptr<Processor> create_processor(
                                         decoder_factory.get());
   }
 
-  std::size_t default_buffer_size = 8192;
+  std::size_t default_buffer_size = kDefaultBufferSize;
   if (opt_default_buffer_size.has_value())
-    default_buffer_size = std::max(static_cast<std::size_t>(128),
+    default_buffer_size = std::max(kMinBufferSize,
                                    opt_default_buffer_size.value());
   // This value is documented in public headers. Do NOT change.
   std::size_t max_buffer_size = 10 * 1024 * 1024;
@@ -136,7 +1215,8 @@ std::unique_ptr<Processor> create_processor(
                                          max_buffer_size);
 }
 
-std::unique_ptr<Processor> create(std::shared_ptr<Delegate> delegate) {
+std::unique_ptr<Processor>
+Processor::create(std::shared_ptr<Delegate> delegate) {
   return create_processor(std::move(delegate), nullptr,
                           std::nullopt, std::nullopt, std::nullopt);
 }
diff --git a/sax/src/utils.cc b/sax/src/utils.cc
index f0366d5..e3a53b1 100644
--- a/sax/src/utils.cc
+++ b/sax/src/utils.cc
@@ -9,7 +9,7 @@ namespace sax {
 
 namespace {
 
-std::string cleanup_encoding(std::string const& str) {
+std::string cleanup_encoding(std::string_view str) {
   std::string ret;
   ret.reserve(str.size());
   for (auto c : str) {
@@ -29,29 +29,29 @@ std::string cleanup_encoding(std::string const& str) {
 // Names inspired by:
 // https://www.iana.org/assignments/character-sets/character-sets.xhtml
 std::unique_ptr<Decoder> pick_decoder_for_encoding(
-    std::string const& encoding, DecoderFactory* factory) {
+    std::string_view encoding, DecoderFactory* factory) {
   auto clean_enc = cleanup_encoding(encoding);
-  if (clean_enc == "utf-8" || clean_enc == "utf8") {
+  if (clean_enc == "utf-8" || clean_enc == "utf8")
     return create_utf8_decoder();
-  }
-  if (clean_enc == "utf-16" || clean_enc == "utf16") {
+
+  if (clean_enc == "utf-16" || clean_enc == "utf16")
     return create_utf16_decoder();
-  }
-  if (clean_enc == "utf-16be" || clean_enc == "utf16be") {
+
+  if (clean_enc == "utf-16be" || clean_enc == "utf16be")
     return create_utf16be_decoder();
-  }
-  if (clean_enc == "utf-16le" || clean_enc == "utf16le") {
+
+  if (clean_enc == "utf-16le" || clean_enc == "utf16le")
     return create_utf16le_decoder();
-  }
-  if (clean_enc == "utf-32" || clean_enc == "utf32") {
+
+  if (clean_enc == "utf-32" || clean_enc == "utf32")
     return create_utf32_decoder();
-  }
-  if (clean_enc == "utf-32be" || clean_enc == "utf32be") {
+
+  if (clean_enc == "utf-32be" || clean_enc == "utf32be")
     return create_utf32be_decoder();
-  }
-  if (clean_enc == "utf-32le" || clean_enc == "utf32le") {
+
+  if (clean_enc == "utf-32le" || clean_enc == "utf32le")
     return create_utf32le_decoder();
-  }
+
   if (clean_enc == "ascii" || clean_enc == "us-ascii" ||
       clean_enc == "usascii" || clean_enc == "iso-ir-6" ||
       clean_enc == "ansi-x3-4-1968" || clean_enc == "ansi-x3-4-1986" ||
@@ -59,9 +59,10 @@ std::unique_ptr<Decoder> pick_decoder_for_encoding(
       clean_enc == "us" || clean_enc == "ibm367" || clean_enc == "cp367") {
     return create_ascii_decoder();
   }
-  if (factory) {
+
+  if (factory)
     return factory->create(encoding);
-  }
+
   return nullptr;
 }
 
diff --git a/sax/src/utils.hh b/sax/src/utils.hh
index 206d003..074f0c0 100644
--- a/sax/src/utils.hh
+++ b/sax/src/utils.hh
@@ -4,7 +4,7 @@
 #include "macros.hh"
 
 #include <memory>
-#include <string>
+#include <string_view>
 
 namespace modxml {
 namespace sax {
@@ -13,7 +13,7 @@ class Decoder;
 class DecoderFactory;
 
 std::unique_ptr<Decoder> HIDDEN pick_decoder_for_encoding(
-    std::string const& encoding,
+    std::string_view encoding,
     DecoderFactory* factory);
 
 }  // namespace sax
diff --git a/sax/tst/test_buffer.cc b/sax/tst/test_buffer.cc
new file mode 100644
index 0000000..13bc6d4
--- /dev/null
+++ b/sax/tst/test_buffer.cc
@@ -0,0 +1,272 @@
+#include "buffer.hh"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+enum class BufferType {
+  FIXED,
+  DYNAMIC,
+};
+
+class BufferTest : public testing::TestWithParam<BufferType> {
+ protected:
+  std::unique_ptr<modxml::sax::Buffer> make_buffer(std::size_t size) {
+    switch (GetParam()) {
+      case BufferType::FIXED:
+        return modxml::sax::make_buffer(size, size);
+      case BufferType::DYNAMIC:
+        return modxml::sax::make_buffer(size / 2, size);
+    }
+    return nullptr;
+  }
+};
+
+std::array<uint8_t, 10> AAAAAAAAAA{
+  'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
+std::array<uint8_t, 5> BBBBB{
+  'B', 'B', 'B', 'B', 'B'};
+
+}  // namespace
+
+TEST_P(BufferTest, sanity) {
+  auto buf = make_buffer(10);
+  EXPECT_TRUE(buf->empty());
+  EXPECT_FALSE(buf->full());
+
+  EXPECT_TRUE(buf->write_all(AAAAAAAAAA));
+  EXPECT_TRUE(buf->full());
+  EXPECT_FALSE(buf->empty());
+
+  EXPECT_FALSE(buf->write_all(AAAAAAAAAA));
+
+  std::array<uint8_t, 10> tmp10;
+  EXPECT_TRUE(buf->read_all(tmp10));
+  EXPECT_THAT(tmp10, testing::ContainerEq(AAAAAAAAAA));
+  EXPECT_TRUE(buf->empty());
+  EXPECT_FALSE(buf->full());
+
+  EXPECT_TRUE(buf->write_all(BBBBB));
+  EXPECT_FALSE(buf->full());
+  EXPECT_FALSE(buf->empty());
+
+  EXPECT_EQ(5u, buf->write(AAAAAAAAAA));
+  EXPECT_TRUE(buf->full());
+  EXPECT_FALSE(buf->empty());
+
+  std::array<uint8_t, 3> tmp3;
+  EXPECT_TRUE(buf->read_all(tmp3));
+  EXPECT_THAT(tmp3, testing::ElementsAre('B', 'B', 'B'));
+
+  EXPECT_EQ(3u, buf->write(BBBBB));
+
+  EXPECT_TRUE(buf->read_all(tmp3));
+  EXPECT_THAT(tmp3, testing::ElementsAre('B', 'B', 'A'));
+
+  std::array<uint8_t, 5> tmp5;
+  EXPECT_TRUE(buf->read_all(tmp5));
+  EXPECT_THAT(tmp5, testing::ElementsAre('A', 'A', 'A', 'A', 'B'));
+
+  EXPECT_FALSE(buf->read_all(tmp3));
+  tmp3[2] = 'X';
+  EXPECT_EQ(2u, buf->read(tmp3));
+  EXPECT_THAT(tmp3, testing::ElementsAre('B', 'B', 'X'));
+}
+
+TEST_P(BufferTest, noop) {
+  auto buf = make_buffer(10);
+  EXPECT_TRUE(buf->empty());
+
+  std::array<uint8_t, 0> empty;
+  EXPECT_EQ(0u, buf->write(empty));
+  EXPECT_EQ(0u, buf->read(empty));
+
+  EXPECT_TRUE(buf->write_all(empty));
+  EXPECT_TRUE(buf->read_all(empty));
+
+  buf->commit(0);
+  buf->consume(0);
+
+  EXPECT_TRUE(buf->empty());
+}
+
+TEST_P(BufferTest, one_byte_filler) {
+  auto buf = make_buffer(10);
+
+  std::array<uint8_t, 1> tmp1;
+  uint8_t out = 0;
+  for (uint8_t in = 0; in <= 20; ++in) {
+    tmp1[0] = in;
+    EXPECT_TRUE(buf->write_all(tmp1));
+    if (in >= 9) {
+      EXPECT_TRUE(buf->read_all(tmp1));
+      EXPECT_EQ(tmp1[0], out);
+      ++out;
+    }
+  }
+  for (; out <= 20; ++out) {
+    EXPECT_TRUE(buf->read_all(tmp1));
+    EXPECT_EQ(tmp1[0], out);
+  }
+  EXPECT_TRUE(buf->empty());
+}
+
+TEST_P(BufferTest, read_wrap) {
+  auto buf = make_buffer(10);
+
+  EXPECT_TRUE(buf->write_all(BBBBB));
+  EXPECT_EQ(5u, buf->write(AAAAAAAAAA));
+
+  std::array<uint8_t, 5> tmp5;
+  EXPECT_TRUE(buf->read_all(tmp5));
+  EXPECT_THAT(tmp5, testing::ContainerEq(BBBBB));
+
+  EXPECT_EQ(5u, buf->write(AAAAAAAAAA));
+
+  std::array<uint8_t, 10> tmp10;
+  EXPECT_TRUE(buf->read_all(tmp10));
+  EXPECT_THAT(tmp10, testing::ContainerEq(AAAAAAAAAA));
+}
+
+TEST_P(BufferTest, skip_wrap) {
+  auto buf = make_buffer(10);
+
+  EXPECT_TRUE(buf->write_all(BBBBB));
+  EXPECT_EQ(5u, buf->write(AAAAAAAAAA));
+
+  buf->consume(5);
+  EXPECT_FALSE(buf->empty());
+
+  EXPECT_EQ(5u, buf->write(AAAAAAAAAA));
+
+  buf->consume(10);
+  EXPECT_TRUE(buf->empty());
+}
+
+TEST_P(BufferTest, write_wrap) {
+  auto buf = make_buffer(12);
+
+  EXPECT_TRUE(buf->write_all(BBBBB));
+
+  std::array<uint8_t, 3> tmp3;
+  EXPECT_TRUE(buf->read_all(tmp3));
+  EXPECT_THAT(tmp3, testing::ElementsAre('B', 'B', 'B'));
+
+  EXPECT_TRUE(buf->write_all(AAAAAAAAAA));
+
+  std::array<uint8_t, 12> tmp12;
+  EXPECT_EQ(12u, buf->read(tmp12));
+  EXPECT_THAT(tmp12, testing::ElementsAre(
+      'B', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'));
+}
+
+TEST_P(BufferTest, read_wrap2) {
+  auto buf = make_buffer(12);
+
+  EXPECT_TRUE(buf->write_all(AAAAAAAAAA));
+
+  std::array<uint8_t, 7> tmp7;
+  EXPECT_TRUE(buf->read_all(tmp7));
+  EXPECT_THAT(tmp7, testing::ElementsAre('A', 'A', 'A', 'A', 'A', 'A', 'A'));
+
+  EXPECT_EQ(5u, buf->write(BBBBB));
+  EXPECT_EQ(4u, buf->write(BBBBB));
+
+  std::array<uint8_t, 12> tmp12;
+  EXPECT_TRUE(buf->read_all(tmp12));
+  EXPECT_THAT(tmp12, testing::ElementsAre(
+      'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'));
+}
+
+TEST(Buffer, dynamic_resize) {
+  auto buf = modxml::sax::make_buffer(10, 1000);
+
+  std::array<uint8_t, 30> tmp30;
+  for (uint8_t i = 0; i < 30; ++i)
+    tmp30[i] = i;
+
+  EXPECT_TRUE(buf->write_all(tmp30));
+  EXPECT_TRUE(buf->write_all(tmp30));
+
+  std::array<uint8_t, 60> tmp60;
+  EXPECT_TRUE(buf->read_all(tmp60));
+  for (uint8_t i = 0; i < 60; ++i)
+    EXPECT_EQ(i % 30, tmp60[i]) << i;
+}
+
+TEST(Buffer, dynamic_overalloc) {
+  // This test can fail, but in most configurations trying to allocate
+  // std::numeric_limits<std::size_t>::max() will fail.
+  auto buf = modxml::sax::make_buffer(10, std::numeric_limits<std::size_t>::max());
+  EXPECT_FALSE(buf->wspan(10000).empty());
+  EXPECT_TRUE(buf->wspan(std::numeric_limits<std::size_t>::max()).empty());
+}
+
+TEST_P(BufferTest, modify) {
+  auto buf = make_buffer(10);
+
+  EXPECT_TRUE(buf->write_all(AAAAAAAAAA));
+
+  auto span = buf->mspan(5);
+  EXPECT_EQ(10u, span.size());
+  auto len = std::min(static_cast<std::size_t>(5), span.size());
+  for (uint8_t i = 0; i < len; ++i)
+    span[i] = 'C';
+
+  std::array<uint8_t, 10> tmp10;
+  EXPECT_TRUE(buf->read_all(tmp10));
+  EXPECT_THAT(tmp10, testing::ElementsAre(
+      'C', 'C', 'C', 'C', 'C', 'A', 'A', 'A', 'A', 'A'));
+}
+
+TEST_P(BufferTest, uncommit) {
+  auto buf = make_buffer(10);
+
+  EXPECT_TRUE(buf->write_all(BBBBB));
+
+  EXPECT_EQ(0u, buf->uncommit(0));
+
+  EXPECT_EQ(5u, buf->write(AAAAAAAAAA));
+
+  std::array<uint8_t, 2> tmp2;
+  EXPECT_TRUE(buf->read_all(tmp2));
+  EXPECT_THAT(tmp2, testing::ElementsAre('B', 'B'));
+
+  EXPECT_EQ(3u, buf->uncommit(3));
+  std::array<uint8_t, 5> tmp5;
+  EXPECT_TRUE(buf->read_all(tmp5));
+  EXPECT_THAT(tmp5, testing::ElementsAre('B', 'B', 'B', 'A', 'A'));
+
+  EXPECT_EQ(0u, buf->uncommit(2));
+}
+
+TEST_P(BufferTest, uncommit_wrap) {
+  auto buf = make_buffer(10);
+
+  EXPECT_TRUE(buf->write_all(AAAAAAAAAA));
+  std::array<uint8_t, 5> tmp5;
+  EXPECT_TRUE(buf->read_all(tmp5));
+
+  EXPECT_TRUE(buf->write_all(BBBBB));
+
+  EXPECT_EQ(8u, buf->uncommit(8));
+  std::array<uint8_t, 2> tmp2;
+  EXPECT_TRUE(buf->read_all(tmp2));
+  EXPECT_THAT(tmp2, testing::ElementsAre('A', 'A'));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BufferTests,
+    BufferTest,
+    testing::Values(BufferType::FIXED, BufferType::DYNAMIC),
+    [](auto& info) {
+      switch (info.param) {
+        case BufferType::FIXED:
+          return "fixed";
+        case BufferType::DYNAMIC:
+          return "dynamic";
+      }
+      return "";
+    }
+);
diff --git a/sax/tst/test_decoder.cc b/sax/tst/test_decoder.cc
new file mode 100644
index 0000000..86f230b
--- /dev/null
+++ b/sax/tst/test_decoder.cc
@@ -0,0 +1,242 @@
+#include "sax_decoder.hh"
+#include "sax_decoder_factory.hh"
+#include "sax_processor.hh"
+#include "sax_delegate.hh"
+
+#include <memory>
+#include <gtest/gtest.h>
+
+namespace {
+
+class TestDelegate : public modxml::sax::Delegate {
+ public:
+  ~TestDelegate() override = default;
+
+  void empty_element(std::string_view name,
+                     modxml::sax::Attributes const&) override {
+    EXPECT_EQ(name, "root");
+    if (name == "root") {
+      EXPECT_FALSE(have_root_);
+      have_root_ = true;
+    }
+  }
+
+  void error(std::string_view message) override {
+    have_error_ = true;
+    FAIL() << message;
+  }
+
+  bool have_root() const { return have_root_; }
+
+  bool have_error() const { return have_error_; }
+
+ private:
+  bool have_root_{false};
+  bool have_error_{false};
+};
+
+bool process_all(modxml::sax::Processor& processor,
+                 TestDelegate& delegate,
+                 std::span<uint8_t const> data) {
+  std::size_t offset = 0;
+  while (offset < data.size()) {
+    auto consumed = processor.process(data, offset);
+    if (consumed == 0 || delegate.have_error())
+      return false;
+    offset += consumed;
+  }
+  return true;
+}
+
+}  // namespace
+
+TEST(sax, decoder_utf8) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::string input = R"(<?xml version="1.0" encoding="utf-8"?><root />)";
+  std::cerr << input << std::endl;
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(reinterpret_cast<uint8_t const*>(input.data()),
+                               input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf8_bom) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::string input =
+      "\xef\xbb\xbf" R"(<?xml version="1.0" encoding="utf-8"?><root />)";
+  std::cerr << input << std::endl;
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(reinterpret_cast<uint8_t const*>(input.data()),
+                               input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf16) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u16string input = uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(reinterpret_cast<uint8_t const*>(input.data()),
+                               input.size() * sizeof(char16_t))));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf16be) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u16string str = uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
+  std::vector<uint8_t> input;
+  for (char16_t c : str) {
+    input.push_back(c >> 8);
+    input.push_back(c & 0xff);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf16le) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u16string str = uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
+  std::vector<uint8_t> input;
+  for (char16_t c : str) {
+    input.push_back(c & 0xff);
+    input.push_back(c >> 8);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf16be_bom) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u16string str =
+      u"\ufffe" uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
+  std::vector<uint8_t> input;
+  for (char16_t c : str) {
+    input.push_back(c >> 8);
+    input.push_back(c & 0xff);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf16le_bom) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u16string str =
+      u"\ufffe" uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
+  std::vector<uint8_t> input;
+  for (char16_t c : str) {
+    input.push_back(c & 0xff);
+    input.push_back(c >> 8);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf32) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u32string input = UR"(<?xml version="1.0" encoding="utf-32"?><root />)";
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(reinterpret_cast<uint8_t const*>(input.data()),
+                               input.size() * sizeof(char32_t))));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf32be) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u32string str = UR"(<?xml version="1.0" encoding="utf-32"?><root />)";
+  std::vector<uint8_t> input;
+  for (char32_t c : str) {
+    input.push_back(c >> 24);
+    input.push_back((c >> 16) & 0xff);
+    input.push_back((c >> 8) & 0xff);
+    input.push_back(c & 0xff);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf32le) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u32string str = UR"(<?xml version="1.0" encoding="utf-32"?><root />)";
+  std::vector<uint8_t> input;
+  for (char32_t c : str) {
+    input.push_back(c & 0xff);
+    input.push_back((c >> 8) & 0xff);
+    input.push_back((c >> 16) & 0xff);
+    input.push_back(c >> 24);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf32be_bom) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u32string str =
+      U"\ufffe" UR"(<?xml version="1.0" encoding="utf-32"?><root />)";
+  std::vector<uint8_t> input;
+  for (char32_t c : str) {
+    input.push_back(c >> 24);
+    input.push_back((c >> 16) & 0xff);
+    input.push_back((c >> 8) & 0xff);
+    input.push_back(c & 0xff);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
+
+TEST(sax, decoder_utf32le_bom) {
+  auto delegate = std::make_shared<TestDelegate>();
+  auto processor = modxml::sax::Processor::create(delegate);
+  std::u32string str =
+      U"\ufffe" R"(<?xml version="1.0" encoding="utf-32"?><root />)";
+  std::vector<uint8_t> input;
+  for (char32_t c : str) {
+    input.push_back(c & 0xff);
+    input.push_back((c >> 8) & 0xff);
+    input.push_back((c >> 16) & 0xff);
+    input.push_back(c >> 24);
+  }
+  EXPECT_TRUE(process_all(
+      *processor.get(),
+      *delegate.get(),
+      std::span<uint8_t const>(input.data(), input.size())));
+  EXPECT_TRUE(delegate->have_root());
+}
diff --git a/utf/inc/utf16.hh b/utf/inc/utf16.hh
index 344b1a2..b9229bc 100644
--- a/utf/inc/utf16.hh
+++ b/utf/inc/utf16.hh
@@ -4,27 +4,29 @@
 #include "macros.hh"
 
 #include <cstdint>
-#include <string_view>
+#include <span>
 
 namespace utf {
 
-/* Read one unicode codepoint from UTF-16 BigEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-16 BigEndian encoded data if possible.
  * If successfull offset is incremented to point to next codepoint.
  * Will fail:
  * - not enough data is left in data given offset, returns NEED_MORE.
  * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs,
  *   returns INVALID.
  */
-uint32_t HIDDEN read16be(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read16be(std::span<uint8_t const> data, std::size_t& offset);
 
-/* Read one unicode codepoint from UTF-16 LittleEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-16 LittleEndian encoded data if possible.
  * If successfull offset is incremented to point to next codepoint.
  * Will fail:
  * - not enough data is left in data given offset, returns NEED_MORE.
  * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs,
  *   returns INVALID.
  */
-uint32_t HIDDEN read16le(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read16le(std::span<uint8_t const> data, std::size_t& offset);
 
 }  // namespace utf
 
diff --git a/utf/inc/utf32.hh b/utf/inc/utf32.hh
index 2d3088e..4ee5eac 100644
--- a/utf/inc/utf32.hh
+++ b/utf/inc/utf32.hh
@@ -4,25 +4,27 @@
 #include "macros.hh"
 
 #include <cstdint>
-#include <string_view>
+#include <span>
 
 namespace utf {
 
-/* Read one unicode codepoint from UTF-32 BigEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-32 BigEndian encoded data if possible.
  * If successfull offset is incremented to point to next codepoint.
  * Will fail:
  * - not enough data is left in data given offset, returns NEED_MORE.
  * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID.
  */
-uint32_t HIDDEN read32be(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read32be(std::span<uint8_t const> data, std::size_t& offset);
 
-/* Read one unicode codepoint from UTF-32 LittleEndian encoded data if possible.
+/**
+ * Read one unicode codepoint from UTF-32 LittleEndian encoded data if possible.
  * If successfull offset is incremented to point to next codepoint.
  * Will fail:
  * - not enough data is left in data given offset, returns NEED_MORE.
  * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID.
  */
-uint32_t HIDDEN read32le(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read32le(std::span<uint8_t const> data, std::size_t& offset);
 
 }  // namespace utf
 
diff --git a/utf/inc/utf8.hh b/utf/inc/utf8.hh
index a3ea84a..7735ecd 100644
--- a/utf/inc/utf8.hh
+++ b/utf/inc/utf8.hh
@@ -4,18 +4,29 @@
 #include "macros.hh"
 
 #include <cstdint>
-#include <string_view>
+#include <span>
 
 namespace utf {
 
-/* Read one unicode codepoint from UTF-8 encoded data if possible.
- * If successfull offset is incremented to point to next codepoint.
+/**
+ * Read one unicode codepoint from UTF-8 encoded data if possible.
+ * If successful, offset is incremented to point to next codepoint.
  * Will fail:
  * - not enough data is left in data given offset, returns NEED_MORE.
  * - data is not valid UTF-8, this includes overlong encodings and
  *   invalid unicode code points, returns INVALID.
  */
-uint32_t HIDDEN read8(std::string_view data, std::size_t& offset);
+uint32_t HIDDEN read8(std::span<uint8_t const> data, std::size_t& offset);
+
+/**
+ * Write one unicode codepoint to UTF-8 encoded data if possible.
+ * If successful, offset is incremented to the end of the written data
+ * and true is returned.
+ * If not successful, offset is not incremented and false is returned.
+ * data is not modified.
+ */
+bool HIDDEN write8(uint32_t codepoint, std::span<uint8_t> data,
+                   std::size_t& offset);
 
 }  // namespace utf
 
diff --git a/utf/meson.build b/utf/meson.build
index 64db6ff..051ddd1 100644
--- a/utf/meson.build
+++ b/utf/meson.build
@@ -23,16 +23,16 @@ test('utf8',
      executable(
        'test_utf8',
        sources: ['tst/test_utf8.cc'],
-       dependencies: [utf_dep, gtest_dep]))
+       dependencies: [utf_dep, gmock_dep, gtest_dep]))
 
 test('utf16',
      executable(
        'test_utf16',
        sources: ['tst/test_utf16.cc'],
-       dependencies: [utf_dep, gtest_dep]))
+       dependencies: [utf_dep, gmock_dep, gtest_dep]))
 
 test('utf32',
      executable(
        'test_utf32',
        sources: ['tst/test_utf32.cc'],
-       dependencies: [utf_dep, gtest_dep]))
+       dependencies: [utf_dep, gmock_dep, gtest_dep]))
diff --git a/utf/src/utf16.cc b/utf/src/utf16.cc
index 43595bf..623c1be 100644
--- a/utf/src/utf16.cc
+++ b/utf/src/utf16.cc
@@ -16,7 +16,7 @@ inline bool is_low_surrogate(uint16_t c) {
 
 }  // namespace
 
-uint32_t read16be(std::string_view data, std::size_t& offset) {
+uint32_t read16be(std::span<uint8_t const> data, std::size_t& offset) {
   if (offset > data.size() || data.size() - offset < 2)
     return NEED_MORE;
   uint16_t c = static_cast<uint16_t>(data[offset]) << 8
@@ -40,7 +40,7 @@ uint32_t read16be(std::string_view data, std::size_t& offset) {
   return c;
 }
 
-uint32_t read16le(std::string_view data, std::size_t& offset) {
+uint32_t read16le(std::span<uint8_t const> data, std::size_t& offset) {
   if (offset > data.size() || data.size() - offset < 2)
     return NEED_MORE;
   uint16_t c = static_cast<uint16_t>(data[offset + 1]) << 8
diff --git a/utf/src/utf32.cc b/utf/src/utf32.cc
index cfa29b6..e33b0b4 100644
--- a/utf/src/utf32.cc
+++ b/utf/src/utf32.cc
@@ -12,7 +12,7 @@ inline bool valid_codepoint(uint32_t c) {
 
 }  // namespace
 
-uint32_t read32be(std::string_view data, std::size_t& offset) {
+uint32_t read32be(std::span<uint8_t const> data, std::size_t& offset) {
   if (offset > data.size() || data.size() - offset < 4)
     return NEED_MORE;
   uint32_t c = static_cast<uint32_t>(data[offset]) << 24
@@ -26,7 +26,7 @@ uint32_t read32be(std::string_view data, std::size_t& offset) {
   return INVALID;
 }
 
-uint32_t read32le(std::string_view data, std::size_t& offset) {
+uint32_t read32le(std::span<uint8_t const> data, std::size_t& offset) {
   if (offset > data.size() || data.size() - offset < 4)
     return NEED_MORE;
   uint32_t c = static_cast<uint32_t>(data[offset + 3]) << 24
diff --git a/utf/src/utf8.cc b/utf/src/utf8.cc
index 54b0296..0e444ae 100644
--- a/utf/src/utf8.cc
+++ b/utf/src/utf8.cc
@@ -12,12 +12,12 @@ inline bool valid_codepoint(uint32_t c) {
 
 }  // namespace
 
-uint32_t read8(std::string_view data, std::size_t& offset) {
+uint32_t read8(std::span<uint8_t const> data, std::size_t& offset) {
   if (offset >= data.size())
     return NEED_MORE;
   uint32_t ret;
   uint8_t size;
-  switch (static_cast<uint8_t>(data[offset]) >> 4) {
+  switch (data[offset] >> 4) {
     case 15:
       if (data[offset] & 0x08)
         return INVALID;
@@ -65,4 +65,35 @@ uint32_t read8(std::string_view data, std::size_t& offset) {
   return ret;
 }
 
+bool write8(uint32_t codepoint, std::span<uint8_t> data, std::size_t& offset) {
+  if (offset >= data.size()) UNLIKELY {
+    return false;
+  }
+  if (codepoint < 0x80) {
+    data[offset++] = codepoint;
+  } else if (codepoint < 0x800) {
+    if (data.size() - offset < 2) UNLIKELY {
+      return false;
+    }
+    data[offset++] = 0xc0 | (codepoint >> 6);
+    data[offset++] = 0x80 | (codepoint & 0x3f);
+  } else if (codepoint < 0x10000) {
+    if (data.size() - offset < 3) UNLIKELY {
+      return false;
+    }
+    data[offset++] = 0xe0 | (codepoint >> 12);
+    data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+    data[offset++] = 0x80 | (codepoint & 0x3f);
+  } else {
+    if (data.size() - offset < 4) UNLIKELY {
+      return false;
+    }
+    data[offset++] = 0xf0 | (codepoint >> 18);
+    data[offset++] = 0x80 | ((codepoint >> 12) & 0x3f);
+    data[offset++] = 0x80 | ((codepoint >> 6) & 0x3f);
+    data[offset++] = 0x80 | (codepoint & 0x3f);
+  }
+  return true;
+}
+
 }  // namespace utf
diff --git a/utf/tst/test_utf16.cc b/utf/tst/test_utf16.cc
index c17982e..3b3c03c 100644
--- a/utf/tst/test_utf16.cc
+++ b/utf/tst/test_utf16.cc
@@ -2,156 +2,137 @@
 
 #include "utf_error.hh"
 
+#include <array>
 #include <gtest/gtest.h>
 
 TEST(utf16be, sanity) {
-  std::string_view str("\x00\x24", 2);
   size_t offset = 0;
-  auto ret = utf::read16be(str, offset);
+  auto ret = utf::read16be(std::array<uint8_t, 2>({0x00, 0x24}), offset);
   EXPECT_EQ('$', ret);
   EXPECT_EQ(2, offset);
 
-  str = "\x20\xAC";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 2>({0x20, 0xAC}), offset);
   EXPECT_EQ(0x20AC, ret);
   EXPECT_EQ(2, offset);
 
-  str = "\xD8\x01\xDC\x37";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 4>({0xD8, 0x01, 0xDC, 0x37}), offset);
   EXPECT_EQ(0x10437, ret);
   EXPECT_EQ(4, offset);
 
-  str = "\xD8\x52\xDF\x62";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 4>({0xD8, 0x52, 0xDF, 0x62}), offset);
   EXPECT_EQ(0x24B62, ret);
   EXPECT_EQ(4, offset);
 }
 
 TEST(utf16le, sanity) {
-  std::string_view str("\x24\x00", 2);
   size_t offset = 0;
-  auto ret = utf::read16le(str, offset);
+  auto ret = utf::read16le(std::array<uint8_t, 2>({0x24, 0x00}), offset);
   EXPECT_EQ('$', ret);
   EXPECT_EQ(2, offset);
 
-  str = "\xAC\x20";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 2>({0xAC, 0x20}), offset);
   EXPECT_EQ(0x20AC, ret);
   EXPECT_EQ(2, offset);
 
-  str = "\x01\xD8\x37\xDC";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 4>({0x01, 0xD8, 0x37, 0xDC}), offset);
   EXPECT_EQ(0x10437, ret);
   EXPECT_EQ(4, offset);
 
-  str = "\x52\xD8\x62\xDF";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 4>({0x52, 0xD8, 0x62, 0xDF}), offset);
   EXPECT_EQ(0x24B62, ret);
   EXPECT_EQ(4, offset);
 }
 
 TEST(utf16be, bom) {
-  std::string_view str("\xFE\xFF\x20\xAC");
+  std::array<uint8_t, 4> data({0xFE, 0xFF, 0x20, 0xAC});
   size_t offset = 0;
-  auto ret = utf::read16be(str, offset);
+  auto ret = utf::read16be(data, offset);
   EXPECT_EQ(0xFEFF, ret);
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(data, offset);
   EXPECT_EQ(0x20AC, ret);
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(data, offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
-  EXPECT_EQ(str.size(), offset);
+  EXPECT_EQ(data.size(), offset);
 }
 
 TEST(utf16le, bom) {
-  std::string_view str("\xFF\xFE\xAC\x20");
+  std::array<uint8_t, 4> data({0xFF, 0xFE, 0xAC, 0x20});
   size_t offset = 0;
-  auto ret = utf::read16le(str, offset);
+  auto ret = utf::read16le(data, offset);
   EXPECT_EQ(0xFEFF, ret);
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(data, offset);
   EXPECT_EQ(0x20AC, ret);
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(data, offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
-  EXPECT_EQ(str.size(), offset);
+  EXPECT_EQ(data.size(), offset);
 }
 
 TEST(utf16be, invalid) {
-  std::string_view str("\xD8");
   size_t offset = 0;
-  auto ret = utf::read16be(str, offset);
+  auto ret = utf::read16be(std::array<uint8_t, 1>({0xD8}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 0>(), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xD8\x01";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 2>({0xD8, 0x01}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xD8\x01\xDC";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 3>({0xD8, 0x01, 0xDC}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xDC\x37\xD8\x01";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 4>({0xDC, 0x37, 0xD8, 0x01}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xD8\x01\xD8\x01";
   offset = 0;
-  ret = utf::read16be(str, offset);
+  ret = utf::read16be(std::array<uint8_t, 4>({0xD8, 0x01, 0xD8, 0x01}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 }
 
 TEST(utf16le, invalid) {
-  std::string_view str("\x01");
   size_t offset = 0;
-  auto ret = utf::read16le(str, offset);
+  auto ret = utf::read16le(std::array<uint8_t, 1>({0x01}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 0>(), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\x01\xD8";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 2>({0x01, 0xD8}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\x01\xD8\x37";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 3>({0x01, 0xD8, 0x37}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\x37\xDC\x01\xD8";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 4>({0x37, 0xDC, 0x01, 0xD8}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\x01\xD8\x01\xD8";
   offset = 0;
-  ret = utf::read16le(str, offset);
+  ret = utf::read16le(std::array<uint8_t, 4>({0x01, 0xD8, 0x01, 0xD8}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 }
diff --git a/utf/tst/test_utf32.cc b/utf/tst/test_utf32.cc
index 796b4cd..447b541 100644
--- a/utf/tst/test_utf32.cc
+++ b/utf/tst/test_utf32.cc
@@ -2,144 +2,137 @@
 
 #include "utf_error.hh"
 
+#include <array>
 #include <gtest/gtest.h>
 
 TEST(utf32be, sanity) {
-  std::string_view str("\x00\x00\x00\x24", 4);
   size_t offset = 0;
-  auto ret = utf::read32be(str, offset);
+  auto ret = utf::read32be(
+      std::array<uint8_t, 4>({0x00, 0x00, 0x00, 0x24}), offset);
   EXPECT_EQ('$', ret);
   EXPECT_EQ(4, offset);
 
-  str = std::string_view("\x00\x00\x20\xAC", 4);
   offset = 0;
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(
+      std::array<uint8_t, 4>({0x00, 0x00, 0x20, 0xAC}), offset);
   EXPECT_EQ(0x20AC, ret);
   EXPECT_EQ(4, offset);
 
-  str = std::string_view("\x00\x01\x04\x37", 4);
   offset = 0;
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(
+      std::array<uint8_t, 4>({0x00, 0x01, 0x04, 0x37}), offset);
   EXPECT_EQ(0x10437, ret);
   EXPECT_EQ(4, offset);
 }
 
 TEST(utf32le, sanity) {
-  std::string_view str("\x24\x00\x00\x00", 4);
   size_t offset = 0;
-  auto ret = utf::read32le(str, offset);
+  auto ret = utf::read32le(
+      std::array<uint8_t, 4>({0x24, 0x00, 0x00, 0x00}), offset);
   EXPECT_EQ('$', ret);
   EXPECT_EQ(4, offset);
 
-  str = std::string_view("\xAC\x20\x00\x00", 4);
   offset = 0;
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(
+      std::array<uint8_t, 4>({0xAC, 0x20, 0x00, 0x00}), offset);
   EXPECT_EQ(0x20AC, ret);
   EXPECT_EQ(4, offset);
 
-  str = std::string_view("\x37\x04\x01\x00", 4);
   offset = 0;
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(
+      std::array<uint8_t, 4>({0x37, 0x04, 0x01, 0x00}), offset);
   EXPECT_EQ(0x10437, ret);
   EXPECT_EQ(4, offset);
 }
 
 TEST(utf32be, invalid) {
-  std::string_view str("\xFF\xFF\xFF\xFF");
   size_t offset = 0;
-  auto ret = utf::read32be(str, offset);
+  auto ret = utf::read32be(
+      std::array<uint8_t, 4>({0xFF, 0xFF, 0xFF, 0xFF}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00\x00\xD8\x00", 4);
   offset = 0;
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(
+      std::array<uint8_t, 4>({0x00, 0x00, 0xD8, 0x00}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "";
   offset = 0;
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(std::array<uint8_t, 1>({}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00", 1);
   offset = 0;
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(std::array<uint8_t, 1>({0x00}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00\x00", 2);
   offset = 0;
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(std::array<uint8_t, 2>({0x00, 0x00}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00\x00\x00", 3);
   offset = 0;
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(std::array<uint8_t, 3>({0x00, 0x00, 0x00}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 }
 
 TEST(utf32le, invalid) {
-  std::string_view str("\xFF\xFF\xFF\xFF");
   size_t offset = 0;
-  auto ret = utf::read32le(str, offset);
+  auto ret = utf::read32le(
+      std::array<uint8_t, 4>({0xFF, 0xFF, 0xFF, 0xFF}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00\xD8\x00\x00", 4);
   offset = 0;
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(
+      std::array<uint8_t, 4>({0x00, 0xD8, 0x00, 0x00}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "";
   offset = 0;
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(std::array<uint8_t, 0>(), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00", 1);
   offset = 0;
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(std::array<uint8_t, 1>({0x00}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00\x00", 2);
   offset = 0;
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(std::array<uint8_t, 2>({0x00, 0x00}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = std::string_view("\x00\x00\x00", 3);
   offset = 0;
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(std::array<uint8_t, 3>({0x00, 0x00, 0x00}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 }
 
 TEST(utf32be, bom) {
-  std::string_view str("\x00\x00\xFF\xFE\x00\x00\x20\xAC", 8);
+  std::array<uint8_t, 8> data({0x00, 0x00, 0xFF, 0xFE, 0x00, 0x00, 0x20, 0xAC});
   size_t offset = 0;
-  auto ret = utf::read32be(str, offset);
+  auto ret = utf::read32be(data, offset);
   EXPECT_EQ(0xFFFE, ret);
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(data, offset);
   EXPECT_EQ(0x20AC, ret);
-  ret = utf::read32be(str, offset);
+  ret = utf::read32be(data, offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
-  EXPECT_EQ(str.size(), offset);
+  EXPECT_EQ(data.size(), offset);
 }
 
 TEST(utf32le, bom) {
-  std::string_view str("\xFE\xFF\x00\x00\xAC\x20\x00\x00", 8);
+  std::array<uint8_t, 8> data({0xFE, 0xFF, 0x00, 0x00, 0xAC, 0x20, 0x00, 0x00});
   size_t offset = 0;
-  auto ret = utf::read32le(str, offset);
+  auto ret = utf::read32le(data, offset);
   EXPECT_EQ(0xFFFE, ret);
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(data, offset);
   EXPECT_EQ(0x20AC, ret);
-  ret = utf::read32le(str, offset);
+  ret = utf::read32le(data, offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
-  EXPECT_EQ(str.size(), offset);
+  EXPECT_EQ(data.size(), offset);
 }
diff --git a/utf/tst/test_utf8.cc b/utf/tst/test_utf8.cc
index 10df969..8bdeba4 100644
--- a/utf/tst/test_utf8.cc
+++ b/utf/tst/test_utf8.cc
@@ -2,187 +2,245 @@
 
 #include "utf_error.hh"
 
+#include <array>
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include <span>
 
-TEST(utf8, sanity) {
-  std::string_view str("$");
+TEST(utf8, read_sanity) {
   size_t offset = 0;
-  auto ret = utf::read8(str, offset);
+  auto ret = utf::read8(std::array<uint8_t, 1>({'$'}), offset);
   EXPECT_EQ('$', ret);
   EXPECT_EQ(1, offset);
 
-  str = "\xC2\xA3";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 2>({0xC2, 0xA3}), offset);
   EXPECT_EQ(0xa3, ret);
   EXPECT_EQ(2, offset);
 
-  str = "\xD0\x98";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 2>({0xD0, 0x98}), offset);
   EXPECT_EQ(0x418, ret);
   EXPECT_EQ(2, offset);
 
-  str = "\xE0\xA4\xB9";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 3>({0xE0, 0xA4, 0xB9}), offset);
   EXPECT_EQ(0x939, ret);
   EXPECT_EQ(3, offset);
 
-  str = "\xE2\x82\xAC";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 3>({0xE2, 0x82, 0xAC}), offset);
   EXPECT_EQ(0x20AC, ret);
   EXPECT_EQ(3, offset);
 
-  str = "\xED\x95\x9C";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 3>({0xED, 0x95, 0x9C}), offset);
   EXPECT_EQ(0xD55C, ret);
   EXPECT_EQ(3, offset);
 
-  str = "\xF0\x90\x8D\x88";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 4>({0xF0, 0x90, 0x8D, 0x88}), offset);
   EXPECT_EQ(0x10348, ret);
   EXPECT_EQ(4, offset);
 }
 
-TEST(utf8, overlong) {
-  std::string_view str("\xF0\x82\x82\xAC");
+TEST(utf8, write_sanity) {
+  std::array<uint8_t, 10> out;
   size_t offset = 0;
-  auto ret = utf::read8(str, offset);
+  EXPECT_TRUE(utf::write8('$', out, offset));
+  EXPECT_THAT(std::span(out).subspan(0, offset),
+              testing::ElementsAre('$'));
+  EXPECT_EQ(1, offset);
+
+  offset = 0;
+  EXPECT_TRUE(utf::write8(0xa3, out, offset));
+  EXPECT_THAT(std::span(out).subspan(0, offset),
+              testing::ElementsAre(0xC2, 0xA3));
+  EXPECT_EQ(2, offset);
+
+  offset = 0;
+  EXPECT_TRUE(utf::write8(0x418, out, offset));
+  EXPECT_THAT(std::span(out).subspan(0, offset),
+              testing::ElementsAre(0xD0, 0x98));
+  EXPECT_EQ(2, offset);
+
+  offset = 0;
+  EXPECT_TRUE(utf::write8(0x939, out, offset));
+  EXPECT_THAT(std::span(out).subspan(0, offset),
+              testing::ElementsAre(0xE0, 0xA4, 0xB9));
+  EXPECT_EQ(3, offset);
+
+  offset = 0;
+  EXPECT_TRUE(utf::write8(0x20AC, out, offset));
+  EXPECT_THAT(std::span(out).subspan(0, offset),
+              testing::ElementsAre(0xE2, 0x82, 0xAC));
+  EXPECT_EQ(3, offset);
+
+  offset = 0;
+  EXPECT_TRUE(utf::write8(0xD55C, out, offset));
+  EXPECT_THAT(std::span(out).subspan(0, offset),
+              testing::ElementsAre(0xED, 0x95, 0x9C));
+  EXPECT_EQ(3, offset);
+
+  offset = 0;
+  EXPECT_TRUE(utf::write8(0x10348, out, offset));
+  EXPECT_THAT(std::span(out).subspan(0, offset),
+              testing::ElementsAre(0xF0, 0x90, 0x8D, 0x88));
+  EXPECT_EQ(4, offset);
+}
+
+TEST(utf8, read_overlong) {
+  size_t offset = 0;
+  auto ret = utf::read8(
+      std::array<uint8_t, 4>({0xF0, 0x82, 0x82, 0xAC}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xE0\x81\x81";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 3>({0xE0, 0x81, 0x81}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xC0\x80";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 2>({0xC0, 0x80}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 }
 
-TEST(utf8, invalid) {
-  std::string_view str("\xED\xB0\x80");
+TEST(utf8, read_invalid) {
   size_t offset = 0;
-  auto ret = utf::read8(str, offset);
+  auto ret = utf::read8(std::array<uint8_t, 3>({0xED, 0xB0, 0x80}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xFB\xFF\xFF";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 3>({0xFB, 0xFF, 0xFF}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xFF\xFF\xFF\xFF\xFF";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(
+      std::array<uint8_t, 5>({0xFF, 0xFF, 0xFF, 0xFF, 0xFF}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 0>(), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\x80";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 1>({0x80}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xC2";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 1>({0xC2}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xC2\x03";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 2>({0xC2, 0x03}), offset);
   EXPECT_EQ(utf::INVALID, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xE0\xA4";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 2>({0xE0, 0xA4}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 
-  str = "\xF0\x90\x8D";
   offset = 0;
-  ret = utf::read8(str, offset);
+  ret = utf::read8(std::array<uint8_t, 3>({0xF0, 0x90, 0x8D}), offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
   EXPECT_EQ(0, offset);
 }
 
-TEST(utf8, multiple1) {
-  std::string_view str("\x4D\xC3\xAC\x6E\x68\x20\x6E\xC3\xB3\x69\x20\x74\x69"
-                       "\xE1\xBA\xBF\x6E\x67\x20\x56\x69\xE1\xBB\x87\x74");
+TEST(utf8, read_multiple1) {
+  std::array<uint8_t, 25> data({
+      0x4D, 0xC3, 0xAC, 0x6E, 0x68, 0x20, 0x6E, 0xC3, 0xB3, 0x69,
+      0x20, 0x74, 0x69, 0xE1, 0xBA, 0xBF, 0x6E, 0x67, 0x20, 0x56,
+      0x69, 0xE1, 0xBB, 0x87, 0x74
+  });
   size_t offset = 0;
-  auto ret = utf::read8(str, offset);
+  auto ret = utf::read8(data, offset);
   EXPECT_EQ('M', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(0xEC, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('n', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('h', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(' ', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('n', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(0xF3, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('i', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(' ', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('t', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('i', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(0x1EBF, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('n', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('g', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(' ', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('V', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('i', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(0x1EC7, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ('t', ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
-  EXPECT_EQ(str.size(), offset);
+  EXPECT_EQ(data.size(), offset);
 }
 
-TEST(utf8, multiple2) {
-  std::string_view str("\xF0\xA8\x89\x9F\xE5\x91\x90\xE3\x97\x82\xE8\xB6\x8A");
+TEST(utf8, read_multiple2) {
+  std::array<uint8_t, 13> data({
+      0xF0, 0xA8, 0x89, 0x9F, 0xE5, 0x91, 0x90, 0xE3, 0x97, 0x82,
+      0xE8, 0xB6, 0x8A,
+  });
   size_t offset = 0;
-  auto ret = utf::read8(str, offset);
+  auto ret = utf::read8(data, offset);
   EXPECT_EQ(0x2825F, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(0x5450, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(0x35C2, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(0x8D8A, ret);
-  ret = utf::read8(str, offset);
+  ret = utf::read8(data, offset);
   EXPECT_EQ(utf::NEED_MORE, ret);
-  EXPECT_EQ(str.size(), offset);
+  EXPECT_EQ(data.size(), offset);
+}
+
+TEST(utf8, write_no_space) {
+  std::array<uint8_t, 10> data;
+  std::span<uint8_t> out(data);
+  size_t offset = 0;
+  EXPECT_FALSE(utf::write8('$', out.subspan(0, 0), offset));
+  EXPECT_EQ(0u, offset);
+
+  EXPECT_FALSE(utf::write8(0xa3, out.subspan(0, 1), offset));
+  EXPECT_EQ(0u, offset);
+  EXPECT_FALSE(utf::write8(0x418, out.subspan(0, 0), offset));
+  EXPECT_EQ(0u, offset);
+
+  EXPECT_FALSE(utf::write8(0x939, out.subspan(0, 2), offset));
+  EXPECT_EQ(0u, offset);
+  EXPECT_FALSE(utf::write8(0x20AC, out.subspan(0, 0), offset));
+  EXPECT_EQ(0u, offset);
+
+  EXPECT_FALSE(utf::write8(0x10348, out.subspan(0, 3), offset));
+  EXPECT_EQ(0u, offset);
 }
author	Joel Klinghed <the_jk@spawned.biz>	2024-01-21 12:31:30 +0100
committer	Joel Klinghed <the_jk@spawned.biz>	2024-01-21 12:31:30 +0100
commit	7dd49c6293172b494c78918507242cdb55d35137 (patch)
tree	9c8ab822ab9501a5ea2f937e609144e00ea091c4
parent	fc4547b412e28164af1bf8981234c6af959ccc0b (diff)