summaryrefslogtreecommitdiff
path: root/sax/inc
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2024-01-21 12:31:30 +0100
committerJoel Klinghed <the_jk@spawned.biz>2024-01-21 12:31:30 +0100
commit7dd49c6293172b494c78918507242cdb55d35137 (patch)
tree9c8ab822ab9501a5ea2f937e609144e00ea091c4 /sax/inc
parentfc4547b412e28164af1bf8981234c6af959ccc0b (diff)
WIP
Diffstat (limited to 'sax/inc')
-rw-r--r--sax/inc/sax_attributes.hh146
-rw-r--r--sax/inc/sax_decoder.hh26
-rw-r--r--sax/inc/sax_decoder_factory.hh4
-rw-r--r--sax/inc/sax_delegate.hh22
-rw-r--r--sax/inc/sax_processor.hh18
-rw-r--r--sax/inc/sax_processor_builder.hh2
6 files changed, 201 insertions, 17 deletions
diff --git a/sax/inc/sax_attributes.hh b/sax/inc/sax_attributes.hh
new file mode 100644
index 0000000..4ab1a44
--- /dev/null
+++ b/sax/inc/sax_attributes.hh
@@ -0,0 +1,146 @@
+#ifndef SAX_ATTRIBUTES_HH
+#define SAX_ATTRIBUTES_HH
+
+#include <iterator>
+#include <optional>
+#include <string_view>
+
+namespace modxml {
+namespace sax {
+
+struct Attribute {
+ std::string_view name;
+ std::string_view value;
+
+ Attribute(std::string_view name, std::string_view value);
+};
+
+/**
+ * A view of attributes, with utility functions.
+ */
+class Attributes {
+ public:
+ virtual ~Attributes() = default;
+
+ class iterator {
+ public:
+ using iterator_category = std::random_access_iterator_tag;
+ using difference_type = std::ptrdiff_t;
+ using element_type = Attribute;
+ using pointer = element_type const *;
+ using reference = element_type const &;
+
+ iterator()
+ : attributes_(nullptr), index_(0) {}
+ iterator(iterator const& it)
+ : attributes_(it.attributes_), index_(it.index_) {}
+ iterator& operator=(iterator const& it) {
+ attributes_ = it.attributes_;
+ index_ = it.index_;
+ return *this;
+ }
+
+ /**
+ * Comparing two iterators from different Attributes instances is undefined.
+ */
+ bool operator==(iterator const& it) const {
+ return index_ == it.index_;
+ }
+ std::strong_ordering operator<=>(iterator const& it) const {
+ return index_ <=> it.index_;
+ }
+
+ pointer operator->() const { return &attributes_->at(index_); }
+ reference operator*() const { return attributes_->at(index_); }
+ reference operator[](difference_type i) const {
+ return attributes_->at(index_ + i);
+ }
+
+ iterator& operator++() {
+ ++index_;
+ return *this;
+ }
+ iterator operator++(int) {
+ auto ret = *this;
+ ++index_;
+ return ret;
+ }
+ iterator& operator+=(difference_type i) {
+ index_ += i;
+ return *this;
+ }
+ iterator operator+(difference_type i) const {
+ return iterator(attributes_, index_ + i);
+ }
+ friend iterator operator+(difference_type i, iterator const &it) {
+ return iterator(it.attributes_, it.index_ + i);
+ }
+ iterator& operator--() {
+ --index_;
+ return *this;
+ }
+ iterator operator--(int) {
+ auto ret = *this;
+ --index_;
+ return ret;
+ }
+ iterator& operator-=(difference_type i) {
+ index_ -= i;
+ return *this;
+ }
+ difference_type operator-(iterator const& it) const {
+ return index_ - it.index_;
+ }
+ iterator operator-(difference_type i) const {
+ return iterator(attributes_, index_ - i);
+ }
+
+ protected:
+ iterator(Attributes const* attributes, std::size_t index)
+ : attributes_(attributes), index_(index) {}
+
+ private:
+ Attributes const* attributes_;
+ std::size_t index_;
+ };
+
+ static_assert(std::random_access_iterator<iterator>);
+
+ virtual iterator begin() const = 0;
+ virtual iterator end() const = 0;
+
+ virtual std::size_t size() const = 0;
+ /**
+ * name and value of attribute are valid as long as Attributes instance is.
+ */
+ virtual Attribute const& at(std::size_t index) const = 0;
+
+ Attribute const& operator[](std::size_t index) const { return at(index); }
+
+ /**
+ * Return the first attribute with name, if any.
+ */
+ virtual std::optional<std::string_view> find_first(
+ std::string_view name) const;
+
+ /**
+ * Return the last attribute with name, if any.
+ */
+ virtual std::optional<std::string_view> find_last(
+ std::string_view name) const;
+
+ /**
+ * Return the index of the attribute with name, starting with offset.
+ */
+ virtual std::optional<std::size_t> find(std::string_view name,
+ std::size_t index = 0) const;
+
+ protected:
+ Attributes() = default;
+};
+
+} // namespace sax
+} // namespace modxml
+
+
+#endif // SAX_ATTRIBUTES_HH
diff --git a/sax/inc/sax_decoder.hh b/sax/inc/sax_decoder.hh
index 40a56c9..8b2490c 100644
--- a/sax/inc/sax_decoder.hh
+++ b/sax/inc/sax_decoder.hh
@@ -1,16 +1,15 @@
#ifndef SAX_DECODER_HH
#define SAX_DECODER_HH
-#include <memory>
-#include <string>
-#include <string_view>
+#include <cstdint>
+#include <span>
namespace modxml {
namespace sax {
/**
* Decoder returned by DecoderFactory. Used by Processor to turn bytes into
- * unicode characters.
+ * unicode characters encoded as UTF-8.
*/
class Decoder {
public:
@@ -18,9 +17,9 @@ class Decoder {
enum class State {
GOOD = 0,
- // too little data was given to advance
+ // too little data was given to decode
NEED_MORE,
- // invalid data was given to advance
+ // invalid data was given to decode
INVALID,
};
@@ -29,23 +28,22 @@ class Decoder {
* write them to out (start at out_offset) as UTF-8.
* All written code points must be valid per Unicode, so inside the
* range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF).
- * No partial output, only write to out if the whole UTF-8 sequence is
- * going to fit.
- * The is always at least 4 bytes available (out.size() - out_offset) when
+ * No partial code point output, only write to out if the whole UTF-8
+ * sequence for the code point is going to fit.
+ * There will always at least 4 bytes available (out.size() - out_offset) when
* called.
- * Advance in_offset for data consumed.
+ * Advance in_offset for data consumed. Do NOT read past in.size().
* Advance out_offset for code points written. Do NOT write past out.size().
- * Do NOT resize out.
* If at least one code point is decoded and written to out, return GOOD.
* If it is not possible to decode a single code point, in_offset and
* out_offset should not be advanced and something other than GOOD returned.
* Do not keep any references to any of the parameters after returning, next
- * advance() call will point to the following bytes, but all parameters
+ * decode() call will point to the following bytes, but all parameters
* may have changed as they are subject to the buffer implementations of the
* Processor.
*/
- virtual State decode(std::string_view in, std::size_t& in_offset,
- std::string& out, std::size_t& out_offset) = 0;
+ virtual State decode(std::span<uint8_t const> in, std::size_t& in_offset,
+ std::span<uint8_t> out, std::size_t& out_offset) = 0;
protected:
Decoder() = default;
diff --git a/sax/inc/sax_decoder_factory.hh b/sax/inc/sax_decoder_factory.hh
index 80f1af3..2361ac3 100644
--- a/sax/inc/sax_decoder_factory.hh
+++ b/sax/inc/sax_decoder_factory.hh
@@ -2,7 +2,7 @@
#define SAX_DECODER_FACTORY_HH
#include <memory>
-#include <string>
+#include <string_view>
namespace modxml {
namespace sax {
@@ -23,7 +23,7 @@ class DecoderFactory {
* Note that encoding value isn't cleaned up or validated in any way, it is
* reported EXACTLY as found (even if not valid per XML spec).
*/
- virtual std::unique_ptr<Decoder> create(std::string const& encoding) = 0;
+ virtual std::unique_ptr<Decoder> create(std::string_view encoding) = 0;
protected:
DecoderFactory() = default;
diff --git a/sax/inc/sax_delegate.hh b/sax/inc/sax_delegate.hh
index ba63e72..59af2b7 100644
--- a/sax/inc/sax_delegate.hh
+++ b/sax/inc/sax_delegate.hh
@@ -1,9 +1,14 @@
#ifndef MODXML_SAX_DELEGATE_HH
#define MODXML_SAX_DELEGATE_HH
+#include <cstdint>
+#include <string_view>
+
namespace modxml {
namespace sax {
+class Attributes;
+
/**
* Delegate for processor.
* Implement to handle events.
@@ -12,6 +17,23 @@ class Delegate {
public:
virtual ~Delegate() = default;
+ virtual void start_element(std::string_view name,
+ Attributes const& attributes);
+
+ virtual void end_element(std::string_view name);
+
+ virtual void empty_element(std::string_view name,
+ Attributes const& attributes);
+
+ virtual void character_data(std::string_view data);
+
+ virtual void processing_instruction(std::string_view target,
+ std::string_view data);
+
+ virtual void comment(std::string_view data);
+
+ virtual void error(std::string_view message);
+
protected:
Delegate() = default;
};
diff --git a/sax/inc/sax_processor.hh b/sax/inc/sax_processor.hh
index 7ca32f7..cf53807 100644
--- a/sax/inc/sax_processor.hh
+++ b/sax/inc/sax_processor.hh
@@ -2,6 +2,7 @@
#define MODXML_SAX_PROCESSOR_HH
#include <memory>
+#include <span>
namespace modxml {
namespace sax {
@@ -23,6 +24,23 @@ class Processor {
*/
static std::unique_ptr<Processor> create(std::shared_ptr<Delegate> delegate);
+ /**
+ * Process (consume) input data.
+ * Returns bytes consumed, can be zero.
+ */
+ virtual std::size_t process(std::span<uint8_t const> data,
+ std::size_t offset = 0) = 0;
+
+ /**
+ * When called from delegate, points to the start of the element that
+ * triggered the callback.
+ * When called otherwise, points to the last element that was processed.
+ * Lines start at 1.
+ * Columns start at 0.
+ */
+ virtual uint64_t line() const = 0;
+ virtual uint64_t column() const = 0;
+
protected:
Processor() = default;
diff --git a/sax/inc/sax_processor_builder.hh b/sax/inc/sax_processor_builder.hh
index 070fbbf..8b114e4 100644
--- a/sax/inc/sax_processor_builder.hh
+++ b/sax/inc/sax_processor_builder.hh
@@ -48,7 +48,7 @@ class ProcessorBuilder {
* If you give a too small buffer size (such as zero) it will be ignored
* and a implementation specific minimum will be used instead.
* This is meant as a possible optimization and can be completely ignored.
- * Note that the processor will allocate more data if it needed.
+ * Note that the processor will allocate more data if it needs to.
*/
virtual ProcessorBuilder* set_default_buffer_size(std::size_t size) = 0;