diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2024-01-21 12:31:30 +0100 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2024-01-21 12:31:30 +0100 |
| commit | 7dd49c6293172b494c78918507242cdb55d35137 (patch) | |
| tree | 9c8ab822ab9501a5ea2f937e609144e00ea091c4 /sax/inc | |
| parent | fc4547b412e28164af1bf8981234c6af959ccc0b (diff) | |
WIP
Diffstat (limited to 'sax/inc')
| -rw-r--r-- | sax/inc/sax_attributes.hh | 146 | ||||
| -rw-r--r-- | sax/inc/sax_decoder.hh | 26 | ||||
| -rw-r--r-- | sax/inc/sax_decoder_factory.hh | 4 | ||||
| -rw-r--r-- | sax/inc/sax_delegate.hh | 22 | ||||
| -rw-r--r-- | sax/inc/sax_processor.hh | 18 | ||||
| -rw-r--r-- | sax/inc/sax_processor_builder.hh | 2 |
6 files changed, 201 insertions, 17 deletions
diff --git a/sax/inc/sax_attributes.hh b/sax/inc/sax_attributes.hh new file mode 100644 index 0000000..4ab1a44 --- /dev/null +++ b/sax/inc/sax_attributes.hh @@ -0,0 +1,146 @@ +#ifndef SAX_ATTRIBUTES_HH +#define SAX_ATTRIBUTES_HH + +#include <iterator> +#include <optional> +#include <string_view> + +namespace modxml { +namespace sax { + +struct Attribute { + std::string_view name; + std::string_view value; + + Attribute(std::string_view name, std::string_view value); +}; + +/** + * A view of attributes, with utility functions. + */ +class Attributes { + public: + virtual ~Attributes() = default; + + class iterator { + public: + using iterator_category = std::random_access_iterator_tag; + using difference_type = std::ptrdiff_t; + using element_type = Attribute; + using pointer = element_type const *; + using reference = element_type const &; + + iterator() + : attributes_(nullptr), index_(0) {} + iterator(iterator const& it) + : attributes_(it.attributes_), index_(it.index_) {} + iterator& operator=(iterator const& it) { + attributes_ = it.attributes_; + index_ = it.index_; + return *this; + } + + /** + * Comparing two iterators from different Attributes instances is undefined. + */ + bool operator==(iterator const& it) const { + return index_ == it.index_; + } + std::strong_ordering operator<=>(iterator const& it) const { + return index_ <=> it.index_; + } + + pointer operator->() const { return &attributes_->at(index_); } + reference operator*() const { return attributes_->at(index_); } + reference operator[](difference_type i) const { + return attributes_->at(index_ + i); + } + + iterator& operator++() { + ++index_; + return *this; + } + iterator operator++(int) { + auto ret = *this; + ++index_; + return ret; + } + iterator& operator+=(difference_type i) { + index_ += i; + return *this; + } + iterator operator+(difference_type i) const { + return iterator(attributes_, index_ + i); + } + friend iterator operator+(difference_type i, iterator const &it) { + return iterator(it.attributes_, it.index_ + i); + } + iterator& operator--() { + --index_; + return *this; + } + iterator operator--(int) { + auto ret = *this; + --index_; + return ret; + } + iterator& operator-=(difference_type i) { + index_ -= i; + return *this; + } + difference_type operator-(iterator const& it) const { + return index_ - it.index_; + } + iterator operator-(difference_type i) const { + return iterator(attributes_, index_ - i); + } + + protected: + iterator(Attributes const* attributes, std::size_t index) + : attributes_(attributes), index_(index) {} + + private: + Attributes const* attributes_; + std::size_t index_; + }; + + static_assert(std::random_access_iterator<iterator>); + + virtual iterator begin() const = 0; + virtual iterator end() const = 0; + + virtual std::size_t size() const = 0; + /** + * name and value of attribute are valid as long as Attributes instance is. + */ + virtual Attribute const& at(std::size_t index) const = 0; + + Attribute const& operator[](std::size_t index) const { return at(index); } + + /** + * Return the first attribute with name, if any. + */ + virtual std::optional<std::string_view> find_first( + std::string_view name) const; + + /** + * Return the last attribute with name, if any. + */ + virtual std::optional<std::string_view> find_last( + std::string_view name) const; + + /** + * Return the index of the attribute with name, starting with offset. + */ + virtual std::optional<std::size_t> find(std::string_view name, + std::size_t index = 0) const; + + protected: + Attributes() = default; +}; + +} // namespace sax +} // namespace modxml + + +#endif // SAX_ATTRIBUTES_HH diff --git a/sax/inc/sax_decoder.hh b/sax/inc/sax_decoder.hh index 40a56c9..8b2490c 100644 --- a/sax/inc/sax_decoder.hh +++ b/sax/inc/sax_decoder.hh @@ -1,16 +1,15 @@ #ifndef SAX_DECODER_HH #define SAX_DECODER_HH -#include <memory> -#include <string> -#include <string_view> +#include <cstdint> +#include <span> namespace modxml { namespace sax { /** * Decoder returned by DecoderFactory. Used by Processor to turn bytes into - * unicode characters. + * unicode characters encoded as UTF-8. */ class Decoder { public: @@ -18,9 +17,9 @@ class Decoder { enum class State { GOOD = 0, - // too little data was given to advance + // too little data was given to decode NEED_MORE, - // invalid data was given to advance + // invalid data was given to decode INVALID, }; @@ -29,23 +28,22 @@ class Decoder { * write them to out (start at out_offset) as UTF-8. * All written code points must be valid per Unicode, so inside the * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF). - * No partial output, only write to out if the whole UTF-8 sequence is - * going to fit. - * The is always at least 4 bytes available (out.size() - out_offset) when + * No partial code point output, only write to out if the whole UTF-8 + * sequence for the code point is going to fit. + * There will always at least 4 bytes available (out.size() - out_offset) when * called. - * Advance in_offset for data consumed. + * Advance in_offset for data consumed. Do NOT read past in.size(). * Advance out_offset for code points written. Do NOT write past out.size(). - * Do NOT resize out. * If at least one code point is decoded and written to out, return GOOD. * If it is not possible to decode a single code point, in_offset and * out_offset should not be advanced and something other than GOOD returned. * Do not keep any references to any of the parameters after returning, next - * advance() call will point to the following bytes, but all parameters + * decode() call will point to the following bytes, but all parameters * may have changed as they are subject to the buffer implementations of the * Processor. */ - virtual State decode(std::string_view in, std::size_t& in_offset, - std::string& out, std::size_t& out_offset) = 0; + virtual State decode(std::span<uint8_t const> in, std::size_t& in_offset, + std::span<uint8_t> out, std::size_t& out_offset) = 0; protected: Decoder() = default; diff --git a/sax/inc/sax_decoder_factory.hh b/sax/inc/sax_decoder_factory.hh index 80f1af3..2361ac3 100644 --- a/sax/inc/sax_decoder_factory.hh +++ b/sax/inc/sax_decoder_factory.hh @@ -2,7 +2,7 @@ #define SAX_DECODER_FACTORY_HH #include <memory> -#include <string> +#include <string_view> namespace modxml { namespace sax { @@ -23,7 +23,7 @@ class DecoderFactory { * Note that encoding value isn't cleaned up or validated in any way, it is * reported EXACTLY as found (even if not valid per XML spec). */ - virtual std::unique_ptr<Decoder> create(std::string const& encoding) = 0; + virtual std::unique_ptr<Decoder> create(std::string_view encoding) = 0; protected: DecoderFactory() = default; diff --git a/sax/inc/sax_delegate.hh b/sax/inc/sax_delegate.hh index ba63e72..59af2b7 100644 --- a/sax/inc/sax_delegate.hh +++ b/sax/inc/sax_delegate.hh @@ -1,9 +1,14 @@ #ifndef MODXML_SAX_DELEGATE_HH #define MODXML_SAX_DELEGATE_HH +#include <cstdint> +#include <string_view> + namespace modxml { namespace sax { +class Attributes; + /** * Delegate for processor. * Implement to handle events. @@ -12,6 +17,23 @@ class Delegate { public: virtual ~Delegate() = default; + virtual void start_element(std::string_view name, + Attributes const& attributes); + + virtual void end_element(std::string_view name); + + virtual void empty_element(std::string_view name, + Attributes const& attributes); + + virtual void character_data(std::string_view data); + + virtual void processing_instruction(std::string_view target, + std::string_view data); + + virtual void comment(std::string_view data); + + virtual void error(std::string_view message); + protected: Delegate() = default; }; diff --git a/sax/inc/sax_processor.hh b/sax/inc/sax_processor.hh index 7ca32f7..cf53807 100644 --- a/sax/inc/sax_processor.hh +++ b/sax/inc/sax_processor.hh @@ -2,6 +2,7 @@ #define MODXML_SAX_PROCESSOR_HH #include <memory> +#include <span> namespace modxml { namespace sax { @@ -23,6 +24,23 @@ class Processor { */ static std::unique_ptr<Processor> create(std::shared_ptr<Delegate> delegate); + /** + * Process (consume) input data. + * Returns bytes consumed, can be zero. + */ + virtual std::size_t process(std::span<uint8_t const> data, + std::size_t offset = 0) = 0; + + /** + * When called from delegate, points to the start of the element that + * triggered the callback. + * When called otherwise, points to the last element that was processed. + * Lines start at 1. + * Columns start at 0. + */ + virtual uint64_t line() const = 0; + virtual uint64_t column() const = 0; + protected: Processor() = default; diff --git a/sax/inc/sax_processor_builder.hh b/sax/inc/sax_processor_builder.hh index 070fbbf..8b114e4 100644 --- a/sax/inc/sax_processor_builder.hh +++ b/sax/inc/sax_processor_builder.hh @@ -48,7 +48,7 @@ class ProcessorBuilder { * If you give a too small buffer size (such as zero) it will be ignored * and a implementation specific minimum will be used instead. * This is meant as a possible optimization and can be completely ignored. - * Note that the processor will allocate more data if it needed. + * Note that the processor will allocate more data if it needs to. */ virtual ProcessorBuilder* set_default_buffer_size(std::size_t size) = 0; |
