diff options
Diffstat (limited to 'sax/inc')
| -rw-r--r-- | sax/inc/sax_decoder.hh | 57 | ||||
| -rw-r--r-- | sax/inc/sax_decoder_factory.hh | 35 | ||||
| -rw-r--r-- | sax/inc/sax_delegate.hh | 22 | ||||
| -rw-r--r-- | sax/inc/sax_error.hh | 36 | ||||
| -rw-r--r-- | sax/inc/sax_processor.hh | 37 | ||||
| -rw-r--r-- | sax/inc/sax_processor_builder.hh | 82 |
6 files changed, 269 insertions, 0 deletions
diff --git a/sax/inc/sax_decoder.hh b/sax/inc/sax_decoder.hh new file mode 100644 index 0000000..40a56c9 --- /dev/null +++ b/sax/inc/sax_decoder.hh @@ -0,0 +1,57 @@ +#ifndef SAX_DECODER_HH +#define SAX_DECODER_HH + +#include <memory> +#include <string> +#include <string_view> + +namespace modxml { +namespace sax { + +/** + * Decoder returned by DecoderFactory. Used by Processor to turn bytes into + * unicode characters. + */ +class Decoder { + public: + virtual ~Decoder() = default; + + enum class State { + GOOD = 0, + // too little data was given to advance + NEED_MORE, + // invalid data was given to advance + INVALID, + }; + + /** + * Decode as many code points as possible from in (start at in_offset) and + * write them to out (start at out_offset) as UTF-8. + * All written code points must be valid per Unicode, so inside the + * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF). + * No partial output, only write to out if the whole UTF-8 sequence is + * going to fit. + * The is always at least 4 bytes available (out.size() - out_offset) when + * called. + * Advance in_offset for data consumed. + * Advance out_offset for code points written. Do NOT write past out.size(). + * Do NOT resize out. + * If at least one code point is decoded and written to out, return GOOD. + * If it is not possible to decode a single code point, in_offset and + * out_offset should not be advanced and something other than GOOD returned. + * Do not keep any references to any of the parameters after returning, next + * advance() call will point to the following bytes, but all parameters + * may have changed as they are subject to the buffer implementations of the + * Processor. + */ + virtual State decode(std::string_view in, std::size_t& in_offset, + std::string& out, std::size_t& out_offset) = 0; + + protected: + Decoder() = default; +}; + +} // namespace sax +} // namespace modxml + +#endif // SAX_DECODER_HH diff --git a/sax/inc/sax_decoder_factory.hh b/sax/inc/sax_decoder_factory.hh new file mode 100644 index 0000000..80f1af3 --- /dev/null +++ b/sax/inc/sax_decoder_factory.hh @@ -0,0 +1,35 @@ +#ifndef SAX_DECODER_FACTORY_HH +#define SAX_DECODER_FACTORY_HH + +#include <memory> +#include <string> + +namespace modxml { +namespace sax { + +class Decoder; + +/** + * Factory for decoders. You can give one to ProcessBuilder. + */ +class DecoderFactory { + public: + virtual ~DecoderFactory() = default; + + /** + * If encoding is supported, return a decoder for that encoding. + * Return nullptr if not supported and Processor will return + * UNKNOWN_ENCODING error. + * Note that encoding value isn't cleaned up or validated in any way, it is + * reported EXACTLY as found (even if not valid per XML spec). + */ + virtual std::unique_ptr<Decoder> create(std::string const& encoding) = 0; + + protected: + DecoderFactory() = default; +}; + +} // namespace sax +} // namespace modxml + +#endif // SAX_DECODER_FACTORY_HH diff --git a/sax/inc/sax_delegate.hh b/sax/inc/sax_delegate.hh new file mode 100644 index 0000000..ba63e72 --- /dev/null +++ b/sax/inc/sax_delegate.hh @@ -0,0 +1,22 @@ +#ifndef MODXML_SAX_DELEGATE_HH +#define MODXML_SAX_DELEGATE_HH + +namespace modxml { +namespace sax { + +/** + * Delegate for processor. + * Implement to handle events. + */ +class Delegate { + public: + virtual ~Delegate() = default; + + protected: + Delegate() = default; +}; + +} // namespace sax +} // namespace modxml + +#endif // MODXML_SAX_DELEGATE_HH diff --git a/sax/inc/sax_error.hh b/sax/inc/sax_error.hh new file mode 100644 index 0000000..748f995 --- /dev/null +++ b/sax/inc/sax_error.hh @@ -0,0 +1,36 @@ +#ifndef MODXML_SAX_ERROR_HH +#define MODXML_SAX_ERROR_HH + +namespace modxml { +namespace sax { + +enum class Error { + /** + * The XML spec has a list of characters that are never allowed in a document. + */ + INVALID_CHAR, + /** + * If the document encoding is unsupported or unkown. + */ + UNKNOWN_ENCODING, + /** + * If the document is incomplete. The is one of the few recoverable errors, + * if you call the processor with more data it will continue. + */ + INCOMPLETE, + /** + * A entity in the document exeeded max buffer size (either set by + * ProcessBuilder or the default 10 MiB). + */ + MAX_MEMORY_EXCEEDED, + /** + * A memory allocation failed. Note that this doesn't protect against + * usage of overallocated memory. + */ + OUT_OF_MEMORY, +}; + +} // namespace sax +} // namespace modxml + +#endif // MODXML_SAX_ERROR_HH diff --git a/sax/inc/sax_processor.hh b/sax/inc/sax_processor.hh new file mode 100644 index 0000000..7ca32f7 --- /dev/null +++ b/sax/inc/sax_processor.hh @@ -0,0 +1,37 @@ +#ifndef MODXML_SAX_PROCESSOR_HH +#define MODXML_SAX_PROCESSOR_HH + +#include <memory> + +namespace modxml { +namespace sax { + +class Delegate; + +/** + * The XML processor, or parser if you like that term better. + * Feed it data and the processor will give the delegate calls with events or + * possibly errors. + */ +class Processor { + public: + virtual ~Processor() = default; + + /** + * Construct a Processor. Same as creating a ProcessorBuilder + * and not changing any options and just calling build. + */ + static std::unique_ptr<Processor> create(std::shared_ptr<Delegate> delegate); + + protected: + Processor() = default; + + private: + Processor(Processor const&) = delete; + Processor& operator=(Processor const&) = delete; +}; + +} // namespace sax +} // namespace modxml + +#endif // MODXML_SAX_PROCESSOR_HH diff --git a/sax/inc/sax_processor_builder.hh b/sax/inc/sax_processor_builder.hh new file mode 100644 index 0000000..070fbbf --- /dev/null +++ b/sax/inc/sax_processor_builder.hh @@ -0,0 +1,82 @@ +#ifndef MODXML_SAX_PROCESSOR_BUILDER_HH +#define MODXML_SAX_PROCESSOR_BUILDER_HH + +#include <memory> +#include <string> + +namespace modxml { +namespace sax { + +class DecoderFactory; +class Delegate; +class Processor; + +/** + * Used to construct Processor's with options set if needed. + */ +class ProcessorBuilder { + public: + virtual ~ProcessorBuilder() = default; + + /** + * Construct a ProcessorBuilder. All options are set to default. + */ + static std::unique_ptr<ProcessorBuilder> create(); + + /** + * If you know the encoding of the data sent to the processor set it here, + * this will stop the processor from trying to autodetect and will ignore + * encoding in any xml declaration if found. + * If the encoding is unsupported/unknown the processor will fail with + * an error indicating this, same as if it read a xml declaration with + * an unsupported or unknown encoding. + */ + virtual ProcessorBuilder* force_encoding(std::string const& str) = 0; + + /** + * Set a decoder factory for encodings not supported by library. + * Library only calls this for encodings it doesn't support itself. + * Library supports UTF-8, UTF-16, UTF-32 and US-ASCII. + * If you want to force the decoder factory to be used, force a custom + * encoding with force_encoding above. + */ + virtual ProcessorBuilder* custom_decoder_factory( + std::shared_ptr<DecoderFactory> custom_decoder_factory) = 0; + + /** + * Set the default buffer size the processor should use. + * If you give a too small buffer size (such as zero) it will be ignored + * and a implementation specific minimum will be used instead. + * This is meant as a possible optimization and can be completely ignored. + * Note that the processor will allocate more data if it needed. + */ + virtual ProcessorBuilder* set_default_buffer_size(std::size_t size) = 0; + + /** + * Set the max buffer size the processor should use. + * If you have memory constraints this will block the processing of CDATA, + * or other entities from allocating more than the given size. + * Default is 10MiB. + */ + virtual ProcessorBuilder* set_max_buffer_size(std::size_t size) = 0; + + /** + * Call to construct a Processor with the options setup in this builder, + * using the delegate given as parameter. + * May be called multiple times, will create an unique Processor each time. + */ + virtual std::unique_ptr<Processor> build( + std::shared_ptr<Delegate> delegate) const = 0; + + protected: + ProcessorBuilder() = default; + + private: + ProcessorBuilder(ProcessorBuilder const&) = delete; + ProcessorBuilder& operator=(ProcessorBuilder const&) = delete; +}; + +} // namespace sax +} // namespace modxml + +#endif // MODXML_SAX_PROCESSOR_BUILDER_HH |
