summaryrefslogtreecommitdiff
path: root/sax/inc
diff options
context:
space:
mode:
Diffstat (limited to 'sax/inc')
-rw-r--r--sax/inc/sax_decoder.hh57
-rw-r--r--sax/inc/sax_decoder_factory.hh35
-rw-r--r--sax/inc/sax_delegate.hh22
-rw-r--r--sax/inc/sax_error.hh36
-rw-r--r--sax/inc/sax_processor.hh37
-rw-r--r--sax/inc/sax_processor_builder.hh82
6 files changed, 269 insertions, 0 deletions
diff --git a/sax/inc/sax_decoder.hh b/sax/inc/sax_decoder.hh
new file mode 100644
index 0000000..40a56c9
--- /dev/null
+++ b/sax/inc/sax_decoder.hh
@@ -0,0 +1,57 @@
+#ifndef SAX_DECODER_HH
+#define SAX_DECODER_HH
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+namespace modxml {
+namespace sax {
+
+/**
+ * Decoder returned by DecoderFactory. Used by Processor to turn bytes into
+ * unicode characters.
+ */
+class Decoder {
+ public:
+ virtual ~Decoder() = default;
+
+ enum class State {
+ GOOD = 0,
+ // too little data was given to advance
+ NEED_MORE,
+ // invalid data was given to advance
+ INVALID,
+ };
+
+ /**
+ * Decode as many code points as possible from in (start at in_offset) and
+ * write them to out (start at out_offset) as UTF-8.
+ * All written code points must be valid per Unicode, so inside the
+ * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF).
+ * No partial output, only write to out if the whole UTF-8 sequence is
+ * going to fit.
+ * The is always at least 4 bytes available (out.size() - out_offset) when
+ * called.
+ * Advance in_offset for data consumed.
+ * Advance out_offset for code points written. Do NOT write past out.size().
+ * Do NOT resize out.
+ * If at least one code point is decoded and written to out, return GOOD.
+ * If it is not possible to decode a single code point, in_offset and
+ * out_offset should not be advanced and something other than GOOD returned.
+ * Do not keep any references to any of the parameters after returning, next
+ * advance() call will point to the following bytes, but all parameters
+ * may have changed as they are subject to the buffer implementations of the
+ * Processor.
+ */
+ virtual State decode(std::string_view in, std::size_t& in_offset,
+ std::string& out, std::size_t& out_offset) = 0;
+
+ protected:
+ Decoder() = default;
+};
+
+} // namespace sax
+} // namespace modxml
+
+#endif // SAX_DECODER_HH
diff --git a/sax/inc/sax_decoder_factory.hh b/sax/inc/sax_decoder_factory.hh
new file mode 100644
index 0000000..80f1af3
--- /dev/null
+++ b/sax/inc/sax_decoder_factory.hh
@@ -0,0 +1,35 @@
+#ifndef SAX_DECODER_FACTORY_HH
+#define SAX_DECODER_FACTORY_HH
+
+#include <memory>
+#include <string>
+
+namespace modxml {
+namespace sax {
+
+class Decoder;
+
+/**
+ * Factory for decoders. You can give one to ProcessBuilder.
+ */
+class DecoderFactory {
+ public:
+ virtual ~DecoderFactory() = default;
+
+ /**
+ * If encoding is supported, return a decoder for that encoding.
+ * Return nullptr if not supported and Processor will return
+ * UNKNOWN_ENCODING error.
+ * Note that encoding value isn't cleaned up or validated in any way, it is
+ * reported EXACTLY as found (even if not valid per XML spec).
+ */
+ virtual std::unique_ptr<Decoder> create(std::string const& encoding) = 0;
+
+ protected:
+ DecoderFactory() = default;
+};
+
+} // namespace sax
+} // namespace modxml
+
+#endif // SAX_DECODER_FACTORY_HH
diff --git a/sax/inc/sax_delegate.hh b/sax/inc/sax_delegate.hh
new file mode 100644
index 0000000..ba63e72
--- /dev/null
+++ b/sax/inc/sax_delegate.hh
@@ -0,0 +1,22 @@
+#ifndef MODXML_SAX_DELEGATE_HH
+#define MODXML_SAX_DELEGATE_HH
+
+namespace modxml {
+namespace sax {
+
+/**
+ * Delegate for processor.
+ * Implement to handle events.
+ */
+class Delegate {
+ public:
+ virtual ~Delegate() = default;
+
+ protected:
+ Delegate() = default;
+};
+
+} // namespace sax
+} // namespace modxml
+
+#endif // MODXML_SAX_DELEGATE_HH
diff --git a/sax/inc/sax_error.hh b/sax/inc/sax_error.hh
new file mode 100644
index 0000000..748f995
--- /dev/null
+++ b/sax/inc/sax_error.hh
@@ -0,0 +1,36 @@
+#ifndef MODXML_SAX_ERROR_HH
+#define MODXML_SAX_ERROR_HH
+
+namespace modxml {
+namespace sax {
+
+enum class Error {
+ /**
+ * The XML spec has a list of characters that are never allowed in a document.
+ */
+ INVALID_CHAR,
+ /**
+ * If the document encoding is unsupported or unkown.
+ */
+ UNKNOWN_ENCODING,
+ /**
+ * If the document is incomplete. The is one of the few recoverable errors,
+ * if you call the processor with more data it will continue.
+ */
+ INCOMPLETE,
+ /**
+ * A entity in the document exeeded max buffer size (either set by
+ * ProcessBuilder or the default 10 MiB).
+ */
+ MAX_MEMORY_EXCEEDED,
+ /**
+ * A memory allocation failed. Note that this doesn't protect against
+ * usage of overallocated memory.
+ */
+ OUT_OF_MEMORY,
+};
+
+} // namespace sax
+} // namespace modxml
+
+#endif // MODXML_SAX_ERROR_HH
diff --git a/sax/inc/sax_processor.hh b/sax/inc/sax_processor.hh
new file mode 100644
index 0000000..7ca32f7
--- /dev/null
+++ b/sax/inc/sax_processor.hh
@@ -0,0 +1,37 @@
+#ifndef MODXML_SAX_PROCESSOR_HH
+#define MODXML_SAX_PROCESSOR_HH
+
+#include <memory>
+
+namespace modxml {
+namespace sax {
+
+class Delegate;
+
+/**
+ * The XML processor, or parser if you like that term better.
+ * Feed it data and the processor will give the delegate calls with events or
+ * possibly errors.
+ */
+class Processor {
+ public:
+ virtual ~Processor() = default;
+
+ /**
+ * Construct a Processor. Same as creating a ProcessorBuilder
+ * and not changing any options and just calling build.
+ */
+ static std::unique_ptr<Processor> create(std::shared_ptr<Delegate> delegate);
+
+ protected:
+ Processor() = default;
+
+ private:
+ Processor(Processor const&) = delete;
+ Processor& operator=(Processor const&) = delete;
+};
+
+} // namespace sax
+} // namespace modxml
+
+#endif // MODXML_SAX_PROCESSOR_HH
diff --git a/sax/inc/sax_processor_builder.hh b/sax/inc/sax_processor_builder.hh
new file mode 100644
index 0000000..070fbbf
--- /dev/null
+++ b/sax/inc/sax_processor_builder.hh
@@ -0,0 +1,82 @@
+#ifndef MODXML_SAX_PROCESSOR_BUILDER_HH
+#define MODXML_SAX_PROCESSOR_BUILDER_HH
+
+#include <memory>
+#include <string>
+
+namespace modxml {
+namespace sax {
+
+class DecoderFactory;
+class Delegate;
+class Processor;
+
+/**
+ * Used to construct Processor's with options set if needed.
+ */
+class ProcessorBuilder {
+ public:
+ virtual ~ProcessorBuilder() = default;
+
+ /**
+ * Construct a ProcessorBuilder. All options are set to default.
+ */
+ static std::unique_ptr<ProcessorBuilder> create();
+
+ /**
+ * If you know the encoding of the data sent to the processor set it here,
+ * this will stop the processor from trying to autodetect and will ignore
+ * encoding in any xml declaration if found.
+ * If the encoding is unsupported/unknown the processor will fail with
+ * an error indicating this, same as if it read a xml declaration with
+ * an unsupported or unknown encoding.
+ */
+ virtual ProcessorBuilder* force_encoding(std::string const& str) = 0;
+
+ /**
+ * Set a decoder factory for encodings not supported by library.
+ * Library only calls this for encodings it doesn't support itself.
+ * Library supports UTF-8, UTF-16, UTF-32 and US-ASCII.
+ * If you want to force the decoder factory to be used, force a custom
+ * encoding with force_encoding above.
+ */
+ virtual ProcessorBuilder* custom_decoder_factory(
+ std::shared_ptr<DecoderFactory> custom_decoder_factory) = 0;
+
+ /**
+ * Set the default buffer size the processor should use.
+ * If you give a too small buffer size (such as zero) it will be ignored
+ * and a implementation specific minimum will be used instead.
+ * This is meant as a possible optimization and can be completely ignored.
+ * Note that the processor will allocate more data if it needed.
+ */
+ virtual ProcessorBuilder* set_default_buffer_size(std::size_t size) = 0;
+
+ /**
+ * Set the max buffer size the processor should use.
+ * If you have memory constraints this will block the processing of CDATA,
+ * or other entities from allocating more than the given size.
+ * Default is 10MiB.
+ */
+ virtual ProcessorBuilder* set_max_buffer_size(std::size_t size) = 0;
+
+ /**
+ * Call to construct a Processor with the options setup in this builder,
+ * using the delegate given as parameter.
+ * May be called multiple times, will create an unique Processor each time.
+ */
+ virtual std::unique_ptr<Processor> build(
+ std::shared_ptr<Delegate> delegate) const = 0;
+
+ protected:
+ ProcessorBuilder() = default;
+
+ private:
+ ProcessorBuilder(ProcessorBuilder const&) = delete;
+ ProcessorBuilder& operator=(ProcessorBuilder const&) = delete;
+};
+
+} // namespace sax
+} // namespace modxml
+
+#endif // MODXML_SAX_PROCESSOR_BUILDER_HH