summaryrefslogtreecommitdiff
path: root/sax/inc/sax_decoder.hh
diff options
context:
space:
mode:
Diffstat (limited to 'sax/inc/sax_decoder.hh')
-rw-r--r--sax/inc/sax_decoder.hh26
1 files changed, 12 insertions, 14 deletions
diff --git a/sax/inc/sax_decoder.hh b/sax/inc/sax_decoder.hh
index 40a56c9..8b2490c 100644
--- a/sax/inc/sax_decoder.hh
+++ b/sax/inc/sax_decoder.hh
@@ -1,16 +1,15 @@
#ifndef SAX_DECODER_HH
#define SAX_DECODER_HH
-#include <memory>
-#include <string>
-#include <string_view>
+#include <cstdint>
+#include <span>
namespace modxml {
namespace sax {
/**
* Decoder returned by DecoderFactory. Used by Processor to turn bytes into
- * unicode characters.
+ * unicode characters encoded as UTF-8.
*/
class Decoder {
public:
@@ -18,9 +17,9 @@ class Decoder {
enum class State {
GOOD = 0,
- // too little data was given to advance
+ // too little data was given to decode
NEED_MORE,
- // invalid data was given to advance
+ // invalid data was given to decode
INVALID,
};
@@ -29,23 +28,22 @@ class Decoder {
* write them to out (start at out_offset) as UTF-8.
* All written code points must be valid per Unicode, so inside the
* range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF).
- * No partial output, only write to out if the whole UTF-8 sequence is
- * going to fit.
- * The is always at least 4 bytes available (out.size() - out_offset) when
+ * No partial code point output, only write to out if the whole UTF-8
+ * sequence for the code point is going to fit.
+ * There will always at least 4 bytes available (out.size() - out_offset) when
* called.
- * Advance in_offset for data consumed.
+ * Advance in_offset for data consumed. Do NOT read past in.size().
* Advance out_offset for code points written. Do NOT write past out.size().
- * Do NOT resize out.
* If at least one code point is decoded and written to out, return GOOD.
* If it is not possible to decode a single code point, in_offset and
* out_offset should not be advanced and something other than GOOD returned.
* Do not keep any references to any of the parameters after returning, next
- * advance() call will point to the following bytes, but all parameters
+ * decode() call will point to the following bytes, but all parameters
* may have changed as they are subject to the buffer implementations of the
* Processor.
*/
- virtual State decode(std::string_view in, std::size_t& in_offset,
- std::string& out, std::size_t& out_offset) = 0;
+ virtual State decode(std::span<uint8_t const> in, std::size_t& in_offset,
+ std::span<uint8_t> out, std::size_t& out_offset) = 0;
protected:
Decoder() = default;