diff options
Diffstat (limited to 'sax/inc/sax_decoder.hh')
| -rw-r--r-- | sax/inc/sax_decoder.hh | 26 |
1 files changed, 12 insertions, 14 deletions
diff --git a/sax/inc/sax_decoder.hh b/sax/inc/sax_decoder.hh index 40a56c9..8b2490c 100644 --- a/sax/inc/sax_decoder.hh +++ b/sax/inc/sax_decoder.hh @@ -1,16 +1,15 @@ #ifndef SAX_DECODER_HH #define SAX_DECODER_HH -#include <memory> -#include <string> -#include <string_view> +#include <cstdint> +#include <span> namespace modxml { namespace sax { /** * Decoder returned by DecoderFactory. Used by Processor to turn bytes into - * unicode characters. + * unicode characters encoded as UTF-8. */ class Decoder { public: @@ -18,9 +17,9 @@ class Decoder { enum class State { GOOD = 0, - // too little data was given to advance + // too little data was given to decode NEED_MORE, - // invalid data was given to advance + // invalid data was given to decode INVALID, }; @@ -29,23 +28,22 @@ class Decoder { * write them to out (start at out_offset) as UTF-8. * All written code points must be valid per Unicode, so inside the * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF). - * No partial output, only write to out if the whole UTF-8 sequence is - * going to fit. - * The is always at least 4 bytes available (out.size() - out_offset) when + * No partial code point output, only write to out if the whole UTF-8 + * sequence for the code point is going to fit. + * There will always at least 4 bytes available (out.size() - out_offset) when * called. - * Advance in_offset for data consumed. + * Advance in_offset for data consumed. Do NOT read past in.size(). * Advance out_offset for code points written. Do NOT write past out.size(). - * Do NOT resize out. * If at least one code point is decoded and written to out, return GOOD. * If it is not possible to decode a single code point, in_offset and * out_offset should not be advanced and something other than GOOD returned. * Do not keep any references to any of the parameters after returning, next - * advance() call will point to the following bytes, but all parameters + * decode() call will point to the following bytes, but all parameters * may have changed as they are subject to the buffer implementations of the * Processor. */ - virtual State decode(std::string_view in, std::size_t& in_offset, - std::string& out, std::size_t& out_offset) = 0; + virtual State decode(std::span<uint8_t const> in, std::size_t& in_offset, + std::span<uint8_t> out, std::size_t& out_offset) = 0; protected: Decoder() = default; |
