#ifndef SAX_DECODER_HH #define SAX_DECODER_HH #include #include namespace modxml { namespace sax { /** * Decoder returned by DecoderFactory. Used by Processor to turn bytes into * unicode characters encoded as UTF-8. */ class Decoder { public: virtual ~Decoder() = default; enum class State { GOOD = 0, // too little data was given to decode NEED_MORE, // invalid data was given to decode INVALID, }; /** * Decode as many code points as possible from in (start at in_offset) and * write them to out (start at out_offset) as UTF-8. * All written code points must be valid per Unicode, so inside the * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF). * No partial code point output, only write to out if the whole UTF-8 * sequence for the code point is going to fit. * There will always at least 4 bytes available (out.size() - out_offset) when * called. * Advance in_offset for data consumed. Do NOT read past in.size(). * Advance out_offset for code points written. Do NOT write past out.size(). * If at least one code point is decoded and written to out, return GOOD. * If it is not possible to decode a single code point, in_offset and * out_offset should not be advanced and something other than GOOD returned. * Do not keep any references to any of the parameters after returning, next * decode() call will point to the following bytes, but all parameters * may have changed as they are subject to the buffer implementations of the * Processor. */ virtual State decode(std::span in, std::size_t& in_offset, std::span out, std::size_t& out_offset) = 0; protected: Decoder() = default; }; } // namespace sax } // namespace modxml #endif // SAX_DECODER_HH