#ifndef SAX_DECODER_HH #define SAX_DECODER_HH #include #include #include namespace modxml { namespace sax { /** * Decoder returned by DecoderFactory. Used by Processor to turn bytes into * unicode characters. */ class Decoder { public: virtual ~Decoder() = default; enum class State { GOOD = 0, // too little data was given to advance NEED_MORE, // invalid data was given to advance INVALID, }; /** * Decode as many code points as possible from in (start at in_offset) and * write them to out (start at out_offset) as UTF-8. * All written code points must be valid per Unicode, so inside the * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF). * No partial output, only write to out if the whole UTF-8 sequence is * going to fit. * The is always at least 4 bytes available (out.size() - out_offset) when * called. * Advance in_offset for data consumed. * Advance out_offset for code points written. Do NOT write past out.size(). * Do NOT resize out. * If at least one code point is decoded and written to out, return GOOD. * If it is not possible to decode a single code point, in_offset and * out_offset should not be advanced and something other than GOOD returned. * Do not keep any references to any of the parameters after returning, next * advance() call will point to the following bytes, but all parameters * may have changed as they are subject to the buffer implementations of the * Processor. */ virtual State decode(std::string_view in, std::size_t& in_offset, std::string& out, std::size_t& out_offset) = 0; protected: Decoder() = default; }; } // namespace sax } // namespace modxml #endif // SAX_DECODER_HH