summaryrefslogtreecommitdiff
path: root/sax/inc/sax_decoder.hh
blob: 40a56c9eba35c5b5ea867a8e740a590c09268ead (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#ifndef SAX_DECODER_HH
#define SAX_DECODER_HH

#include <memory>
#include <string>
#include <string_view>

namespace modxml {
namespace sax {

/**
 * Decoder returned by DecoderFactory. Used by Processor to turn bytes into
 * unicode characters.
 */
class Decoder {
 public:
  virtual ~Decoder() = default;

  enum class State {
    GOOD = 0,
    // too little data was given to advance
    NEED_MORE,
    // invalid data was given to advance
    INVALID,
  };

  /**
   * Decode as many code points as possible from in (start at in_offset) and
   * write them to out (start at out_offset) as UTF-8.
   * All written code points must be valid per Unicode, so inside the
   * range U+0 to U+10FFFF and not a surrogate pair (U+D800-U+DFFF).
   * No partial output, only write to out if the whole UTF-8 sequence is
   * going to fit.
   * The is always at least 4 bytes available (out.size() - out_offset) when
   * called.
   * Advance in_offset for data consumed.
   * Advance out_offset for code points written. Do NOT write past out.size().
   * Do NOT resize out.
   * If at least one code point is decoded and written to out, return GOOD.
   * If it is not possible to decode a single code point, in_offset and
   * out_offset should not be advanced and something other than GOOD returned.
   * Do not keep any references to any of the parameters after returning, next
   * advance() call will point to the following bytes, but all parameters
   * may have changed as they are subject to the buffer implementations of the
   * Processor.
   */
  virtual State decode(std::string_view in, std::size_t& in_offset,
                       std::string& out, std::size_t& out_offset) = 0;

 protected:
  Decoder() = default;
};

}  // namespace sax
}  // namespace modxml

#endif  // SAX_DECODER_HH