diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2024-01-21 14:32:27 +0100 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2024-01-21 14:32:27 +0100 |
| commit | 507a3936aa2f7a32f7f45c13734ffbe0ed4a2078 (patch) | |
| tree | 40b83de62173e3112a0a6fe35ba66a517cef4894 /sax/src | |
| parent | 7dd49c6293172b494c78918507242cdb55d35137 (diff) | |
Diffstat (limited to 'sax/src')
| -rw-r--r-- | sax/src/guessing_decoder.cc | 26 | ||||
| -rw-r--r-- | sax/src/sax_processor.cc | 9 |
2 files changed, 17 insertions, 18 deletions
diff --git a/sax/src/guessing_decoder.cc b/sax/src/guessing_decoder.cc index e72dab3..0e3b628 100644 --- a/sax/src/guessing_decoder.cc +++ b/sax/src/guessing_decoder.cc @@ -18,7 +18,7 @@ bool eq(std::span<uint8_t const> a, std::size_t& a_offset, std::string_view b) { if (a.size() - a_offset < b.size()) return false; for (size_t i = 0; i < b.size(); ++i) - if (a[a_offset + i] != b[i]) + if (a[a_offset + i] != static_cast<uint8_t>(b[i])) return false; return true; } @@ -32,37 +32,37 @@ class GuessingDecoder : public Decoder { if (!decided_) { if (eq(in, in_offset, "\xef\xbb\xbf"sv)) { decided_ = create_utf8_decoder(); - } else if (eq(in, in_offset, "\xfe\xff\x00\x00"sv)) { + } else if (eq(in, in_offset, "\xff\xfe\x00\x00"sv)) { in_offset += 4; - decided_ = create_utf32be_decoder(); - } else if (eq(in, in_offset, "\xfe\xff"sv)) { + decided_ = create_utf32le_decoder(); + } else if (eq(in, in_offset, "\xff\xfe"sv)) { // Could be UTF-32 BOM, need more data to decide // (note, an xml document encoded in UTF-16 that is less than 4 bytes // is rather impossible). if (in.size() - in_offset < 4) return State::NEED_MORE; in_offset += 2; - decided_ = create_utf16be_decoder(); - } else if (eq(in, in_offset, "\xff\xfe"sv)) { - in_offset += 2; decided_ = create_utf16le_decoder(); - } else if (eq(in, in_offset, "\x00\x00\xff\xfe"sv)) { + } else if (eq(in, in_offset, "\xfe\xff"sv)) { + in_offset += 2; + decided_ = create_utf16be_decoder(); + } else if (eq(in, in_offset, "\x00\x00\xfe\xff"sv)) { in_offset += 4; - decided_ = create_utf32le_decoder(); + decided_ = create_utf32be_decoder(); } else { auto avail = in.size() - in_offset; if (avail == 0) return State::NEED_MORE; if (avail >= 4 && in[in_offset] == 0 && in[in_offset + 1] == 0 && in[in_offset + 2] == 0 && in[in_offset + 3] != 0) { - decided_ = create_utf32le_decoder(); + decided_ = create_utf32be_decoder(); } else if (avail >= 4 && in[in_offset] != 0 && in[in_offset + 1] == 0 && in[in_offset + 2] == 0 && in[in_offset + 3] == 0) { - decided_ = create_utf32be_decoder(); + decided_ = create_utf32le_decoder(); } else if (avail >= 2 && in[in_offset] == 0 && in[in_offset + 1] != 0) { - decided_ = create_utf16le_decoder(); - } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) { decided_ = create_utf16be_decoder(); + } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) { + decided_ = create_utf16le_decoder(); } else { auto tmp = in_offset; auto ret = utf::read8(in, tmp); diff --git a/sax/src/sax_processor.cc b/sax/src/sax_processor.cc index afc9d3b..2b627e0 100644 --- a/sax/src/sax_processor.cc +++ b/sax/src/sax_processor.cc @@ -1,7 +1,5 @@ #include "sax_processor.hh" -#include <iostream> - #include "buffer.hh" #include "guessing_decoder.hh" #include "processor.hh" @@ -304,7 +302,6 @@ class ProcessorImpl : public Processor { while (true) { if (cmds_.empty()) { if (!buffer_->empty()) { - std::cerr << make_string_view(buffer_->rspan()) << std::endl; delegate_->error("Extra data at end"); } return consumed; @@ -430,7 +427,8 @@ class ProcessorImpl : public Processor { std::size_t tmp = offset; auto wspan = buffer_->wspan(4); - switch (decoder_->decode(data, tmp, wspan, consumed)) { + std::size_t wrote = 0; + switch (decoder_->decode(data, tmp, wspan, wrote)) { case Decoder::State::GOOD: break; case Decoder::State::NEED_MORE: @@ -439,7 +437,8 @@ class ProcessorImpl : public Processor { delegate_->error("Invalid data"); return Process::ERROR; } - buffer_->commit(consumed); + buffer_->commit(wrote); + consumed = tmp - offset; return Process::CONTINUE; } |
