summaryrefslogtreecommitdiff
path: root/sax/src
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2024-01-21 14:32:27 +0100
committerJoel Klinghed <the_jk@spawned.biz>2024-01-21 14:32:27 +0100
commit507a3936aa2f7a32f7f45c13734ffbe0ed4a2078 (patch)
tree40b83de62173e3112a0a6fe35ba66a517cef4894 /sax/src
parent7dd49c6293172b494c78918507242cdb55d35137 (diff)
Diffstat (limited to 'sax/src')
-rw-r--r--sax/src/guessing_decoder.cc26
-rw-r--r--sax/src/sax_processor.cc9
2 files changed, 17 insertions, 18 deletions
diff --git a/sax/src/guessing_decoder.cc b/sax/src/guessing_decoder.cc
index e72dab3..0e3b628 100644
--- a/sax/src/guessing_decoder.cc
+++ b/sax/src/guessing_decoder.cc
@@ -18,7 +18,7 @@ bool eq(std::span<uint8_t const> a, std::size_t& a_offset, std::string_view b) {
if (a.size() - a_offset < b.size())
return false;
for (size_t i = 0; i < b.size(); ++i)
- if (a[a_offset + i] != b[i])
+ if (a[a_offset + i] != static_cast<uint8_t>(b[i]))
return false;
return true;
}
@@ -32,37 +32,37 @@ class GuessingDecoder : public Decoder {
if (!decided_) {
if (eq(in, in_offset, "\xef\xbb\xbf"sv)) {
decided_ = create_utf8_decoder();
- } else if (eq(in, in_offset, "\xfe\xff\x00\x00"sv)) {
+ } else if (eq(in, in_offset, "\xff\xfe\x00\x00"sv)) {
in_offset += 4;
- decided_ = create_utf32be_decoder();
- } else if (eq(in, in_offset, "\xfe\xff"sv)) {
+ decided_ = create_utf32le_decoder();
+ } else if (eq(in, in_offset, "\xff\xfe"sv)) {
// Could be UTF-32 BOM, need more data to decide
// (note, an xml document encoded in UTF-16 that is less than 4 bytes
// is rather impossible).
if (in.size() - in_offset < 4)
return State::NEED_MORE;
in_offset += 2;
- decided_ = create_utf16be_decoder();
- } else if (eq(in, in_offset, "\xff\xfe"sv)) {
- in_offset += 2;
decided_ = create_utf16le_decoder();
- } else if (eq(in, in_offset, "\x00\x00\xff\xfe"sv)) {
+ } else if (eq(in, in_offset, "\xfe\xff"sv)) {
+ in_offset += 2;
+ decided_ = create_utf16be_decoder();
+ } else if (eq(in, in_offset, "\x00\x00\xfe\xff"sv)) {
in_offset += 4;
- decided_ = create_utf32le_decoder();
+ decided_ = create_utf32be_decoder();
} else {
auto avail = in.size() - in_offset;
if (avail == 0)
return State::NEED_MORE;
if (avail >= 4 && in[in_offset] == 0 && in[in_offset + 1] == 0
&& in[in_offset + 2] == 0 && in[in_offset + 3] != 0) {
- decided_ = create_utf32le_decoder();
+ decided_ = create_utf32be_decoder();
} else if (avail >= 4 && in[in_offset] != 0 && in[in_offset + 1] == 0
&& in[in_offset + 2] == 0 && in[in_offset + 3] == 0) {
- decided_ = create_utf32be_decoder();
+ decided_ = create_utf32le_decoder();
} else if (avail >= 2 && in[in_offset] == 0 && in[in_offset + 1] != 0) {
- decided_ = create_utf16le_decoder();
- } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) {
decided_ = create_utf16be_decoder();
+ } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) {
+ decided_ = create_utf16le_decoder();
} else {
auto tmp = in_offset;
auto ret = utf::read8(in, tmp);
diff --git a/sax/src/sax_processor.cc b/sax/src/sax_processor.cc
index afc9d3b..2b627e0 100644
--- a/sax/src/sax_processor.cc
+++ b/sax/src/sax_processor.cc
@@ -1,7 +1,5 @@
#include "sax_processor.hh"
-#include <iostream>
-
#include "buffer.hh"
#include "guessing_decoder.hh"
#include "processor.hh"
@@ -304,7 +302,6 @@ class ProcessorImpl : public Processor {
while (true) {
if (cmds_.empty()) {
if (!buffer_->empty()) {
- std::cerr << make_string_view(buffer_->rspan()) << std::endl;
delegate_->error("Extra data at end");
}
return consumed;
@@ -430,7 +427,8 @@ class ProcessorImpl : public Processor {
std::size_t tmp = offset;
auto wspan = buffer_->wspan(4);
- switch (decoder_->decode(data, tmp, wspan, consumed)) {
+ std::size_t wrote = 0;
+ switch (decoder_->decode(data, tmp, wspan, wrote)) {
case Decoder::State::GOOD:
break;
case Decoder::State::NEED_MORE:
@@ -439,7 +437,8 @@ class ProcessorImpl : public Processor {
delegate_->error("Invalid data");
return Process::ERROR;
}
- buffer_->commit(consumed);
+ buffer_->commit(wrote);
+ consumed = tmp - offset;
return Process::CONTINUE;
}