summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2024-01-21 14:32:27 +0100
committerJoel Klinghed <the_jk@spawned.biz>2024-01-21 14:32:27 +0100
commit507a3936aa2f7a32f7f45c13734ffbe0ed4a2078 (patch)
tree40b83de62173e3112a0a6fe35ba66a517cef4894
parent7dd49c6293172b494c78918507242cdb55d35137 (diff)
-rw-r--r--sax/src/guessing_decoder.cc26
-rw-r--r--sax/src/sax_processor.cc9
-rw-r--r--sax/tst/test_buffer.cc2
-rw-r--r--sax/tst/test_decoder.cc10
4 files changed, 23 insertions, 24 deletions
diff --git a/sax/src/guessing_decoder.cc b/sax/src/guessing_decoder.cc
index e72dab3..0e3b628 100644
--- a/sax/src/guessing_decoder.cc
+++ b/sax/src/guessing_decoder.cc
@@ -18,7 +18,7 @@ bool eq(std::span<uint8_t const> a, std::size_t& a_offset, std::string_view b) {
if (a.size() - a_offset < b.size())
return false;
for (size_t i = 0; i < b.size(); ++i)
- if (a[a_offset + i] != b[i])
+ if (a[a_offset + i] != static_cast<uint8_t>(b[i]))
return false;
return true;
}
@@ -32,37 +32,37 @@ class GuessingDecoder : public Decoder {
if (!decided_) {
if (eq(in, in_offset, "\xef\xbb\xbf"sv)) {
decided_ = create_utf8_decoder();
- } else if (eq(in, in_offset, "\xfe\xff\x00\x00"sv)) {
+ } else if (eq(in, in_offset, "\xff\xfe\x00\x00"sv)) {
in_offset += 4;
- decided_ = create_utf32be_decoder();
- } else if (eq(in, in_offset, "\xfe\xff"sv)) {
+ decided_ = create_utf32le_decoder();
+ } else if (eq(in, in_offset, "\xff\xfe"sv)) {
// Could be UTF-32 BOM, need more data to decide
// (note, an xml document encoded in UTF-16 that is less than 4 bytes
// is rather impossible).
if (in.size() - in_offset < 4)
return State::NEED_MORE;
in_offset += 2;
- decided_ = create_utf16be_decoder();
- } else if (eq(in, in_offset, "\xff\xfe"sv)) {
- in_offset += 2;
decided_ = create_utf16le_decoder();
- } else if (eq(in, in_offset, "\x00\x00\xff\xfe"sv)) {
+ } else if (eq(in, in_offset, "\xfe\xff"sv)) {
+ in_offset += 2;
+ decided_ = create_utf16be_decoder();
+ } else if (eq(in, in_offset, "\x00\x00\xfe\xff"sv)) {
in_offset += 4;
- decided_ = create_utf32le_decoder();
+ decided_ = create_utf32be_decoder();
} else {
auto avail = in.size() - in_offset;
if (avail == 0)
return State::NEED_MORE;
if (avail >= 4 && in[in_offset] == 0 && in[in_offset + 1] == 0
&& in[in_offset + 2] == 0 && in[in_offset + 3] != 0) {
- decided_ = create_utf32le_decoder();
+ decided_ = create_utf32be_decoder();
} else if (avail >= 4 && in[in_offset] != 0 && in[in_offset + 1] == 0
&& in[in_offset + 2] == 0 && in[in_offset + 3] == 0) {
- decided_ = create_utf32be_decoder();
+ decided_ = create_utf32le_decoder();
} else if (avail >= 2 && in[in_offset] == 0 && in[in_offset + 1] != 0) {
- decided_ = create_utf16le_decoder();
- } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) {
decided_ = create_utf16be_decoder();
+ } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) {
+ decided_ = create_utf16le_decoder();
} else {
auto tmp = in_offset;
auto ret = utf::read8(in, tmp);
diff --git a/sax/src/sax_processor.cc b/sax/src/sax_processor.cc
index afc9d3b..2b627e0 100644
--- a/sax/src/sax_processor.cc
+++ b/sax/src/sax_processor.cc
@@ -1,7 +1,5 @@
#include "sax_processor.hh"
-#include <iostream>
-
#include "buffer.hh"
#include "guessing_decoder.hh"
#include "processor.hh"
@@ -304,7 +302,6 @@ class ProcessorImpl : public Processor {
while (true) {
if (cmds_.empty()) {
if (!buffer_->empty()) {
- std::cerr << make_string_view(buffer_->rspan()) << std::endl;
delegate_->error("Extra data at end");
}
return consumed;
@@ -430,7 +427,8 @@ class ProcessorImpl : public Processor {
std::size_t tmp = offset;
auto wspan = buffer_->wspan(4);
- switch (decoder_->decode(data, tmp, wspan, consumed)) {
+ std::size_t wrote = 0;
+ switch (decoder_->decode(data, tmp, wspan, wrote)) {
case Decoder::State::GOOD:
break;
case Decoder::State::NEED_MORE:
@@ -439,7 +437,8 @@ class ProcessorImpl : public Processor {
delegate_->error("Invalid data");
return Process::ERROR;
}
- buffer_->commit(consumed);
+ buffer_->commit(wrote);
+ consumed = tmp - offset;
return Process::CONTINUE;
}
diff --git a/sax/tst/test_buffer.cc b/sax/tst/test_buffer.cc
index 13bc6d4..3a40792 100644
--- a/sax/tst/test_buffer.cc
+++ b/sax/tst/test_buffer.cc
@@ -140,6 +140,8 @@ TEST_P(BufferTest, skip_wrap) {
EXPECT_EQ(5u, buf->write(AAAAAAAAAA));
+ auto data = buf->rspan(10);
+ EXPECT_EQ(10u, data.size());
buf->consume(10);
EXPECT_TRUE(buf->empty());
}
diff --git a/sax/tst/test_decoder.cc b/sax/tst/test_decoder.cc
index 86f230b..24434f5 100644
--- a/sax/tst/test_decoder.cc
+++ b/sax/tst/test_decoder.cc
@@ -54,7 +54,6 @@ TEST(sax, decoder_utf8) {
auto delegate = std::make_shared<TestDelegate>();
auto processor = modxml::sax::Processor::create(delegate);
std::string input = R"(<?xml version="1.0" encoding="utf-8"?><root />)";
- std::cerr << input << std::endl;
EXPECT_TRUE(process_all(
*processor.get(),
*delegate.get(),
@@ -68,7 +67,6 @@ TEST(sax, decoder_utf8_bom) {
auto processor = modxml::sax::Processor::create(delegate);
std::string input =
"\xef\xbb\xbf" R"(<?xml version="1.0" encoding="utf-8"?><root />)";
- std::cerr << input << std::endl;
EXPECT_TRUE(process_all(
*processor.get(),
*delegate.get(),
@@ -125,7 +123,7 @@ TEST(sax, decoder_utf16be_bom) {
auto delegate = std::make_shared<TestDelegate>();
auto processor = modxml::sax::Processor::create(delegate);
std::u16string str =
- u"\ufffe" uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
+ u"\ufeff" uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
std::vector<uint8_t> input;
for (char16_t c : str) {
input.push_back(c >> 8);
@@ -142,7 +140,7 @@ TEST(sax, decoder_utf16le_bom) {
auto delegate = std::make_shared<TestDelegate>();
auto processor = modxml::sax::Processor::create(delegate);
std::u16string str =
- u"\ufffe" uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
+ u"\ufeff" uR"(<?xml version="1.0" encoding="utf-16"?><root />)";
std::vector<uint8_t> input;
for (char16_t c : str) {
input.push_back(c & 0xff);
@@ -207,7 +205,7 @@ TEST(sax, decoder_utf32be_bom) {
auto delegate = std::make_shared<TestDelegate>();
auto processor = modxml::sax::Processor::create(delegate);
std::u32string str =
- U"\ufffe" UR"(<?xml version="1.0" encoding="utf-32"?><root />)";
+ U"\ufeff" UR"(<?xml version="1.0" encoding="utf-32"?><root />)";
std::vector<uint8_t> input;
for (char32_t c : str) {
input.push_back(c >> 24);
@@ -226,7 +224,7 @@ TEST(sax, decoder_utf32le_bom) {
auto delegate = std::make_shared<TestDelegate>();
auto processor = modxml::sax::Processor::create(delegate);
std::u32string str =
- U"\ufffe" R"(<?xml version="1.0" encoding="utf-32"?><root />)";
+ U"\ufeff" R"(<?xml version="1.0" encoding="utf-32"?><root />)";
std::vector<uint8_t> input;
for (char32_t c : str) {
input.push_back(c & 0xff);