diff options
| -rw-r--r-- | sax/src/guessing_decoder.cc | 26 | ||||
| -rw-r--r-- | sax/src/sax_processor.cc | 9 | ||||
| -rw-r--r-- | sax/tst/test_buffer.cc | 2 | ||||
| -rw-r--r-- | sax/tst/test_decoder.cc | 10 |
4 files changed, 23 insertions, 24 deletions
diff --git a/sax/src/guessing_decoder.cc b/sax/src/guessing_decoder.cc index e72dab3..0e3b628 100644 --- a/sax/src/guessing_decoder.cc +++ b/sax/src/guessing_decoder.cc @@ -18,7 +18,7 @@ bool eq(std::span<uint8_t const> a, std::size_t& a_offset, std::string_view b) { if (a.size() - a_offset < b.size()) return false; for (size_t i = 0; i < b.size(); ++i) - if (a[a_offset + i] != b[i]) + if (a[a_offset + i] != static_cast<uint8_t>(b[i])) return false; return true; } @@ -32,37 +32,37 @@ class GuessingDecoder : public Decoder { if (!decided_) { if (eq(in, in_offset, "\xef\xbb\xbf"sv)) { decided_ = create_utf8_decoder(); - } else if (eq(in, in_offset, "\xfe\xff\x00\x00"sv)) { + } else if (eq(in, in_offset, "\xff\xfe\x00\x00"sv)) { in_offset += 4; - decided_ = create_utf32be_decoder(); - } else if (eq(in, in_offset, "\xfe\xff"sv)) { + decided_ = create_utf32le_decoder(); + } else if (eq(in, in_offset, "\xff\xfe"sv)) { // Could be UTF-32 BOM, need more data to decide // (note, an xml document encoded in UTF-16 that is less than 4 bytes // is rather impossible). if (in.size() - in_offset < 4) return State::NEED_MORE; in_offset += 2; - decided_ = create_utf16be_decoder(); - } else if (eq(in, in_offset, "\xff\xfe"sv)) { - in_offset += 2; decided_ = create_utf16le_decoder(); - } else if (eq(in, in_offset, "\x00\x00\xff\xfe"sv)) { + } else if (eq(in, in_offset, "\xfe\xff"sv)) { + in_offset += 2; + decided_ = create_utf16be_decoder(); + } else if (eq(in, in_offset, "\x00\x00\xfe\xff"sv)) { in_offset += 4; - decided_ = create_utf32le_decoder(); + decided_ = create_utf32be_decoder(); } else { auto avail = in.size() - in_offset; if (avail == 0) return State::NEED_MORE; if (avail >= 4 && in[in_offset] == 0 && in[in_offset + 1] == 0 && in[in_offset + 2] == 0 && in[in_offset + 3] != 0) { - decided_ = create_utf32le_decoder(); + decided_ = create_utf32be_decoder(); } else if (avail >= 4 && in[in_offset] != 0 && in[in_offset + 1] == 0 && in[in_offset + 2] == 0 && in[in_offset + 3] == 0) { - decided_ = create_utf32be_decoder(); + decided_ = create_utf32le_decoder(); } else if (avail >= 2 && in[in_offset] == 0 && in[in_offset + 1] != 0) { - decided_ = create_utf16le_decoder(); - } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) { decided_ = create_utf16be_decoder(); + } else if (avail >= 2 && in[in_offset] != 0 && in[in_offset + 1] == 0) { + decided_ = create_utf16le_decoder(); } else { auto tmp = in_offset; auto ret = utf::read8(in, tmp); diff --git a/sax/src/sax_processor.cc b/sax/src/sax_processor.cc index afc9d3b..2b627e0 100644 --- a/sax/src/sax_processor.cc +++ b/sax/src/sax_processor.cc @@ -1,7 +1,5 @@ #include "sax_processor.hh" -#include <iostream> - #include "buffer.hh" #include "guessing_decoder.hh" #include "processor.hh" @@ -304,7 +302,6 @@ class ProcessorImpl : public Processor { while (true) { if (cmds_.empty()) { if (!buffer_->empty()) { - std::cerr << make_string_view(buffer_->rspan()) << std::endl; delegate_->error("Extra data at end"); } return consumed; @@ -430,7 +427,8 @@ class ProcessorImpl : public Processor { std::size_t tmp = offset; auto wspan = buffer_->wspan(4); - switch (decoder_->decode(data, tmp, wspan, consumed)) { + std::size_t wrote = 0; + switch (decoder_->decode(data, tmp, wspan, wrote)) { case Decoder::State::GOOD: break; case Decoder::State::NEED_MORE: @@ -439,7 +437,8 @@ class ProcessorImpl : public Processor { delegate_->error("Invalid data"); return Process::ERROR; } - buffer_->commit(consumed); + buffer_->commit(wrote); + consumed = tmp - offset; return Process::CONTINUE; } diff --git a/sax/tst/test_buffer.cc b/sax/tst/test_buffer.cc index 13bc6d4..3a40792 100644 --- a/sax/tst/test_buffer.cc +++ b/sax/tst/test_buffer.cc @@ -140,6 +140,8 @@ TEST_P(BufferTest, skip_wrap) { EXPECT_EQ(5u, buf->write(AAAAAAAAAA)); + auto data = buf->rspan(10); + EXPECT_EQ(10u, data.size()); buf->consume(10); EXPECT_TRUE(buf->empty()); } diff --git a/sax/tst/test_decoder.cc b/sax/tst/test_decoder.cc index 86f230b..24434f5 100644 --- a/sax/tst/test_decoder.cc +++ b/sax/tst/test_decoder.cc @@ -54,7 +54,6 @@ TEST(sax, decoder_utf8) { auto delegate = std::make_shared<TestDelegate>(); auto processor = modxml::sax::Processor::create(delegate); std::string input = R"(<?xml version="1.0" encoding="utf-8"?><root />)"; - std::cerr << input << std::endl; EXPECT_TRUE(process_all( *processor.get(), *delegate.get(), @@ -68,7 +67,6 @@ TEST(sax, decoder_utf8_bom) { auto processor = modxml::sax::Processor::create(delegate); std::string input = "\xef\xbb\xbf" R"(<?xml version="1.0" encoding="utf-8"?><root />)"; - std::cerr << input << std::endl; EXPECT_TRUE(process_all( *processor.get(), *delegate.get(), @@ -125,7 +123,7 @@ TEST(sax, decoder_utf16be_bom) { auto delegate = std::make_shared<TestDelegate>(); auto processor = modxml::sax::Processor::create(delegate); std::u16string str = - u"\ufffe" uR"(<?xml version="1.0" encoding="utf-16"?><root />)"; + u"\ufeff" uR"(<?xml version="1.0" encoding="utf-16"?><root />)"; std::vector<uint8_t> input; for (char16_t c : str) { input.push_back(c >> 8); @@ -142,7 +140,7 @@ TEST(sax, decoder_utf16le_bom) { auto delegate = std::make_shared<TestDelegate>(); auto processor = modxml::sax::Processor::create(delegate); std::u16string str = - u"\ufffe" uR"(<?xml version="1.0" encoding="utf-16"?><root />)"; + u"\ufeff" uR"(<?xml version="1.0" encoding="utf-16"?><root />)"; std::vector<uint8_t> input; for (char16_t c : str) { input.push_back(c & 0xff); @@ -207,7 +205,7 @@ TEST(sax, decoder_utf32be_bom) { auto delegate = std::make_shared<TestDelegate>(); auto processor = modxml::sax::Processor::create(delegate); std::u32string str = - U"\ufffe" UR"(<?xml version="1.0" encoding="utf-32"?><root />)"; + U"\ufeff" UR"(<?xml version="1.0" encoding="utf-32"?><root />)"; std::vector<uint8_t> input; for (char32_t c : str) { input.push_back(c >> 24); @@ -226,7 +224,7 @@ TEST(sax, decoder_utf32le_bom) { auto delegate = std::make_shared<TestDelegate>(); auto processor = modxml::sax::Processor::create(delegate); std::u32string str = - U"\ufffe" R"(<?xml version="1.0" encoding="utf-32"?><root />)"; + U"\ufeff" R"(<?xml version="1.0" encoding="utf-32"?><root />)"; std::vector<uint8_t> input; for (char32_t c : str) { input.push_back(c & 0xff); |
