diff options
| author | Joel Klinghed <the_jk@yahoo.com> | 2017-08-09 20:54:57 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@yahoo.com> | 2017-08-09 20:54:57 +0200 |
| commit | 582cf3f5efab85a6316cb5936d529ffa78cb3f4e (patch) | |
| tree | 9803872e63e929226bb7a63c58a3fbc884b9b1a2 | |
| parent | b8112a1304e36dc8a308d2b09d5cd49c9271b2cc (diff) | |
Improve handling of non-utf8 or empty text http message body
If http message body is expected to be text and turns out to
not be valid UTF-8 we fallback to using hex output.
Two bugs in that code:
1) An empty message body was considered invalid UTF-8 as it didn't
have any valid UTF-8 characters ...
2) When restarting the output all the content already written
to text was appended, including headers.
| -rw-r--r-- | src/http_protocol.cc | 45 |
1 files changed, 21 insertions, 24 deletions
diff --git a/src/http_protocol.cc b/src/http_protocol.cc index ad81050..f0e3cb3 100644 --- a/src/http_protocol.cc +++ b/src/http_protocol.cc @@ -326,7 +326,7 @@ private: class TextOutput : public OutputFilter { public: TextOutput(AttributedText* text) - : text_(text) { + : text_(text), checked_(0) { } void write(void const* data, size_t size, bool last) override { @@ -334,49 +334,46 @@ public: hex_->write(data, size, last); return; } - auto d = reinterpret_cast<char const*>(data); - if (!buf_.empty()) { - buf_.append(d, size); - for (size_t i = 0; i < 4; ++i) { - if (i >= buf_.size()) break; - if (valid_utf8(buf_.data(), buf_.size() - i)) { - if (last && i > 0) break; - text_->append(buf_.data(), buf_.size() - i); - buf_.erase(0, buf_.size() - i); - return; - } - } - } else { - for (size_t i = 0; i < 4; ++i) { - if (i >= size) break; - if (valid_utf8(d, size - i)) { - if (last && i > 0) break; - text_->append(d, size - i); - if (i > 0) buf_.append(d + size - i, i); - return; + + if (size == 0 && (!last || buf_.empty())) return; + + buf_.append(reinterpret_cast<char const*>(data), size); + for (size_t i = 0; i < 4; ++i) { + if (checked_ + i >= buf_.size()) break; + if (valid_utf8(buf_.data() + checked_, buf_.size() - checked_ - i)) { + if (last) { + if (i > 0) break; + text_->append(buf_); + buf_.clear(); + checked_ = 0; + } else { + checked_ = buf_.size() - i; } + return; } } - buf_.assign(text_->text()); - buf_.append(d, size); - text_->clear(); hex_.reset(new HexOutput(text_)); hex_->write(buf_.data(), buf_.size(), last); buf_.clear(); } void error() override { + text_->append(buf_.data(), checked_); + buf_.clear(); text_->append("\nDecoding failed, invalid data\n"); } void incomplete() override { + text_->append(buf_.data(), checked_); + buf_.clear(); text_->append("\nNeed more data...\n"); } private: AttributedText* const text_; std::string buf_; + size_t checked_; std::unique_ptr<HexOutput> hex_; }; |
