From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- src/decompress_z.cc | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 src/decompress_z.cc (limited to 'src/decompress_z.cc') diff --git a/src/decompress_z.cc b/src/decompress_z.cc new file mode 100644 index 0000000..f9f87ae --- /dev/null +++ b/src/decompress_z.cc @@ -0,0 +1,120 @@ +#include "decompress.hh" + +#include "buffer.hh" + +#define ZLIB_CONST +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace decompress { + +namespace { + +const size_t kBufferSizeZ = static_cast(1024) * 1024; + +class DecompressReader : public io::Reader { + public: + DecompressReader(std::unique_ptr reader, bool gzip) + : reader_(std::move(reader)), gzip_(gzip) {} + + ~DecompressReader() override { + if (initialized_) + inflateEnd(&stream_); + } + + std::expected read(void* dst, size_t max) override { + auto err = fill(); + if (err.has_value()) + return std::unexpected(err.value()); + + // NOLINTNEXTLINE(misc-include-cleaner) + stream_.next_out = reinterpret_cast(dst); + stream_.avail_out = max; + + if (!initialized_) { + if (in_eof_ && buffer_->empty()) + return 0; + + stream_.zalloc = Z_NULL; + stream_.zfree = Z_NULL; + stream_.opaque = Z_NULL; + if (inflateInit2(&stream_, gzip_ ? 16 : 0) != Z_OK) { + return std::unexpected(io::ReadError::Error); + } + initialized_ = true; + } + + auto* const rptr = stream_.next_in; + auto ret = inflate(&stream_, in_eof_ ? Z_FINISH : Z_NO_FLUSH); + auto got = max - stream_.avail_out; + if (ret == Z_STREAM_END) { + inflateEnd(&stream_); + initialized_ = false; + buffer_->consume(stream_.next_in - rptr); + } else if (ret == Z_OK) { + if (!in_eof_) + buffer_->consume(stream_.next_in - rptr); + } else { + return std::unexpected( + ret == Z_DATA_ERROR + ? io::ReadError::InvalidData : io::ReadError::Error); + } + return got; + } + + std::expected skip(size_t max) override { + auto tmp = std::make_unique_for_overwrite(max); + return read(tmp.get(), max); + } + + private: + std::optional fill() { + size_t avail; + auto* rptr = buffer_->rptr(avail); + if (!in_eof_ && avail < kBufferSizeZ / 2) { + auto* wptr = buffer_->wptr(avail); + auto got = reader_->read(wptr, avail); + if (got.has_value()) { + buffer_->commit(got.value()); + if (got.value() == 0) + in_eof_ = true; + } else { + return got.error(); + } + rptr = buffer_->rptr(avail); + } + // NOLINTNEXTLINE(misc-include-cleaner) + stream_.next_in = reinterpret_cast(rptr); + stream_.avail_in = std::min( + // NOLINTNEXTLINE(misc-include-cleaner) + static_cast(std::numeric_limits::max()), avail); + return std::nullopt; + } + + std::unique_ptr reader_; + bool const gzip_; + bool in_eof_{false}; + std::unique_ptr buffer_{Buffer::fixed(kBufferSizeZ)}; + bool initialized_{false}; + z_stream stream_; +}; + +} // namespace + +std::unique_ptr zlib(std::unique_ptr reader) { + return std::make_unique(std::move(reader), /* gzip = */ false); +} + +std::unique_ptr gzip(std::unique_ptr reader) { + return std::make_unique(std::move(reader), /* gzip = */ true); +} + + +} // namespace decompress -- cgit v1.3