From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- src/decompress_lzma.cc | 110 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 src/decompress_lzma.cc (limited to 'src/decompress_lzma.cc') diff --git a/src/decompress_lzma.cc b/src/decompress_lzma.cc new file mode 100644 index 0000000..6baea18 --- /dev/null +++ b/src/decompress_lzma.cc @@ -0,0 +1,110 @@ +#include "decompress.hh" + +#include "buffer.hh" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace decompress { + +namespace { + +const size_t kBufferSizeXz = static_cast(1024) * 1024; + +class XzReader : public io::Reader { + public: + explicit XzReader(std::unique_ptr reader) + : reader_(std::move(reader)) {} + + ~XzReader() override { + if (initialized_) + lzma_end(&stream_); + } + + std::expected read(void* dst, size_t max) override { + auto err = fill(); + if (err.has_value()) + return std::unexpected(err.value()); + + stream_.next_out = reinterpret_cast(dst); + stream_.avail_out = max; + + if (!initialized_) { + if (in_eof_ && buffer_->empty()) + return 0; + + lzma_mt options; + memset(&options, 0, sizeof(options)); + options.threads = std::max(static_cast(1), lzma_cputhreads()); + options.memlimit_threading = lzma_physmem() / 4; + options.memlimit_stop = lzma_physmem() / 4; + auto ret = lzma_stream_decoder_mt(&stream_, &options); + if (ret != LZMA_OK) + return std::unexpected(io::ReadError::Error); + initialized_ = true; + } + + auto* const rptr = stream_.next_in; + auto ret = lzma_code(&stream_, in_eof_ ? LZMA_FINISH : LZMA_RUN); + auto got = max - stream_.avail_out; + if (ret == LZMA_STREAM_END) { + lzma_end(&stream_); + initialized_ = false; + buffer_->consume(stream_.next_in - rptr); + } else if (ret == LZMA_OK) { + if (!in_eof_) + buffer_->consume(stream_.next_in - rptr); + } else { + return std::unexpected( + ret == LZMA_DATA_ERROR + ? io::ReadError::InvalidData : io::ReadError::Error); + } + return got; + } + + std::expected skip(size_t max) override { + auto tmp = std::make_unique_for_overwrite(max); + return read(tmp.get(), max); + } + + private: + std::optional fill() { + auto* rptr = buffer_->rptr(stream_.avail_in); + if (!in_eof_ && stream_.avail_in < kBufferSizeXz / 2) { + auto* wptr = buffer_->wptr(stream_.avail_in); + auto got = reader_->read(wptr, stream_.avail_in); + if (got.has_value()) { + buffer_->commit(got.value()); + if (got.value() == 0) + in_eof_ = true; + } else { + return got.error(); + } + rptr = buffer_->rptr(stream_.avail_in); + } + stream_.next_in = reinterpret_cast(rptr); + return std::nullopt; + } + + std::unique_ptr reader_; + bool in_eof_{false}; + std::unique_ptr buffer_{Buffer::fixed(kBufferSizeXz)}; + bool initialized_{false}; + lzma_stream stream_ = LZMA_STREAM_INIT; +}; + +} // namespace + +std::unique_ptr xz(std::unique_ptr reader) { + return std::make_unique(std::move(reader)); +} + +} // namespace decompress -- cgit v1.3