diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/buffer.cc | 213 | ||||
| -rw-r--r-- | src/buffer.hh | 31 | ||||
| -rw-r--r-- | src/check.hh | 39 | ||||
| -rw-r--r-- | src/csv.cc | 63 | ||||
| -rw-r--r-- | src/csv.hh | 44 | ||||
| -rw-r--r-- | src/decompress.hh | 19 | ||||
| -rw-r--r-- | src/decompress_lzma.cc | 110 | ||||
| -rw-r--r-- | src/decompress_z.cc | 120 | ||||
| -rw-r--r-- | src/gen_ugc.cc | 317 | ||||
| -rw-r--r-- | src/io.cc | 238 | ||||
| -rw-r--r-- | src/io.hh | 52 | ||||
| -rw-r--r-- | src/line.cc | 133 | ||||
| -rw-r--r-- | src/line.hh | 44 | ||||
| -rw-r--r-- | src/str.cc | 34 | ||||
| -rw-r--r-- | src/str.hh | 18 | ||||
| -rw-r--r-- | src/u.cc | 46 | ||||
| -rw-r--r-- | src/u.hh | 18 | ||||
| -rw-r--r-- | src/ugc.hh | 49 | ||||
| -rw-r--r-- | src/unique_fd.cc | 9 | ||||
| -rw-r--r-- | src/unique_fd.hh | 45 |
20 files changed, 1642 insertions, 0 deletions
diff --git a/src/buffer.cc b/src/buffer.cc new file mode 100644 index 0000000..65c6757 --- /dev/null +++ b/src/buffer.cc @@ -0,0 +1,213 @@ +#include "buffer.hh" + +#include <algorithm> +#include <cassert> +#include <cstring> +#include <memory> +#include <utility> + +namespace { + +class FixedBuffer : public Buffer { + public: + explicit FixedBuffer(size_t size) + : size_(size) {} + + void const* rptr(size_t& avail, size_t need) override { + if (rptr_ < wptr_) { + avail = wptr_ - rptr_; + } else if (rptr_ == wptr_ && !full_) { + avail = 0; + } else { + avail = (data_.get() + size_) - rptr_; + if (avail < need) { + rotate(); + return rptr(avail, need); + } + } + return rptr_; + } + + void consume(size_t size) override { + if (size == 0) return; + if (rptr_ < wptr_) { + assert(std::cmp_greater_equal(wptr_ - rptr_, size)); + rptr_ += size; + if (rptr_ == wptr_) + reset(); + } else { + assert(rptr_ != wptr_ || !full_); + assert(std::cmp_greater_equal((data_.get() + size_) - rptr_, size)); + rptr_ += size; + if (rptr_ == data_.get() + size_) { + rptr_ = data_.get(); + if (rptr_ == wptr_) + reset(); + } + } + } + + void* wptr(size_t& avail, size_t need) override { + if (wptr_ == nullptr) { + data_ = std::make_unique_for_overwrite<char[]>(size_); + rptr_ = wptr_ = data_.get(); + } + + if (wptr_ < rptr_) { + avail = rptr_ - wptr_; + } else if (rptr_ == wptr_ && full_) { + avail = 0; + } else { + avail = (data_.get() + size_) - wptr_; + if (avail < need) { + rotate(); + return wptr(avail, need); + } + } + return wptr_; + } + + void commit(size_t size) override { + if (size == 0) return; + if (wptr_ < rptr_) { + assert(std::cmp_greater_equal(rptr_ - wptr_, size)); + wptr_ += size; + if (wptr_ == rptr_) { + full_ = true; + } + } else { + assert(rptr_ != wptr_ || !full_); + assert(std::cmp_greater_equal((data_.get() + size_) - wptr_, size)); + wptr_ += size; + if (wptr_ == data_.get() + size_) { + wptr_ = data_.get(); + if (wptr_ == rptr_) + full_ = true; + } + } + } + + [[nodiscard]] bool full() const override { + return rptr_ == wptr_ && full_; + } + + [[nodiscard]] bool empty() const override { + return rptr_ == wptr_ && !full_; + } + + private: + void reset() { + rptr_ = wptr_ = data_.get(); + full_ = false; + } + + void rotate() { + size_t to_move = (data_.get() + size_) - rptr_; + if (wptr_ + to_move > rptr_) { + auto tmp = std::make_unique_for_overwrite<char[]>(to_move); + memcpy(tmp.get(), rptr_, to_move); + memmove(data_.get() + to_move, data_.get(), wptr_ - data_.get()); + memcpy(data_.get(), tmp.get(), to_move); + } else { + memmove(data_.get() + to_move, data_.get(), wptr_ - data_.get()); + memcpy(data_.get(), rptr_, to_move); + } + rptr_ = data_.get(); + wptr_ += to_move; + } + + size_t const size_; + std::unique_ptr<char[]> data_; + char* rptr_{nullptr}; + char* wptr_{nullptr}; + bool full_{false}; +}; + +class DynamicBuffer : public Buffer { + public: + DynamicBuffer(size_t start_size, size_t max_size) + : start_size_(start_size), max_size_(max_size) {} + + void const* rptr(size_t& avail, size_t /* need */) override { + avail = wptr_ - rptr_; + return rptr_; + } + + void consume(size_t size) override { + assert(std::cmp_greater_equal(wptr_ - rptr_, size)); + rptr_ += size; + if (rptr_ == wptr_) { + reset(); + } + } + + void* wptr(size_t& avail, size_t need) override { + avail = end_ - wptr_; + if (avail < need) { + if (end_ == nullptr) { + size_t size = std::min(max_size_, std::max(need, start_size_)); + data_ = std::make_unique_for_overwrite<char[]>(size); + end_ = data_.get() + size; + rptr_ = wptr_ = data_.get(); + avail = end_ - wptr_; + } else if (std::cmp_greater_equal(rptr_ - data_.get(), need - avail)) { + memmove(data_.get(), rptr_, wptr_ - rptr_); + wptr_ = data_.get() + (wptr_ - rptr_); + rptr_ = data_.get(); + avail = end_ - wptr_; + } else if (std::cmp_less(end_ - data_.get(), max_size_)) { + size_t current_size = end_ - data_.get(); + size_t new_size = std::min(max_size_, + current_size + std::max(need - avail, + current_size)); + auto tmp = std::make_unique_for_overwrite<char[]>(new_size); + memcpy(tmp.get(), rptr_, wptr_ - rptr_); + end_ = tmp.get() + new_size; + wptr_ = tmp.get() + (wptr_ - rptr_); + rptr_ = tmp.get(); + data_ = std::move(tmp); + avail = end_ - wptr_; + } + } + return wptr_; + } + + void commit(size_t size) override { + assert(std::cmp_greater_equal(end_ - wptr_, size)); + wptr_ += size; + } + + [[nodiscard]] bool full() const override { + return rptr_ == data_.get() && wptr_ == end_ && + std::cmp_equal(end_ - data_.get(), max_size_); + } + + [[nodiscard]] bool empty() const override { + return rptr_ == wptr_; + } + + private: + void reset() { + if (std::cmp_greater(end_ - data_.get(), start_size_)) { + data_ = std::make_unique_for_overwrite<char[]>(start_size_); + } + rptr_ = wptr_ = data_.get(); + } + + size_t const start_size_; + size_t const max_size_; + std::unique_ptr<char[]> data_; + char* end_{nullptr}; + char* rptr_{nullptr}; + char* wptr_{nullptr}; +}; + +} // namespace + +std::unique_ptr<Buffer> Buffer::fixed(size_t size) { + return std::make_unique<FixedBuffer>(size); +} + +std::unique_ptr<Buffer> Buffer::dynamic(size_t start_size, size_t max_size) { + return std::make_unique<DynamicBuffer>(start_size, max_size); +} diff --git a/src/buffer.hh b/src/buffer.hh new file mode 100644 index 0000000..685cd36 --- /dev/null +++ b/src/buffer.hh @@ -0,0 +1,31 @@ +#ifndef BUFFER_HH +#define BUFFER_HH + +#include <cstddef> +#include <memory> + +class Buffer { + public: + virtual ~Buffer() = default; + + virtual void const* rptr(size_t& avail, size_t need = 1) = 0; + virtual void consume(size_t size) = 0; + + virtual void* wptr(size_t& avail, size_t need = 1) = 0; + virtual void commit(size_t size) = 0; + + [[nodiscard]] virtual bool full() const = 0; + [[nodiscard]] virtual bool empty() const = 0; + + [[nodiscard]] + static std::unique_ptr<Buffer> fixed(size_t size); + [[nodiscard]] + static std::unique_ptr<Buffer> dynamic(size_t start_size, size_t max_size); + + protected: + Buffer() = default; + Buffer(Buffer const&) = delete; + Buffer& operator=(Buffer const&) = delete; +}; + +#endif // BUFFER_HH diff --git a/src/check.hh b/src/check.hh new file mode 100644 index 0000000..be65437 --- /dev/null +++ b/src/check.hh @@ -0,0 +1,39 @@ +#ifndef CHECK_HH +#define CHECK_HH + +#include <cstdlib> +#include <stdckdint.h> +#include <type_traits> + +namespace check { + +template<typename T> +requires std::is_arithmetic_v<T> +T add(T a, T b) { + T ret; + if (ckd_add(&ret, a, b)) + abort(); + return ret; +} + +template<typename T> +requires std::is_arithmetic_v<T> +T sub(T a, T b) { + T ret; + if (ckd_sub(&ret, a, b)) + abort(); + return ret; +} + +template<typename T> +requires std::is_arithmetic_v<T> +T mul(T a, T b) { + T ret; + if (ckd_mul(&ret, a, b)) + abort(); + return ret; +} + +} // namespace check + +#endif // CHECK_HH diff --git a/src/csv.cc b/src/csv.cc new file mode 100644 index 0000000..4135555 --- /dev/null +++ b/src/csv.cc @@ -0,0 +1,63 @@ +#include "csv.hh" + +#include "line.hh" +#include "str.hh" + +#include <cstdint> +#include <expected> +#include <memory> +#include <span> +#include <string_view> +#include <utility> +#include <vector> + +namespace csv { + +namespace { + +class ReaderImpl : public Reader { + public: + ReaderImpl(std::unique_ptr<line::Reader> reader, char separator) + : reader_(std::move(reader)), separator_(separator) { + } + + [[nodiscard]] + std::expected<std::span<std::string_view>, io::ReadError> read() override { + while (true) { + auto line = reader_->read(); + if (line.has_value()) { + str::split(line.value(), line_, separator_, /* keep_empty */ true); + if (line_.size() == 1 && line_[0].empty()) + continue; + return line_; + } + if (line.error().eof) { + return {}; + } + return std::unexpected(line.error().io_error.value()); + } + } + + [[nodiscard]] uint64_t number() const override { + return reader_->number(); + } + + private: + std::unique_ptr<line::Reader> reader_; + char const separator_; + std::vector<std::string_view> line_; +}; + +} // namespace + +std::unique_ptr<Reader> open(std::unique_ptr<line::Reader> reader, + char separator) { + return std::make_unique<ReaderImpl>(std::move(reader), separator); +} + +std::unique_ptr<Reader> open(std::unique_ptr<io::Reader> reader, + char separator) { + return open(line::open(std::move(reader)), separator); +} + +} // namespace csv diff --git a/src/csv.hh b/src/csv.hh new file mode 100644 index 0000000..8c47ceb --- /dev/null +++ b/src/csv.hh @@ -0,0 +1,44 @@ +#ifndef CSV_HH +#define CSV_HH + +#include "io.hh" // IWYU pragma: export +#include "line.hh" + +#include <expected> +#include <memory> +#include <span> +#include <string_view> + +namespace csv { + +// Note that this reader is very simple, no quotes or escapes. +// Empty lines are ignored. +class Reader { + public: + virtual ~Reader() = default; + + // Returned span is only valid until next call to read. + // Returns empty span at end-of-file and only then. + [[nodiscard]] + virtual std::expected<std::span<std::string_view>, io::ReadError> read() = 0; + + // Starts at zero. Returns next line. + // So, before first read it is zero, after first read it is one. + [[nodiscard]] virtual uint64_t number() const = 0; + + protected: + Reader() = default; + + Reader(Reader const&) = delete; + Reader& operator=(Reader const&) = delete; +}; + +[[nodiscard]] std::unique_ptr<Reader> open(std::unique_ptr<line::Reader> reader, + char separator = ','); + +[[nodiscard]] std::unique_ptr<Reader> open(std::unique_ptr<io::Reader> reader, + char separator = ','); + +} // namespace csv + +#endif // CSV_HH diff --git a/src/decompress.hh b/src/decompress.hh new file mode 100644 index 0000000..a15efdc --- /dev/null +++ b/src/decompress.hh @@ -0,0 +1,19 @@ +#ifndef DECOMPRESS_HH +#define DECOMPRESS_HH + +#include "io.hh" // IWYU pragma: export + +namespace decompress { + +// zlib format +std::unique_ptr<io::Reader> zlib(std::unique_ptr<io::Reader> reader); + +// gzip (.gz) format +std::unique_ptr<io::Reader> gzip(std::unique_ptr<io::Reader> reader); + +// xz format +std::unique_ptr<io::Reader> xz(std::unique_ptr<io::Reader> reader); + +} // namespace decompress + +#endif // DECOMPRESS_HH diff --git a/src/decompress_lzma.cc b/src/decompress_lzma.cc new file mode 100644 index 0000000..6baea18 --- /dev/null +++ b/src/decompress_lzma.cc @@ -0,0 +1,110 @@ +#include "decompress.hh" + +#include "buffer.hh" + +#include <lzma.h> + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <expected> +#include <memory> +#include <optional> +#include <utility> + +namespace decompress { + +namespace { + +const size_t kBufferSizeXz = static_cast<size_t>(1024) * 1024; + +class XzReader : public io::Reader { + public: + explicit XzReader(std::unique_ptr<io::Reader> reader) + : reader_(std::move(reader)) {} + + ~XzReader() override { + if (initialized_) + lzma_end(&stream_); + } + + std::expected<size_t, io::ReadError> read(void* dst, size_t max) override { + auto err = fill(); + if (err.has_value()) + return std::unexpected(err.value()); + + stream_.next_out = reinterpret_cast<unsigned char*>(dst); + stream_.avail_out = max; + + if (!initialized_) { + if (in_eof_ && buffer_->empty()) + return 0; + + lzma_mt options; + memset(&options, 0, sizeof(options)); + options.threads = std::max(static_cast<uint32_t>(1), lzma_cputhreads()); + options.memlimit_threading = lzma_physmem() / 4; + options.memlimit_stop = lzma_physmem() / 4; + auto ret = lzma_stream_decoder_mt(&stream_, &options); + if (ret != LZMA_OK) + return std::unexpected(io::ReadError::Error); + initialized_ = true; + } + + auto* const rptr = stream_.next_in; + auto ret = lzma_code(&stream_, in_eof_ ? LZMA_FINISH : LZMA_RUN); + auto got = max - stream_.avail_out; + if (ret == LZMA_STREAM_END) { + lzma_end(&stream_); + initialized_ = false; + buffer_->consume(stream_.next_in - rptr); + } else if (ret == LZMA_OK) { + if (!in_eof_) + buffer_->consume(stream_.next_in - rptr); + } else { + return std::unexpected( + ret == LZMA_DATA_ERROR + ? io::ReadError::InvalidData : io::ReadError::Error); + } + return got; + } + + std::expected<size_t, io::ReadError> skip(size_t max) override { + auto tmp = std::make_unique_for_overwrite<char[]>(max); + return read(tmp.get(), max); + } + + private: + std::optional<io::ReadError> fill() { + auto* rptr = buffer_->rptr(stream_.avail_in); + if (!in_eof_ && stream_.avail_in < kBufferSizeXz / 2) { + auto* wptr = buffer_->wptr(stream_.avail_in); + auto got = reader_->read(wptr, stream_.avail_in); + if (got.has_value()) { + buffer_->commit(got.value()); + if (got.value() == 0) + in_eof_ = true; + } else { + return got.error(); + } + rptr = buffer_->rptr(stream_.avail_in); + } + stream_.next_in = reinterpret_cast<const unsigned char*>(rptr); + return std::nullopt; + } + + std::unique_ptr<io::Reader> reader_; + bool in_eof_{false}; + std::unique_ptr<Buffer> buffer_{Buffer::fixed(kBufferSizeXz)}; + bool initialized_{false}; + lzma_stream stream_ = LZMA_STREAM_INIT; +}; + +} // namespace + +std::unique_ptr<io::Reader> xz(std::unique_ptr<io::Reader> reader) { + return std::make_unique<XzReader>(std::move(reader)); +} + +} // namespace decompress diff --git a/src/decompress_z.cc b/src/decompress_z.cc new file mode 100644 index 0000000..f9f87ae --- /dev/null +++ b/src/decompress_z.cc @@ -0,0 +1,120 @@ +#include "decompress.hh" + +#include "buffer.hh" + +#define ZLIB_CONST +#include <zlib.h> + +#include <algorithm> +#include <cstddef> +#include <expected> +#include <limits> +#include <memory> +#include <optional> +#include <utility> + +namespace decompress { + +namespace { + +const size_t kBufferSizeZ = static_cast<size_t>(1024) * 1024; + +class DecompressReader : public io::Reader { + public: + DecompressReader(std::unique_ptr<io::Reader> reader, bool gzip) + : reader_(std::move(reader)), gzip_(gzip) {} + + ~DecompressReader() override { + if (initialized_) + inflateEnd(&stream_); + } + + std::expected<size_t, io::ReadError> read(void* dst, size_t max) override { + auto err = fill(); + if (err.has_value()) + return std::unexpected(err.value()); + + // NOLINTNEXTLINE(misc-include-cleaner) + stream_.next_out = reinterpret_cast<Bytef*>(dst); + stream_.avail_out = max; + + if (!initialized_) { + if (in_eof_ && buffer_->empty()) + return 0; + + stream_.zalloc = Z_NULL; + stream_.zfree = Z_NULL; + stream_.opaque = Z_NULL; + if (inflateInit2(&stream_, gzip_ ? 16 : 0) != Z_OK) { + return std::unexpected(io::ReadError::Error); + } + initialized_ = true; + } + + auto* const rptr = stream_.next_in; + auto ret = inflate(&stream_, in_eof_ ? Z_FINISH : Z_NO_FLUSH); + auto got = max - stream_.avail_out; + if (ret == Z_STREAM_END) { + inflateEnd(&stream_); + initialized_ = false; + buffer_->consume(stream_.next_in - rptr); + } else if (ret == Z_OK) { + if (!in_eof_) + buffer_->consume(stream_.next_in - rptr); + } else { + return std::unexpected( + ret == Z_DATA_ERROR + ? io::ReadError::InvalidData : io::ReadError::Error); + } + return got; + } + + std::expected<size_t, io::ReadError> skip(size_t max) override { + auto tmp = std::make_unique_for_overwrite<char[]>(max); + return read(tmp.get(), max); + } + + private: + std::optional<io::ReadError> fill() { + size_t avail; + auto* rptr = buffer_->rptr(avail); + if (!in_eof_ && avail < kBufferSizeZ / 2) { + auto* wptr = buffer_->wptr(avail); + auto got = reader_->read(wptr, avail); + if (got.has_value()) { + buffer_->commit(got.value()); + if (got.value() == 0) + in_eof_ = true; + } else { + return got.error(); + } + rptr = buffer_->rptr(avail); + } + // NOLINTNEXTLINE(misc-include-cleaner) + stream_.next_in = reinterpret_cast<z_const Bytef*>(rptr); + stream_.avail_in = std::min( + // NOLINTNEXTLINE(misc-include-cleaner) + static_cast<size_t>(std::numeric_limits<uInt>::max()), avail); + return std::nullopt; + } + + std::unique_ptr<io::Reader> reader_; + bool const gzip_; + bool in_eof_{false}; + std::unique_ptr<Buffer> buffer_{Buffer::fixed(kBufferSizeZ)}; + bool initialized_{false}; + z_stream stream_; +}; + +} // namespace + +std::unique_ptr<io::Reader> zlib(std::unique_ptr<io::Reader> reader) { + return std::make_unique<DecompressReader>(std::move(reader), /* gzip = */ false); +} + +std::unique_ptr<io::Reader> gzip(std::unique_ptr<io::Reader> reader) { + return std::make_unique<DecompressReader>(std::move(reader), /* gzip = */ true); +} + + +} // namespace decompress diff --git a/src/gen_ugc.cc b/src/gen_ugc.cc new file mode 100644 index 0000000..e9bce11 --- /dev/null +++ b/src/gen_ugc.cc @@ -0,0 +1,317 @@ +#include "args.hh" +#include "csv.hh" +#include "decompress.hh" +#include "ugc.hh" + +#include <charconv> +#include <cstdint> +#include <expected> +#include <format> +#include <fstream> +#include <functional> +#include <iostream> +#include <map> +#include <span> +#include <string> +#include <string_view> +#include <system_error> +#include <utility> +#include <vector> + +namespace { + +std::map<std::string, u::GeneralCategory, std::less<>> str2gc{ + {"Lu", u::GeneralCategory::LETTER_UPPERCASE}, + {"Ll", u::GeneralCategory::LETTER_LOWERCASE}, + {"Lt", u::GeneralCategory::LETTER_TITLECASE}, + {"Lm", u::GeneralCategory::LETTER_MODIFIER}, + {"Lo", u::GeneralCategory::LETTER_OTHER}, + + {"Mn", u::GeneralCategory::MARK_NONSPACING}, + {"Mc", u::GeneralCategory::MARK_SPACING_COMBINDING}, + {"Me", u::GeneralCategory::MARK_SPACING_ENCLOSING}, + + {"Nd", u::GeneralCategory::NUMBER_DIGIT}, + {"Nl", u::GeneralCategory::NUMBER_LETTER}, + {"No", u::GeneralCategory::NUMBER_OTHER}, + + {"Pc", u::GeneralCategory::PUNCTUATION_CONNECTOR}, + {"Pd", u::GeneralCategory::PUNCTUATION_DASH}, + {"Ps", u::GeneralCategory::PUNCTUATION_OPEN}, + {"Pe", u::GeneralCategory::PUNCTUATION_CLOSE}, + {"Pi", u::GeneralCategory::PUNCTUATION_INITIAL_QUOTE}, + {"Pf", u::GeneralCategory::PUNCTUATION_FINAL_QUOTE}, + {"Po", u::GeneralCategory::PUNCTUATION_OTHER}, + + {"Sm", u::GeneralCategory::SYMBOL_MATH}, + {"Sc", u::GeneralCategory::SYMBOL_CURRENCY}, + {"Sk", u::GeneralCategory::SYMBOL_MODIFIER}, + {"So", u::GeneralCategory::SYMBOL_OTHER}, + + {"Zs", u::GeneralCategory::SEPARATOR_SPACE}, + {"Zl", u::GeneralCategory::SEPARATOR_LINE}, + {"Zp", u::GeneralCategory::SEPARATOR_PARAGRAPH}, + + {"Cc", u::GeneralCategory::OTHER_CONTROL}, + {"Cf", u::GeneralCategory::OTHER_FORMAT}, + {"Cs", u::GeneralCategory::OTHER_SURROGATE}, + {"Co", u::GeneralCategory::OTHER_PRIVATE_USE}, + {"Cn", u::GeneralCategory::OTHER_UNASSIGNED}, +}; + +void print_header(std::ostream& out, std::string_view prefix) { + out << "#include \"ugc.hh\"\n" + << "\n" + << "#include <array>\n" + << "#include <cstddef>\n" + << "#include <cstdint>\n" + << "\n" + << "namespace u {\n" + << "\n" + << "extern GeneralCategory " << prefix << "lookup_gc(uint32_t code) {\n"; +} + +void print_body(std::ostream& out, + std::map<uint32_t, u::GeneralCategory> const& data) { + std::vector<uint32_t> codes; + std::vector<u::GeneralCategory> categories; + + auto it = data.begin(); + codes.emplace_back(it->first); + categories.emplace_back(it->second); + + uint32_t next = it->first + 1; + + for (++it; it != data.end(); ++it) { + if (it->first == next && categories.back() == it->second) { + ++next; + } else { + codes.emplace_back(next - 1); + codes.emplace_back(it->first); + categories.emplace_back(it->second); + next = it->first + 1; + } + } + + codes.emplace_back(next - 1); + + out << " static std::array<uint32_t, " << codes.size() << "> codes{"; + for (auto code : codes) { + out << code << ","; + } + out << " };\n"; + out << " static std::array<uint8_t, " << categories.size() + << "> categories{"; + for (auto category : categories) { + out << static_cast<uint16_t>(category) << ","; + } + out << "};\n"; + + out << " size_t low = 0;\n" + << " size_t high = " << (codes.size() / 2) << ";\n" + << " while (low < high) {\n" + << " size_t m = (low + high) / 2;\n" + << " uint32_t start = codes[m * 2];\n" + << " if (code < start) {\n" + << " high = m;\n" + << " } else {\n" + << " uint32_t end = codes[(m * 2) + 1];\n" + << " if (code <= end) {\n" + << " return static_cast<u::GeneralCategory>(categories[m]);\n" + << " }\n" + << " low = m + 1;\n" + << " }\n" + << " }\n" + << " return u::GeneralCategory::OTHER_UNASSIGNED;\n"; +} + +void print_footer(std::ostream& out, std::string_view /* prefix */) { + out << "}\n" + << "\n" + << "} // namespace u\n"; +} + +std::string_view ioerr2str(io::OpenError error) { + switch (error) { + case io::OpenError::NoSuchFile: + return "No such file"; + case io::OpenError::NoAccess: + return "No access"; + case io::OpenError::Error: + return "Fatal error"; + } + std::unreachable(); +} + +std::string_view ioerr2str(io::ReadError error) { + switch (error) { + case io::ReadError::InvalidData: + return "Invalid (compressed) data"; + case io::ReadError::Error: + return "Fatal error"; + } + std::unreachable(); +} + +std::expected<std::pair<uint32_t, u::GeneralCategory>, std::string> parse_row( + std::span<std::string_view> row) { + // [code];[name];[gc];[cc];[bc];[decomposition];[nv-dec];[nv-dig];[nv-num];[bm];[alias];;[upper case];[lower case];[title case] + if (row.size() != 15) { + return std::unexpected(std::format("Invalid row ({} columns)", row.size())); + } + auto code_col = row[0]; + auto category_col = row[2]; + + uint32_t code; + auto [ptr, ec] = std::from_chars(code_col.data(), + code_col.data() + code_col.size(), code, + /* base */ 16); + if (ec != std::errc() || ptr != code_col.data() + code_col.size()) { + return std::unexpected(std::format("Invalid code value {}", code_col)); + } + u::GeneralCategory category; + auto it = str2gc.find(category_col); + if (it == str2gc.end()) { + return std::unexpected(std::format("Invalid general category {}", + category_col)); + } + category = it->second; + + return std::make_pair(code, category); +} + +std::expected<std::map<uint32_t, u::GeneralCategory>, std::string> read( + std::string_view filename) { + auto maybe_reader = io::open(std::string(filename)); + if (!maybe_reader.has_value()) { + return std::unexpected(std::format( + "Unable to open {} for reading: {}", + filename, ioerr2str(maybe_reader.error()))); + } + auto reader = std::move(maybe_reader.value()); + if (filename.ends_with(".gz")) { + reader = decompress::gzip(std::move(reader)); + } else if (filename.ends_with(".xz")) { + reader = decompress::xz(std::move(reader)); + } + + std::map<uint32_t, u::GeneralCategory> ret; + auto csv_reader = csv::open(std::move(reader), ';'); + while (true) { + auto row = csv_reader->read(); + if (!row.has_value()) { + return std::unexpected(std::format( + "{}:{}: Error reading file: {}", + filename, csv_reader->number(), ioerr2str(row.error()))); + } + if (row->empty()) + break; + + auto pair = parse_row(row.value()); + if (!pair.has_value()) { + return std::unexpected(std::format( + "{}:{}: {}", filename, csv_reader->number(), pair.error())); + } + auto name_col = (*row)[1]; + + if (name_col.ends_with(", First>")) { + std::string prefix(name_col.substr(0, name_col.size() - 8)); + row = csv_reader->read(); + if (!row.has_value()) { + return std::unexpected(std::format( + "{}:{}: Error reading file: {}", + filename, csv_reader->number(), ioerr2str(row.error()))); + } + + auto second_pair = parse_row(row.value()); + if (!pair.has_value()) { + return std::unexpected(std::format( + "{}:{}: {}", filename, csv_reader->number(), pair.error())); + } + + name_col = (*row)[1]; + if (name_col.ends_with(", Last>") && + name_col.substr(0, name_col.size() - 7) == prefix) { + if (pair->second != second_pair->second) { + return std::unexpected(std::format( + "{}:{}: Invalid range, general category doesn't match", + filename, csv_reader->number())); + } + + for (uint32_t c = pair->first; c <= second_pair->first; ++c) { + auto emplace_ret = ret.emplace(c, pair->second); + if (!emplace_ret.second) { + return std::unexpected(std::format( + "{}:{}: Duplicate value for {:#08x}", + filename, csv_reader->number(), c)); + } + } + } else { + return std::unexpected(std::format( + "{}:{}: Invalid range, {} doesn't match {}", + filename, csv_reader->number(), prefix, name_col)); + } + } else { + auto emplace_ret = ret.emplace(std::move(pair.value())); + if (!emplace_ret.second) { + return std::unexpected(std::format( + "{}:{}: Duplicate value for {:#08x}", + filename, csv_reader->number(), emplace_ret.first->first)); + } + } + } + + return ret; +} + +} // namespace + +int main(int argc, char** argv) { + auto args = Args::create(); + auto opt_help = args->option('h', "help", "display this text and exit"); + auto opt_prefix = + args->option_argument('p', "prefix", "ARG", "Prefix for exported method"); + std::vector<std::string_view> arguments; + if (!args->run(argc, argv, &arguments)) { + std::cerr << "Try `gen_u --help` for usage\n"; + return 1; + } + if (opt_help->is_set()) { + std::cout << "Usage: `gen_u [OPTIONS...] UnicodeData [OUTPUT]`\n" + << "Generates a method for getting the general category for a " + << "code point.\n" + << "\n"; + args->print_help(std::cout); + return 0; + } + if (!opt_prefix->is_set()) { + std::cerr << "No prefix given.\n" + << "Try `gen_u --help` for usage\n"; + return 1; + } + auto prefix = opt_prefix->argument(); + if (arguments.empty() || arguments.size() > 2) { + std::cerr << "Expecting one or two argument. No more, no less.\n" + << "Try `gen_u --help` for usage\n"; + return 1; + } + + auto general_categories = read(arguments[0]); + if (!general_categories.has_value()) { + std::cerr << general_categories.error() << '\n'; + return 1; + } + + if (arguments.size() < 2 || arguments[1] == "-") { + print_header(std::cout, prefix); + print_body(std::cout, general_categories.value()); + print_footer(std::cout, prefix); + } else { + std::fstream out{std::string(arguments[1]), + std::fstream::trunc | std::fstream::out}; + print_header(out, prefix); + print_body(out, general_categories.value()); + print_footer(out, prefix); + } + return 0; +} diff --git a/src/io.cc b/src/io.cc new file mode 100644 index 0000000..baf162a --- /dev/null +++ b/src/io.cc @@ -0,0 +1,238 @@ +#include "io.hh" + +#include "unique_fd.hh" + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <expected> +#include <fcntl.h> +#include <limits> +#include <memory> +#include <optional> +#include <string> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <utility> + +namespace io { + +namespace { + +class BasicReader : public Reader { + public: + explicit BasicReader(unique_fd fd) + : fd_(std::move(fd)) { + } + + [[nodiscard]] + std::expected<size_t, ReadError> read(void* dst, size_t max) override { + ssize_t ret = ::read( + fd_.get(), dst, std::min(static_cast<size_t>( + std::numeric_limits<ssize_t>::max()), max)); + if (ret < 0) { + switch (errno) { + case EINTR: + return read(dst, max); + default: + return std::unexpected(ReadError::Error); + } + } + offset_ += ret; + return ret; + } + + [[nodiscard]] + std::expected<size_t, ReadError> skip(size_t max) override { + off_t ret; + if (sizeof(size_t) > sizeof(off_t)) { + // NOLINTNEXTLINE(bugprone-narrowing-conversions) + ret = lseek(fd_.get(), std::min(static_cast<size_t>( + std::numeric_limits<off_t>::max()), max), SEEK_CUR); + } else { + ret = lseek(fd_.get(), static_cast<off_t>(max), SEEK_CUR); + } + if (ret < 0) { + return std::unexpected(ReadError::Error); + } + // Don't want skip to go past (cached) file end. + if (!size_.has_value() || ret > size_.value()) { + // When going past end, double check that it still is the end. + off_t ret2 = lseek(fd_.get(), 0, SEEK_END); + if (ret2 < 0) { + // We're screwed, but try to go back to original position and then + // return error. + size_.reset(); + lseek(fd_.get(), offset_, SEEK_SET); + return std::unexpected(ReadError::Error); + } + size_ = ret2; + if (ret > ret2) { + auto distance = ret2 - offset_; + offset_ = ret2; + return distance; + } + // Seek back to where we should be + if (lseek(fd_.get(), ret, SEEK_SET) < 0) { + return std::unexpected(ReadError::Error); + } + } + auto distance = ret - offset_; + offset_ = ret; + return distance; + } + + private: + unique_fd fd_; + off_t offset_{0}; + std::optional<off_t> size_; +}; + +class MemoryReader : public Reader { + public: + MemoryReader(void* ptr, size_t size) + : ptr_(ptr), size_(size) { + } + + [[nodiscard]] + std::expected<size_t, ReadError> read(void* dst, size_t max) override { + size_t avail = size_ - offset_; + size_t ret = std::min(max, avail); + memcpy(dst, reinterpret_cast<char*>(ptr_) + offset_, ret); + offset_ += ret; + return ret; + } + + [[nodiscard]] + std::expected<size_t, ReadError> skip(size_t max) override { + size_t avail = size_ - offset_; + size_t ret = std::min(max, avail); + offset_ += ret; + return ret; + } + + protected: + void* ptr_; + size_t const size_; + + private: + size_t offset_{0}; +}; + +class MmapReader : public MemoryReader { + public: + MmapReader(unique_fd fd, void* ptr, size_t size) + : MemoryReader(ptr, size), fd_(std::move(fd)) { + } + + ~MmapReader() override { + munmap(ptr_, size_); + } + + private: + unique_fd fd_; +}; + +class StringReader : public MemoryReader { + public: + explicit StringReader(std::string data) + : MemoryReader(nullptr, data.size()), data_(std::move(data)) { + ptr_ = data_.data(); + } + + private: + std::string data_; +}; + +} // namespace + +std::expected<size_t, ReadError> Reader::read(std::string& str) { + return read(str.data(), str.size()); +} + +std::expected<size_t, ReadError> Reader::repeat_read(void* dst, size_t max) { + auto ret = read(dst, max); + if (!ret.has_value() || ret.value() == 0 || ret.value() == max) + return ret; + + char* d = reinterpret_cast<char*>(dst); + size_t offset = ret.value(); + while (true) { + ret = read(d + offset, max - offset); + if (!ret.has_value() || ret.value() == 0) + break; + offset += ret.value(); + if (offset == max) + break; + } + return offset; +} + +std::expected<size_t, ReadError> Reader::repeat_read(std::string& str) { + return repeat_read(str.data(), str.size()); +} + +std::expected<size_t, ReadError> Reader::repeat_skip(size_t max) { + auto ret = skip(max); + if (!ret.has_value() || ret.value() == 0 || ret.value() == max) + return ret; + + size_t offset = ret.value(); + while (true) { + ret = skip(max - offset); + if (!ret.has_value() || ret.value() == 0) + break; + offset += ret.value(); + if (offset == max) + break; + } + return offset; +} + +std::expected<std::unique_ptr<Reader>, OpenError> open( + const std::string& file_path) { + return openat(AT_FDCWD, file_path); +} + +std::expected<std::unique_ptr<Reader>, OpenError> openat( + int dirfd, const std::string& file_path) { + unique_fd fd(::openat(dirfd, file_path.c_str(), O_RDONLY)); + if (fd) { + struct stat buf; + if (fstat(fd.get(), &buf) == 0) { + if (std::cmp_less_equal(buf.st_size, + std::numeric_limits<size_t>::max())) { + auto size = static_cast<size_t>(buf.st_size); + void* ptr = mmap(nullptr, size, PROT_READ, MAP_PRIVATE, fd.get(), 0); + if (ptr != MAP_FAILED) { + return std::make_unique<MmapReader>(std::move(fd), ptr, size); + } + } + } + return std::make_unique<BasicReader>(std::move(fd)); + } + OpenError err; + switch (errno) { + case EINTR: + return openat(dirfd, file_path); + case EACCES: + err = OpenError::NoAccess; + break; + case ENOENT: + err = OpenError::NoSuchFile; + break; + default: + err = OpenError::Error; + break; + } + return std::unexpected(err); +} + +std::unique_ptr<Reader> memory(std::string data) { + return std::make_unique<StringReader>(std::move(data)); +} + +} // namespace io diff --git a/src/io.hh b/src/io.hh new file mode 100644 index 0000000..315d0bb --- /dev/null +++ b/src/io.hh @@ -0,0 +1,52 @@ +#ifndef IO_HH +#define IO_HH + +#include <cstddef> +#include <expected> +#include <memory> +#include <string> + +namespace io { + +enum class ReadError { + Error, + InvalidData, // Used by decompress and such +}; + +enum class OpenError { + NoSuchFile, + NoAccess, + Error, +}; + +class Reader { + public: + virtual ~Reader() = default; + + [[nodiscard]] virtual std::expected<size_t, ReadError> read(void* dst, + size_t max) = 0; + [[nodiscard]] virtual std::expected<size_t, ReadError> skip(size_t max) = 0; + + [[nodiscard]] std::expected<size_t, ReadError> read(std::string& str); + + [[nodiscard]] std::expected<size_t, ReadError> repeat_read(void* dst, + size_t max); + [[nodiscard]] std::expected<size_t, ReadError> repeat_read(std::string& str); + [[nodiscard]] std::expected<size_t, ReadError> repeat_skip(size_t max); + + protected: + Reader() = default; + + Reader(Reader const&) = delete; + Reader& operator=(Reader const&) = delete; +}; + +[[nodiscard]] std::expected<std::unique_ptr<Reader>, OpenError> open( + const std::string& file_path); +[[nodiscard]] std::expected<std::unique_ptr<Reader>, OpenError> openat( + int dirfd, const std::string& file_path); +[[nodiscard]] std::unique_ptr<Reader> memory(std::string data); + +} // namespace io + +#endif // IO_HH diff --git a/src/line.cc b/src/line.cc new file mode 100644 index 0000000..2eeb116 --- /dev/null +++ b/src/line.cc @@ -0,0 +1,133 @@ +#include "line.hh" + +#include "check.hh" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <expected> +#include <memory> +#include <string_view> +#include <utility> + +namespace line { + +namespace { + +const char kLineTerminators[] = "\r\n"; + +class ReaderImpl : public Reader { + public: + ReaderImpl(std::unique_ptr<io::Reader> reader, size_t max_len) + : reader_(std::move(reader)), max_len_(max_len), + buffer_(std::make_unique_for_overwrite<char[]>( + check::add(max_len, static_cast<size_t>(2)))), + rptr_(buffer_.get()), wptr_(buffer_.get()), search_(rptr_), + end_(buffer_.get() + check::add(max_len, static_cast<size_t>(2))) {} + + [[nodiscard]] std::expected<std::string_view, ReadError> read() override { + while (true) { + search_ = std::find_first_of(search_, wptr_, + kLineTerminators, kLineTerminators + 2); + if (search_ < wptr_) { + if (std::cmp_greater(search_ - rptr_, max_len_)) { + return line(max_len_, 0); + } + + size_t tlen; + if (*search_ == '\n') { + tlen = 1; + } else { + if (search_ + 1 == wptr_) { + make_space_if_needed(); + auto got = fill(); + if (got.has_value()) { + if (got.value() == 0) { + return line(search_ - rptr_, 1); + } + } else { + return std::unexpected(ReadError(got.error())); + } + } + if (search_[1] == '\n') { + tlen = 2; + } else { + tlen = 1; + } + } + return line(search_ - rptr_, tlen); + } + if (std::cmp_greater_equal(wptr_ - rptr_, max_len_)) { + return line(max_len_, 0); + } + + make_space_if_needed(); + auto got = fill(); + if (got.has_value()) { + if (got.value() == 0) { + if (rptr_ == wptr_) { + return std::unexpected(ReadError()); + } + return line(wptr_ - rptr_, 0); + } + } else { + return std::unexpected(ReadError(got.error())); + } + } + } + + [[nodiscard]] uint64_t number() const override { return number_; } + + private: + std::string_view line(size_t len, size_t terminator_len) { + assert(len <= max_len_); + auto ret = std::string_view(rptr_, len); + rptr_ += len + terminator_len; + search_ = rptr_; + ++number_; + return ret; + } + + void make_space_if_needed() { + size_t free = rptr_ - buffer_.get(); + if (free == 0) return; + size_t avail = end_ - wptr_; + if (avail > 1024) return; + memmove(buffer_.get(), rptr_, wptr_ - rptr_); + search_ -= free; + wptr_ -= free; + rptr_ = buffer_.get(); + } + + std::expected<size_t, io::ReadError> fill() { + auto ret = reader_->read(wptr_, end_ - wptr_); + if (ret.has_value()) + wptr_ += ret.value(); + return ret; + } + + std::unique_ptr<io::Reader> reader_; + size_t const max_len_; + uint64_t number_{0}; + std::unique_ptr<char[]> buffer_; + char* rptr_; + char* wptr_; + char* search_; + char* const end_; +}; + +} // namespace + +ReadError::ReadError() + : eof(true) {} + +ReadError::ReadError(io::ReadError error) + : eof(false), io_error(error) {} + +std::unique_ptr<Reader> open(std::unique_ptr<io::Reader> reader, + size_t max_len) { + return std::make_unique<ReaderImpl>(std::move(reader), max_len); +} + +} // namespace line diff --git a/src/line.hh b/src/line.hh new file mode 100644 index 0000000..94e3646 --- /dev/null +++ b/src/line.hh @@ -0,0 +1,44 @@ +#ifndef LINE_HH +#define LINE_HH + +#include "io.hh" // IWYU pragma: export + +#include <cstddef> +#include <expected> +#include <memory> +#include <optional> +#include <string_view> + +namespace line { + +struct ReadError { + bool eof; + std::optional<io::ReadError> io_error; + + ReadError(); + explicit ReadError(io::ReadError error); +}; + +class Reader { + public: + virtual ~Reader() = default; + + // Returned view is only valid until next call to read. + [[nodiscard]] virtual std::expected<std::string_view, ReadError> read() = 0; + // Starts at zero. Returns next line. + // So, before first read it is zero, after first read it is one. + [[nodiscard]] virtual uint64_t number() const = 0; + + protected: + Reader() = default; + + Reader(Reader const&) = delete; + Reader& operator=(Reader const&) = delete; +}; + +[[nodiscard]] std::unique_ptr<Reader> open(std::unique_ptr<io::Reader> reader, + size_t max_len = 8192); + +} // namespace line + +#endif // LINE_HH diff --git a/src/str.cc b/src/str.cc new file mode 100644 index 0000000..f81617d --- /dev/null +++ b/src/str.cc @@ -0,0 +1,34 @@ +#include "str.hh" + +#include <cstddef> +#include <string_view> +#include <vector> + +namespace str { + +void split(std::string_view str, std::vector<std::string_view>& out, + char separator, bool keep_empty) { + out.clear(); + + size_t offset = 0; + while (true) { + auto next = str.find(separator, offset); + if (next == std::string_view::npos) { + if (keep_empty || offset < str.size()) + out.push_back(str.substr(offset)); + break; + } + if (keep_empty || offset < next) + out.push_back(str.substr(offset, next - offset)); + offset = next + 1; + } +} + +std::vector<std::string_view> split(std::string_view str, + char separator, bool keep_empty) { + std::vector<std::string_view> vec; + split(str, vec, separator, keep_empty); + return vec; +} + +} // namespace str diff --git a/src/str.hh b/src/str.hh new file mode 100644 index 0000000..58d5d32 --- /dev/null +++ b/src/str.hh @@ -0,0 +1,18 @@ +#ifndef STR_HH +#define STR_HH + +#include <string_view> +#include <vector> + +namespace str { + +void split(std::string_view str, std::vector<std::string_view>& out, + char separator = ' ', bool keep_empty = false); + +[[nodiscard]] std::vector<std::string_view> split(std::string_view str, + char separator = ' ', + bool keep_empty = false); + +} // namespace str + +#endif // STR_HH diff --git a/src/u.cc b/src/u.cc new file mode 100644 index 0000000..3c06ba8 --- /dev/null +++ b/src/u.cc @@ -0,0 +1,46 @@ +#include "u.hh" + +#include <cstdint> +#include <utility> + +namespace u { + +// These are generated by gen_ugc +GeneralCategory u6_2_0_lookup_gc(uint32_t code); +GeneralCategory u8_0_0_lookup_gc(uint32_t code); +GeneralCategory u10_0_0_lookup_gc(uint32_t code); +GeneralCategory u11_0_0_lookup_gc(uint32_t code); +GeneralCategory u12_1_0_lookup_gc(uint32_t code); +GeneralCategory u13_0_0_lookup_gc(uint32_t code); +GeneralCategory u14_0_0_lookup_gc(uint32_t code); +GeneralCategory u15_0_0_lookup_gc(uint32_t code); +GeneralCategory u15_1_0_lookup_gc(uint32_t code); +GeneralCategory u16_0_0_lookup_gc(uint32_t code); + +GeneralCategory lookup_gc(uint32_t code, Version version) { + switch (version) { + case Version::u6_2_0: + return u6_2_0_lookup_gc(code); + case Version::u8_0_0: + return u8_0_0_lookup_gc(code); + case Version::u10_0_0: + return u10_0_0_lookup_gc(code); + case Version::u11_0_0: + return u11_0_0_lookup_gc(code); + case Version::u12_1_0: + return u12_1_0_lookup_gc(code); + case Version::u13_0_0: + return u13_0_0_lookup_gc(code); + case Version::u14_0_0: + return u14_0_0_lookup_gc(code); + case Version::u15_0_0: + return u15_0_0_lookup_gc(code); + case Version::u15_1_0: + return u15_1_0_lookup_gc(code); + case Version::u16_0_0: + return u16_0_0_lookup_gc(code); + } + std::unreachable(); +} + +} // namespace u @@ -1,6 +1,8 @@ #ifndef U_HH #define U_HH +#include "ugc.hh" // IWYU pragma: export + namespace u { enum class ReadError : uint8_t { @@ -14,6 +16,22 @@ enum class ReadErrorReplace : uint8_t { Incomplete, // Too few bytes }; +enum class Version : uint8_t { + u6_2_0, + u8_0_0, + u10_0_0, + u11_0_0, + u12_1_0, + u13_0_0, + u14_0_0, + u15_0_0, + u15_1_0, + u16_0_0, + LATEST = u16_0_0, +}; + +GeneralCategory lookup_gc(uint32_t code, Version version = Version::LATEST); + } // namespace u #endif // U_HH diff --git a/src/ugc.hh b/src/ugc.hh new file mode 100644 index 0000000..c49d50f --- /dev/null +++ b/src/ugc.hh @@ -0,0 +1,49 @@ +#ifndef UGC_HH +#define UGC_HH + +#include <cstdint> + +namespace u { + +enum class GeneralCategory : uint8_t { + LETTER_UPPERCASE, + LETTER_LOWERCASE, + LETTER_TITLECASE, + LETTER_MODIFIER, + LETTER_OTHER, + + MARK_NONSPACING, + MARK_SPACING_COMBINDING, + MARK_SPACING_ENCLOSING, + + NUMBER_DIGIT, + NUMBER_LETTER, + NUMBER_OTHER, + + PUNCTUATION_CONNECTOR, + PUNCTUATION_DASH, + PUNCTUATION_OPEN, + PUNCTUATION_CLOSE, + PUNCTUATION_INITIAL_QUOTE, + PUNCTUATION_FINAL_QUOTE, + PUNCTUATION_OTHER, + + SYMBOL_MATH, + SYMBOL_CURRENCY, + SYMBOL_MODIFIER, + SYMBOL_OTHER, + + SEPARATOR_SPACE, + SEPARATOR_LINE, + SEPARATOR_PARAGRAPH, + + OTHER_CONTROL, + OTHER_FORMAT, + OTHER_SURROGATE, + OTHER_PRIVATE_USE, + OTHER_UNASSIGNED, +}; + +} // namespace u + +#endif // UGC_HH diff --git a/src/unique_fd.cc b/src/unique_fd.cc new file mode 100644 index 0000000..135a449 --- /dev/null +++ b/src/unique_fd.cc @@ -0,0 +1,9 @@ +#include "unique_fd.hh" + +#include <unistd.h> + +void unique_fd::reset(int fd) { + if (fd_ != -1) + close(fd_); + fd_ = fd; +} diff --git a/src/unique_fd.hh b/src/unique_fd.hh new file mode 100644 index 0000000..189d513 --- /dev/null +++ b/src/unique_fd.hh @@ -0,0 +1,45 @@ +#ifndef UNIQUE_FD_HH +#define UNIQUE_FD_HH + +class unique_fd { + public: + constexpr unique_fd() + : fd_(-1) {} + explicit constexpr unique_fd(int fd) + : fd_(fd) {} + unique_fd(unique_fd& fd) = delete; + unique_fd& operator=(unique_fd& fd) = delete; + unique_fd(unique_fd&& fd) + : fd_(fd.release()) {} + unique_fd& operator=(unique_fd&& fd) { + reset(fd.release()); + return *this; + } + ~unique_fd() { + reset(); + } + + bool operator==(unique_fd const& fd) const { + return get() == fd.get(); + } + bool operator!=(unique_fd const& fd) const { + return get() != fd.get(); + } + + int get() const { return fd_; } + explicit operator bool() const { return fd_ != -1; } + int operator*() const { return fd_; } + + int release() { + int ret = fd_; + fd_ = -1; + return ret; + } + + void reset(int fd = -1); + + private: + int fd_; +}; + +#endif // UNIQUE_FD_HH |
