diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.cc | 1 | ||||
| -rw-r--r-- | src/u.hh | 21 | ||||
| -rw-r--r-- | src/u8.hh | 196 | ||||
| -rw-r--r-- | src/uri.cc | 70 | ||||
| -rw-r--r-- | src/uri.hh | 17 |
5 files changed, 305 insertions, 0 deletions
diff --git a/src/main.cc b/src/main.cc index 83fd905..d19a434 100644 --- a/src/main.cc +++ b/src/main.cc @@ -7,6 +7,7 @@ #include "logger.hh" #include "looper.hh" #include "signals.hh" +#include "uri.hh" #include "websocket.hh" #include <cerrno> diff --git a/src/u.hh b/src/u.hh new file mode 100644 index 0000000..439d6dc --- /dev/null +++ b/src/u.hh @@ -0,0 +1,21 @@ +#ifndef U_HH +#define U_HH + +#include <cstdint> + +namespace u { + +enum class ReadError : uint8_t { + Invalid, // Invalid sequence + End, // At end (it == end) + Incomplete, // Too few bytes +}; + +enum class ReadErrorReplace : uint8_t { + End, // At end (it == end) + Incomplete, // Too few bytes +}; + +} // namespace u + +#endif // U_HH diff --git a/src/u8.hh b/src/u8.hh new file mode 100644 index 0000000..d673caa --- /dev/null +++ b/src/u8.hh @@ -0,0 +1,196 @@ +#ifndef U8_HH +#define U8_HH + +#include "u.hh" // IWYU pragma: export + +#include <cstdint> // IWYU pragma: export +#include <expected> +#include <iterator> +#include <type_traits> +#include <utility> + +namespace u8 { + +template <std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +std::expected<uint32_t, u::ReadError> read(T& start, T const& end) { + if (start == end) + return std::unexpected(u::ReadError::End); + uint32_t u; + switch (*start >> 4) { + case 0xf: + // 11110uvv 10vvwwww 10xxxxyy 10yyzzzz + if (std::distance(start, end) < 4) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x07) << 18; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 3); + return std::unexpected(u::ReadError::Invalid); + } + u |= (*start & 0x3f) << 12; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 2); + return std::unexpected(u::ReadError::Invalid); + } + u |= (*start & 0x3f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u < 0x10000 || u > 0x10ffff) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + case 0xe: + // 1110wwww 10xxxxyy 10yyzzzz + if (std::distance(start, end) < 3) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x0f) << 12; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 2); + return std::unexpected(u::ReadError::Invalid); + } + u |= (*start & 0x3f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u < 0x800 || (u >= 0xd800 && u <= 0xdfff)) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + case 0xd: + case 0xc: + // 110xxxyy 10yyzzzz + if (std::distance(start, end) < 2) { + return std::unexpected(u::ReadError::Incomplete); + } + u = (*start & 0x1f) << 6; + std::advance(start, 1); + if ((*start & 0xc0) != 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + u |= *start & 0x3f; + if (u < 0x80) { + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + } + break; + case 0xb: + case 0xa: + case 0x9: + case 0x8: + std::advance(start, 1); + return std::unexpected(u::ReadError::Invalid); + default: + // 0yyyzzzz + u = *start; + break; + } + std::advance(start, 1); + return u; +} + +template <std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start, + T const& end, + bool eof) { + auto const tmp = start; + auto ret = read(start, end); + if (ret.has_value()) + return *ret; + switch (ret.error()) { + case u::ReadError::Incomplete: + if (eof) + break; + return std::unexpected(u::ReadErrorReplace::Incomplete); + case u::ReadError::End: + return std::unexpected(u::ReadErrorReplace::End); + case u::ReadError::Invalid: + break; + } + start = tmp + 1; + return 0xfffd; +} + +template <std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +bool write(T& start, T const& end, uint32_t code) { + if (code < 0x80) { + if (start == end) + return false; + *start = static_cast<uint8_t>(code); + } else if (code < 0x800) { + if (std::distance(start, end) < 2) + return false; + *start = 0xc0 | static_cast<uint8_t>(code >> 6); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } else if (code < 0x10000) { + if (std::distance(start, end) < 3) + return false; + *start = 0xe0 | static_cast<uint8_t>(code >> 12); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } else { + if (std::distance(start, end) < 4) + return false; + *start = 0xf0 | static_cast<uint8_t>(code >> 18); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>((code >> 12) & 0x3f); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f); + std::advance(start, 1); + *start = 0x80 | static_cast<uint8_t>(code & 0x3f); + } + std::advance(start, 1); + return true; +} + +template <std::forward_iterator T> + requires std::is_same_v<std::iter_value_t<T>, uint8_t> +bool skip(T& start, T const& end) { + if (start == end) + return false; + switch (*start >> 4) { + case 0xf: + if (std::distance(start, end) < 4) + return false; + std::advance(start, 4); + break; + case 0xe: + if (std::distance(start, end) < 3) + return false; + std::advance(start, 3); + break; + case 0xc: + case 0xd: + if (std::distance(start, end) < 2) + return false; + std::advance(start, 2); + break; + default: + std::advance(start, 1); + break; + } + return true; +} + +} // namespace u8 + +#endif // U8_HH diff --git a/src/uri.cc b/src/uri.cc new file mode 100644 index 0000000..b7a3edf --- /dev/null +++ b/src/uri.cc @@ -0,0 +1,70 @@ +#include "uri.hh" + +#include "u8.hh" + +#include <cstddef> +#include <optional> +#include <span> +#include <string> +#include <string_view> + +namespace uri { + +namespace { + +inline std::optional<uint8_t> hex(char c) { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'F') + return 10 + (c - 'A'); + if (c >= 'a' && c <= 'f') + return 10 + (c - 'a'); + return std::nullopt; +} + +} // namespace + +std::optional<std::string_view> decode(std::string_view input, + std::string& dst) { + auto i = input.find('%'); + if (i == std::string_view::npos) + return input; + + dst.clear(); + size_t last = 0; + bool check_utf8 = false; + while (true) { + if (input.size() - i < 3) + return std::nullopt; + auto a = hex(input[i + 1]); + auto b = hex(input[i + 2]); + if (!a.has_value() || !b.has_value()) + return std::nullopt; + dst.append(input, last, i - last); + auto c = (a.value() << 4) | b.value(); + if (c & 0x80) + check_utf8 = true; + dst.push_back(static_cast<char>(c)); + last = i + 3; + i = input.find('%', last); + if (i == std::string::npos) { + dst.append(input, last); + break; + } + } + + if (check_utf8) { + std::span<uint8_t const> data{reinterpret_cast<uint8_t const*>(dst.data()), + dst.size()}; + auto it = data.begin(); + while (it != data.end()) { + auto ret = u8::read(it, data.end()); + if (!ret.has_value()) + return std::nullopt; + } + } + + return dst; +} + +} // namespace uri diff --git a/src/uri.hh b/src/uri.hh new file mode 100644 index 0000000..6b92694 --- /dev/null +++ b/src/uri.hh @@ -0,0 +1,17 @@ +#ifndef URI_HH +#define URI_HH + +#include <optional> +#include <string> +#include <string_view> + +namespace uri { + +// If input needs no decoding, input is returned. Otherwise dst +// is modified and returned. If invalid encoding is found, nullopt is returned. +std::optional<std::string_view> decode(std::string_view input, + std::string& dst); + +} // namespace uri + +#endif // URI_HH |
