diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2025-10-20 22:01:05 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2025-10-21 23:15:29 +0200 |
| commit | 4dddfd622977f84f0cf41847aec9e728d02bec65 (patch) | |
| tree | 27bc4ceca8cd18bd3583107b2213368134b931f5 /src/uri.cc | |
| parent | e8dc8edad7cdf194091f0479b70b154e872f57ef (diff) | |
uri: New module
Decode URI encoded string, validating both hex and that encoded
data is valid UTF-8.
Diffstat (limited to 'src/uri.cc')
| -rw-r--r-- | src/uri.cc | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/src/uri.cc b/src/uri.cc new file mode 100644 index 0000000..b7a3edf --- /dev/null +++ b/src/uri.cc @@ -0,0 +1,70 @@ +#include "uri.hh" + +#include "u8.hh" + +#include <cstddef> +#include <optional> +#include <span> +#include <string> +#include <string_view> + +namespace uri { + +namespace { + +inline std::optional<uint8_t> hex(char c) { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'F') + return 10 + (c - 'A'); + if (c >= 'a' && c <= 'f') + return 10 + (c - 'a'); + return std::nullopt; +} + +} // namespace + +std::optional<std::string_view> decode(std::string_view input, + std::string& dst) { + auto i = input.find('%'); + if (i == std::string_view::npos) + return input; + + dst.clear(); + size_t last = 0; + bool check_utf8 = false; + while (true) { + if (input.size() - i < 3) + return std::nullopt; + auto a = hex(input[i + 1]); + auto b = hex(input[i + 2]); + if (!a.has_value() || !b.has_value()) + return std::nullopt; + dst.append(input, last, i - last); + auto c = (a.value() << 4) | b.value(); + if (c & 0x80) + check_utf8 = true; + dst.push_back(static_cast<char>(c)); + last = i + 3; + i = input.find('%', last); + if (i == std::string::npos) { + dst.append(input, last); + break; + } + } + + if (check_utf8) { + std::span<uint8_t const> data{reinterpret_cast<uint8_t const*>(dst.data()), + dst.size()}; + auto it = data.begin(); + while (it != data.end()) { + auto ret = u8::read(it, data.end()); + if (!ret.has_value()) + return std::nullopt; + } + } + + return dst; +} + +} // namespace uri |
