summaryrefslogtreecommitdiff
path: root/src/uri.cc
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2025-10-20 22:01:05 +0200
committerJoel Klinghed <the_jk@spawned.biz>2025-10-21 23:15:29 +0200
commit4dddfd622977f84f0cf41847aec9e728d02bec65 (patch)
tree27bc4ceca8cd18bd3583107b2213368134b931f5 /src/uri.cc
parente8dc8edad7cdf194091f0479b70b154e872f57ef (diff)
uri: New module
Decode URI encoded string, validating both hex and that encoded data is valid UTF-8.
Diffstat (limited to 'src/uri.cc')
-rw-r--r--src/uri.cc70
1 files changed, 70 insertions, 0 deletions
diff --git a/src/uri.cc b/src/uri.cc
new file mode 100644
index 0000000..b7a3edf
--- /dev/null
+++ b/src/uri.cc
@@ -0,0 +1,70 @@
+#include "uri.hh"
+
+#include "u8.hh"
+
+#include <cstddef>
+#include <optional>
+#include <span>
+#include <string>
+#include <string_view>
+
+namespace uri {
+
+namespace {
+
+inline std::optional<uint8_t> hex(char c) {
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'A' && c <= 'F')
+ return 10 + (c - 'A');
+ if (c >= 'a' && c <= 'f')
+ return 10 + (c - 'a');
+ return std::nullopt;
+}
+
+} // namespace
+
+std::optional<std::string_view> decode(std::string_view input,
+ std::string& dst) {
+ auto i = input.find('%');
+ if (i == std::string_view::npos)
+ return input;
+
+ dst.clear();
+ size_t last = 0;
+ bool check_utf8 = false;
+ while (true) {
+ if (input.size() - i < 3)
+ return std::nullopt;
+ auto a = hex(input[i + 1]);
+ auto b = hex(input[i + 2]);
+ if (!a.has_value() || !b.has_value())
+ return std::nullopt;
+ dst.append(input, last, i - last);
+ auto c = (a.value() << 4) | b.value();
+ if (c & 0x80)
+ check_utf8 = true;
+ dst.push_back(static_cast<char>(c));
+ last = i + 3;
+ i = input.find('%', last);
+ if (i == std::string::npos) {
+ dst.append(input, last);
+ break;
+ }
+ }
+
+ if (check_utf8) {
+ std::span<uint8_t const> data{reinterpret_cast<uint8_t const*>(dst.data()),
+ dst.size()};
+ auto it = data.begin();
+ while (it != data.end()) {
+ auto ret = u8::read(it, data.end());
+ if (!ret.has_value())
+ return std::nullopt;
+ }
+ }
+
+ return dst;
+}
+
+} // namespace uri