summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--meson.build25
-rw-r--r--src/main.cc1
-rw-r--r--src/u.hh21
-rw-r--r--src/u8.hh196
-rw-r--r--src/uri.cc70
-rw-r--r--src/uri.hh17
-rw-r--r--test/uri.cc23
7 files changed, 353 insertions, 0 deletions
diff --git a/meson.build b/meson.build
index 93e6534..fef8e92 100644
--- a/meson.build
+++ b/meson.build
@@ -230,6 +230,20 @@ sha1_dep = declare_dependency(
dependencies: [sha1_inner_dep],
)
+uri_lib = library(
+ 'uri',
+ sources: [
+ 'src/u.hh',
+ 'src/u8.hh',
+ 'src/uri.cc',
+ 'src/uri.hh',
+ ],
+ include_directories: inc,
+)
+uri_dep = declare_dependency(
+ link_with: uri_lib,
+)
+
websocket_lib = library(
'websocket',
sources: [
@@ -259,6 +273,7 @@ bluetooth_jukebox = executable(
json_dep,
looper_dep,
signals_dep,
+ uri_dep,
websocket_dep,
],
)
@@ -409,6 +424,16 @@ test('sha1', executable(
],
))
+test('uri', executable(
+ 'test_uri',
+ sources: ['test/uri.cc'],
+ include_directories: inc,
+ dependencies : [
+ uri_dep,
+ test_dependencies,
+ ],
+))
+
run_clang_tidy = find_program('run-clang-tidy', required: false)
if run_clang_tidy.found()
diff --git a/src/main.cc b/src/main.cc
index 83fd905..d19a434 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -7,6 +7,7 @@
#include "logger.hh"
#include "looper.hh"
#include "signals.hh"
+#include "uri.hh"
#include "websocket.hh"
#include <cerrno>
diff --git a/src/u.hh b/src/u.hh
new file mode 100644
index 0000000..439d6dc
--- /dev/null
+++ b/src/u.hh
@@ -0,0 +1,21 @@
+#ifndef U_HH
+#define U_HH
+
+#include <cstdint>
+
+namespace u {
+
+enum class ReadError : uint8_t {
+ Invalid, // Invalid sequence
+ End, // At end (it == end)
+ Incomplete, // Too few bytes
+};
+
+enum class ReadErrorReplace : uint8_t {
+ End, // At end (it == end)
+ Incomplete, // Too few bytes
+};
+
+} // namespace u
+
+#endif // U_HH
diff --git a/src/u8.hh b/src/u8.hh
new file mode 100644
index 0000000..d673caa
--- /dev/null
+++ b/src/u8.hh
@@ -0,0 +1,196 @@
+#ifndef U8_HH
+#define U8_HH
+
+#include "u.hh" // IWYU pragma: export
+
+#include <cstdint> // IWYU pragma: export
+#include <expected>
+#include <iterator>
+#include <type_traits>
+#include <utility>
+
+namespace u8 {
+
+template <std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+std::expected<uint32_t, u::ReadError> read(T& start, T const& end) {
+ if (start == end)
+ return std::unexpected(u::ReadError::End);
+ uint32_t u;
+ switch (*start >> 4) {
+ case 0xf:
+ // 11110uvv 10vvwwww 10xxxxyy 10yyzzzz
+ if (std::distance(start, end) < 4) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x07) << 18;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 3);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= (*start & 0x3f) << 12;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 2);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= (*start & 0x3f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u < 0x10000 || u > 0x10ffff) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ case 0xe:
+ // 1110wwww 10xxxxyy 10yyzzzz
+ if (std::distance(start, end) < 3) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x0f) << 12;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 2);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= (*start & 0x3f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u < 0x800 || (u >= 0xd800 && u <= 0xdfff)) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ case 0xd:
+ case 0xc:
+ // 110xxxyy 10yyzzzz
+ if (std::distance(start, end) < 2) {
+ return std::unexpected(u::ReadError::Incomplete);
+ }
+ u = (*start & 0x1f) << 6;
+ std::advance(start, 1);
+ if ((*start & 0xc0) != 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ u |= *start & 0x3f;
+ if (u < 0x80) {
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ }
+ break;
+ case 0xb:
+ case 0xa:
+ case 0x9:
+ case 0x8:
+ std::advance(start, 1);
+ return std::unexpected(u::ReadError::Invalid);
+ default:
+ // 0yyyzzzz
+ u = *start;
+ break;
+ }
+ std::advance(start, 1);
+ return u;
+}
+
+template <std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+std::expected<uint32_t, u::ReadErrorReplace> read_replace(T& start,
+ T const& end,
+ bool eof) {
+ auto const tmp = start;
+ auto ret = read(start, end);
+ if (ret.has_value())
+ return *ret;
+ switch (ret.error()) {
+ case u::ReadError::Incomplete:
+ if (eof)
+ break;
+ return std::unexpected(u::ReadErrorReplace::Incomplete);
+ case u::ReadError::End:
+ return std::unexpected(u::ReadErrorReplace::End);
+ case u::ReadError::Invalid:
+ break;
+ }
+ start = tmp + 1;
+ return 0xfffd;
+}
+
+template <std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+bool write(T& start, T const& end, uint32_t code) {
+ if (code < 0x80) {
+ if (start == end)
+ return false;
+ *start = static_cast<uint8_t>(code);
+ } else if (code < 0x800) {
+ if (std::distance(start, end) < 2)
+ return false;
+ *start = 0xc0 | static_cast<uint8_t>(code >> 6);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ } else if (code < 0x10000) {
+ if (std::distance(start, end) < 3)
+ return false;
+ *start = 0xe0 | static_cast<uint8_t>(code >> 12);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ } else {
+ if (std::distance(start, end) < 4)
+ return false;
+ *start = 0xf0 | static_cast<uint8_t>(code >> 18);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>((code >> 12) & 0x3f);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>((code >> 6) & 0x3f);
+ std::advance(start, 1);
+ *start = 0x80 | static_cast<uint8_t>(code & 0x3f);
+ }
+ std::advance(start, 1);
+ return true;
+}
+
+template <std::forward_iterator T>
+ requires std::is_same_v<std::iter_value_t<T>, uint8_t>
+bool skip(T& start, T const& end) {
+ if (start == end)
+ return false;
+ switch (*start >> 4) {
+ case 0xf:
+ if (std::distance(start, end) < 4)
+ return false;
+ std::advance(start, 4);
+ break;
+ case 0xe:
+ if (std::distance(start, end) < 3)
+ return false;
+ std::advance(start, 3);
+ break;
+ case 0xc:
+ case 0xd:
+ if (std::distance(start, end) < 2)
+ return false;
+ std::advance(start, 2);
+ break;
+ default:
+ std::advance(start, 1);
+ break;
+ }
+ return true;
+}
+
+} // namespace u8
+
+#endif // U8_HH
diff --git a/src/uri.cc b/src/uri.cc
new file mode 100644
index 0000000..b7a3edf
--- /dev/null
+++ b/src/uri.cc
@@ -0,0 +1,70 @@
+#include "uri.hh"
+
+#include "u8.hh"
+
+#include <cstddef>
+#include <optional>
+#include <span>
+#include <string>
+#include <string_view>
+
+namespace uri {
+
+namespace {
+
+inline std::optional<uint8_t> hex(char c) {
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'A' && c <= 'F')
+ return 10 + (c - 'A');
+ if (c >= 'a' && c <= 'f')
+ return 10 + (c - 'a');
+ return std::nullopt;
+}
+
+} // namespace
+
+std::optional<std::string_view> decode(std::string_view input,
+ std::string& dst) {
+ auto i = input.find('%');
+ if (i == std::string_view::npos)
+ return input;
+
+ dst.clear();
+ size_t last = 0;
+ bool check_utf8 = false;
+ while (true) {
+ if (input.size() - i < 3)
+ return std::nullopt;
+ auto a = hex(input[i + 1]);
+ auto b = hex(input[i + 2]);
+ if (!a.has_value() || !b.has_value())
+ return std::nullopt;
+ dst.append(input, last, i - last);
+ auto c = (a.value() << 4) | b.value();
+ if (c & 0x80)
+ check_utf8 = true;
+ dst.push_back(static_cast<char>(c));
+ last = i + 3;
+ i = input.find('%', last);
+ if (i == std::string::npos) {
+ dst.append(input, last);
+ break;
+ }
+ }
+
+ if (check_utf8) {
+ std::span<uint8_t const> data{reinterpret_cast<uint8_t const*>(dst.data()),
+ dst.size()};
+ auto it = data.begin();
+ while (it != data.end()) {
+ auto ret = u8::read(it, data.end());
+ if (!ret.has_value())
+ return std::nullopt;
+ }
+ }
+
+ return dst;
+}
+
+} // namespace uri
diff --git a/src/uri.hh b/src/uri.hh
new file mode 100644
index 0000000..6b92694
--- /dev/null
+++ b/src/uri.hh
@@ -0,0 +1,17 @@
+#ifndef URI_HH
+#define URI_HH
+
+#include <optional>
+#include <string>
+#include <string_view>
+
+namespace uri {
+
+// If input needs no decoding, input is returned. Otherwise dst
+// is modified and returned. If invalid encoding is found, nullopt is returned.
+std::optional<std::string_view> decode(std::string_view input,
+ std::string& dst);
+
+} // namespace uri
+
+#endif // URI_HH
diff --git a/test/uri.cc b/test/uri.cc
new file mode 100644
index 0000000..44366b9
--- /dev/null
+++ b/test/uri.cc
@@ -0,0 +1,23 @@
+#include "uri.hh"
+
+#include <gtest/gtest.h>
+#include <string>
+
+TEST(uri, empty) {
+ std::string tmp;
+ EXPECT_EQ("", uri::decode("", tmp).value_or("error"));
+}
+
+TEST(uri, example) {
+ std::string tmp;
+ EXPECT_EQ("?x=test", uri::decode("%3Fx%3Dtest", tmp).value_or("error"));
+ EXPECT_EQ(
+ "шеллы",
+ uri::decode("%D1%88%D0%B5%D0%BB%D0%BB%D1%8B", tmp).value_or("error"));
+ EXPECT_EQ("JavaScript_шеллы",
+ uri::decode("JavaScript_%D1%88%D0%B5%D0%BB%D0%BB%D1%8B", tmp)
+ .value_or("error"));
+ EXPECT_FALSE(uri::decode("%E0%A4%A", tmp).has_value());
+ EXPECT_EQ("search+query (correct)",
+ uri::decode("search+query%20%28correct%29", tmp).value_or("error"));
+}