summaryrefslogtreecommitdiff
path: root/utf/inc
diff options
context:
space:
mode:
Diffstat (limited to 'utf/inc')
-rw-r--r--utf/inc/utf16.hh31
-rw-r--r--utf/inc/utf32.hh29
-rw-r--r--utf/inc/utf8.hh22
-rw-r--r--utf/inc/utf_error.hh13
4 files changed, 95 insertions, 0 deletions
diff --git a/utf/inc/utf16.hh b/utf/inc/utf16.hh
new file mode 100644
index 0000000..344b1a2
--- /dev/null
+++ b/utf/inc/utf16.hh
@@ -0,0 +1,31 @@
+#ifndef UTF_UTF16_HH
+#define UTF_UTF16_HH
+
+#include "macros.hh"
+
+#include <cstdint>
+#include <string_view>
+
+namespace utf {
+
+/* Read one unicode codepoint from UTF-16 BigEndian encoded data if possible.
+ * If successfull offset is incremented to point to next codepoint.
+ * Will fail:
+ * - not enough data is left in data given offset, returns NEED_MORE.
+ * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs,
+ * returns INVALID.
+ */
+uint32_t HIDDEN read16be(std::string_view data, std::size_t& offset);
+
+/* Read one unicode codepoint from UTF-16 LittleEndian encoded data if possible.
+ * If successfull offset is incremented to point to next codepoint.
+ * Will fail:
+ * - not enough data is left in data given offset, returns NEED_MORE.
+ * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs,
+ * returns INVALID.
+ */
+uint32_t HIDDEN read16le(std::string_view data, std::size_t& offset);
+
+} // namespace utf
+
+#endif // UTF_UTF16_HH
diff --git a/utf/inc/utf32.hh b/utf/inc/utf32.hh
new file mode 100644
index 0000000..2d3088e
--- /dev/null
+++ b/utf/inc/utf32.hh
@@ -0,0 +1,29 @@
+#ifndef UTF_UTF32_HH
+#define UTF_UTF32_HH
+
+#include "macros.hh"
+
+#include <cstdint>
+#include <string_view>
+
+namespace utf {
+
+/* Read one unicode codepoint from UTF-32 BigEndian encoded data if possible.
+ * If successfull offset is incremented to point to next codepoint.
+ * Will fail:
+ * - not enough data is left in data given offset, returns NEED_MORE.
+ * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID.
+ */
+uint32_t HIDDEN read32be(std::string_view data, std::size_t& offset);
+
+/* Read one unicode codepoint from UTF-32 LittleEndian encoded data if possible.
+ * If successfull offset is incremented to point to next codepoint.
+ * Will fail:
+ * - not enough data is left in data given offset, returns NEED_MORE.
+ * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID.
+ */
+uint32_t HIDDEN read32le(std::string_view data, std::size_t& offset);
+
+} // namespace utf
+
+#endif // UTF_UTF32_HH
diff --git a/utf/inc/utf8.hh b/utf/inc/utf8.hh
new file mode 100644
index 0000000..a3ea84a
--- /dev/null
+++ b/utf/inc/utf8.hh
@@ -0,0 +1,22 @@
+#ifndef UTF_UTF8_HH
+#define UTF_UTF8_HH
+
+#include "macros.hh"
+
+#include <cstdint>
+#include <string_view>
+
+namespace utf {
+
+/* Read one unicode codepoint from UTF-8 encoded data if possible.
+ * If successfull offset is incremented to point to next codepoint.
+ * Will fail:
+ * - not enough data is left in data given offset, returns NEED_MORE.
+ * - data is not valid UTF-8, this includes overlong encodings and
+ * invalid unicode code points, returns INVALID.
+ */
+uint32_t HIDDEN read8(std::string_view data, std::size_t& offset);
+
+} // namespace utf
+
+#endif // UTF_UTF8_HH
diff --git a/utf/inc/utf_error.hh b/utf/inc/utf_error.hh
new file mode 100644
index 0000000..079fa43
--- /dev/null
+++ b/utf/inc/utf_error.hh
@@ -0,0 +1,13 @@
+#ifndef UTF_ERROR_HH
+#define UTF_ERROR_HH
+
+#include <cstdint>
+
+namespace utf {
+
+constexpr uint32_t NEED_MORE = 0xfffffffe;
+constexpr uint32_t INVALID = 0xffffffff;
+
+} // namespace utf
+
+#endif // UTF_ERROR_HH