From fc4547b412e28164af1bf8981234c6af959ccc0b Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Tue, 13 Jun 2023 10:07:16 +0200 Subject: WIP --- utf/inc/utf16.hh | 31 +++++++++++++++++++++++++++++++ utf/inc/utf32.hh | 29 +++++++++++++++++++++++++++++ utf/inc/utf8.hh | 22 ++++++++++++++++++++++ utf/inc/utf_error.hh | 13 +++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 utf/inc/utf16.hh create mode 100644 utf/inc/utf32.hh create mode 100644 utf/inc/utf8.hh create mode 100644 utf/inc/utf_error.hh (limited to 'utf/inc') diff --git a/utf/inc/utf16.hh b/utf/inc/utf16.hh new file mode 100644 index 0000000..344b1a2 --- /dev/null +++ b/utf/inc/utf16.hh @@ -0,0 +1,31 @@ +#ifndef UTF_UTF16_HH +#define UTF_UTF16_HH + +#include "macros.hh" + +#include +#include + +namespace utf { + +/* Read one unicode codepoint from UTF-16 BigEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs, + * returns INVALID. + */ +uint32_t HIDDEN read16be(std::string_view data, std::size_t& offset); + +/* Read one unicode codepoint from UTF-16 LittleEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs, + * returns INVALID. + */ +uint32_t HIDDEN read16le(std::string_view data, std::size_t& offset); + +} // namespace utf + +#endif // UTF_UTF16_HH diff --git a/utf/inc/utf32.hh b/utf/inc/utf32.hh new file mode 100644 index 0000000..2d3088e --- /dev/null +++ b/utf/inc/utf32.hh @@ -0,0 +1,29 @@ +#ifndef UTF_UTF32_HH +#define UTF_UTF32_HH + +#include "macros.hh" + +#include +#include + +namespace utf { + +/* Read one unicode codepoint from UTF-32 BigEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID. + */ +uint32_t HIDDEN read32be(std::string_view data, std::size_t& offset); + +/* Read one unicode codepoint from UTF-32 LittleEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID. + */ +uint32_t HIDDEN read32le(std::string_view data, std::size_t& offset); + +} // namespace utf + +#endif // UTF_UTF32_HH diff --git a/utf/inc/utf8.hh b/utf/inc/utf8.hh new file mode 100644 index 0000000..a3ea84a --- /dev/null +++ b/utf/inc/utf8.hh @@ -0,0 +1,22 @@ +#ifndef UTF_UTF8_HH +#define UTF_UTF8_HH + +#include "macros.hh" + +#include +#include + +namespace utf { + +/* Read one unicode codepoint from UTF-8 encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-8, this includes overlong encodings and + * invalid unicode code points, returns INVALID. + */ +uint32_t HIDDEN read8(std::string_view data, std::size_t& offset); + +} // namespace utf + +#endif // UTF_UTF8_HH diff --git a/utf/inc/utf_error.hh b/utf/inc/utf_error.hh new file mode 100644 index 0000000..079fa43 --- /dev/null +++ b/utf/inc/utf_error.hh @@ -0,0 +1,13 @@ +#ifndef UTF_ERROR_HH +#define UTF_ERROR_HH + +#include + +namespace utf { + +constexpr uint32_t NEED_MORE = 0xfffffffe; +constexpr uint32_t INVALID = 0xffffffff; + +} // namespace utf + +#endif // UTF_ERROR_HH -- cgit v1.2.3-70-g09d2