From fc4547b412e28164af1bf8981234c6af959ccc0b Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Tue, 13 Jun 2023 10:07:16 +0200 Subject: WIP --- utf/inc/utf8.hh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 utf/inc/utf8.hh (limited to 'utf/inc/utf8.hh') diff --git a/utf/inc/utf8.hh b/utf/inc/utf8.hh new file mode 100644 index 0000000..a3ea84a --- /dev/null +++ b/utf/inc/utf8.hh @@ -0,0 +1,22 @@ +#ifndef UTF_UTF8_HH +#define UTF_UTF8_HH + +#include "macros.hh" + +#include +#include + +namespace utf { + +/* Read one unicode codepoint from UTF-8 encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-8, this includes overlong encodings and + * invalid unicode code points, returns INVALID. + */ +uint32_t HIDDEN read8(std::string_view data, std::size_t& offset); + +} // namespace utf + +#endif // UTF_UTF8_HH -- cgit v1.2.3-70-g09d2