diff options
Diffstat (limited to 'utf/inc')
| -rw-r--r-- | utf/inc/utf16.hh | 31 | ||||
| -rw-r--r-- | utf/inc/utf32.hh | 29 | ||||
| -rw-r--r-- | utf/inc/utf8.hh | 22 | ||||
| -rw-r--r-- | utf/inc/utf_error.hh | 13 |
4 files changed, 95 insertions, 0 deletions
diff --git a/utf/inc/utf16.hh b/utf/inc/utf16.hh new file mode 100644 index 0000000..344b1a2 --- /dev/null +++ b/utf/inc/utf16.hh @@ -0,0 +1,31 @@ +#ifndef UTF_UTF16_HH +#define UTF_UTF16_HH + +#include "macros.hh" + +#include <cstdint> +#include <string_view> + +namespace utf { + +/* Read one unicode codepoint from UTF-16 BigEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs, + * returns INVALID. + */ +uint32_t HIDDEN read16be(std::string_view data, std::size_t& offset); + +/* Read one unicode codepoint from UTF-16 LittleEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-16, ie. invalid or incomplete surrogate pairs, + * returns INVALID. + */ +uint32_t HIDDEN read16le(std::string_view data, std::size_t& offset); + +} // namespace utf + +#endif // UTF_UTF16_HH diff --git a/utf/inc/utf32.hh b/utf/inc/utf32.hh new file mode 100644 index 0000000..2d3088e --- /dev/null +++ b/utf/inc/utf32.hh @@ -0,0 +1,29 @@ +#ifndef UTF_UTF32_HH +#define UTF_UTF32_HH + +#include "macros.hh" + +#include <cstdint> +#include <string_view> + +namespace utf { + +/* Read one unicode codepoint from UTF-32 BigEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID. + */ +uint32_t HIDDEN read32be(std::string_view data, std::size_t& offset); + +/* Read one unicode codepoint from UTF-32 LittleEndian encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-32, ie. outside valid ranges, returns INVALID. + */ +uint32_t HIDDEN read32le(std::string_view data, std::size_t& offset); + +} // namespace utf + +#endif // UTF_UTF32_HH diff --git a/utf/inc/utf8.hh b/utf/inc/utf8.hh new file mode 100644 index 0000000..a3ea84a --- /dev/null +++ b/utf/inc/utf8.hh @@ -0,0 +1,22 @@ +#ifndef UTF_UTF8_HH +#define UTF_UTF8_HH + +#include "macros.hh" + +#include <cstdint> +#include <string_view> + +namespace utf { + +/* Read one unicode codepoint from UTF-8 encoded data if possible. + * If successfull offset is incremented to point to next codepoint. + * Will fail: + * - not enough data is left in data given offset, returns NEED_MORE. + * - data is not valid UTF-8, this includes overlong encodings and + * invalid unicode code points, returns INVALID. + */ +uint32_t HIDDEN read8(std::string_view data, std::size_t& offset); + +} // namespace utf + +#endif // UTF_UTF8_HH diff --git a/utf/inc/utf_error.hh b/utf/inc/utf_error.hh new file mode 100644 index 0000000..079fa43 --- /dev/null +++ b/utf/inc/utf_error.hh @@ -0,0 +1,13 @@ +#ifndef UTF_ERROR_HH +#define UTF_ERROR_HH + +#include <cstdint> + +namespace utf { + +constexpr uint32_t NEED_MORE = 0xfffffffe; +constexpr uint32_t INVALID = 0xffffffff; + +} // namespace utf + +#endif // UTF_ERROR_HH |
