#ifndef U8_HH #define U8_HH #include "u.hh" // IWYU pragma: export #include // IWYU pragma: export #include #include #include #include namespace u8 { template requires std::is_same_v, uint8_t> std::expected read(T& start, T const& end) { if (start == end) return std::unexpected(u::ReadError::End); uint32_t u; switch (*start >> 4) { case 0xf: // 11110uvv 10vvwwww 10xxxxyy 10yyzzzz if (std::distance(start, end) < 4) { return std::unexpected(u::ReadError::Incomplete); } u = (*start & 0x07) << 18; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 3); return std::unexpected(u::ReadError::Invalid); } u |= (*start & 0x3f) << 12; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 2); return std::unexpected(u::ReadError::Invalid); } u |= (*start & 0x3f) << 6; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } u |= *start & 0x3f; if (u < 0x10000 || u > 0x10ffff) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } break; case 0xe: // 1110wwww 10xxxxyy 10yyzzzz if (std::distance(start, end) < 3) { return std::unexpected(u::ReadError::Incomplete); } u = (*start & 0x0f) << 12; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 2); return std::unexpected(u::ReadError::Invalid); } u |= (*start & 0x3f) << 6; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } u |= *start & 0x3f; if (u < 0x800 || (u >= 0xd800 && u <= 0xdfff)) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } break; case 0xd: case 0xc: // 110xxxyy 10yyzzzz if (std::distance(start, end) < 2) { return std::unexpected(u::ReadError::Incomplete); } u = (*start & 0x1f) << 6; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } u |= *start & 0x3f; if (u < 0x80) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } break; case 0xb: case 0xa: case 0x9: case 0x8: std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); default: // 0yyyzzzz u = *start; break; } std::advance(start, 1); return u; } template requires std::is_same_v, uint8_t> std::expected read_replace(T& start, T const& end, bool eof) { auto const tmp = start; auto ret = read(start, end); if (ret.has_value()) return *ret; switch (ret.error()) { case u::ReadError::Incomplete: if (eof) break; return std::unexpected(u::ReadErrorReplace::Incomplete); case u::ReadError::End: return std::unexpected(u::ReadErrorReplace::End); case u::ReadError::Invalid: break; } start = tmp + 1; return 0xfffd; } template requires std::is_same_v, uint8_t> bool write(T& start, T const& end, uint32_t code) { if (code < 0x80) { if (start == end) return false; *start = static_cast(code); } else if (code < 0x800) { if (std::distance(start, end) < 2) return false; *start = 0xc0 | static_cast(code >> 6); std::advance(start, 1); *start = 0x80 | static_cast(code & 0x3f); } else if (code < 0x10000) { if (std::distance(start, end) < 3) return false; *start = 0xe0 | static_cast(code >> 12); std::advance(start, 1); *start = 0x80 | static_cast((code >> 6) & 0x3f); std::advance(start, 1); *start = 0x80 | static_cast(code & 0x3f); } else { if (std::distance(start, end) < 4) return false; *start = 0xf0 | static_cast(code >> 18); std::advance(start, 1); *start = 0x80 | static_cast((code >> 12) & 0x3f); std::advance(start, 1); *start = 0x80 | static_cast((code >> 6) & 0x3f); std::advance(start, 1); *start = 0x80 | static_cast(code & 0x3f); } std::advance(start, 1); return true; } template requires std::is_same_v, uint8_t> bool skip(T& start, T const& end) { if (start == end) return false; switch (*start >> 4) { case 0xf: if (std::distance(start, end) < 4) return false; std::advance(start, 4); break; case 0xe: if (std::distance(start, end) < 3) return false; std::advance(start, 3); break; case 0xc: case 0xd: if (std::distance(start, end) < 2) return false; std::advance(start, 2); break; default: std::advance(start, 1); break; } return true; } } // namespace u8 #endif // U8_HH