#ifndef UMOD8_HH #define UMOD8_HH #include "u.hh" // IWYU pragma: export #include // IWYU pragma: export #include #include #include #include namespace umod8 { template requires std::is_same_v, uint8_t> std::expected read(T& start, const T& end) { if (start == end) return std::unexpected(u::ReadError::End); uint32_t u; switch (*start >> 4) { case 0xe: { auto const tmp = start; // 1110wwww 10xxxxyy 10yyzzzz if (std::distance(start, end) < 3) { return std::unexpected(u::ReadError::Incomplete); } u = (*start & 0x0f) << 12; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 2); return std::unexpected(u::ReadError::Invalid); } u |= (*start & 0x3f) << 6; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } u |= *start & 0x3f; if (u < 0x800) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } if (u >= 0xd800 && u <= 0xdbff) { std::advance(start, 1); // Not going recursive here as we don't want it unbounded // Lone surrogate pair at end == invalid. if (start == end) return std::unexpected(u::ReadError::Invalid); if ((*start >> 4) == 0xe) { if (std::distance(start, end) < 3) { start = tmp; return std::unexpected(u::ReadError::Incomplete); } uint32_t v = (*start & 0x0f) << 12; std::advance(start, 1); if ((*start & 0xc0) == 0x80) { v |= (*start & 0x3f) << 6; std::advance(start, 1); if ((*start & 0xc0) == 0x80) { v |= *start & 0x3f; if (v >= 0xdc00 && v <= 0xdfff) { std::advance(start, 1); return 0x10000 + (((u - 0xd800) << 10) | (v - 0xdc00)); } } } start = std::next(tmp, 3); } // Next character may be valid, invalid, something, but we know // it is not the second half of a surrogate pair, so consider // this first part invalid. return std::unexpected(u::ReadError::Invalid); } if (u >= 0xdc00 && u <= 0xdfff) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } break; } case 0xd: case 0xc: // 110xxxyy 10yyzzzz if (std::distance(start, end) < 2) { return std::unexpected(u::ReadError::Incomplete); } u = (*start & 0x1f) << 6; std::advance(start, 1); if ((*start & 0xc0) != 0x80) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } u |= *start & 0x3f; if (u > 0 && u < 0x80) { std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); } break; case 0xf: case 0xb: case 0xa: case 0x9: case 0x8: std::advance(start, 1); return std::unexpected(u::ReadError::Invalid); default: // 0yyyzzzz u = *start; break; } std::advance(start, 1); return u; } template requires std::is_same_v, uint8_t> std::expected read_replace(T& start, const T& end) { auto ret = read(start, end); if (ret.has_value()) return *ret; switch (ret.error()) { case u::ReadError::Incomplete: return std::unexpected(u::ReadErrorReplace::Incomplete); case u::ReadError::End: return std::unexpected(u::ReadErrorReplace::End); case u::ReadError::Invalid: return 0xfffd; } std::unreachable(); } template requires std::is_same_v, uint8_t> bool write(T& start, const T& end, uint32_t code) { if (code > 0 && code < 0x80) { if (start == end) return false; *start = static_cast(code); } else if (code < 0x800) { if (std::distance(start, end) < 2) return false; *start = 0xc0 | static_cast(code >> 6); std::advance(start, 1); *start = 0x80 | static_cast(code & 0x3f); } else if (code < 0x10000) { if (std::distance(start, end) < 3) return false; *start = 0xe0 | static_cast(code >> 12); std::advance(start, 1); *start = 0x80 | static_cast((code >> 6) & 0x3f); std::advance(start, 1); *start = 0x80 | static_cast(code & 0x3f); } else { auto tmp = start; code -= 0x10000; if (write(start, end, 0xd800 + (code >> 10)) && write(start, end, 0xdc00 + (code & 0x3ff))) { return true; } start = tmp; return false; } std::advance(start, 1); return true; } template requires std::is_same_v, uint8_t> bool skip(T& start, const T& end) { if (start == end) return false; switch (*start >> 4) { case 0xe: { auto tmp = start; if (read(start, end).has_value()) return true; start = tmp; return false; } case 0xc: case 0xd: if (std::distance(start, end) < 2) return false; std::advance(start, 2); break; default: std::advance(start, 1); break; } return true; } } // namespace umod8 #endif // UMOD8_HH