blob: 54b02968ab1f0d351c37be6d5c5ea798186c185a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
#include "utf8.hh"
#include "utf_error.hh"
namespace utf {
namespace {
inline bool valid_codepoint(uint32_t c) {
return (c < 0xd800) || (c > 0xdfff && c <= 0x10ffff);
}
} // namespace
uint32_t read8(std::string_view data, std::size_t& offset) {
if (offset >= data.size())
return NEED_MORE;
uint32_t ret;
uint8_t size;
switch (static_cast<uint8_t>(data[offset]) >> 4) {
case 15:
if (data[offset] & 0x08)
return INVALID;
ret = static_cast<uint32_t>(data[offset] & 0x07) << 18;
size = 4;
break;
case 14:
ret = static_cast<uint32_t>(data[offset] & 0x0f) << 12;
size = 3;
break;
case 13:
case 12:
ret = static_cast<uint32_t>(data[offset] & 0x1f) << 6;
size = 2;
break;
default:
if (data[offset] & 0x80)
return INVALID;
return data[offset++];
}
if (data.size() - offset < size)
return NEED_MORE;
for (uint8_t i = 1; i < size; ++i) {
if ((data[offset + i] & 0xc0) != 0x80)
return INVALID;
ret |= static_cast<uint32_t>(data[offset + i] & 0x3f) << (size - i - 1) * 6;
}
if (!valid_codepoint(ret))
return INVALID;
switch (size) {
case 4:
if (ret < 0x10000)
return INVALID;
break;
case 3:
if (ret < 0x800)
return INVALID;
break;
case 2:
if (ret < 0x80)
return INVALID;
break;
}
offset += size;
return ret;
}
} // namespace utf
|