summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2025-09-29 09:39:49 +0200
committerJoel Klinghed <the_jk@spawned.biz>2025-09-29 09:50:47 +0200
commitd196d51e07f50f3510c43ad375c5559b58860023 (patch)
tree3432b8e99e306d0ece9f29ddad1e2945f88a1481
parent1e9e51dae1c01bab7562911b958c47528b8011c8 (diff)
java: Add tokens support for Java 21
Some new keywords, I opted to modify java-8 grammar to use the new names, even if they are not going to match anything. Makes the tokenizer easier to write.
-rw-r--r--data/java-21/tokens.grammar423
-rw-r--r--data/java-8/tokens.grammar12
-rw-r--r--meson.build2
-rw-r--r--src/gen_tokens.cc78
-rw-r--r--src/grammar.cc37
-rw-r--r--src/java_tokens.cc61
-rw-r--r--src/java_tokens.hh5
-rw-r--r--src/java_version.hh3
-rw-r--r--test/java_tokens.cc56
9 files changed, 654 insertions, 23 deletions
diff --git a/data/java-21/tokens.grammar b/data/java-21/tokens.grammar
new file mode 100644
index 0000000..db935b2
--- /dev/null
+++ b/data/java-21/tokens.grammar
@@ -0,0 +1,423 @@
+InputElement:
+ WhiteSpace
+ Comment
+ Token
+
+Token:
+ Identifier
+ Keyword
+ Literal
+ Separator
+ Operator
+
+Comment:
+ TraditionalComment
+ EndOfLineComment
+
+TraditionalComment:
+ / * CommentTail
+
+CommentTail:
+ * CommentTailStar
+ NotStar CommentTail
+
+CommentTailStar:
+ /
+ * CommentTailStar
+ NotStarNotSlash CommentTail
+
+NotStar:
+ InputCharacter but not *
+ LineTerminator
+
+NotStarNotSlash:
+ InputCharacter but not * or /
+ LineTerminator
+
+EndOfLineComment:
+ / / {InputCharacter}
+
+Identifier:
+ IdentifierChars but not a ReservedKeyword or BooleanLiteral or NullLiteral
+
+IdentifierChars:
+ JavaLetter {JavaLetterOrDigit}
+
+Keyword:
+ ReservedKeyword
+ ContextualKeyword
+
+ReservedKeyword:
+ abstract
+ assert
+ boolean
+ break
+ byte
+ case
+ catch
+ char
+ class
+ const
+ continue
+ default
+ do
+ double
+ else
+ enum
+ extends
+ final
+ finally
+ float
+ for
+ goto
+ if
+ implements
+ import
+ instanceof
+ int
+ interface
+ long
+ native
+ new
+ package
+ private
+ protected
+ public
+ return
+ short
+ static
+ strictfp
+ super
+ switch
+ synchronized
+ this
+ throw
+ throws
+ transient
+ try
+ void
+ volatile
+ while
+ _
+
+ContextualKeyword:
+ exports
+ module
+ non-sealed
+ open
+ opens
+ permits
+ provides
+ record
+ requires
+ sealed
+ to
+ transitive
+ uses
+ var
+ when
+ with
+ yield
+
+Literal:
+ IntegerLiteral
+ FloatingPointLiteral
+ BooleanLiteral
+ CharacterLiteral
+ StringLiteral
+ TextBlock
+ NullLiteral
+
+IntegerLiteral:
+ DecimalIntegerLiteral
+ HexIntegerLiteral
+ OctalIntegerLiteral
+ BinaryIntegerLiteral
+
+DecimalIntegerLiteral:
+ DecimalNumeral [IntegerTypeSuffix]
+
+HexIntegerLiteral:
+ HexNumeral [IntegerTypeSuffix]
+
+OctalIntegerLiteral:
+ OctalNumeral [IntegerTypeSuffix]
+
+BinaryIntegerLiteral:
+ BinaryNumeral [IntegerTypeSuffix]
+
+IntegerTypeSuffix:
+ l
+ L
+
+DecimalNumeral:
+ 0
+ NonZeroDigit [Digits]
+ NonZeroDigit Underscores Digits
+
+NonZeroDigit:
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+
+Digits:
+ Digit
+ Digit [DigitsAndUnderscores] Digit
+
+Digit:
+ 0
+ NonZeroDigit
+
+DigitsAndUnderscores:
+ DigitOrUnderscore {DigitOrUnderscore}
+
+DigitOrUnderscore:
+ Digit
+ _
+
+Underscores:
+ _ {_}
+
+HexNumeral:
+ 0 x HexDigits
+ 0 X HexDigits
+
+HexDigits:
+ HexDigit
+ HexDigit [HexDigitsAndUnderscores] HexDigit
+
+HexDigit:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ a
+ b
+ c
+ d
+ e
+ f
+ A
+ B
+ C
+ D
+ E
+ F
+
+HexDigitsAndUnderscores:
+ HexDigitOrUnderscore {HexDigitOrUnderscore}
+
+HexDigitOrUnderscore:
+ HexDigit
+ _
+
+OctalNumeral:
+ 0 OctalDigits
+ 0 Underscores OctalDigits
+
+OctalDigits:
+ OctalDigit
+ OctalDigit [OctalDigitsAndUnderscores] OctalDigit
+
+OctalDigit:
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+
+OctalDigitsAndUnderscores:
+ OctalDigitOrUnderscore {OctalDigitOrUnderscore}
+
+OctalDigitOrUnderscore:
+ OctalDigit
+ _
+
+BinaryNumeral:
+ 0 b BinaryDigits
+ 0 B BinaryDigits
+
+BinaryDigits:
+ BinaryDigit
+ BinaryDigit [BinaryDigitsAndUnderscores] BinaryDigit
+
+BinaryDigit:
+ 0
+ 1
+
+BinaryDigitsAndUnderscores:
+ BinaryDigitOrUnderscore {BinaryDigitOrUnderscore}
+
+BinaryDigitOrUnderscore:
+ BinaryDigit
+ _
+
+FloatingPointLiteral:
+ DecimalFloatingPointLiteral
+ HexadecimalFloatingPointLiteral
+
+DecimalFloatingPointLiteral:
+ Digits . [Digits] [ExponentPart] [FloatTypeSuffix]
+ . Digits [ExponentPart] [FloatTypeSuffix]
+ Digits ExponentPart [FloatTypeSuffix]
+ Digits [ExponentPart] FloatTypeSuffix
+
+ExponentPart:
+ ExponentIndicator SignedInteger
+
+ExponentIndicator:
+ e
+ E
+
+SignedInteger:
+ [Sign] Digits
+
+Sign:
+ +
+ -
+
+FloatTypeSuffix:
+ f
+ F
+ d
+ D
+
+HexadecimalFloatingPointLiteral:
+ HexSignificand BinaryExponent [FloatTypeSuffix]
+
+HexSignificand:
+ HexNumeral [.]
+ 0 x [HexDigits] . HexDigits
+ 0 X [HexDigits] . HexDigits
+
+BinaryExponent:
+ BinaryExponentIndicator SignedInteger
+
+BinaryExponentIndicator:
+ p
+ P
+
+BooleanLiteral:
+ true
+ false
+
+CharacterLiteral:
+ ' SingleCharacter '
+ ' EscapeSequence '
+
+SingleCharacter:
+ InputCharacter but not ' or \
+
+StringLiteral:
+ " {StringCharacter} "
+
+StringCharacter:
+ InputCharacter but not " or \
+ EscapeSequence
+
+TextBlock:
+ " " " {TextBlockWhiteSpace} LineTerminator {TextBlockCharacter} " " "
+
+TextBlockWhiteSpace:
+ WhiteSpace but not LineTerminator
+
+TextBlockCharacter:
+ InputCharacter but not \
+ EscapeSequence
+ LineTerminator
+
+EscapeSequence:
+ \ b
+ \ s
+ \ t
+ \ n
+ \ f
+ \ r
+ \ LineTerminator
+ \ "
+ \ '
+ \ \
+ OctalEscape
+
+OctalEscape:
+ \ OctalDigit
+ \ OctalDigit OctalDigit
+ \ ZeroToThree OctalDigit OctalDigit
+
+ZeroToThree:
+ 0
+ 1
+ 2
+ 3
+
+NullLiteral:
+ null
+
+Separator:
+ (
+ )
+ {
+ }
+ [
+ ]
+ ;
+ ,
+ .
+ ...
+ @
+ ::
+
+Operator:
+ =
+ >
+ <
+ !
+ ~
+ ?
+ :
+ ->
+ ==
+ >=
+ <=
+ !=
+ &&
+ ||
+ ++
+ --
+ +
+ -
+ *
+ /
+ &
+ |
+ ^
+ %
+ <<
+ >>
+ >>>
+ +=
+ -=
+ *=
+ /=
+ &=
+ |=
+ ^=
+ %=
+ <<=
+ >>=
+ >>>=
diff --git a/data/java-8/tokens.grammar b/data/java-8/tokens.grammar
index 3521ac0..3941b94 100644
--- a/data/java-8/tokens.grammar
+++ b/data/java-8/tokens.grammar
@@ -43,7 +43,13 @@ Identifier:
IdentifierChars:
JavaLetter {JavaLetterOrDigit}
+# Java 8 only has reserved keywords, but use modern names
+# here to make a shared tokenizer simpler.
Keyword:
+ ReservedKeyword
+ ContextualKeyword
+
+ReservedKeyword:
abstract
continue
for
@@ -95,14 +101,20 @@ Keyword:
super
while
+ContextualKeyword:
+
Literal:
IntegerLiteral
FloatingPointLiteral
BooleanLiteral
CharacterLiteral
StringLiteral
+ TextBlock
NullLiteral
+# Java 8 doesn't have TextBlock, but add it as newer grammers have it
+TextBlock:
+
IntegerLiteral:
DecimalIntegerLiteral
HexIntegerLiteral
diff --git a/meson.build b/meson.build
index 7782285..a8f4b97 100644
--- a/meson.build
+++ b/meson.build
@@ -263,10 +263,12 @@ gen_tokens = executable(
java_versions = [
'java-8',
+ 'java-21',
]
java_unicode_versions = {
'java-8': '6.2.0',
+ 'java-21': '15.0.0',
}
java_tokens_sources = []
diff --git a/src/gen_tokens.cc b/src/gen_tokens.cc
index ef0fce7..cc8c06d 100644
--- a/src/gen_tokens.cc
+++ b/src/gen_tokens.cc
@@ -88,7 +88,8 @@ class Generator {
// Find the Elements that has at least one terminal or character class as symbol
// These will be the different tokens the tokenizer can return
void Generator::find_specific_elements(grammar::Element const& root) {
- if (std::ranges::any_of(root.definitions, [](auto const& definition) {
+ if (root.definitions.empty() ||
+ std::ranges::any_of(root.definitions, [](auto const& definition) {
return definition.symbols.size() > 1 ||
definition.symbols[0].type == grammar::Symbol::Type::kTerminal;
})) {
@@ -233,7 +234,7 @@ void write_character_class_matchers(std::ostream& out,
<< "std::optional<size_t> TokenMatcher::matchLineTerminator"
<< "(std::string_view str) {\n"
// Tokenizer normally reads one line at a time, there is only
- // one construct (traditional comment) that needs it.
+ // a few constructs (traditional comment, textblock) that needs it.
// So match synthetic '\n' or report that it was needed if we are at
// end of string.
<< " if (str.empty()) {\n"
@@ -361,6 +362,22 @@ void write_character_class_matchers(std::ostream& out,
}
std::ostream& quote(std::ostream& out, std::string_view in) {
+ int use_raw_string = 1;
+ for (auto c : in) {
+ if (c == '"' || c == '\\' || c == '\n') {
+ use_raw_string = 2;
+ } else if (c < ' ' || (c & 0x80)) {
+ use_raw_string = 0;
+ break;
+ }
+ }
+ if (use_raw_string == 2) {
+ out << "R\"(";
+ out << in;
+ out << ")\"";
+ return out;
+ }
+
out << '"';
bool avoid_digit = false;
for (auto c : in) {
@@ -569,6 +586,7 @@ bool Generator::write_matcher(std::ostream& out,
bool have_internal = next_internal;
next_internal = false;
ReturnType symbol_return_type = return_type;
+ bool zero_or_more_with_terminal = false;
if (symbol.optional != grammar::Symbol::Optional::kRequired &&
i + 1 < definition.symbols.size() &&
@@ -613,6 +631,33 @@ bool Generator::write_matcher(std::ostream& out,
}
out << indent << "while (true) {\n";
indent2 += " ";
+
+ if (i + 1 < definition.symbols.size() &&
+ definition.symbols[i + 1].optional ==
+ grammar::Symbol::Optional::kRequired &&
+ definition.symbols[i + 1].type ==
+ grammar::Symbol::Type::kTerminal) {
+ if (symbol_return_type == return_type) {
+ out << indent2 << "ret = ";
+ } else {
+ out << indent2 << "ret_internal = ";
+ }
+ write_matcher(out, definition.symbols[i + 1], symbol_return_type,
+ "str.substr(tot)");
+ out << ";\n";
+ if (symbol_return_type != return_type) {
+ out << indent2 << "ret = ret_internal";
+ match_return_type(out, symbol_return_type, "", return_type);
+ out << ";\n";
+ }
+ out << indent2 << "if (ret.has_value()) {\n"
+ << indent2 << " tot += ret" << size_suffix << ";\n";
+ if (last_internal)
+ out << indent2 << " last_internal = ret->first;\n";
+ out << indent2 << " break;\n" << indent2 << "}\n";
+
+ zero_or_more_with_terminal = true;
+ }
break;
case grammar::Symbol::Optional::kExcluded:
std::cerr << "Excluded mixed with conditional\n";
@@ -678,9 +723,15 @@ bool Generator::write_matcher(std::ostream& out,
<< indent2 << " last_internal = ret->first;\n";
break;
case grammar::Symbol::Optional::kZeroOrMore:
- out << indent2 << "if (!ret.has_value())\n"
- << indent2 << " break;\n"
- << indent2 << "tot += ret" << size_suffix << ";\n";
+ out << indent2 << "if (!ret.has_value())\n";
+ if (zero_or_more_with_terminal) {
+ out << indent2 << " return ret;\n";
+ // Skip next symbol as it was already used to terminate the loop
+ ++i;
+ } else {
+ out << indent2 << " break;\n";
+ }
+ out << indent2 << "tot += ret" << size_suffix << ";\n";
if (last_internal)
out << indent2 << "last_internal = ret->first;\n";
out << indent << "}\n";
@@ -742,13 +793,12 @@ bool Generator::write_matcher(std::ostream& out,
switch (return_type) {
case ReturnType::kSize:
out << "[[nodiscard]]\n"
- << "std::optional<size_t> TokenMatcher::match" << element.name
- << "(std::string_view str) {\n";
+ << "std::optional<size_t> TokenMatcher::match" << element.name;
break;
case ReturnType::kTokenAndSize:
out << "[[nodiscard]]\n"
<< "std::optional<std::pair<Token, size_t>> TokenMatcher::match"
- << element.name << "(std::string_view str) {\n";
+ << element.name;
if (specific_tokens_.contains(element.name)) {
sub_return_type = ReturnType::kSize;
@@ -759,11 +809,19 @@ bool Generator::write_matcher(std::ostream& out,
out << "[[nodiscard]]\n"
<< "std::optional<std::pair<TokenMatcher::Internal, size_t>> "
"TokenMatcher::match"
- << element.name << "(std::string_view str) {\n";
+ << element.name;
break;
}
- if (element.definitions.size() == 1) {
+ if (element.definitions.empty()) {
+ out << "(std::string_view /* str */) {\n";
+ } else {
+ out << "(std::string_view str) {\n";
+ }
+
+ if (element.definitions.empty()) {
+ out << " return std::nullopt;\n";
+ } else if (element.definitions.size() == 1) {
if (make_token) {
out << " auto ret = [this, str]() -> std::optional<size_t> {\n";
if (!write_matcher(out, element.definitions[0], sub_return_type,
diff --git a/src/grammar.cc b/src/grammar.cc
index 25c4d64..6ed2766 100644
--- a/src/grammar.cc
+++ b/src/grammar.cc
@@ -127,11 +127,6 @@ class GrammarLoader {
auto it = second_pass_elements.begin();
for (auto const& pair : first_pass_elements) {
auto const& element = *it++;
- if (pair.second.definitions.empty()) {
- errors_.err(pair.second.loc,
- std::format("No definitions for {}", pair.first));
- continue;
- }
std::vector<std::string_view> in_symbols;
for (auto const& in_definition : pair.second.definitions) {
str::split(in_definition, in_symbols);
@@ -247,10 +242,42 @@ class GrammarLoader {
"No root element found");
}
+ optimize(second_pass_elements);
+
return std::make_unique<GrammarImpl>(std::move(second_pass_elements));
}
private:
+ static void optimize(std::vector<std::unique_ptr<Element>> const& elements) {
+ merge_terminals(elements);
+ }
+
+ static void merge_terminals(std::vector<std::unique_ptr<Element>> const& elements) {
+ for (auto const& element : elements) {
+ for (auto& definition : element->definitions) {
+ auto it = definition.symbols.begin();
+ while (it != definition.symbols.end()) {
+ if (it->type != Symbol::Type::kTerminal) {
+ ++it;
+ continue;
+ }
+
+ auto it2 = it + 1;
+ if (it2 == definition.symbols.end())
+ break;
+ if (it2->type != Symbol::Type::kTerminal ||
+ it->optional != it2->optional) {
+ ++it;
+ continue;
+ }
+
+ it->value += it2->value;
+ definition.symbols.erase(it2);
+ }
+ }
+ }
+ }
+
std::unique_ptr<line::Reader> reader_;
std::vector<std::string> const& character_classes_;
src::Errors& errors_;
diff --git a/src/java_tokens.cc b/src/java_tokens.cc
index 1ba40a3..42c310b 100644
--- a/src/java_tokens.cc
+++ b/src/java_tokens.cc
@@ -1,6 +1,7 @@
#include "java_tokens.hh"
#include "errors.hh"
+#include "java_tokens_java-21.hh"
#include "java_tokens_java-8.hh"
#include "java_uescape.hh"
#include "str.hh"
@@ -62,9 +63,13 @@ class TokensImpl : public Tokens {
break;
line_tmp_.append(maybe_line.value());
got_any = true;
- // Simple check, it might not actually end the comment but if so tokenizer will complain
+ // Simple check, it might not actually be true but if so tokenizer will complain
// about reaching line_end again.
- if (maybe_line->contains("*/"))
+ auto stop = (maybe_token_pair.has_value() &&
+ maybe_token_pair->first == MatchToken::kStringLiteral)
+ ? R"(""")"
+ : "*/";
+ if (maybe_line->contains(stop))
break;
line_tmp_.push_back('\n');
}
@@ -118,8 +123,11 @@ class TokensImpl : public Tokens {
case MatchToken::kIdentifier:
token.type = Token::Type::kIdentifier;
break;
- case MatchToken::kKeyword:
- token.type = Token::Type::kKeyword;
+ case MatchToken::kReservedKeyword:
+ token.type = Token::Type::kReservedKeyword;
+ break;
+ case MatchToken::kContextualKeyword:
+ token.type = Token::Type::kContextualKeyword;
break;
case MatchToken::kNullLiteral:
token.type = Token::Type::kLiteralNull;
@@ -138,6 +146,13 @@ class TokensImpl : public Tokens {
token.str =
unescape_if_needed(token.str.substr(1, token.str.size() - 2));
break;
+ case MatchToken::kTextBlock: {
+ token.type = Token::Type::kLiteralString;
+ auto start = token.str.find('\n', 3) + 1;
+ token.str = unescape_if_needed(trim_indent(
+ token.str.substr(start, token.str.size() - 3 - start)));
+ break;
+ }
case MatchToken::kTraditionalComment: {
token.type = Token::Type::kComment;
size_t s = 2;
@@ -368,6 +383,39 @@ class TokensImpl : public Tokens {
return count;
}
+ static size_t indent(std::string_view str) {
+ size_t i = 0;
+ while (i < str.size() &&
+ (str[i] == ' ' || str[i] == '\t' || str[i] == '\f'))
+ ++i;
+ return i;
+ }
+
+ std::string_view trim_indent(std::string_view str) {
+ auto lines = str::split(str, '\n', /* keep_empty */ true);
+ auto it = lines.begin();
+ auto min_indent = indent(*it);
+ if (min_indent == 0)
+ return str;
+ for (++it; it != lines.end(); ++it) {
+ auto i = indent(*it);
+ if (i < min_indent) {
+ if (i == 0)
+ return str;
+ min_indent = i;
+ }
+ }
+ trim_tmp_.clear();
+ trim_tmp_.reserve(str.size());
+ for (auto line : lines) {
+ trim_tmp_.append(line, min_indent);
+ trim_tmp_.push_back('\n');
+ }
+ // remove last '\n'
+ trim_tmp_.resize(trim_tmp_.size() - 1);
+ return trim_tmp_;
+ }
+
std::unique_ptr<u8::line::Reader> reader_;
std::unique_ptr<src::Errors> errors_;
TokensConfig const config_;
@@ -376,6 +424,7 @@ class TokensImpl : public Tokens {
std::string line_tmp_;
Location location_;
std::string unescape_tmp_;
+ std::string trim_tmp_;
};
} // namespace
@@ -387,6 +436,10 @@ std::unique_ptr<Tokens> open(std::unique_ptr<io::Reader> reader,
case Version::kJava8:
return std::make_unique<TokensImpl<java_8::TokenMatcher, java_8::Token>>(
std::move(reader), std::move(errors), config);
+ case Version::kJava21:
+ return std::make_unique<
+ TokensImpl<java_21::TokenMatcher, java_21::Token>>(
+ std::move(reader), std::move(errors), config);
}
std::unreachable();
}
diff --git a/src/java_tokens.hh b/src/java_tokens.hh
index 6fbefcb..c4e27c0 100644
--- a/src/java_tokens.hh
+++ b/src/java_tokens.hh
@@ -25,7 +25,10 @@ struct Token {
kIdentifier,
// str is keyword, int_value is Keyword index
- kKeyword,
+ kReservedKeyword,
+
+ // str is keyword, int_value is Keyword index
+ kContextualKeyword,
// str is separator, int_value is Separator index
kSeparator,
diff --git a/src/java_version.hh b/src/java_version.hh
index 444ae36..4877263 100644
--- a/src/java_version.hh
+++ b/src/java_version.hh
@@ -7,8 +7,9 @@ namespace java {
enum class Version : uint8_t {
kJava8 = 8,
+ kJava21 = 21,
- kMax = kJava8,
+ kMax = kJava21,
};
} // namespace java
diff --git a/test/java_tokens.cc b/test/java_tokens.cc
index 1c69196..cb1ae73 100644
--- a/test/java_tokens.cc
+++ b/test/java_tokens.cc
@@ -29,7 +29,7 @@ TEST_P(JavaTokens, empty_class) {
java::TokensConfig{.version = GetParam()});
auto ret = tokens->read();
ASSERT_TRUE(ret.has_value());
- EXPECT_EQ(java::Token::Type::kKeyword, ret->type);
+ EXPECT_EQ(java::Token::Type::kReservedKeyword, ret->type);
EXPECT_EQ("class", ret->str);
EXPECT_EQ(1, ret->loc.line);
EXPECT_EQ(0, ret->loc.column);
@@ -602,5 +602,57 @@ TEST_P(JavaTokens, null) {
EXPECT_EQ(io::ReadError::Eof, ret.error());
}
+TEST_P(JavaTokens, textblock) {
+ auto input = io::memory(R"(String html = """
+ <html>
+ <body>
+ <p>Hello, world</p>
+ </body>
+ </html>
+ """;)");
+ auto tokens = java::open(std::move(input), make_errors(),
+ java::TokensConfig{.version = GetParam()});
+
+ auto ret = tokens->read();
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(java::Token::Type::kIdentifier, ret->type);
+ EXPECT_EQ("String", ret->str);
+ ret = tokens->read();
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(java::Token::Type::kIdentifier, ret->type);
+ EXPECT_EQ("html", ret->str);
+ ret = tokens->read();
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(java::Token::Type::kOperator, ret->type);
+ EXPECT_EQ("=", ret->str);
+ ret = tokens->read();
+ if (std::to_underlying(GetParam()) >= 15) {
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(java::Token::Type::kLiteralString, ret->type);
+ EXPECT_EQ(R"(<html>
+ <body>
+ <p>Hello, world</p>
+ </body>
+</html>
+)",
+ ret->str);
+ ret = tokens->read();
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(java::Token::Type::kSeparator, ret->type);
+ EXPECT_EQ(";", ret->str);
+ ret = tokens->read();
+ ASSERT_FALSE(ret.has_value());
+ EXPECT_EQ(io::ReadError::Eof, ret.error());
+ } else {
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(java::Token::Type::kLiteralString, ret->type);
+ EXPECT_EQ("", ret->str);
+ ret = tokens->read();
+ ASSERT_TRUE(ret.has_value());
+ EXPECT_EQ(java::Token::Type::kError, ret->type);
+ }
+}
+
INSTANTIATE_TEST_SUITE_P(AllVersions, JavaTokens,
- testing::Values(java::Version::kJava8));
+ testing::Values(java::Version::kJava8,
+ java::Version::kJava21));