diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2025-09-29 09:39:49 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2025-09-29 09:50:47 +0200 |
| commit | d196d51e07f50f3510c43ad375c5559b58860023 (patch) | |
| tree | 3432b8e99e306d0ece9f29ddad1e2945f88a1481 /src/java_tokens.cc | |
| parent | 1e9e51dae1c01bab7562911b958c47528b8011c8 (diff) | |
java: Add tokens support for Java 21
Some new keywords, I opted to modify java-8 grammar to use the new
names, even if they are not going to match anything. Makes the
tokenizer easier to write.
Diffstat (limited to 'src/java_tokens.cc')
| -rw-r--r-- | src/java_tokens.cc | 61 |
1 files changed, 57 insertions, 4 deletions
diff --git a/src/java_tokens.cc b/src/java_tokens.cc index 1ba40a3..42c310b 100644 --- a/src/java_tokens.cc +++ b/src/java_tokens.cc @@ -1,6 +1,7 @@ #include "java_tokens.hh" #include "errors.hh" +#include "java_tokens_java-21.hh" #include "java_tokens_java-8.hh" #include "java_uescape.hh" #include "str.hh" @@ -62,9 +63,13 @@ class TokensImpl : public Tokens { break; line_tmp_.append(maybe_line.value()); got_any = true; - // Simple check, it might not actually end the comment but if so tokenizer will complain + // Simple check, it might not actually be true but if so tokenizer will complain // about reaching line_end again. - if (maybe_line->contains("*/")) + auto stop = (maybe_token_pair.has_value() && + maybe_token_pair->first == MatchToken::kStringLiteral) + ? R"(""")" + : "*/"; + if (maybe_line->contains(stop)) break; line_tmp_.push_back('\n'); } @@ -118,8 +123,11 @@ class TokensImpl : public Tokens { case MatchToken::kIdentifier: token.type = Token::Type::kIdentifier; break; - case MatchToken::kKeyword: - token.type = Token::Type::kKeyword; + case MatchToken::kReservedKeyword: + token.type = Token::Type::kReservedKeyword; + break; + case MatchToken::kContextualKeyword: + token.type = Token::Type::kContextualKeyword; break; case MatchToken::kNullLiteral: token.type = Token::Type::kLiteralNull; @@ -138,6 +146,13 @@ class TokensImpl : public Tokens { token.str = unescape_if_needed(token.str.substr(1, token.str.size() - 2)); break; + case MatchToken::kTextBlock: { + token.type = Token::Type::kLiteralString; + auto start = token.str.find('\n', 3) + 1; + token.str = unescape_if_needed(trim_indent( + token.str.substr(start, token.str.size() - 3 - start))); + break; + } case MatchToken::kTraditionalComment: { token.type = Token::Type::kComment; size_t s = 2; @@ -368,6 +383,39 @@ class TokensImpl : public Tokens { return count; } + static size_t indent(std::string_view str) { + size_t i = 0; + while (i < str.size() && + (str[i] == ' ' || str[i] == '\t' || str[i] == '\f')) + ++i; + return i; + } + + std::string_view trim_indent(std::string_view str) { + auto lines = str::split(str, '\n', /* keep_empty */ true); + auto it = lines.begin(); + auto min_indent = indent(*it); + if (min_indent == 0) + return str; + for (++it; it != lines.end(); ++it) { + auto i = indent(*it); + if (i < min_indent) { + if (i == 0) + return str; + min_indent = i; + } + } + trim_tmp_.clear(); + trim_tmp_.reserve(str.size()); + for (auto line : lines) { + trim_tmp_.append(line, min_indent); + trim_tmp_.push_back('\n'); + } + // remove last '\n' + trim_tmp_.resize(trim_tmp_.size() - 1); + return trim_tmp_; + } + std::unique_ptr<u8::line::Reader> reader_; std::unique_ptr<src::Errors> errors_; TokensConfig const config_; @@ -376,6 +424,7 @@ class TokensImpl : public Tokens { std::string line_tmp_; Location location_; std::string unescape_tmp_; + std::string trim_tmp_; }; } // namespace @@ -387,6 +436,10 @@ std::unique_ptr<Tokens> open(std::unique_ptr<io::Reader> reader, case Version::kJava8: return std::make_unique<TokensImpl<java_8::TokenMatcher, java_8::Token>>( std::move(reader), std::move(errors), config); + case Version::kJava21: + return std::make_unique< + TokensImpl<java_21::TokenMatcher, java_21::Token>>( + std::move(reader), std::move(errors), config); } std::unreachable(); } |
