summaryrefslogtreecommitdiff
path: root/src/java_tokens.cc
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2025-09-29 09:39:49 +0200
committerJoel Klinghed <the_jk@spawned.biz>2025-09-29 09:50:47 +0200
commitd196d51e07f50f3510c43ad375c5559b58860023 (patch)
tree3432b8e99e306d0ece9f29ddad1e2945f88a1481 /src/java_tokens.cc
parent1e9e51dae1c01bab7562911b958c47528b8011c8 (diff)
java: Add tokens support for Java 21
Some new keywords, I opted to modify java-8 grammar to use the new names, even if they are not going to match anything. Makes the tokenizer easier to write.
Diffstat (limited to 'src/java_tokens.cc')
-rw-r--r--src/java_tokens.cc61
1 files changed, 57 insertions, 4 deletions
diff --git a/src/java_tokens.cc b/src/java_tokens.cc
index 1ba40a3..42c310b 100644
--- a/src/java_tokens.cc
+++ b/src/java_tokens.cc
@@ -1,6 +1,7 @@
#include "java_tokens.hh"
#include "errors.hh"
+#include "java_tokens_java-21.hh"
#include "java_tokens_java-8.hh"
#include "java_uescape.hh"
#include "str.hh"
@@ -62,9 +63,13 @@ class TokensImpl : public Tokens {
break;
line_tmp_.append(maybe_line.value());
got_any = true;
- // Simple check, it might not actually end the comment but if so tokenizer will complain
+ // Simple check, it might not actually be true but if so tokenizer will complain
// about reaching line_end again.
- if (maybe_line->contains("*/"))
+ auto stop = (maybe_token_pair.has_value() &&
+ maybe_token_pair->first == MatchToken::kStringLiteral)
+ ? R"(""")"
+ : "*/";
+ if (maybe_line->contains(stop))
break;
line_tmp_.push_back('\n');
}
@@ -118,8 +123,11 @@ class TokensImpl : public Tokens {
case MatchToken::kIdentifier:
token.type = Token::Type::kIdentifier;
break;
- case MatchToken::kKeyword:
- token.type = Token::Type::kKeyword;
+ case MatchToken::kReservedKeyword:
+ token.type = Token::Type::kReservedKeyword;
+ break;
+ case MatchToken::kContextualKeyword:
+ token.type = Token::Type::kContextualKeyword;
break;
case MatchToken::kNullLiteral:
token.type = Token::Type::kLiteralNull;
@@ -138,6 +146,13 @@ class TokensImpl : public Tokens {
token.str =
unescape_if_needed(token.str.substr(1, token.str.size() - 2));
break;
+ case MatchToken::kTextBlock: {
+ token.type = Token::Type::kLiteralString;
+ auto start = token.str.find('\n', 3) + 1;
+ token.str = unescape_if_needed(trim_indent(
+ token.str.substr(start, token.str.size() - 3 - start)));
+ break;
+ }
case MatchToken::kTraditionalComment: {
token.type = Token::Type::kComment;
size_t s = 2;
@@ -368,6 +383,39 @@ class TokensImpl : public Tokens {
return count;
}
+ static size_t indent(std::string_view str) {
+ size_t i = 0;
+ while (i < str.size() &&
+ (str[i] == ' ' || str[i] == '\t' || str[i] == '\f'))
+ ++i;
+ return i;
+ }
+
+ std::string_view trim_indent(std::string_view str) {
+ auto lines = str::split(str, '\n', /* keep_empty */ true);
+ auto it = lines.begin();
+ auto min_indent = indent(*it);
+ if (min_indent == 0)
+ return str;
+ for (++it; it != lines.end(); ++it) {
+ auto i = indent(*it);
+ if (i < min_indent) {
+ if (i == 0)
+ return str;
+ min_indent = i;
+ }
+ }
+ trim_tmp_.clear();
+ trim_tmp_.reserve(str.size());
+ for (auto line : lines) {
+ trim_tmp_.append(line, min_indent);
+ trim_tmp_.push_back('\n');
+ }
+ // remove last '\n'
+ trim_tmp_.resize(trim_tmp_.size() - 1);
+ return trim_tmp_;
+ }
+
std::unique_ptr<u8::line::Reader> reader_;
std::unique_ptr<src::Errors> errors_;
TokensConfig const config_;
@@ -376,6 +424,7 @@ class TokensImpl : public Tokens {
std::string line_tmp_;
Location location_;
std::string unescape_tmp_;
+ std::string trim_tmp_;
};
} // namespace
@@ -387,6 +436,10 @@ std::unique_ptr<Tokens> open(std::unique_ptr<io::Reader> reader,
case Version::kJava8:
return std::make_unique<TokensImpl<java_8::TokenMatcher, java_8::Token>>(
std::move(reader), std::move(errors), config);
+ case Version::kJava21:
+ return std::make_unique<
+ TokensImpl<java_21::TokenMatcher, java_21::Token>>(
+ std::move(reader), std::move(errors), config);
}
std::unreachable();
}