From d196d51e07f50f3510c43ad375c5559b58860023 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Mon, 29 Sep 2025 09:39:49 +0200 Subject: java: Add tokens support for Java 21 Some new keywords, I opted to modify java-8 grammar to use the new names, even if they are not going to match anything. Makes the tokenizer easier to write. --- src/grammar.cc | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'src/grammar.cc') diff --git a/src/grammar.cc b/src/grammar.cc index 25c4d64..6ed2766 100644 --- a/src/grammar.cc +++ b/src/grammar.cc @@ -127,11 +127,6 @@ class GrammarLoader { auto it = second_pass_elements.begin(); for (auto const& pair : first_pass_elements) { auto const& element = *it++; - if (pair.second.definitions.empty()) { - errors_.err(pair.second.loc, - std::format("No definitions for {}", pair.first)); - continue; - } std::vector in_symbols; for (auto const& in_definition : pair.second.definitions) { str::split(in_definition, in_symbols); @@ -247,10 +242,42 @@ class GrammarLoader { "No root element found"); } + optimize(second_pass_elements); + return std::make_unique(std::move(second_pass_elements)); } private: + static void optimize(std::vector> const& elements) { + merge_terminals(elements); + } + + static void merge_terminals(std::vector> const& elements) { + for (auto const& element : elements) { + for (auto& definition : element->definitions) { + auto it = definition.symbols.begin(); + while (it != definition.symbols.end()) { + if (it->type != Symbol::Type::kTerminal) { + ++it; + continue; + } + + auto it2 = it + 1; + if (it2 == definition.symbols.end()) + break; + if (it2->type != Symbol::Type::kTerminal || + it->optional != it2->optional) { + ++it; + continue; + } + + it->value += it2->value; + definition.symbols.erase(it2); + } + } + } + } + std::unique_ptr reader_; std::vector const& character_classes_; src::Errors& errors_; -- cgit v1.3