diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2026-04-28 22:24:11 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2026-04-28 22:26:12 +0200 |
| commit | ed00d6111ec63faffe1aee21efb9d66df69a0df3 (patch) | |
| tree | 050dad3068a54ddb9a0e7e4250f4ade2a4b0feef | |
| parent | 42643b5543408b9c0b62b16b86834211db1c05e8 (diff) | |
grammar: Expand parsing to handle syntax grammar
Introduce anonymous elements to handle more complicated symbols
| -rw-r--r-- | src/grammar.cc | 212 |
1 files changed, 145 insertions, 67 deletions
diff --git a/src/grammar.cc b/src/grammar.cc index c94a86f..3526681 100644 --- a/src/grammar.cc +++ b/src/grammar.cc @@ -124,81 +124,22 @@ class GrammarLoader { second_pass_elements.emplace_back(std::move(element)); } - auto it = second_pass_elements.begin(); + size_t i = 0; for (auto const& pair : first_pass_elements) { - auto const& element = *it++; - std::vector<std::string_view> in_symbols; for (auto const& in_definition : pair.second.definitions) { - str::split(in_definition, in_symbols); - - std::vector<Symbol> out_symbols; - bool exclude = false; - bool expect_not = false; - for (auto in_symbol : in_symbols) { - Symbol out_symbol; - if (exclude) { - if (in_symbol == "or") - continue; - out_symbol.optional = Symbol::Optional::kExcluded; - } else if (expect_not) { - expect_not = false; - if (in_symbol == "not") { - exclude = true; - } else { - errors_.err(pair.second.loc, "but is not followed by not"); - } - continue; - } - if (in_symbol == "but") { - expect_not = true; - continue; - } - if (in_symbol.front() == '{' && in_symbol.back() == '}') { - if (exclude) { - errors_.err(pair.second.loc, - "Optional and exclude doesn't work together"); - } else { - out_symbol.optional = Symbol::Optional::kZeroOrMore; - } - in_symbol = in_symbol.substr(1, in_symbol.size() - 2); - } else if (in_symbol.front() == '[' && in_symbol.back() == ']') { - if (exclude) { - errors_.err(pair.second.loc, - "Optional and exclude doesn't work together"); - } else { - out_symbol.optional = Symbol::Optional::kZeroOrOne; - } - in_symbol = in_symbol.substr(1, in_symbol.size() - 2); - } - auto it2 = second_pass_lookup.find(in_symbol); - if (it2 != second_pass_lookup.end()) { - out_symbol.type = Symbol::Type::kNonTerminal; - out_symbol.element = second_pass_elements[it2->second].get(); - } else { - auto it3 = cc_lookup.find(in_symbol); - if (it3 != cc_lookup.end()) { - out_symbol.type = Symbol::Type::kCharacterClass; - out_symbol.char_class = it3->second; - } else { - out_symbol.type = Symbol::Type::kTerminal; - out_symbol.value = in_symbol; - } - } - out_symbols.emplace_back(std::move(out_symbol)); - } - - if (expect_not) { - errors_.err(pair.second.loc, "but is not followed by not"); - } + auto out_symbols = + parse_definition(pair.second.loc, second_pass_elements, + second_pass_lookup, cc_lookup, in_definition); if (out_symbols.empty()) { errors_.err(pair.second.loc, "no symbols found in definition"); continue; } - element->definitions.emplace_back( + second_pass_elements[i]->definitions.emplace_back( Definition{.symbols = std::move(out_symbols)}); } + ++i; } // Find root and move it first (if needed) @@ -211,7 +152,8 @@ class GrammarLoader { case Symbol::Type::kCharacterClass: break; case Symbol::Type::kNonTerminal: - used[second_pass_lookup.find(symbol.element->name)->second]++; + if (!symbol.element->name.empty()) + used[second_pass_lookup.find(symbol.element->name)->second]++; break; } } @@ -220,7 +162,7 @@ class GrammarLoader { std::optional<size_t> root_index; for (size_t i = 0; i < used.size(); ++i) { - if (used[i] == 0) { + if (used[i] == 0 && !second_pass_elements[i]->name.empty()) { if (root_index.has_value()) { errors_.warn(first_pass_elements.find(second_pass_elements[i]->name) ->second.loc, @@ -279,6 +221,142 @@ class GrammarLoader { } } + std::vector<Symbol> parse_definition( + src::Location const& loc, + std::vector<std::unique_ptr<Element>>& non_terminal, + std::map<std::string_view, size_t, std::less<>>& non_terminal_lookup, + std::map<std::string_view, uint8_t, std::less<>> const& cc_lookup, + std::string_view in_definition) { + std::vector<Symbol> out_symbols; + bool exclude = false; + bool expect_not = false; + while (!in_definition.empty()) { + auto symbol = parse_symbol(loc, non_terminal, non_terminal_lookup, + cc_lookup, in_definition, exclude, expect_not); + if (symbol.has_value()) { + out_symbols.push_back(std::move(symbol.value())); + } + } + return out_symbols; + } + + std::optional<Symbol> parse_symbol( + src::Location const& loc, + std::vector<std::unique_ptr<Element>>& non_terminal, + std::map<std::string_view, size_t, std::less<>>& non_terminal_lookup, + std::map<std::string_view, uint8_t, std::less<>> const& cc_lookup, + std::string_view& input, bool& exclude, bool& expect_not, + char extra_terminator = '\0') { + Symbol out_symbol; + + if ((input[0] == '{' || input[0] == '[') && input.size() > 1 && + input[1] != ' ') { + char end = input[0] == '{' ? '}' : ']'; + std::vector<Symbol> inner_symbols; + bool inner_exclude = false; + bool inner_expect_not = false; + input = input.substr(1); + while (!input.empty() && input.front() != end) { + auto symbol = + parse_symbol(loc, non_terminal, non_terminal_lookup, cc_lookup, + input, inner_exclude, inner_expect_not, end); + if (symbol.has_value()) { + inner_symbols.push_back(std::move(symbol.value())); + } + } + + if (input.empty()) { + errors_.err(loc, "unclosed sub-symbol"); + return std::nullopt; + } + if (input.size() == 1 || input[1] != ' ') { + input = input.substr(1); + } else { + input = input.substr(2); + } + + if (inner_symbols.empty()) { + errors_.err(loc, "empty sub-symbol"); + return std::nullopt; + } + if (inner_symbols.size() == 1) { + out_symbol = inner_symbols[0]; + } else { + auto anon_element = std::make_unique<Element>(); + anon_element->definitions.emplace_back( + Definition{.symbols = std::move(inner_symbols)}); + non_terminal.push_back(std::move(anon_element)); + out_symbol.type = Symbol::Type::kNonTerminal; + out_symbol.element = non_terminal.back().get(); + } + + if (exclude) { + errors_.err(loc, "Optional and exclude doesn't work together"); + } else if (end == '}') { + out_symbol.optional = Symbol::Optional::kZeroOrMore; + } else /* if (end == '[') */ { + out_symbol.optional = Symbol::Optional::kZeroOrOne; + } + + return out_symbol; + } + + std::string_view::size_type end; + if (extra_terminator) { + char tmp[2]; + tmp[0] = extra_terminator; + tmp[1] = ' '; + end = input.find_first_of(std::string_view{tmp, 2}); + } else { + end = input.find(' '); + } + std::string_view in_symbol; + if (end == std::string_view::npos) { + in_symbol = input; + input = std::string_view{}; + } else { + in_symbol = input.substr(0, end); + if (input[end] == ' ') { + input = input.substr(end + 1); + } else { + input = input.substr(end); + } + } + + if (exclude) { + if (in_symbol == "or") + return std::nullopt; + out_symbol.optional = Symbol::Optional::kExcluded; + } else if (expect_not) { + expect_not = false; + if (in_symbol == "not") { + exclude = true; + } else { + errors_.err(loc, "but is not followed by not"); + } + return std::nullopt; + } + if (in_symbol == "but") { + expect_not = true; + return std::nullopt; + } + auto it2 = non_terminal_lookup.find(in_symbol); + if (it2 != non_terminal_lookup.end()) { + out_symbol.type = Symbol::Type::kNonTerminal; + out_symbol.element = non_terminal[it2->second].get(); + } else { + auto it3 = cc_lookup.find(in_symbol); + if (it3 != cc_lookup.end()) { + out_symbol.type = Symbol::Type::kCharacterClass; + out_symbol.char_class = it3->second; + } else { + out_symbol.type = Symbol::Type::kTerminal; + out_symbol.value = in_symbol; + } + } + return out_symbol; + } + std::unique_ptr<line::Reader> reader_; std::vector<std::string> const& character_classes_; src::Errors& errors_; |
