summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2026-04-28 22:24:11 +0200
committerJoel Klinghed <the_jk@spawned.biz>2026-04-28 22:26:12 +0200
commited00d6111ec63faffe1aee21efb9d66df69a0df3 (patch)
tree050dad3068a54ddb9a0e7e4250f4ade2a4b0feef
parent42643b5543408b9c0b62b16b86834211db1c05e8 (diff)
grammar: Expand parsing to handle syntax grammar
Introduce anonymous elements to handle more complicated symbols
-rw-r--r--src/grammar.cc212
1 files changed, 145 insertions, 67 deletions
diff --git a/src/grammar.cc b/src/grammar.cc
index c94a86f..3526681 100644
--- a/src/grammar.cc
+++ b/src/grammar.cc
@@ -124,81 +124,22 @@ class GrammarLoader {
second_pass_elements.emplace_back(std::move(element));
}
- auto it = second_pass_elements.begin();
+ size_t i = 0;
for (auto const& pair : first_pass_elements) {
- auto const& element = *it++;
- std::vector<std::string_view> in_symbols;
for (auto const& in_definition : pair.second.definitions) {
- str::split(in_definition, in_symbols);
-
- std::vector<Symbol> out_symbols;
- bool exclude = false;
- bool expect_not = false;
- for (auto in_symbol : in_symbols) {
- Symbol out_symbol;
- if (exclude) {
- if (in_symbol == "or")
- continue;
- out_symbol.optional = Symbol::Optional::kExcluded;
- } else if (expect_not) {
- expect_not = false;
- if (in_symbol == "not") {
- exclude = true;
- } else {
- errors_.err(pair.second.loc, "but is not followed by not");
- }
- continue;
- }
- if (in_symbol == "but") {
- expect_not = true;
- continue;
- }
- if (in_symbol.front() == '{' && in_symbol.back() == '}') {
- if (exclude) {
- errors_.err(pair.second.loc,
- "Optional and exclude doesn't work together");
- } else {
- out_symbol.optional = Symbol::Optional::kZeroOrMore;
- }
- in_symbol = in_symbol.substr(1, in_symbol.size() - 2);
- } else if (in_symbol.front() == '[' && in_symbol.back() == ']') {
- if (exclude) {
- errors_.err(pair.second.loc,
- "Optional and exclude doesn't work together");
- } else {
- out_symbol.optional = Symbol::Optional::kZeroOrOne;
- }
- in_symbol = in_symbol.substr(1, in_symbol.size() - 2);
- }
- auto it2 = second_pass_lookup.find(in_symbol);
- if (it2 != second_pass_lookup.end()) {
- out_symbol.type = Symbol::Type::kNonTerminal;
- out_symbol.element = second_pass_elements[it2->second].get();
- } else {
- auto it3 = cc_lookup.find(in_symbol);
- if (it3 != cc_lookup.end()) {
- out_symbol.type = Symbol::Type::kCharacterClass;
- out_symbol.char_class = it3->second;
- } else {
- out_symbol.type = Symbol::Type::kTerminal;
- out_symbol.value = in_symbol;
- }
- }
- out_symbols.emplace_back(std::move(out_symbol));
- }
-
- if (expect_not) {
- errors_.err(pair.second.loc, "but is not followed by not");
- }
+ auto out_symbols =
+ parse_definition(pair.second.loc, second_pass_elements,
+ second_pass_lookup, cc_lookup, in_definition);
if (out_symbols.empty()) {
errors_.err(pair.second.loc, "no symbols found in definition");
continue;
}
- element->definitions.emplace_back(
+ second_pass_elements[i]->definitions.emplace_back(
Definition{.symbols = std::move(out_symbols)});
}
+ ++i;
}
// Find root and move it first (if needed)
@@ -211,7 +152,8 @@ class GrammarLoader {
case Symbol::Type::kCharacterClass:
break;
case Symbol::Type::kNonTerminal:
- used[second_pass_lookup.find(symbol.element->name)->second]++;
+ if (!symbol.element->name.empty())
+ used[second_pass_lookup.find(symbol.element->name)->second]++;
break;
}
}
@@ -220,7 +162,7 @@ class GrammarLoader {
std::optional<size_t> root_index;
for (size_t i = 0; i < used.size(); ++i) {
- if (used[i] == 0) {
+ if (used[i] == 0 && !second_pass_elements[i]->name.empty()) {
if (root_index.has_value()) {
errors_.warn(first_pass_elements.find(second_pass_elements[i]->name)
->second.loc,
@@ -279,6 +221,142 @@ class GrammarLoader {
}
}
+ std::vector<Symbol> parse_definition(
+ src::Location const& loc,
+ std::vector<std::unique_ptr<Element>>& non_terminal,
+ std::map<std::string_view, size_t, std::less<>>& non_terminal_lookup,
+ std::map<std::string_view, uint8_t, std::less<>> const& cc_lookup,
+ std::string_view in_definition) {
+ std::vector<Symbol> out_symbols;
+ bool exclude = false;
+ bool expect_not = false;
+ while (!in_definition.empty()) {
+ auto symbol = parse_symbol(loc, non_terminal, non_terminal_lookup,
+ cc_lookup, in_definition, exclude, expect_not);
+ if (symbol.has_value()) {
+ out_symbols.push_back(std::move(symbol.value()));
+ }
+ }
+ return out_symbols;
+ }
+
+ std::optional<Symbol> parse_symbol(
+ src::Location const& loc,
+ std::vector<std::unique_ptr<Element>>& non_terminal,
+ std::map<std::string_view, size_t, std::less<>>& non_terminal_lookup,
+ std::map<std::string_view, uint8_t, std::less<>> const& cc_lookup,
+ std::string_view& input, bool& exclude, bool& expect_not,
+ char extra_terminator = '\0') {
+ Symbol out_symbol;
+
+ if ((input[0] == '{' || input[0] == '[') && input.size() > 1 &&
+ input[1] != ' ') {
+ char end = input[0] == '{' ? '}' : ']';
+ std::vector<Symbol> inner_symbols;
+ bool inner_exclude = false;
+ bool inner_expect_not = false;
+ input = input.substr(1);
+ while (!input.empty() && input.front() != end) {
+ auto symbol =
+ parse_symbol(loc, non_terminal, non_terminal_lookup, cc_lookup,
+ input, inner_exclude, inner_expect_not, end);
+ if (symbol.has_value()) {
+ inner_symbols.push_back(std::move(symbol.value()));
+ }
+ }
+
+ if (input.empty()) {
+ errors_.err(loc, "unclosed sub-symbol");
+ return std::nullopt;
+ }
+ if (input.size() == 1 || input[1] != ' ') {
+ input = input.substr(1);
+ } else {
+ input = input.substr(2);
+ }
+
+ if (inner_symbols.empty()) {
+ errors_.err(loc, "empty sub-symbol");
+ return std::nullopt;
+ }
+ if (inner_symbols.size() == 1) {
+ out_symbol = inner_symbols[0];
+ } else {
+ auto anon_element = std::make_unique<Element>();
+ anon_element->definitions.emplace_back(
+ Definition{.symbols = std::move(inner_symbols)});
+ non_terminal.push_back(std::move(anon_element));
+ out_symbol.type = Symbol::Type::kNonTerminal;
+ out_symbol.element = non_terminal.back().get();
+ }
+
+ if (exclude) {
+ errors_.err(loc, "Optional and exclude doesn't work together");
+ } else if (end == '}') {
+ out_symbol.optional = Symbol::Optional::kZeroOrMore;
+ } else /* if (end == '[') */ {
+ out_symbol.optional = Symbol::Optional::kZeroOrOne;
+ }
+
+ return out_symbol;
+ }
+
+ std::string_view::size_type end;
+ if (extra_terminator) {
+ char tmp[2];
+ tmp[0] = extra_terminator;
+ tmp[1] = ' ';
+ end = input.find_first_of(std::string_view{tmp, 2});
+ } else {
+ end = input.find(' ');
+ }
+ std::string_view in_symbol;
+ if (end == std::string_view::npos) {
+ in_symbol = input;
+ input = std::string_view{};
+ } else {
+ in_symbol = input.substr(0, end);
+ if (input[end] == ' ') {
+ input = input.substr(end + 1);
+ } else {
+ input = input.substr(end);
+ }
+ }
+
+ if (exclude) {
+ if (in_symbol == "or")
+ return std::nullopt;
+ out_symbol.optional = Symbol::Optional::kExcluded;
+ } else if (expect_not) {
+ expect_not = false;
+ if (in_symbol == "not") {
+ exclude = true;
+ } else {
+ errors_.err(loc, "but is not followed by not");
+ }
+ return std::nullopt;
+ }
+ if (in_symbol == "but") {
+ expect_not = true;
+ return std::nullopt;
+ }
+ auto it2 = non_terminal_lookup.find(in_symbol);
+ if (it2 != non_terminal_lookup.end()) {
+ out_symbol.type = Symbol::Type::kNonTerminal;
+ out_symbol.element = non_terminal[it2->second].get();
+ } else {
+ auto it3 = cc_lookup.find(in_symbol);
+ if (it3 != cc_lookup.end()) {
+ out_symbol.type = Symbol::Type::kCharacterClass;
+ out_symbol.char_class = it3->second;
+ } else {
+ out_symbol.type = Symbol::Type::kTerminal;
+ out_symbol.value = in_symbol;
+ }
+ }
+ return out_symbol;
+ }
+
std::unique_ptr<line::Reader> reader_;
std::vector<std::string> const& character_classes_;
src::Errors& errors_;