From 14ce0f48bf5780ed808af300bd5ae1050469ea36 Mon Sep 17 00:00:00 2001 From: Ahmedelsa3eed <73740339+Ahmedelsa3eed@users.noreply.github.com> Date: Mon, 25 Dec 2023 18:38:28 +0200 Subject: [PATCH] Fixing end of input while parsing stack not empty (#39) * ANYCC 29 - Fix 'dash' print. * handle end of input while stack not empty * ANYCC 29 - Generate First & Follow Markdown files * ANYCC 29 - Printing Grammar * handle end of input and adjust readme output * ANYCC 47 - Fix prioritize direct production for each first in first set * ANYCC 47 - Fix Predictive Table MD * ANYCC 47 - Put project program * ANYCC 47 - Put test program * fix end of input or end of stack * ANYCC 47 - Modify print * ANYCC 47 - Add Default test --------- Co-authored-by: bazina --- include/Parser/FirstAndFollowGenerator.h | 4 +- include/Parser/Parser.h | 2 + include/Parser/PredictiveTopDownParser.h | 6 +- program.txt | 10 +- src/Parser/FirstAndFollowGenerator.cpp | 46 ++++++--- src/Parser/FirstAndFollowGeneratorUtility.cpp | 98 ++++++++++--------- src/Parser/Parser.cpp | 24 ++++- src/Parser/PredictiveTable.cpp | 22 ++++- src/Parser/PredictiveTopDownParser.cpp | 73 +++++++++----- 9 files changed, 182 insertions(+), 103 deletions(-) diff --git a/include/Parser/FirstAndFollowGenerator.h b/include/Parser/FirstAndFollowGenerator.h index 9113229..eb45dd1 100644 --- a/include/Parser/FirstAndFollowGenerator.h +++ b/include/Parser/FirstAndFollowGenerator.h @@ -36,9 +36,7 @@ class FirstAndFollowGenerator { void compute(); - void printFirstSets(); - - void printFollowSets(); + void generateMarkdownFirstAndFollowSets(const std::string &filename); private: std::vector productionVector; diff --git a/include/Parser/Parser.h b/include/Parser/Parser.h index 728d9d7..4ee7ae6 100644 --- a/include/Parser/Parser.h +++ b/include/Parser/Parser.h @@ -51,6 +51,8 @@ class Parser { * @brief Build the predictive top down parser */ void buildPredictiveTopDownParser(); + + static void printGrammar(std::unordered_map>> &grammar); }; diff --git a/include/Parser/PredictiveTopDownParser.h b/include/Parser/PredictiveTopDownParser.h index 5e3d900..9e0e899 100644 --- a/include/Parser/PredictiveTopDownParser.h +++ b/include/Parser/PredictiveTopDownParser.h @@ -53,10 +53,12 @@ class PredictiveTopDownParser { bool handleMatchOrError(const StackItem& top, Token*& curr_token); void handleMatch(const StackItem& top, Token*& curr_token); void handleNonTerminal(const StackItem& top, Token*& curr_token); - void handleMissingTerminal(const StackItem& top); + void handleMissingTerminal(const StackItem& top, Token *&curr_token); + void handleSyncEntry(const StackItem &top); void handleEmptyEntry(const StackItem& top, Token*& curr_token); - void handleSyncEntry(const StackItem& top); void handleValidProduction(const StackItem& top, const CellValue* cellValue); + void handleEndOfInput(const StackItem &top); + void handleEndOfStack(Token *&curr_token); void pushProductionToStack(const std::vector& production); void setNextDerivation(const StackItem& top, std::vector &curr_production); }; diff --git a/program.txt b/program.txt index f5799f8..d54f9b0 100644 --- a/program.txt +++ b/program.txt @@ -1,8 +1,8 @@ -int x; -x = 5; -if (x > 2) -{ - x = 0; +int x; +x = 5; +if (x > 2) +{ + x = 0; } else { diff --git a/src/Parser/FirstAndFollowGenerator.cpp b/src/Parser/FirstAndFollowGenerator.cpp index bf8e9ce..bc890a3 100644 --- a/src/Parser/FirstAndFollowGenerator.cpp +++ b/src/Parser/FirstAndFollowGenerator.cpp @@ -31,7 +31,16 @@ FirstAndFollowGenerator::computeFirst(const std::string &nonTerminal) { symbolStr); // insert all the first from nonTerminalFirstSet but with the production in the for loop not the one coming with the variable for (const auto &pair: nonTerminalFirstSet) { - firstSet.insert({pair.first, {nonTerminal, {production}}}); + bool foundBefore = false; + for (const auto &it: firstSet) { + if (it.first == pair.first) { + foundBefore = true; + break; + } + } + if (!foundBefore) { + firstSet.insert({pair.first, {nonTerminal, {production}}}); + } } // Check if the non-terminal has an epsilon production @@ -41,8 +50,13 @@ FirstAndFollowGenerator::computeFirst(const std::string &nonTerminal) { } } else { // Handle terminal symbols + for (auto it = firstSet.begin(); it != firstSet.end(); ++it) { + if (it->first == symbol) { + firstSet.erase(it); + break; + } + } firstSet.insert({symbol, {nonTerminal, {production}}}); - //firstSet.insert(symbol); // Break the loop for terminal symbols break; } @@ -252,30 +266,34 @@ void FirstAndFollowGenerator::compute() { computeFollowSets(computedFollowSets); } -void FirstAndFollowGenerator::printFirstSets() { - std::cout << "First Sets:\n"; +void FirstAndFollowGenerator::generateMarkdownFirstAndFollowSets(const std::string &filename) { + std::ofstream out(filename); + out << "# First and Follow Sets\n\n"; + out << "## First Sets\n\n"; + out << "| Non-Terminal | First Set |\n"; + out << "| ------------ | --------- |\n"; for (const auto &entry: computedFirstSets) { const std::string &non_terminal = entry.first; const std::set, CompareFirst> &first_set = entry.second; - std::cout << non_terminal << ": { "; + out << "| " << non_terminal << " | "; for (const std::pair &symbol: first_set) { - std::cout << symbol.first << ' '; + out << '`' << symbol.first << "` "; } - std::cout << "}\n"; + out << " |\n"; } -} - -void FirstAndFollowGenerator::printFollowSets() { - std::cout << "\nFollow Sets:\n"; + out << "\n## Follow Sets\n\n"; + out << "| Non-Terminal | Follow Set |\n"; + out << "| ------------ | ---------- |\n"; for (const auto &entry: computedFollowSets) { const std::string &non_terminal = entry.first; const std::set &follow_set = entry.second; - std::cout << non_terminal << ": { "; + out << "| " << non_terminal << " | "; for (const std::string &symbol: follow_set) { - std::cout << symbol << ' '; + out << '`' << symbol << "` "; } - std::cout << "}\n"; + out << " |\n"; } + out.close(); } diff --git a/src/Parser/FirstAndFollowGeneratorUtility.cpp b/src/Parser/FirstAndFollowGeneratorUtility.cpp index d9492fb..873e023 100644 --- a/src/Parser/FirstAndFollowGeneratorUtility.cpp +++ b/src/Parser/FirstAndFollowGeneratorUtility.cpp @@ -1,69 +1,71 @@ #include "Parser/FirstAndFollowGeneratorUtility.h" #include +#include -std::vector findAllLongestSubstringIndices(const std::string& input, const std::set& substrings) { - std::vector substringInfoVec; +std::vector +findAllLongestSubstringIndices(const std::string &input, const std::set &substrings) { + std::vector substringInfoVec; - for (const std::string& substring : substrings) { - size_t pos = input.find(substring); - while (pos != std::string::npos) { - int endIndex = static_cast(pos + substring.length()); - bool found = false; + for (const std::string &substring: substrings) { + size_t pos = input.find(substring); + while (pos != std::string::npos) { + int endIndex = static_cast(pos + substring.length()); + bool found = false; - for (auto& info : substringInfoVec) { - if (info.start == static_cast(pos) && endIndex > info.end) { - info.end = endIndex; - found = true; - break; - } - } + for (auto &info: substringInfoVec) { + if (info.start == static_cast(pos) && endIndex > info.end) { + info.end = endIndex; + found = true; + break; + } + } - if (!found) { - substringInfoVec.push_back({static_cast(pos), endIndex}); - } + if (!found) { + substringInfoVec.push_back({static_cast(pos), endIndex}); + } - pos = input.find(substring, pos + 1); // Move to the next occurrence + pos = input.find(substring, pos + 1); // Move to the next occurrence + } } - } - std::sort(substringInfoVec.begin(), substringInfoVec.end(), compareSubstringInfo); + std::sort(substringInfoVec.begin(), substringInfoVec.end(), compareSubstringInfo); - return substringInfoVec; + return substringInfoVec; } -bool isNT(const std::string& s, std::set& nonTerminals) { - return nonTerminals.find(s) != nonTerminals.end(); +bool isNT(const std::string &s, std::set &nonTerminals) { + return nonTerminals.find(s) != nonTerminals.end(); } -std::set collectNonTerminals(const std::vector& grammar) { - std::set nonTerminals; - for (const Production& rule : grammar) { - nonTerminals.insert(rule.nonTerminal); - } - return nonTerminals; +std::set collectNonTerminals(const std::vector &grammar) { + std::set nonTerminals; + for (const Production &rule: grammar) { + nonTerminals.insert(rule.nonTerminal); + } + return nonTerminals; } -std::string getLongestUpperCaseSequence(const std::string& str) { - std::string currentSequence; - std::string longestSequence; +std::string getLongestUpperCaseSequence(const std::string &str) { + std::string currentSequence; + std::string longestSequence; - for (char ch : str) { - if (isupper(ch)) { - currentSequence += ch; - } else { - // Check if the current sequence is longer than the longest - if (currentSequence.length() > longestSequence.length()) { - longestSequence = currentSequence; - } - // Reset the current sequence - currentSequence.clear(); + for (char ch: str) { + if (isupper(ch)) { + currentSequence += ch; + } else { + // Check if the current sequence is longer than the longest + if (currentSequence.length() > longestSequence.length()) { + longestSequence = currentSequence; + } + // Reset the current sequence + currentSequence.clear(); + } } - } - // Check if the last sequence is longer than the longest - if (currentSequence.length() > longestSequence.length()) { - longestSequence = currentSequence; - } + // Check if the last sequence is longer than the longest + if (currentSequence.length() > longestSequence.length()) { + longestSequence = currentSequence; + } - return longestSequence; + return longestSequence; } diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index 91944a1..07c8395 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -1,5 +1,4 @@ #include "Parser/Parser.h" -#include "Utilities.h" #include "Parser/CFGReader.h" #include "LeftRecursionRemover.h" #include "LeftFactorer.h" @@ -31,14 +30,14 @@ std::unordered_map>> Parser::b auto grammar = CFGReader::parseCFGInput("../CFG.txt"); auto lr_free_grammar = LeftRecursionRemover::removeLR(grammar); auto left_factored_grammar = LeftFactorer::leftFactor(lr_free_grammar); + printGrammar(left_factored_grammar); return left_factored_grammar; } void Parser::buildFirstAndFollowSets(std::unordered_map>> &grammar) { firstAndFollowGenerator = new FirstAndFollowGenerator(grammar); firstAndFollowGenerator->compute(); - firstAndFollowGenerator->printFirstSets(); - firstAndFollowGenerator->printFollowSets(); + firstAndFollowGenerator->generateMarkdownFirstAndFollowSets("../FirstAndFollowSets.md"); } void Parser::buildPredictiveTable() { @@ -46,8 +45,6 @@ void Parser::buildPredictiveTable() { firstAndFollowGenerator->getFollowSets(), firstAndFollowGenerator->getNonTerminals()); predictiveTable->buildPredictiveTable(); - std::cout << "\nPredictive Table:\n"; - predictiveTable->printPredictiveTable(); predictiveTable->generateMarkdownTable("../PredictiveTable.md"); } @@ -57,3 +54,20 @@ void Parser::buildPredictiveTopDownParser() { firstAndFollowGenerator->getNonTerminals(), "../LL1ParsingOutput.md"); } + +void Parser::printGrammar(std::unordered_map>> &grammar) { + std::cout << "Grammar:\n"; + for (auto &non_terminal: grammar) { + std::cout << non_terminal.first << " --> "; + int i = 0; + for (auto &production: non_terminal.second) { + i++; + for (auto &symbol: production) { + std::cout << symbol << " "; + } + if (i < non_terminal.second.size()) + std::cout << "| "; + } + std::cout << "\n"; + } +} diff --git a/src/Parser/PredictiveTable.cpp b/src/Parser/PredictiveTable.cpp index 3def57b..ce203eb 100644 --- a/src/Parser/PredictiveTable.cpp +++ b/src/Parser/PredictiveTable.cpp @@ -172,25 +172,39 @@ void PredictiveTable::generateMarkdownTable(const std::string &outputFilePath) { } outputFile << "\n"; - // Iterate through non-terminals and terminals to fill in the table + // create table with empty cells then replace them with the correct values like replace strings + std::vector> table; for (const auto &non_terminal: non_terminals) { - outputFile << "| **" << non_terminal << "** |"; + std::vector row; for (const auto &terminal: terminals) { const CellValue *cellValue = lookUp(non_terminal, terminal); if (hasProduction(non_terminal, terminal)) { const auto &production = cellValue->getProduction(); std::string productionStr; if (!production.productions.empty()) { + productionStr.push_back('`'); for (const auto &symbol: production.productions[0]) { productionStr += symbol + " "; } productionStr.pop_back(); // Remove the extra space + productionStr.push_back('`'); } - outputFile << " `" << productionStr << "` |"; + row.push_back(productionStr); } else if (isSynchronizing(non_terminal, terminal)) { - outputFile << " `Synch` |"; + row.push_back("`Synch`"); + } else { + row.push_back(""); } } + table.push_back(row); + } + + // Write table rows + for (int i = 0; i < non_terminals.size(); ++i) { + outputFile << "| **" << *std::next(non_terminals.begin(), i) << "** |"; + for (int j = 0; j < terminals.size(); ++j) { + outputFile << " " << table[i][j] << " |"; + } outputFile << "\n"; } diff --git a/src/Parser/PredictiveTopDownParser.cpp b/src/Parser/PredictiveTopDownParser.cpp index 828d2f8..0289d1e 100644 --- a/src/Parser/PredictiveTopDownParser.cpp +++ b/src/Parser/PredictiveTopDownParser.cpp @@ -16,21 +16,19 @@ PredictiveTopDownParser::PredictiveTopDownParser( left_most_derivation.push_back({CFGReader::start_symbol}); parsingFile.open(filename); - if (!parsingFile.is_open()) { - std::cerr << "Error opening file: output.md" << std::endl; + std::cerr << "Error opening file: LL1ParsingOutput.md" << std::endl; } } -PredictiveTopDownParser::~PredictiveTopDownParser() { -} +PredictiveTopDownParser::~PredictiveTopDownParser() {} void PredictiveTopDownParser::parseInputTokens() { std::cout << "Parsing input tokens..." << std::endl; - parsingFile << "Parsing input tokens...\n\n"; - parsingFile << "| Stack | Current Token | Output |\n"; - parsingFile << "|--------|---------------|------------------------|\n"; + parsingFile << "*Parsing input tokens...*\n\n"; + parsingFile << "| Stack Top | Current Token | Output |\n"; + parsingFile << "|-----------|---------------|------------------------|\n"; Token *curr_token = lex.getNextToken(); @@ -49,8 +47,6 @@ void PredictiveTopDownParser::parseInputTokens() { handleNonTerminal(top, curr_token); parsingFile << "\n"; } - // Accept the grammar if the stack is empty - std::cout << "Accept -_-" << std::endl; parsingFile.close(); } @@ -58,23 +54,38 @@ bool PredictiveTopDownParser::handleMatchOrError(const StackItem &top, Token *&c if (top.isTerminal) { if (top.token == *(curr_token->getKey())) { handleMatch(top, curr_token); - return true; + } else if (top.token == "$") { + handleEndOfStack(curr_token); } else { - handleMissingTerminal(top); - return true; + handleMissingTerminal(top, curr_token); } + return true; } return false; } void PredictiveTopDownParser::handleMatch(const StackItem &top, Token *&curr_token) { - parsingFile << "match " << *(curr_token->getKey()) << " |"; + parsingFile << "match ``" << *(curr_token->getKey()) << "`` |"; std::cout << "Match " << *(curr_token->getKey()) << std::endl; stk.pop(); curr_token = lex.getNextToken(); // Advance to the next token } +void PredictiveTopDownParser::handleEndOfStack(Token *&curr_token) { + parsingFile << "Error: ``" << *(curr_token->getKey()) << "`` discarded |\n"; + curr_token = lex.getNextToken(); + while (*(curr_token->getKey()) != "$") { + parsingFile << "| $ | " << *(curr_token->getKey()) + << " | Error: ``" << *(curr_token->getKey()) << "`` discarded |"; + + std::cerr << "Error: " << *(curr_token->getKey()) << " discarded" << std::endl; + curr_token = lex.getNextToken(); + if (*(curr_token->getKey()) != "$") { + parsingFile << "\n"; + } + } +} void PredictiveTopDownParser::handleNonTerminal(const StackItem &top, Token *&curr_token) { const CellValue *cellValue = predictive_table.lookUp(top.token, *(curr_token->getKey())); @@ -96,34 +107,52 @@ void PredictiveTopDownParser::handleNonTerminal(const StackItem &top, Token *&cu } // Helper functions for error handling and stack operations -void PredictiveTopDownParser::handleMissingTerminal(const StackItem &top) { - parsingFile << "Error: missing " << top.token << ", discarded" << " |"; +void PredictiveTopDownParser::handleMissingTerminal(const StackItem &top, Token *&curr_token) { + parsingFile << "Error: missing ``" << top.token << "`` inserted" << " |"; - std::cerr << "Error: missing " << top.token << ", discarded" << std::endl; + std::cerr << "Error: missing '" << top.token << "' inserted" << std::endl; stk.pop(); } +/** + * @brief discard the current token (input symbol) and advance to the next token + * */ void PredictiveTopDownParser::handleEmptyEntry(const StackItem &top, Token *&curr_token) { - parsingFile << "Error:(illegal " << top.token << ") at line(" + if (*(curr_token->getKey()) == "$") { + handleEndOfInput(top); // report error and pop stack + return; + } + parsingFile << "Error: (illegal ``" << top.token << "``) at line(" << curr_token->getPosition()->line_number << ") column(" - << curr_token->getPosition()->column_number << ") – discard " << curr_token << " |"; + << curr_token->getPosition()->column_number << ") - discard ``" << *(curr_token->getKey()) << "`` |"; - std::cerr << "Error:(illegal " << top.token << ") at line(" + std::cerr << "Error: (illegal " << top.token << ") at line(" << curr_token->getPosition()->line_number << ") column(" - << curr_token->getPosition()->column_number << ") – discard " << curr_token << std::endl; + << curr_token->getPosition()->column_number << ") - discard " << *(curr_token->getKey()) << std::endl; curr_token = lex.getNextToken(); } +void PredictiveTopDownParser::handleEndOfInput(const StackItem &top) { + parsingFile << "Error: ``No more input`` " << ", discard " << top.token << "|"; + + std::cerr << "Error: No more input " << "discard " << top.token << std::endl; + stk.pop(); +} + void PredictiveTopDownParser::handleSyncEntry(const StackItem &top) { - parsingFile << "Error: missing " << top.token << ", discarded" << " |"; + parsingFile << "Error: ``Sync`` " << ",discard " << top.token << "|"; - std::cerr << "Error: missing " << top.token << ", discarded" << std::endl; + std::cerr << "Error: Sync " << "discard " << top.token << std::endl; stk.pop(); } void PredictiveTopDownParser::handleValidProduction(const StackItem &top, const CellValue *cellValue) { + parsingFile << top.token << " → "; stk.pop(); auto production = cellValue->getProduction().productions[0]; + for (auto &symbol: production) { + parsingFile << symbol << " "; + } setNextDerivation(top, production); pushProductionToStack(production); }