From 55c31a5f3dd951a72097393ba3d213980303dfe4 Mon Sep 17 00:00:00 2001 From: Samiro Discher Date: Fri, 30 Aug 2024 21:04:58 +0200 Subject: [PATCH] revisit helper tools' code (temporary) --- main.cpp | 1647 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 1466 insertions(+), 181 deletions(-) diff --git a/main.cpp b/main.cpp index 5fcca1c..02fddca 100644 --- a/main.cpp +++ b/main.cpp @@ -13,6 +13,9 @@ #include #endif +//#include +//#include + using namespace std; using namespace xamidi::helper; using namespace xamidi::metamath; @@ -191,93 +194,401 @@ static const map& cmdInfo() { return _; } +#if 0 +#include "logic/DlCore.h" +#include "tree/TreeNode.h" +string prover9Formula(const string& theorem, bool showLabel = false, bool printDot = true, bool TPTPConvention = false) { + shared_ptr consequent; + if (!DlCore::fromPolishNotation(consequent, theorem)) { + cerr << "Failed to parse " << theorem << "." << endl; + return 0; + } + + static const unordered_map operatorNames = { { DlCore::terminalStr_and(), "K" }, { DlCore::terminalStr_or(), "A" }, { DlCore::terminalStr_nand(), "D" }, { DlCore::terminalStr_nor(), "X" }, { DlCore::terminalStr_imply(), "i" }, { DlCore::terminalStr_implied(), "B" }, { DlCore::terminalStr_nimply(), "F" }, { DlCore::terminalStr_nimplied(), "G" }, { DlCore::terminalStr_equiv(), "E" }, { DlCore::terminalStr_xor(), "J" }, { DlCore::terminalStr_com(), "S" }, { DlCore::terminalStr_app(), "U" }, { DlCore::terminalStr_not(), "n" }, { DlCore::terminalStr_nece(), "L" }, { DlCore::terminalStr_poss(), "M" }, { DlCore::terminalStr_obli(), "Z" }, { DlCore::terminalStr_perm(), "P" }, { DlCore::terminalStr_top(), "V" }, { DlCore::terminalStr_bot(), "O" } }; + map operatorTranslations; + map variableTranslations; + unsigned next = 0; + auto recurse = [&](const shared_ptr& node, const auto& me) -> string { + auto valToString = [&](const string& s) -> string { + // 1. Operator names + map::const_iterator itOperator = operatorTranslations.find(s); + if (itOperator == operatorTranslations.end()) { + unordered_map::const_iterator searchResult = operatorNames.find(s); + if (searchResult != operatorNames.end()) + return operatorTranslations[s] = searchResult->second; + } else + return itOperator->second; + if (DlCore::dlOperators().count(s)) + return operatorTranslations[s] = "<" + s + ">"; // unsupported operator + + // 2. Variable names + map::const_iterator itVariable = variableTranslations.find(s); + if (itVariable == variableTranslations.end()) + return variableTranslations[s] = (TPTPConvention ? "X" : "x") + to_string(next++); + else + return itVariable->second; + }; + string str = valToString(node->getValue()->value); + bool leaf = node->getChildren().empty(); + if (!leaf) + str += "("; + bool first= true; + for (size_t i = 0; i < node->getChildren().size(); i++) { + if (first) + first = false; + else + str += ","; + str += me(node->getChildren()[i], me); + } + return str + (leaf ? "" : ")"); + }; + return "t(" + recurse(consequent, recurse) + ")" + (showLabel ? " #label(" + theorem + ")" : "") + (printDot ? "." : ""); +} +#endif + +//#include +//#include +//#include int main(int argc, char* argv[]) { // argc = 1 + N, argv = { , , ..., } - // Custom tools' code - (v1.2) : https://github.com/xamidi/pmGenerator/commit/018ac7854f3e620406ba04726802a77acbd6d461 -#if 0 //### ./formulasFromSummaries ; extract comma-separated ordered list of formulas in normal Polish notation that are used in the given proof summaries - // NOTE: Requires '#include "logic/DlCore.h"' - // e.g. ./formulasFromSummaries data/summary-formulas.txt data/m.txt,data/w1.txt,data/w2.txt,data/w3.txt,data/w4.txt,data/w5.txt,data/w6.txt,data/s5.txt - if (argc <= 2) { - cerr << "Need 1. path for output file, and 2. comma-separated list of paths to proof summary files." << endl; +#if 0 //### ./CNFFromTopList ; create proof databases from smallest conclusion lists in TPTP's CNF format + // e.g. ./CNFFromTopList CpCCqCprCCNrCCNstqCsr data/db25c49b13fec26ecf32e40bde65e4e2273f23b3c022cfd0fa986cff/top1000SmallestConclusions_1to43Steps.txt data/w2-top1000-cnf.txt + if (argc <= 3) { + cerr << "Need 1. axioms in normal Polish notation, 2. comma-separated list of paths to smallest conclusion list files, and 3. path for output file. Optional: 4. minimal proof length, 5. minimal conclusion length" << endl; return 0; } - string tmpFile = "data/tmp.txt"; - string outputFile = argv[1]; + size_t minProofLen = 0; + if (argc >= 5) { + try { + minProofLen = stoll(argv[4]); + cout << "Minimal proof length set to " << minProofLen << "." << endl; + } catch (...) { + cerr << "Invalid format for \"4. minimal proof length\"." << endl; + return 0; + } + } + size_t minConcLen = 0; + if (argc >= 6) { + try { + minConcLen = stoll(argv[5]); + cout << "Minimal conclusion length set to " << minConcLen << "." << endl; + } catch (...) { + cerr << "Invalid format for \"5. minimal conclusion length\"." << endl; + return 0; + } + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + DlProofEnumerator::resetRepresentativesFor(&axioms, true); + vector inputFiles = FctHelper::stringSplit(argv[2], ","); - set formulas; - for (const string& proofSummaryFile : inputFiles) { - string filterForTheorems = "."; - DlProofEnumerator::recombineProofSummary(proofSummaryFile, true, nullptr, 1, SIZE_MAX, true, false, &filterForTheorems, true, SIZE_MAX, 134217728, false, false, &tmpFile, false); - set conclusions; + string outputFile = argv[3]; + vector contents(inputFiles.size()); + size_t i = 0; + for (const string& inputFile : inputFiles) { + if (!FctHelper::readFile(inputFile, contents[i])) { + cerr << "Invalid file path \"" << inputFile << "\"" << endl; + return 0; + } + i++; + } + + //map dProofsWithConclusions; + map conclusionsWithDProofs; + for (const string& content : contents) + for (string line : FctHelper::stringSplitAndSkip(content, "\n", "%", true)) { + string::size_type posA = line.find_first_not_of(' ', line.find(' ', line.find_first_not_of(' ', line.find(' ') + 1) + 1) + 1); + string::size_type posB = line.find(' ', posA + 1); + string conclusion = line.substr(posA, posB - posA); + string::size_type posC = line.find_first_not_of(' ', posB + 1); + string::size_type posD = line.find(':', posC + 1); + string dProof = line.substr(posC, posD - posC); + if (conclusion.length() >= minConcLen && dProof.length() >= minProofLen) { +#if 0 // filter for useful formulas + bool use = false; + { + map> occurrences; + size_t i = 0; + for (char c : conclusion) { + occurrences[c].emplace(i); + i++; + } + size_t goodVars = 0; + for (const pair>& p : occurrences) { + const set& occurrence = p.second; + char c = p.first; + if ('a' <= c && c <= 'z' && occurrence.size() >= 2) { + if (occurrence.size() >= 3 || *occurrence.begin() + 1 < *next(occurrence.begin())) + goodVars++; + } + if (goodVars >= 3) + use = true; + } + } +#else + bool use = true; +#endif + //dProofsWithConclusions.emplace(dProof, conclusion); + if (use) + conclusionsWithDProofs.emplace(conclusion, dProof); + } + } + + //size_t tmpI = 0; + size_t c = 0; + stringstream ss; + for (const pair& p : conclusionsWithDProofs) { + ss << "% " << p.first << " (" << p.first.length() << " symbols, " << p.second.length() << " step" << (p.second.length() == 1 ? "" : "s") << ")\n"; + ss << "cnf(eax" << c++ << ", axiom, " << prover9Formula(p.first, false, false, true) << " ).\n"; + //tmpI++; + //if (tmpI > 5) + // exit(0); + } + FctHelper::writeToFile(outputFile, ss.str()); + + return 0; +#endif //### +#if 0 //### ./DBExtractBySummary ; extract those proofs from a proof database which appear in a given proof summary + // NOTE: Requires '#include "logic/DlCore.h"'. + // e.g. ./DBExtractBySummary data/exs/m-topDB.txt data/m.txt data/exs/m-relevantDB.txt + if (argc <= 3) { + cerr << "Need 1. path to proof database (with conclusions commented in normal Polish notation), 2. path to proof summary, and 3. path for output file." << endl; + return 0; + } + string proofDBFile = argv[1]; + string proofSummaryFile = argv[2]; + string outputFile = argv[3]; + + // 1. Obtain all conclusions used by proof summary (i.e. all which are relevant). + string filterForTheorems = "."; + DlProofEnumerator::recombineProofSummary(proofSummaryFile, true, nullptr, 1, SIZE_MAX, true, false, &filterForTheorems, true, SIZE_MAX, 134217728, false, false, &outputFile, false); + unordered_set conclusions; + vector customAxioms; + { + //vector customAxioms; + vector abstractDProof; vector requiredIntermediateResults; - DlProofEnumerator::convertProofSummaryToAbstractDProof(tmpFile, nullptr, nullptr, &requiredIntermediateResults, true, true, false); - for (const DRuleParser::AxiomInfo& info : requiredIntermediateResults) { - const shared_ptr& f = info.refinedAxiom; - conclusions.emplace(DlCore::toPolishNotation(f)); - //conclusions.emplace(DlCore::toPolishNotation_noRename(f)); + DlProofEnumerator::convertProofSummaryToAbstractDProof(outputFile, &customAxioms, &abstractDProof, &requiredIntermediateResults, true, true, false); + //#cout << "abstractDProof = " << FctHelper::vectorString(abstractDProof) << endl; + //#cout << "|abstractDProof| = " << abstractDProof.size() << endl; + //#cout << "requiredIntermediateResults = " << FctHelper::vectorStringF(requiredIntermediateResults, [](const DRuleParser::AxiomInfo& ax) { return DlCore::toPolishNotation(ax.refinedAxiom); }) << endl; + //#cout << "|requiredIntermediateResults| = " << requiredIntermediateResults.size() << endl; + for (const DRuleParser::AxiomInfo& info : requiredIntermediateResults) + conclusions.emplace(DlCore::toPolishNotation(info.refinedAxiom)); + } + cout << "Found " << conclusions.size() << " relevant conclusions." << endl; + + vector dProofNamesInFile; + vector dProofsInFile = DRuleParser::readFromMmsolitaireFile(proofDBFile, &dProofNamesInFile, true); + + // 2. Copy relevant conclusion's D-proofs into new proof database. + vector relevantIndices; + string result; + for (size_t i = 0; i < dProofNamesInFile.size(); i++) { + string dProof = dProofsInFile[i]; + //vector rawParseData = DRuleParser::parseDProof_raw(dProof, &customAxioms); + //const shared_ptr& conclusion = get<0>(rawParseData.back().second).back(); + //string f_ = DlCore::toPolishNotation(conclusion); + string dProofName = dProofNamesInFile[i]; + string::size_type pos = dProofName.find("; ! "); + if (pos == string::npos) { + cerr << "Invalid DB file" << endl; + return 0; + } + string::size_type posEnd = dProofName.find(' ', pos + 5); + string f = dProofName.substr(pos + 4, posEnd == string::npos ? string::npos : posEnd - pos - 4); + //if (f != f_) { + // cerr << "f: " << f << ", f_: " << f_ << ", for " << dProofNamesInFile[i] << endl; + // return 0; + //} + if (conclusions.count(f)) { + result += DRuleParser::toDBProof(dProof, &customAxioms, posEnd == string::npos ? f : dProofName.substr(pos + 4)) + "\n"; + relevantIndices.push_back(i); } - cout << "Found " << conclusions.size() << " different intermediate conclusion" << (conclusions.size() == 1 ? "" : "s") << " in \"" << proofSummaryFile << "\"." << endl; - formulas.insert(conclusions.begin(), conclusions.end()); } - cout << "In total, found " << formulas.size() << " different formula" << (formulas.size() == 1 ? "" : "s") << " in " << inputFiles.size() << " file" << (inputFiles.size() == 1 ? "" : "s") << "." << endl; - if (!formulas.empty()) - cout << "Smallest formula: " << *formulas.begin() << "\n (length " << formulas.begin()->length() << ")\nLargest formula: " << *formulas.rbegin() << "\n (length " << formulas.rbegin()->length() << ")" << endl; - string result = FctHelper::setString(formulas, { }, { }, ","); - FctHelper::writeToFile(outputFile, result); - cout << result.length() << " bytes written to \"" << outputFile<< "\"." << endl; + cout << "Copied for " << relevantIndices.size() << " relevant indices: " << FctHelper::vectorString(relevantIndices) << endl; + + if (!FctHelper::writeToFile(outputFile, result)) + cerr << "Failed." << endl; + else + cout << result.length() << " bytes written to \"" << outputFile<< "\"." << endl; return 0; -#endif -#if 0 //### ./uniteLists ; combine comma-separated ordered lists of strings - // e.g. ./uniteLists data/summary-formulas.txt data/summary-formulas1.txt,data/summary-formulas2.txt,data/summary-formulas3.txt - if (argc <= 2) { - cerr << "Need 1. path for output file, and 2. comma-separated list of paths to files with comma-separated elements." << endl; +#endif //### +#if 0 //### ./DBFromTopList ; create proof databases from smallest conclusion lists + if (argc <= 3) { + cerr << "Need 1. axioms in normal Polish notation, 2. comma-separated list of paths to smallest conclusion list files, and 3. path for output file. Optional: 4. minimal proof length" << endl; return 0; } - string outputFile = argv[1]; + size_t minProofLen = 0; + if (argc >= 5) { + try { + minProofLen = stoll(argv[4]); + cout << "Minimal proof length set to " << minProofLen << "." << endl; + } catch (...) { + cerr << "Invalid format for \"4. minimal proof length\"." << endl; + return 0; + } + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + DlProofEnumerator::resetRepresentativesFor(&axioms, true); + // default: + // ./DBFromTopList CpCqp,CCpCqrCCpqCpr,CCNpNqCqp data/top1000SmallestConclusions_1to39Steps.txt data/topDB.txt + // m: + // ./DBFromTopList CCCCCpqCNrNsrtCCtpCsp data/478804cd4793bc7f87041d99326aff4595662146d8a68175dda22bed/top1000SmallestConclusions_1to83Steps.txt data/m-topDB.txt + // ./DBFromTopList CCCCCpqCNrNsrtCCtpCsp "data/exs/m/top131032SmallestConclusions_1to133Steps.txt,data/exs/m/top15254SmallestConclusions_1to203Steps.txt" data/exs/m-topDB-exs.txt + // w1: + // ./DBFromTopList CCpCCNpqrCsCCNtCrtCpt data/02974777ff5f71e12ef58ccebedeef133584aad66e06a2a13b2b4b2c/top1000SmallestConclusions_1to161Steps.txt data/w1-topDB.txt + // ./DBFromTopList CCpCCNpqrCsCCNtCrtCpt "data/exs/w1/top371445SmallestConclusions_1to211Steps.txt,data/exs/w1/top79976SmallestConclusions_1to337Steps.txt,data/exs/w1/top21935SmallestConclusions_1to433Steps.txt,data/exs/w1/top11238SmallestConclusions_1to491Steps.txt" data/exs/w1-topDB-exs.txt + // w2: + // ./DBFromTopList CpCCqCprCCNrCCNstqCsr data/db25c49b13fec26ecf32e40bde65e4e2273f23b3c022cfd0fa986cff/top1000SmallestConclusions_1to43Steps.txt data/w2-topDB.txt + // ./DBFromTopList CpCCqCprCCNrCCNstqCsr "data/exs/w2/top100000SmallestConclusions_1to139Steps.txt,data/exs/w2/top100000SmallestConclusions_1to177Steps.txt,data/exs/w2/top63668SmallestConclusions_1to277Steps.txt,data/exs/w2/top15790SmallestConclusions_1to313Steps.txt,data/exs/w2/top13235SmallestConclusions_1to339Steps.txt" data/exs/w2-topDB-exs.txt + // w3: + // ./DBFromTopList CpCCNqCCNrsCptCCtqCrq data/0df075acc552c62513b49b6ed674bfcde1c1b018e532c665be229314/top1000SmallestConclusions_1to73Steps.txt data/w3-topDB.txt + // ./DBFromTopList CpCCNqCCNrsCptCCtqCrq "data/exs/w3/top61944SmallestConclusions_1to159Steps.txt,data/exs/w3/top50818SmallestConclusions_1to231Steps.txt,data/exs/w3/top18515SmallestConclusions_1to257Steps.txt,data/exs/w3/top16062SmallestConclusions_1to277Steps.txt,data/exs/w3/top4006SmallestConclusions_1to309Steps.txt" data/exs/w3-topDB-exs.txt + // w4: + // ./DBFromTopList CpCCNqCCNrsCtqCCrtCrq data/fe7117b8aad7634fae344172b9fee05f77e5e23b035276b17d8c6ec9/top1000SmallestConclusions_1to169Steps.txt data/w4-topDB.txt + // ./DBFromTopList CpCCNqCCNrsCtqCCrtCrq "data/exs/w4/top160102SmallestConclusions_1to261Steps.txt,data/exs/w4/top6717SmallestConclusions_1to495Steps.txt" data/exs/w4-topDB-exs.txt + // w5: + // ./DBFromTopList CCpqCCCrCstCqCNsNpCps data/1d5f27494b1a2312e223b7f8dd3551abf717590ceef694c08dcbed72/top1000SmallestConclusions_1to55Steps.txt data/w5-topDB.txt + // ./DBFromTopList CCpqCCCrCstCqCNsNpCps "data/exs/w5/top684361SmallestConclusions_1to111Steps.txt,data/exs/w5/top100000SmallestConclusions_1to149Steps.txt,data/exs/w5/top100000SmallestConclusions_1to159Steps.txt,data/exs/w5/top135154SmallestConclusions_1to161Steps.txt" data/exs/w5-topDB-exs.txt + // w6: + // ./DBFromTopList CCCpqCCCNrNsrtCCtpCsp data/7f473b6ba952b3deadf36cd7f1c4b5286ef32fef64808d14fff70a69/top1000SmallestConclusions_1to95Steps.txt data/w6-topDB.txt + // ./DBFromTopList CCCpqCCCNrNsrtCCtpCsp "data/exs/w6/top297996SmallestConclusions_1to149Steps.txt,data/exs/w6/top16393SmallestConclusions_1to327Steps.txt" data/exs/w6-topDB-exs.txt + // S5: + // ./DBFromTopList CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,CLpp,CLCpqCLpLq,CNLNpLNLNp data/d03a044ec35d4d9a3f6d0f5118bc4f8a02a08e61fe7815b2002d007f/top1000SmallestConclusions_1to30Steps.txt data/s5-topDB.txt + // ./DBFromTopList CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,CLpp,CLCpqCLpLq,CNLNpLNLNp "data/exs/s5/top244329SmallestConclusions_1to45Steps.txt,data/exs/s5/top39663SmallestConclusions_1to85Steps.txt" data/exs/s5-topDB-exs.txt vector inputFiles = FctHelper::stringSplit(argv[2], ","); - set unionOfLists; + string outputFile = argv[3]; + vector contents(inputFiles.size()); + size_t i = 0; for (const string& inputFile : inputFiles) { - string fileString; - if (!FctHelper::readFile(inputFile, fileString)) - throw runtime_error("Failed to read file \"" + inputFile + "\"."); + if (!FctHelper::readFile(inputFile, contents[i])) { + cerr << "Invalid file path \"" << inputFile << "\"" << endl; + return 0; + } + i++; + } + set dProofs; + for (const string& content : contents) + for (string line : FctHelper::stringSplitAndSkip(content, "\n", "%", true)) { + string::size_type posC = line.find_first_not_of(' ', line.find(' ', line.find_first_not_of(' ', line.find(' ', line.find_first_not_of(' ', line.find(' ') + 1) + 1) + 1) + 1) + 1); + string::size_type posD = line.find(':', posC + 1); + string dProof = line.substr(posC, posD - posC); + if (dProof.length() >= minProofLen) + dProofs.emplace(dProof); + } + //#cout << FctHelper::setString(dProofs) << endl; - // Erase all '\r', '\n', '\t', ' ', and lines starting with '%'. ; NOTE: Much faster than using regular expressions. - bool startOfLine = true; - bool erasingLine = false; - fileString.erase(remove_if(fileString.begin(), fileString.end(), [&](const char c) { - switch (c) { - case '\r': - case '\n': - startOfLine = true; - erasingLine = false; - return true; - case '\t': - case ' ': + string result; + for (const string& dProof : dProofs) + result += DRuleParser::toDBProof(dProof, DlProofEnumerator::getCustomAxioms()) + "\n"; + if (!FctHelper::writeToFile(outputFile, result)) + cerr << "Failed." << endl; + else + cout << result.length() << " bytes written to \"" << outputFile<< "\"." << endl; + return 0; +#endif //### +#if 0 //### ./dProofsFromDB ; extract comma-separated list of proofs from all proofs explicitly given in a proof database + // e.g. ./dProofsFromDB data/s5proofs.txt data/s5-dProofs.txt 1 1 + if (argc <= 2) { + cerr << "Need 1. path to proof database and 2. path for output file. Optional: 3. whether to only keep unique proofs (default: 0), 4. whether to remove proofs of length 1 (which are invalid in abstract proofs, default: 0)" << endl; + return 0; + } + string outputFile = argv[2]; + bool filter = false; + bool removeTrivial = false; + if (argc >= 4) { + string s = argv[3]; + filter = (s != "0" && s != "false"); + } + if (argc >= 5) { + string s = argv[4]; + removeTrivial = (s != "0" && s != "false"); + } + vector dProofs = DRuleParser::readFromMmsolitaireFile(argv[1], nullptr, true); + if (filter) { + set s; + vector uniqueDProofs; + for (const string& dProof : dProofs) + if (s.emplace(dProof).second) + uniqueDProofs.push_back(dProof); + dProofs = uniqueDProofs; + } + if (removeTrivial) { + for (vector::iterator it = dProofs.begin(); it != dProofs.end();) + if (it->length() == 1) + it = dProofs.erase(it); + else + it++; + } + stringstream ss; + for (size_t i = 0; i < dProofs.size(); i++) { + if (i) + ss << ",\n"; + ss << dProofs[i]; + } + ss << "\n"; + if (!FctHelper::writeToFile(outputFile, ss.str())) + cerr << "Failed." << endl; + else + cout << ss.str().length() << " bytes written to \"" << outputFile<< "\"." << endl; + return 0; +#endif //### +#if 0 //### ./dProofsToDB ; convert comma-separated proofs to proof database + // e.g. ./dProofsToDB CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,CLpp,CLCpqCLpLq,CNLNpLNLNp data/s5-dProofs.txt data/s5proofs-part.txt + if (argc <= 3) { + cerr << "Need 1. axioms in normal Polish notation, 2. path to file with comma-separated proofs, and 3. path for new proof database." << endl; + return 0; + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + DlProofEnumerator::resetRepresentativesFor(&axioms, true); + chrono::time_point startTime; + string fileString; + startTime = chrono::steady_clock::now(); + if (!FctHelper::readFile(argv[2], fileString)) { + cerr << "Invalid file path" << endl; + return 0; + } + string::size_type len = fileString.length(); + + // Erase all '\r', '\n', '\t', ' ', and lines starting with '%'. ; NOTE: Much faster than using regular expressions. + bool startOfLine = true; + bool erasingLine = false; + fileString.erase(remove_if(fileString.begin(), fileString.end(), [&](const char c) { + switch (c) { + case '\r': + case '\n': + startOfLine = true; + erasingLine = false; + return true; + case '\t': + case ' ': + startOfLine = false; + return true; + case '%': + if (startOfLine) { startOfLine = false; + erasingLine = true; return true; - case '%': - if (startOfLine) { - startOfLine = false; - erasingLine = true; - } - return erasingLine; - default: + } else { startOfLine = false; - return erasingLine; + return false; } - }), fileString.end()); + default: + startOfLine = false; + return erasingLine; + } + }), fileString.end()); - vector elements = FctHelper::stringSplit(fileString, ","); - cout << "Found " << elements.size() << " separated element" << (elements.size() == 1 ? "" : "s") << " in \"" << inputFile << "\"." << endl; - unionOfLists.insert(elements.begin(), elements.end()); - } - cout << "In total, found " << unionOfLists.size() << " different element" << (unionOfLists.size() == 1 ? "" : "s") << " in " << inputFiles.size() << " file" << (inputFiles.size() == 1 ? "" : "s") << "." << endl; - if (!unionOfLists.empty()) - cout << "Smallest element: " << *unionOfLists.begin() << "\n (length " << unionOfLists.begin()->length() << ")\nLargest element: " << *unionOfLists.rbegin() << "\n (length " << unionOfLists.rbegin()->length() << ")" << endl; - string result = FctHelper::setString(unionOfLists, { }, { }, ","); - FctHelper::writeToFile(outputFile, result); - cout << result.length() << " bytes written to \"" << outputFile<< "\"." << endl; + vector dProofsFromFile = FctHelper::stringSplit(fileString, ","); + cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to read and convert " << len << " bytes from \"" << argv[2] << "\"." << endl; + string result; + for (const string& dProof : dProofsFromFile) + result += DRuleParser::toDBProof(dProof, DlProofEnumerator::getCustomAxioms()) + "\n"; + if (!FctHelper::writeToFile(argv[3], result)) + cerr << "Failed." << endl; + else + cout << result.length() << " bytes written to \"" << argv[3] << "\"." << endl; return 0; -#endif +#endif //### #if 0 //### ./excludeLists ; exclude comma-separated ordered lists of strings // e.g. ./excludeLists data/summary-formulasWithoutABC.txt data/summary-formulasA.txt,data/summary-formulasB.txt,data/summary-formulasC.txt if (argc <= 2) { @@ -339,63 +650,18 @@ int main(int argc, char* argv[]) { // argc = 1 + N, argv = { , , FctHelper::writeToFile(referenceFile, result); cout << result.length() << " bytes written to \"" << referenceFile<< "\"." << endl; return 0; -#endif -#if 0 //### ./dProofsFromDB ; extract comma-separated list of proofs from all proofs explicitly given in a proof database - if (argc <= 2) { - cerr << "Need 1. path to proof database and 2. path for output file. Optional: 3. whether to only keep unique proofs (default: 0), 4. whether to remove proofs of length 1 (which are invalid in abstract proofs, default: 0)" << endl; - return 0; - } - string outputFile = argv[2]; - bool filter = false; - bool removeTrivial = false; - if (argc >= 4) { - string s = argv[3]; - filter = (s != "0" && s != "false"); - } - if (argc >= 5) { - string s = argv[4]; - removeTrivial = (s != "0" && s != "false"); - } - vector dProofs = DRuleParser::readFromMmsolitaireFile(argv[1], nullptr, true); - if (filter) { - set s; - vector uniqueDProofs; - for (const string& dProof : dProofs) - if (s.emplace(dProof).second) - uniqueDProofs.push_back(dProof); - dProofs = uniqueDProofs; - } - if (removeTrivial) { - for (vector::iterator it = dProofs.begin(); it != dProofs.end();) - if (it->length() == 1) - it = dProofs.erase(it); - else - it++; - } - stringstream ss; - for (size_t i = 0; i < dProofs.size(); i++) { - if (i) - ss << ",\n"; - ss << dProofs[i]; - } - ss << "\n"; - if (!FctHelper::writeToFile(outputFile, ss.str())) - cerr << "Failed." << endl; - else - cout << ss.str().length() << " bytes written to \"" << outputFile<< "\"." << endl; - return 0; -#endif +#endif //### #if 0 //### ./findCompactSummary ; determine which args for '--transform -f -n -t -j -1 -s ' yields a small output (in bytes) // NOTE: assumes that every formula is derived by a unique proof - // e.g.: ./findCompactSummary data/w3.txt CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,Cpp,CCpqCCqrCpr,CCNppp,CpCNpq + // e.g. ./findCompactSummary data/w3.txt CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,Cpp,CCpqCCqrCpr,CCNppp,CpCNpq if (argc <= 2) { - cerr << "Need 1. path to proof summary, and 2. requested theorem list in normal Polish notation" << endl; + cerr << "Need 1. path to proof summary, and 2. requested theorem list in normal Polish notation." << endl; return 0; } string tmpFile = "data/tmp.txt"; string inputFile = argv[1]; - unsigned minUseAmountToCreateHelperProof = 2; + unsigned minUseAmountToCreateHelperProof = 2; // NOTE: Set initially to 1 for './findCompactSummary_1' variant, which in some cases is required to find improvements. size_t maxLengthToKeepProof = SIZE_MAX; string filterForTheorems = argv[2]; size_t maxLengthToComputeDProof = 134217728; @@ -436,7 +702,7 @@ int main(int argc, char* argv[]) { // argc = 1 + N, argv = { , , #if 0 // NOTE: In case the function does not work properly (e.g. because there are redundancies in the abstract proof, causing it // to be greater when using '--transform -s' such that '--transform -j 2' never gets undercut by single modifications), - // a pre-defined list can be used here as a starting point for sucessful reduction. + // a pre-defined list can be used here as a starting point for successful reduction. string startList = "CCCpCCqrCsrtCCCqrCsrt,CCCCCpCCqrCsrtCCCqrCsrtuCvu,CCCpCqrsCCqrs,CCCCCpqrCqrsCts,CCpqCCCpqrCsr,CCCpqrCqr,CCCNpqrCpr,CCNpCCNqrCCCCCCstuCtuvCCvwCxwyCCypCqp,CCCCNpqCrqpCsp,CpCCpqCrq,CpCqCrp,CCCCCCpqCrqsNttCut,CCCNpqCCCCNrsCCtCutvCCvwCrwxCCxyCpy,CCCCCpqrCsrtCqt,CCCpCqrsCrs,CNNCpqCrCpq,CCNCCpCqpNrsCrs,CCCpqCNprCsCNpr,CpCCNCqrrCqr,CCCpCqpNCNNCNrrsCtr,CCpqCNNpq,CCCpqrCNpr,CCNNpqCpq,CCNpqCNCrpq,CCNpqCNCrCspq,CCpqCCNppq,CNCpqCrCsp,CCpCpqCpq,CCpqCCNqpq,CpCCpqq,CCpCqrCqCpr"; vector startList_vec = FctHelper::stringSplit(startList, ","); set usedConclusions = set(startList_vec.begin(), startList_vec.end()); @@ -511,7 +777,1040 @@ int main(int argc, char* argv[]) { // argc = 1 + N, argv = { , , } while (oldSmallestResultLen != smallestResultLen); return 0; #endif //### -#if 0 //### ./searchShorterSubproofs ; search shorter proofs for conclusions used in a given proof summary ; TODO: Use as concept for a low-memory proof reduction function. +#if 0 //### ./formulasFromProofs ; intermediate conclusions from comma-separated proofs in normal polish notation + // NOTE: Requires '#include '. + // e.g. ./formulasFromProofs CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,CLpp,CLCpqCLpLq,CNLNpLNLNp data/s5-dProofs.txt data/s5-formulas.txt + if (argc <= 3) { + cerr << "Need 1. axioms in normal Polish notation, 2. path to file with comma-separated proofs, and 3. path for output file." << endl; + return 0; + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + DlProofEnumerator::resetRepresentativesFor(&axioms, true); + string inputFile = argv[2]; // e.g. "D:/Dropbox/eclipse/pmGenerator/Release64bitDynamicGcc11.3/data/w5-in.txt" + string outputFile = argv[3]; + DlProofEnumerator::printProofs( { }, DlFormulaStyle::PolishStandard, false, true, 1, true, &inputFile, &outputFile, true); + string fileString; + if (!FctHelper::readFile(outputFile, fileString)) { + cerr << "Invalid file path" << endl; + return 0; + } + fileString = regex_replace(fileString, regex(" [^\n]+\n"), ""); + fileString = regex_replace(fileString, regex("\\[[0-9]+\\] "), ""); + fileString = regex_replace(fileString, regex(" = [^\n]+\n"), "\n"); + //#cout << fileString << flush; + vector formulas = FctHelper::stringSplit(fileString, "\n"); + stringstream ss; + bool first = true; + for (const string& f : formulas) + if (!f.empty()) { + if (first) + first = false; + else + ss << ","; + ss << f; + } + ss << "\n"; + if (!FctHelper::writeToFile(outputFile, ss.str())) + cerr << "Failed." << endl; + else + cout << ss.str().length() << " bytes written to \"" << outputFile << "\"." << endl; + return 0; +#endif //### +#if 0 //### ./formulasFromSummaries ; extract comma-separated ordered list of formulas in normal Polish notation that are used in the given proof summaries + // NOTE: Requires '#include "logic/DlCore.h"' + // e.g. ./formulasFromSummaries data/summary-formulas.txt data/m.txt,data/w1.txt,data/w2.txt,data/w3.txt,data/w4.txt,data/w5.txt,data/w6.txt,data/s5.txt + if (argc <= 2) { + cerr << "Need 1. path for output file, and 2. comma-separated list of paths to proof summary files." << endl; + return 0; + } + string tmpFile = "data/tmp.txt"; + string outputFile = argv[1]; + vector inputFiles = FctHelper::stringSplit(argv[2], ","); + set formulas; + for (const string& proofSummaryFile : inputFiles) { + string filterForTheorems = "."; + DlProofEnumerator::recombineProofSummary(proofSummaryFile, true, nullptr, 1, SIZE_MAX, true, false, &filterForTheorems, true, SIZE_MAX, 134217728, false, false, &tmpFile, false); + set conclusions; + vector requiredIntermediateResults; + DlProofEnumerator::convertProofSummaryToAbstractDProof(tmpFile, nullptr, nullptr, &requiredIntermediateResults, true, true, false); + for (const DRuleParser::AxiomInfo& info : requiredIntermediateResults) { + const shared_ptr& f = info.refinedAxiom; + conclusions.emplace(DlCore::toPolishNotation(f)); + //conclusions.emplace(DlCore::toPolishNotation_noRename(f)); + } + cout << "Found " << conclusions.size() << " different intermediate conclusion" << (conclusions.size() == 1 ? "" : "s") << " in \"" << proofSummaryFile << "\"." << endl; + formulas.insert(conclusions.begin(), conclusions.end()); + } + cout << "In total, found " << formulas.size() << " different formula" << (formulas.size() == 1 ? "" : "s") << " in " << inputFiles.size() << " file" << (inputFiles.size() == 1 ? "" : "s") << "." << endl; + if (!formulas.empty()) + cout << "Smallest formula: " << *formulas.begin() << "\n (length " << formulas.begin()->length() << ")\nLargest formula: " << *formulas.rbegin() << "\n (length " << formulas.rbegin()->length() << ")" << endl; + string result = FctHelper::setString(formulas, { }, { }, ","); + FctHelper::writeToFile(outputFile, result); + cout << result.length() << " bytes written to \"" << outputFile<< "\"." << endl; + return 0; +#endif //### +#if 0 //### ./gkcHintsFromProof ; extract hints from GKC output, parsed with "strategy":["hyper"] + // e.g. ./gkcHintsFromProof gkc-out.txt + if (argc <= 1) { + cerr << "Need path to file with GKC output (with strategy 'hyper' used)." << endl; + return 0; + } + string content; + if (!FctHelper::readFile(argv[1]/*"D:/Dropbox/eclipse/pmGenerator/Release64bitDynamicGcc11.3/gkc-out.txt"*/, content)) { + cerr << "Invalid file path" << endl; + return 0; + } + boost::replace_all(content, "\r\n", "\n"); + vector lines = FctHelper::stringSplit(content, "\n"); + { + vector _lines; + for (const string& line : lines) + if (line.starts_with(" ") && line.find('-') == string::npos) { + string::size_type pos = line.find(" t("); + if (pos != string::npos) + _lines.push_back(line.substr(pos + 1)); + } + lines = _lines; + } + cout << "formulas(hints).\n\n" << FctHelper::stringJoin("\n", lines) << "\n\nend_of_list." << endl; + return 0; +#endif //### +#if 0 //### ./importExtractionFromDB ; import proofs from DB into proof files + // NOTE: Requires '#include ' and '#include ' before 'using namespace' directives. + // e.g. ./importExtractionFromDB CpCCqCprCCNrCCNstqCsr data/exs/w2-topDB-shaky.txt + // ./importExtractionFromDB CpCCqCprCCNrCCNstqCsr data/exs/w2-topDB-modifications-shaky.txt + if (argc <= 2) { + cerr << "Need 1. axioms in normal Polish notation, and 2. path to proof database. Optional: 3. necessitation limit (or -1)" << endl; + return 0; + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + string pathProofDB = argv[2]; + uint32_t necessitationLimit = 0; + if (argc >= 4) { + try { + necessitationLimit = stoi(argv[3]); + cout << "Necessitation limit set to " << necessitationLimit << "." << endl; + } catch (...) { + cerr << "Invalid format for \"3. necessitation limit (or -1)\"." << endl; + return 0; + } + } + vector dProofsInFile = DRuleParser::readFromMmsolitaireFile(pathProofDB, nullptr, true); + DlProofEnumerator::resetRepresentativesFor(&axioms, true, necessitationLimit); + vector conclusionsForFile(dProofsInFile.size()); + + chrono::time_point startTime = chrono::steady_clock::now(); + cout << "Going to parse " << dProofsInFile.size() << " D-proofs." << endl; + tbb::parallel_for(size_t(0), dProofsInFile.size(), [&dProofsInFile, &conclusionsForFile](size_t i) { // NOTE: Counts from i = start := 0 until i < end := representativesOfWordLengthLimit.size(). + const string& dProof = dProofsInFile[i]; + vector rawParseData = DRuleParser::parseDProof_raw(dProof, DlProofEnumerator::getCustomAxioms(), 1); + const shared_ptr& conclusion = get<0>(rawParseData.back().second).back(); + conclusionsForFile[i] = DlCore::toPolishNotation_noRename(conclusion); + }); + //#cout << "conclusionsForFile = " << conclusionsForFile[0] << ", " << conclusionsForFile[1] << ", " << conclusionsForFile[2] << ", ..." << endl; + cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to parse " << dProofsInFile.size() << " D-proofs." << endl; + + string axiomProofList; + for (size_t i = 1; i <= axioms.size(); i++) { + if (i > 1) + axiomProofList += ","; + axiomProofList += to_string(i); + } + //#cout << "axiomProofList = " << axiomProofList << endl; + string targetDir; + DlProofEnumerator::extractConclusions(ExtractionMethod::ProofSystemFromString, 0, &axiomProofList, true, false, 0, 0, true, &targetDir); + targetDir += "dProofs-withConclusions/"; + if (!FctHelper::ensureDirExists(targetDir)) { + cerr << "Created extraction folder, but failed to create \"" << targetDir << "\"." << endl; + return 0; + } + cout << "Created target directory at \"" << targetDir << "\"." << endl; + + cout << "Going to arrange " << dProofsInFile.size() << " D-proofs with conclusions for new proof files." << endl; + tbb::concurrent_map> collections; + startTime = chrono::steady_clock::now(); + tbb::parallel_for(size_t(0), dProofsInFile.size(), [&dProofsInFile, &conclusionsForFile, &collections](size_t i) { // NOTE: Counts from i = start := 0 until i < end := representativesOfWordLengthLimit.size(). + const string& dProof = dProofsInFile[i]; + collections[dProof.length()][dProof] = conclusionsForFile[i]; + }); + cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to arrange " << dProofsInFile.size() << " D-proofs into " << collections.size() << " collections." << endl; + map collectionSizes; + for (tbb::concurrent_map>::iterator it = collections.begin(); it != collections.end(); ++it) + collectionSizes.emplace(it->first, it->second.size()); + cout << "\":\"-pairs: " << FctHelper::mapStringF(collectionSizes, [](const pair& p) { return to_string(p.first) + ":" + to_string(p.second); }, { }, { }) << endl; + const uint32_t c = necessitationLimit ? 1 : 2; + if (!collections.empty()) { + size_t maxProofLen = prev(collectionSizes.end())->first; + //#cout << "maxProofLen = " << maxProofLen << endl; + size_t oldColSize = collections.size(); + for (size_t len = 1 + c; len < maxProofLen; len += c) + collections[len]; + if (oldColSize < collections.size()) + cout << "Inserted " << collections.size() - oldColSize << " missing (empty) collections. There are now " << collections.size() << " collections." << endl; + } + + cout << "Going to write " << dProofsInFile.size() << " D-proofs with conclusions into new proof files." << endl; + startTime = chrono::steady_clock::now(); + mutex mtx_cout; + tbb::parallel_for(collections.range(), [&](tbb::concurrent_map>::range_type& range) { + chrono::time_point startTime; + for (tbb::concurrent_map>::const_iterator it_range = range.begin(); it_range != range.end(); ++it_range) { + string targetFile = "dProofs" + to_string(it_range->first) + ".txt"; + const tbb::concurrent_map& collection = it_range->second; + startTime = chrono::steady_clock::now(); + { + ofstream fout(targetDir + targetFile, fstream::out | fstream::binary); // print directly to file to save memory + for (tbb::concurrent_map::const_iterator it = collection.begin(); it != collection.end(); ++it) { + if (it != collection.begin()) + fout << "\n"; + fout << it->first << ":" << it->second; // : + } + } + chrono::microseconds dur = chrono::duration_cast(chrono::steady_clock::now() - startTime); + lock_guard lock(mtx_cout); + cout << FctHelper::durationStringMs(dur) << " taken to write " << collection.size() << " D-proofs with conclusions to " << targetFile << "." << endl; + } + }); + cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to write all " << dProofsInFile.size() << " D-proofs." << endl; + return 0; +#endif //### +#if 0 //### ./logVerifier ; log file verifier + // NOTE: Requires '#include ' and '#include '. + // To be called from project directory (which contains 'log/') to run log file checks. + for (const filesystem::directory_entry& entry : filesystem::recursive_directory_iterator(filesystem::current_path())) { + if (entry.path().string().find("\\log\\") == string::npos && entry.path().string().find("/log/") == string::npos) + continue; + string file; + string ext = entry.path().extension().string(); + if (ext == ".txt") { + string filename = entry.path().filename().string(); + if (filename == "jobsRSS.txt") { + cout << entry.path().string() << ":" << endl; + if (!FctHelper::readFile(entry.path().string(), file)) { + cerr << "[FATAL ERROR] Failed to read." << endl; + return 1; + } + regex r("\\n.*:\\n"); + size_t lastNum = 0; + for (sregex_iterator it = sregex_iterator(file.begin(), file.end(), r); it != sregex_iterator(); ++it) { + const smatch& m = *it; + string numstr = m.str().substr(1, m.str().length() - 3); + string::size_type dash = numstr.find('-'); + size_t num; + try { + num = stoll(dash == string::npos ? numstr : numstr.substr(0, dash)); + } catch (...) { + cerr << "[ERROR] \"" << numstr << "\" should be a proof length." << endl; + continue; + } + if (lastNum > num) + cerr << "[WARNING] " << num << " < " << lastNum << " ; bad ordering" << endl; + string remainder = file.substr(m.position() + m.length()); + string::size_type posLF = remainder.find('\n'); + string sacct = posLF == string::npos ? remainder : remainder.substr(0, posLF); + smatch m_; + if (posLF == string::npos || !regex_match(sacct, m_, regex("\\$ sacct -j [0-9]{8} --format=\"JobID,Partition,AllocCPUS,State,ExitCode,Elapsed,MaxRSS(%[0-9]+|)\""))) + cerr << "[WARNING] Bad or missing sacct command after \"" << numstr << "\": " << sacct << endl; + else if (remainder.substr(posLF, 3) != "\n> ") + cerr << "[WARNING] Bad or missing sacct output after \"" << numstr << "\"." << endl; + else { + string::size_type pos = posLF + 3; + vector sacctOutput; + size_t len = 0; + while ((posLF = remainder.find('\n', pos)) != string::npos) { + sacctOutput.push_back(remainder.substr(pos, posLF - pos)); + if (!len) + len = sacctOutput.back().length(); + else if (sacctOutput.back().length() != len) + cerr << "[WARNING] Uneven sacct output after \"" << numstr << "\"." << endl; + if (remainder.substr(posLF, 3) != "\n> ") + break; + pos = posLF + 3; + } + //#cout << sacctOutput.size() << " lines:\n" << FctHelper::vectorString(sacctOutput, { }, { }, "\n") << endl; + string relevantOutput; + if (sacctOutput.size() == 6) + relevantOutput = sacctOutput[5]; + else if (sacctOutput.size() == 5) + relevantOutput = sacctOutput[3]; + else { + cerr << "[ERROR] Found " << sacctOutput.size() << " lines for sacct output." << endl; + continue; + } + if (!sacctOutput[0].ends_with("MaxRSS ")) { + cerr << "[ERROR] First line of sacct output should end with \"MaxRSS \"." << endl; + continue; + } + string::size_type end; + string::size_type begin = relevantOutput.substr(0, (end = relevantOutput.find_last_not_of(' '))).find_last_of(' '); + relevantOutput = relevantOutput.substr(begin + 1, end - begin); + if (relevantOutput.empty() || (relevantOutput.back() != 'K' && relevantOutput.back() != 'M')) { + cerr << "[ERROR] Found \"" << relevantOutput << "\" as relevant MaxRSS entry." << endl; + continue; + } + string correctPrefix = relevantOutput.substr(0, relevantOutput.length() - 1); + size_t ram; + try { + ram = stoll(correctPrefix); + } catch (...) { + cerr << "[ERROR] \"" << correctPrefix << "\" should be memory number." << endl; + continue; + } + bool K = relevantOutput.back() == 'K'; + correctPrefix += string(" / 1024^") + (K ? "2" : "1") + " = " + to_string(ram / (K ? 1024 * 1024 : 1024)); + pos = posLF + 1; + posLF = remainder.find('\n', pos + 1); + string equation = remainder.substr(pos, posLF - pos); + string correctPostfix = " ≈ " + FctHelper::round((long double) ram / (K ? 1024 * 1024 : 1024), 2); + if (!equation.starts_with(correctPrefix) || !equation.ends_with(correctPostfix)) { + cerr << "[WARNING] Is:" << equation << endl; + cerr << " Should:" << correctPrefix << "[...]" << correctPostfix << endl; + } + //#cerr << "[NOTE] Should:" << correctPrefix << "[...]" << correctPostfix << endl; + } + } + } + } else if (ext == ".log") { + string filename = entry.path().filename().string(); + if (filename.starts_with("dProof")) { // e.g. "dProofs29_6node_288cpu.log" + cout << entry.path().string() << ":" << endl; + if (!FctHelper::readFile(entry.path().string(), file)) { + cerr << "[FATAL ERROR] Failed to read." << endl; + return 1; + } + size_t node = 1; + size_t cpu = 0; + smatch m; + if (regex_search(filename, m, regex("_[0-9]+node"))) + node = stoll(m.str().substr(1, m.str().length() - 5)); + if (regex_search(filename, m, regex("_[0-9]+cpu"))) + cpu = stoll(m.str().substr(1, m.str().length() - 4)); + else { + cerr << "[ERROR] No cpu amount in file name." << endl; + continue; + } + vector lines = FctHelper::stringSplit(file, "\n"); + if (lines.size() < 9) { + cerr << "[ERROR] Bad line amount." << endl; + continue; + } + if (!lines[1].starts_with(" The run was executed on " + (node == 1 ? "a" : to_string(node)))) { + cerr << "[ERROR] Bad node amount." << endl; + continue; + } + while (lines[lines.size() - 2].starts_with("srun: ") || lines[lines.size() - 2].starts_with("slurmstepd: ")) { + lines.pop_back(); + lines.back() = ""; + if (lines.size() < 9) { + cerr << "[ERROR] Bad line amount." << endl; + continue; + } + } + size_t cpuPerNode = cpu / node; + if (cpuPerNode * node != cpu) { + cerr << "[ERROR] Bad cpu / node amounts." << endl; + continue; + } else if (lines[2].find("(" + to_string(cpuPerNode) + " cores total per node)") == string::npos) { + cerr << "[ERROR] Missing or invalid 'cores total per node'." << endl; + continue; + } else if (node > 1 && file.find("; " + to_string(node) + " processes]") == string::npos) { + cerr << "[ERROR] Missing or invalid process amount via MPI." << endl; + continue; + } else if (!lines[4].starts_with(" Wall-clock time: ")) { + cerr << "[ERROR] Missing wall-clock time." << endl; + continue; + } else if (!lines[5].starts_with(" CPU utilization: ")) { + cerr << "[ERROR] Missing CPU utilization." << endl; + continue; + } else if (lines[6].find(": Process started") == string::npos) { + cerr << "[ERROR] Missing 'Process started'." << endl; + continue; + } else if (!lines.back().empty()) { + cerr << "[ERROR] Missing final empty line." << endl; + continue; + } else if (lines[lines.size() - 2].find(": Process terminated") == string::npos) { + cerr << "[ERROR] Missing 'Process terminated'." << endl; + continue; + } + string ct = lines[4].substr(19); + string::size_type ct_offset; + ct = ct.substr(ct_offset = ct.find_first_not_of(' ')); + ct = ct.substr(0, ct.find_first_of(' ')); + string ut = lines[5].substr(19); + string::size_type ut_offset; + ut = ut.substr(ut_offset = ut.find_first_not_of(' ')); + if (ut_offset != 0) { + cerr << "[ERROR] Extra offset for CPU utilization." << endl; + continue; + } + ut = ut.substr(0, ut.find_first_of(' ')); + if (!lines[4].substr(19 + ct_offset + ct.length()).starts_with(" h")) { + cerr << "[ERROR] Missing or invalid ' h'." << endl; + continue; + } else if (!lines[5].substr(19 + ut.length()).starts_with(" core-h")) { + cerr << "[ERROR] Missing or invalid ' core-h'." << endl; + continue; + } + string::size_type ct_dot = ct.find('.') != string::npos ? lines[4].find(ct, 19) + ct.find('.') : lines[4].find(ct, 19) + ct.size(); + string::size_type ut_dot = ut.find('.') != string::npos ? lines[5].find(ut, 19) + ut.find('.') : lines[5].find(ut, 19) + ut.size(); + if (ct_dot != ut_dot) { + cerr << "[ERROR] Uneven offsets." << endl; + continue; + } + string::size_type ct_dots = ct.find("…"); + string::size_type ut_dots = ut.find("…"); + ct = ct.substr(0, ct_dots); + ut = ut.substr(0, ut_dots); + if (ut_dots != string::npos) + ut += ut.substr(ut.size() - 3); + long double ut_num = stold(ut); + stringstream ss_ct_calc; + ss_ct_calc << setprecision(static_cast(ct.length())) << ut_num / cpu; + if ((ct_dots == string::npos && ss_ct_calc.str() != ct) || (ct_dots != string::npos && ss_ct_calc.str().substr(0, ct.length() - 1) != ct.substr(0, ct.length() - 1))) + cerr << "[WARNING] Unmatching read and calculated wall-clock times " << (ct_dots == string::npos ? ss_ct_calc.str() + " and " + ct : ss_ct_calc.str().substr(0, ct.length() - 1) + " and " + ct.substr(0, ct.length() - 1)) << "." << endl; + size_t coreSeconds = static_cast(round(ut_num * 3600)); + string dateStart = lines[6].substr(0, lines[6].find(": Process started")).substr(4); + string dateEnd = lines[lines.size() - 2].substr(0, lines[lines.size() - 2].find(": Process terminated")).substr(4); + auto strToTime = [](string s) -> chrono::time_point { if (s[4] == ' ') s[4] = '0'; tm time = { }; stringstream ss(s); ss >> get_time(&time, "%b %d %H:%M:%S %Y"); return chrono::system_clock::from_time_t(mktime(&time)); }; + chrono::seconds durSecondsProvided = chrono::duration_cast(strToTime(dateEnd) - strToTime(dateStart)); + if (coreSeconds != durSecondsProvided.count() * cpu) + cerr << "[WARNING] Unmatching read and calculated core-s durations " << coreSeconds << " and " << durSecondsProvided.count() * cpu << ". (from dateStart " << dateStart << " to " << dateEnd << " are " << durSecondsProvided.count() << " seconds, and " << durSecondsProvided.count() << " * " << cpu << " = " << durSecondsProvided.count() * cpu << ")" << endl; + } else { // e.g. "29.log" or "31-31.log" + smatch m; + if (regex_match(filename, m, regex("[0-9]+(-[0-9]+|)\\.log"))) { + cout << entry.path().string() << ":" << endl; + if (!FctHelper::readFile(entry.path().string(), file)) { + cerr << "[FATAL ERROR] Failed to read." << endl; + return 1; + } + vector lines = FctHelper::stringSplit(file, "\n"); + if (lines.size() < 20) { + cerr << "[ERROR] Bad line amount." << endl; + continue; + } + if (!lines[1].starts_with(" The run was executed on a ")) { + cerr << "[ERROR] Bad node amount." << endl; + continue; + } + while (lines[lines.size() - 2].starts_with("srun: ") || lines[lines.size() - 2].starts_with("slurmstepd: ")) { + lines.pop_back(); + lines.back() = ""; + if (lines.size() < 20) { + cerr << "[ERROR] Bad line amount." << endl; + continue; + } + } + string maxRssBehind = lines[9].substr(lines[9].size() - 2); + if (maxRssBehind != "K " && maxRssBehind != "M ") { + cerr << "[ERROR] Unknown suffix \"" << maxRssBehind << "\"." << endl; + continue; + } else if (!regex_match(lines[5], m, regex(" \\$ sacct --format=\"JobID,Partition,AllocCPUS,State,ExitCode,Elapsed,MaxRSS(%[0-9]+|)\""))) { + cerr << "[ERROR] Missing or invalid sacct command." << endl; + continue; + } else if (lines[6].length() != lines[7].length() || lines[7].length() != lines[8].length() || lines[8].length() != lines[9].length() || lines[9].length() != lines[10].length()) { + cerr << "[ERROR] Invalid sacct output." << endl; + continue; + } else if (lines[12].find(": Process started") == string::npos) { + cerr << "[ERROR] Missing 'Process started'." << endl; + continue; + } else if (!lines.back().empty()) { + cerr << "[ERROR] Missing final empty line." << endl; + continue; + } else if (lines[lines.size() - 2].find(": Process terminated") == string::npos) { + cerr << "[ERROR] Missing 'Process terminated'." << endl; + continue; + } else if (lines[0].starts_with("( This log file was generated by executing 'pmGenerator -c")) { + string::size_type pos = lines[0].find(" -s ", 58); + if (pos != string::npos) { + string system = lines[0].substr(pos + 4, lines[0].find_first_of(' ', pos + 4) - pos - 4); + if (lines[0].find(" -s " + system + " --iterate -u'") == string::npos) + cerr << "[WARNING] Missing or invalid call information." << endl; + if (entry.path().string().find(system + "\\") == string::npos && entry.path().string().find(system + "/") == string::npos) + cerr << "[WARNING] System " << system << " missing from path." << endl; + if (!lines[14].starts_with("1. resetRepresentativesFor(\"" + system + "\"")) { + cerr << "[ERROR] Conflicting system declaration and selection." << endl; + continue; + } + } else + cerr << "[WARNING] Missing or invalid '-s '." << endl; + } else { + cerr << "[NOTE] No 'pmGenerator -c [...]' comment." << endl; + if (lines[0].find("'DlProofEnumerator::countNextIterationAmount(false, true)'") == string::npos && lines[0].find("'pmGenerator --iterate -u'") == string::npos) + cerr << "[WARNING] Missing or invalid call information." << endl; + } + string correctPrefix = lines[9].substr(0, lines[9].size() - 2); + correctPrefix = correctPrefix.substr(correctPrefix.find_last_of(' ') + 1); + size_t ram; + try { + ram = stoll(correctPrefix); + } catch (...) { + cerr << "[ERROR] \"" << correctPrefix << "\" should be memory number." << endl; + continue; + } + bool K = maxRssBehind == "K "; + correctPrefix = " By " + to_string(ram) + (K ? " KiB" : " MiB") + " = (" + to_string(ram) + " / 1024^" + (K ? "2" : "1") + ") GiB = " + to_string(ram / (K ? 1024 * 1024 : 1024)); + string correctPostfix = " GiB, it used approximately " + FctHelper::round((long double) ram / (K ? 1024 * 1024 : 1024), 2) + " gibibytes of memory. )"; + if (!lines[11].starts_with(correctPrefix) || !lines[11].ends_with(correctPostfix)) { + cerr << "[WARNING] Is:" << lines[11] << endl; + cerr << " Should:" << correctPrefix << "[...]" << correctPostfix << endl; + } + } + } + } + } + return 0; +#endif //### +#if 0 //### ./prover9Formulas ; formulas in normal polish notation to OTTER / Prover9 format + // e.g. ./prover9Formulas CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,CLpp,CLCpqCLpLq,CNLNpLNLNp + if (argc <= 1) { + cerr << "Need formulas in normal polish notation." << endl; + return 0; + } + vector formulas = FctHelper::stringSplit(argv[1], ","); + for (const string& f : formulas) + cout << prover9Formula(f) << endl; + return 0; +#endif //### +#if 0 //### ./prover9HintsFromProofs ; intermediate conclusions from comma-separated proofs in normal polish notation to OTTER / Prover9 format + // NOTE: Requires '#include '. + // e.g. ./prover9HintsFromProofs CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,CLpp,CLCpqCLpLq,CNLNpLNLNp data/s5-dProofs.txt data/s5-formulas-p9.txt + if (argc <= 3) { + cerr << "Need 1. axioms in normal Polish notation, 2. path to file with comma-separated proofs, and 3. path for output file." << endl; + return 0; + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + DlProofEnumerator::resetRepresentativesFor(&axioms, true); + string inputFile = argv[2]; // e.g. "D:/Dropbox/eclipse/pmGenerator/Release64bitDynamicGcc11.3/data/w5-in.txt" + string outputFile = argv[3]; + DlProofEnumerator::printProofs( { }, DlFormulaStyle::PolishStandard, false, true, 1, true, &inputFile, &outputFile, true); + string fileString; + if (!FctHelper::readFile(outputFile, fileString)) { + cerr << "Invalid file path" << endl; + return 0; + } + fileString = regex_replace(fileString, regex(" [^\n]+\n"), ""); + fileString = regex_replace(fileString, regex("\\[[0-9]+\\] "), ""); + fileString = regex_replace(fileString, regex(" = [^\n]+\n"), "\n"); + //#cout << fileString << flush; + vector formulas = FctHelper::stringSplit(fileString, "\n"); + stringstream ss; + ss << "formulas(hints).\n\n"; + for (const string& f : formulas) + if (!f.empty()) + ss << prover9Formula(f) << "\n"; + ss << "\nend_of_list.\n"; + if (!FctHelper::writeToFile(outputFile, ss.str())) + cerr << "Failed." << endl; + else + cout << ss.str().length() << " bytes written to \"" << outputFile << "\"." << endl; + return 0; +#endif //### +#if 0 //### ./prover9HintsFromSeparateProofs ; individual intermediate conclusions from comma-separated proofs in normal polish notation to OTTER / Prover9 format + // NOTE: Requires '#include '. + // e.g. ./prover9HintsFromSeparateProofs CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,CLpp,CLCpqCLpLq,CNLNpLNLNp data/s5-dProofs.txt data/s5-formulas-p9.txt + if (argc <= 3) { + cerr << "Need 1. axioms in normal Polish notation, 2. path to file with comma-separated proofs, and 3. path for output file." << endl; + return 0; + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + DlProofEnumerator::resetRepresentativesFor(&axioms, true); + string inputFile = argv[2]; // e.g. "D:/Dropbox/eclipse/pmGenerator/Release64bitDynamicGcc11.3/data/w5-in.txt" + string outputFile = argv[3]; + //### + string fileString; + chrono::time_point startTime = chrono::steady_clock::now(); + if (!FctHelper::readFile(inputFile, fileString)) + throw runtime_error("Failed to read file \"" + inputFile + "\"."); + string::size_type len = fileString.length(); + + // Erase all '\r', '\n', '\t', ' ', and lines starting with '%'. ; NOTE: Much faster than using regular expressions. + bool startOfLine = true; + bool erasingLine = false; + fileString.erase(remove_if(fileString.begin(), fileString.end(), [&](const char c) { + switch (c) { + case '\r': + case '\n': + startOfLine = true; + erasingLine = false; + return true; + case '\t': + case ' ': + startOfLine = false; + return true; + case '%': + if (startOfLine) { + startOfLine = false; + erasingLine = true; + return true; + } else { + startOfLine = false; + return false; + } + default: + startOfLine = false; + return erasingLine; + } + }), fileString.end()); + + vector dProofsFromFile = FctHelper::stringSplit(fileString, ","); + cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to read and convert " << len << " bytes from \"" << inputFile << "\"." << endl; + vector> allFormulas; + //### + for (const string& dProof : dProofsFromFile) { + DlProofEnumerator::printProofs( { dProof }, DlFormulaStyle::PolishStandard, false, true, 1, true, nullptr, &outputFile, true); + string fileString; + if (!FctHelper::readFile(outputFile, fileString)) { + cerr << "Invalid file path" << endl; + return 0; + } + fileString = regex_replace(fileString, regex(" [^\n]+\n"), ""); + fileString = regex_replace(fileString, regex("\\[[0-9]+\\] "), ""); + fileString = regex_replace(fileString, regex(" = [^\n]+\n"), "\n"); + //#cout << fileString << flush; + allFormulas.resize(allFormulas.size() + 1); + allFormulas.back() = FctHelper::stringSplit(fileString, "\n"); + } + stringstream ss; + ss << "formulas(hints).\n\n"; + bool first = true; + for (const vector& formulas : allFormulas) { + if (first) + first = false; + else + ss << "\n"; + for (const string& f : formulas) + if (!f.empty()) + ss << prover9Formula(f) << "\n"; + } + ss << "\nend_of_list.\n"; + if (!FctHelper::writeToFile(outputFile, ss.str())) + cerr << "Failed." << endl; + else + cout << ss.str().length() << " bytes written to \"" << outputFile << "\"." << endl; + return 0; +#endif //### +#if 0 //### ./prover9HintsFromTopList ; create hints from smallest conclusion lists + // NOTE: Requires '#include '. + if (argc <= 3) { + //cerr << "Need 1. axioms in normal Polish notation, 2. path to file with comma-separated proofs, and 3. path for output file." << endl; + cerr << "Need 1. axioms in normal Polish notation, 2. path to smallest conclusion list file, and 3. path for output file." << endl; + return 0; + } + vector axioms = FctHelper::stringSplit(argv[1], ","); + DlProofEnumerator::resetRepresentativesFor(&axioms, true); + // default: + // "data/top1000SmallestConclusions_1to39Steps.txt" + // m: + // ./prover9HintsFromTopList CCCCCpqCNrNsrtCCtpCsp data/478804cd4793bc7f87041d99326aff4595662146d8a68175dda22bed/top1000SmallestConclusions_1to83Steps.txt data/m-topHints.txt + // w1: + // ./prover9HintsFromTopList CCpCCNpqrCsCCNtCrtCpt data/02974777ff5f71e12ef58ccebedeef133584aad66e06a2a13b2b4b2c/top1000SmallestConclusions_1to161Steps.txt data/w1-topHints.txt + // w2: + // ./prover9HintsFromTopList CpCCqCprCCNrCCNstqCsr data/db25c49b13fec26ecf32e40bde65e4e2273f23b3c022cfd0fa986cff/top1000SmallestConclusions_1to43Steps.txt data/w2-topHints.txt + // w3: + // ./prover9HintsFromTopList CpCCNqCCNrsCptCCtqCrq data/0df075acc552c62513b49b6ed674bfcde1c1b018e532c665be229314/top1000SmallestConclusions_1to73Steps.txt data/w3-topHints.txt + // w4: + // ./prover9HintsFromTopList CpCCNqCCNrsCtqCCrtCrq data/fe7117b8aad7634fae344172b9fee05f77e5e23b035276b17d8c6ec9/top1000SmallestConclusions_1to169Steps.txt data/w4-topHints.txt + // w5: + // ./prover9HintsFromTopList CCpqCCCrCstCqCNsNpCps data/1d5f27494b1a2312e223b7f8dd3551abf717590ceef694c08dcbed72/top1000SmallestConclusions_1to55Steps.txt data/w5-topHints.txt + // w6: + // ./prover9HintsFromTopList CCCpqCCCNrNsrtCCtpCsp data/7f473b6ba952b3deadf36cd7f1c4b5286ef32fef64808d14fff70a69/top1000SmallestConclusions_1to95Steps.txt data/w6-topHints.txt + // NOTE: Proof translations from extracted modified systems are not supported. + string inputFile = argv[2]; + string outputFile = argv[3]; + string content; + if (!FctHelper::readFile(inputFile, content)) { + cerr << "Invalid file path" << endl; + return 0; + } + vector dProofs; + for (string line : FctHelper::stringSplitAndSkip(content, "\n", "%", true)) { + string::size_type posC = line.find_first_not_of(' ', line.find(' ', line.find_first_not_of(' ', line.find(' ', line.find_first_not_of(' ', line.find(' ') + 1) + 1) + 1) + 1) + 1); + string::size_type posD = line.find(':', posC + 1); + dProofs.push_back(line.substr(posC, posD - posC)); + } + //#cout << FctHelper::vectorString(dProofs) << endl; + + //#for (const string& dProof : dProofs) + //# DlProofEnumerator::printProofs( { dProof }, DlFormulaStyle::PolishStandard, false, true, 2, true, nullptr, nullptr, true); + //#exit(0); + + DlProofEnumerator::printProofs(dProofs, DlFormulaStyle::PolishStandard, false, true, 1, true, nullptr, &outputFile, true); + string fileString; + if (!FctHelper::readFile(outputFile, fileString)) { + cerr << "Invalid file path" << endl; + return 0; + } + fileString = regex_replace(fileString, regex(" [^\n]+\n"), ""); + fileString = regex_replace(fileString, regex("\\[[0-9]+\\] "), ""); + fileString = regex_replace(fileString, regex(" = [^\n]+\n"), "\n"); + //#cout << fileString << flush; + vector formulas = FctHelper::stringSplit(fileString, "\n"); + stringstream ss; + ss << "formulas(hints).\n\n"; + for (const string& f : formulas) + if (!f.empty()) + ss << prover9Formula(f) << "\n"; + ss << "\nend_of_list.\n"; + if (!FctHelper::writeToFile(outputFile, ss.str())) + cerr << "Failed." << endl; + else + cout << ss.str().length() << " bytes written to \"" << outputFile << "\"." << endl; + return 0; +#endif //### +#if 0 //### ./prover9OutputConverter ; parse proofs from Prover9 output + // NOTE: Requires '#include '. + // e.g. ./prover9OutputConverter w2-A2.out data/w2-A2.txt + if (argc <= 1) { + cerr << "Need 1. path to file with Prover9 output. Optional: 2. output file path" << endl; + return 0; + } + string _outputFile; + string* outputFile = nullptr; + if (argc >= 3) { + _outputFile = argv[2]; + outputFile = &_outputFile; + } + + // 1. Extract proof steps. + map formulas; + map reasons; + map> labels; + set finalIndices; + { + string content; + if (!FctHelper::readFile(argv[1]/*"D:/Dropbox/eclipse/pmGenerator/Release64bitDynamicGcc11.3/w2-proof-l2_1.txt"*/, content)) { + cerr << "Invalid file path" << endl; + return 0; + } + boost::replace_all(content, "\r\n", "\n"); + stringstream ss(content); + string line; + smatch m; + while (getline(ss, line)) + if (regex_search(line, m, regex("[0-9]+ ")) && !m.position()) { + string id_str = line.substr(0, m.length() - 1); + size_t id = strtoull(id_str.c_str(), nullptr, 10); + line = line.substr(FctHelper::digitsNum_uint64(id) + 1); + //#cout << "[" << id << "] " << line << endl; + + string::size_type pos = line.find(" "); + string step = line.substr(0, pos); + string type = line.substr(pos + 2); + //#cout << "[" << id << "] \"" << step << "\" ; \"" << type << "\"" << endl; + bool isPos = step.starts_with("t("); + bool isNeg = step.starts_with("-t("); + bool isEnd = step.starts_with("$F"); + if (!isPos && !isNeg && !isEnd) { + cerr << "[" << id << "] Step \"" << step << "\" starts with neither \"t(\", nor \"-t(\", nor \"$F\"." << endl; + return 0; + } + if (!step.ends_with(".")) { + cerr << "[" << id << "] Step \"" << step << "\" does not end with '.'." << endl; + return 0; + } + if (!type.starts_with("[")) { + cerr << "[" << id << "] Type \"" << type << "\" does not start with '['." << endl; + return 0; + } + if (!type.ends_with("].")) { + cerr << "[" << id << "] Type \"" << type << "\" does not end with \"].\"." << endl; + return 0; + } + string formula; + string reason = type.substr(1, type.length() - 3); + vector labelSeq; + pos = step.find(" # "); + if (pos == string::npos) { + formula = step.substr(0, step.length() - 1); + } else { + formula = step.substr(0, pos); + while (pos != string::npos) { + string::size_type nextPos = step.find(" # ", pos + 1); + if (nextPos == string::npos) + labelSeq.push_back(step.substr(pos + 3, step.length() - pos - 4)); // without ending '.' + else + labelSeq.push_back(step.substr(pos + 3, nextPos - pos - 3)); + pos = nextPos; + } + } + //#cout << "[" << id << "] formula = \"" << formula << "\", reason = \"" << reason << "\", labelSeq = " << FctHelper::vectorStringF(labelSeq, [](const string& s) { return "\"" + s + "\""; }) << endl; + formulas.emplace(id, formula); + reasons.emplace(id, reason); + labels.emplace(id, set(labelSeq.begin(), labelSeq.end())); + if (isEnd) + finalIndices.emplace(id); + } + } + + // 2. Extract assumptions. + unordered_set isModusPonensAssumption; + unordered_set isNecessitationAssumption; + unordered_map isGoalDeclaration; + for (const pair>& p : labels) { + const set& m = p.second; + if (m.count("label(non_clause)")) { + //cout << FctHelper::setString(m) << endl; + if (m.count("label(MP)")) + isModusPonensAssumption.emplace(p.first); + if (m.count("label(NE)")) + isNecessitationAssumption.emplace(p.first); + if (m.count("label(goal)")) { + if (m.size() < 3) { + cerr << "[" << p.first << "] Goal is missing a custom label." << endl; + return 0; + } + string name; + for (set::const_iterator it = m.begin(); it != m.end(); ++it) + if (*it != "label(non_clause)" && *it != "label(goal)") { + const string& s = *it; + if (s.starts_with("label(") && s.ends_with(")")) { + name = s.substr(6, s.length() - 7); + break; + } else if (s.starts_with("answer(") && s.ends_with(")")) { + name = s.substr(7, s.length() - 8); + break; + } + } + if (name.empty()) { + cerr << "[" << p.first << "] Goal is missing a custom label." << endl; + return 0; + } + isGoalDeclaration.emplace(p.first, name); + } + } + } + //#cout << "isModusPonensAssumption = " << FctHelper::setString(set(isModusPonensAssumption.begin(), isModusPonensAssumption.end())) << endl; + //#cout << "isNecessitationAssumption = " << FctHelper::setString(set(isNecessitationAssumption.begin(), isNecessitationAssumption.end())) << endl; + //#cout << "isGoalDeclaration = " << FctHelper::mapString(map(isGoalDeclaration.begin(), isGoalDeclaration.end())) << endl; + + // 3. Obtain connections. + unordered_set isModusPonens; + unordered_set isNecessitation; + unordered_map isAxiom; + unordered_map isGoalNegation; + auto toParamList = [](const string& s) { + vector paramList; + vector args = FctHelper::stringSplit(s, ","); + for (const string& a : args) { + size_t link = 0; + try { + link = strtoull(a.c_str(), nullptr, 10); + } catch (...) { + } + if (link) + paramList.push_back(link); + } + return paramList; + }; + map> refs; + map conclusionsById; + for (map::const_iterator it = reasons.begin(); it != reasons.end(); ++it) { + //#cout << it->first << ":" << it->second << endl; + const string& reason = it->second; + if (reason.starts_with("hyper(")) { + string link_str = reason.substr(6, reason.length() - 7); + vector params = toParamList(link_str); + //#cout << "[" << it->first << ", hyper] " << link_str << " ; " << FctHelper::vectorString(params) << endl; + if (isModusPonens.count(params[0])) { + if (params.size() != 3) { + cerr << "[" << it->first << "] (MP) Hyper param list has length " << params.size() << " != 3." << endl; + return 0; + } + refs.emplace(it->first, array { params[1], params[2] }); + } else if (isNecessitation.count(params[0])) { + if (params.size() != 2) { + cerr << "[" << it->first << "] (NE) Hyper param list has length " << params.size() << " != 2." << endl; + return 0; + } + refs.emplace(it->first, array { params[1], 0 }); + } else { + cerr << "[" << it->first << "] Hyper param list does not start with (MP) (but with " << params[0] << ")." << endl; + return 0; + } + + } else if (reason.starts_with("resolve(")) { + string link_str = reason.substr(8, reason.length() - 9); + vector params = toParamList(link_str); + //cout << "[" << it->first << ", resolve] " << link_str << " ; " << FctHelper::vectorString(params) << endl; + if (params.size() != 2) { + cerr << "[" << it->first << "] Resolve param list has length " << params.size() << " != 2." << endl; + return 0; + } + // One param needs to be in isGoalNegation, the other one should be the conclusion. + if (isGoalNegation.count(params[0])) + conclusionsById.emplace(params[1], isGoalNegation.at(params[0])); + else if (isGoalNegation.count(params[1])) + conclusionsById.emplace(params[0], isGoalNegation.at(params[1])); + else { + cerr << "[" << it->first << "] Resolve param list is missing a goal negation. (isGoalNegation: " << FctHelper::mapString(map(isGoalNegation.begin(), isGoalNegation.end())) << ")" << endl; + return 0; + } + } else if (reason.starts_with("clausify(")) { + string link_str = reason.substr(9, reason.length() - 10); + //#cout << "[" << it->first << ", clausify] " << link_str << endl; + size_t link = strtoull(link_str.c_str(), nullptr, 10); + if (isModusPonensAssumption.count(link)) + isModusPonens.emplace(it->first); + else if (isNecessitationAssumption.count(link)) + isNecessitation.emplace(it->first); + else { + cerr << "[" << it->first << "] " << "Clausified non-(MP)/(NE) rule." << endl; + return 0; + } + } else if (reason.starts_with("deny(")) { + string link_str = reason.substr(5, reason.length() - 6); + //#cout << "[" << it->first << ", deny] " << link_str << endl; + size_t link = strtoull(link_str.c_str(), nullptr, 10); + if (!isGoalDeclaration.count(link)) { + cerr << "[" << it->first << "] Denied non-goal." << endl; + return 0; + } + isGoalNegation.emplace(it->first, isGoalDeclaration.at(link)); + } else if (reason == "assumption") { + if (!isModusPonensAssumption.count(it->first) && !isNecessitationAssumption.count(it->first)) { + const set& s = labels.at(it->first); + if (s.size() > 1) { + cerr << "[" << it->first << "] Axiom resolve has " << s.size() << " != 1 labels. (labels: " << FctHelper::setString(s) << ")" << endl; + return 0; + } + string label = *s.begin(); + if (!label.starts_with("label(") || !label.ends_with(")")) { + cerr << "[" << it->first << "] Axiom resolve has invalid label \"" << label << "\"." << endl; + return 0; + } + label = label.substr(6, label.length() - 7); + //#cout << "[" << it->first << ", assumption] " << it->second << " (axiom: " << label << ")" << endl; + isAxiom.emplace(it->first, label); + } + } + } + + // 4. Obtain axiom order + map> axiomNames; + map axioms; + for (const pair& p : isAxiom) + axiomNames[p.second].emplace(p.first); + size_t num = 1; + for (const pair>& p : axiomNames) { + if (num > 35) { + cerr << "Too many axioms." << endl; + return 0; + } + string ax = num < 10 ? to_string(num) : string { char('a' - 10 + num) }; + for (size_t id : p.second) + axioms.emplace(id, ax); + num++; + } + cout << "Axiom translations: " << FctHelper::mapStringF(axioms, [&](const pair& p) { return "(id:" + to_string(p.first) + ",name:" + isAxiom.at(p.first) + ",translation:" + p.second + ")"; }) << endl; + + // 5. Build proofs. + for (const pair& conclusion : conclusionsById) { + cout << "% " << conclusion.second << " : " << formulas[conclusion.first] << endl; + + set relevantIndices; + set recent; + if (!isAxiom.count(conclusion.first)) + recent.emplace(conclusion.first); + while (!recent.empty()) { + relevantIndices.insert(recent.begin(), recent.end()); + set newCandidates; + for (size_t i : recent) { + const array& a = refs.at(i); + if (!isAxiom.count(a[0])) + newCandidates.emplace(a[0]); + if (a[1] && !isAxiom.count(a[1])) + newCandidates.emplace(a[1]); + } + recent = newCandidates; + } + //#cout << "relevantIndices = " << FctHelper::setString(relevantIndices) << endl; + unordered_map idTranslations; + for (size_t id : relevantIndices) + idTranslations.emplace(id, idTranslations.size()); + //#cout << "idTranslations = " << FctHelper::mapString(map(idTranslations.begin(), idTranslations.end())) << endl; + + vector abstractDProof(relevantIndices.size()); + for (size_t id : relevantIndices) { + size_t i = idTranslations.at(id); + const array& a = refs.at(id); + if (a[1]) // (MP) + abstractDProof[i] = "D" + (isAxiom.count(a[1]) ? axioms.at(a[1]) : "[" + to_string(idTranslations.at(a[1])) + "]") + (isAxiom.count(a[0]) ? axioms.at(a[0]) : "[" + to_string(idTranslations.at(a[0])) + "]"); + else // (NE) + abstractDProof[i] = "N" + (isAxiom.count(a[0]) ? axioms.at(a[0]) : "[" + to_string(idTranslations.at(a[0])) + "]"); + } + //#cout << FctHelper::vectorString(abstractDProof) << endl; + + vector customAxioms; + for (const pair>& p : axiomNames) { + size_t id = *p.second.begin(); + string axStr = formulas.at(id); + if (!axStr.starts_with("t(") || !axStr.ends_with(")")) { + cerr << "[" << id << "] Invalid axiom formula \"" << axStr << "\"." << endl; + return 0; + } + + //Translate formula to polish notation with variable strings encapsulated by '<' and '>', e.g. "t(i(x,i(i(y,i(x,z)),i(i(n(z),i(i(n(u),w),y)),i(u,z)))))" -> "CCCCCCNCCNC" + axStr = axStr.substr(2, axStr.length() - 3); + boost::replace_all(axStr, "i(", "C("); + boost::replace_all(axStr, "n(", "N("); + boost::replace_all(axStr, "nece(", "L("); + boost::replace_all(axStr, "(", "<"); + boost::replace_all(axStr, ")", ">"); + boost::replace_all(axStr, ",", "><"); + axStr = regex_replace(axStr, regex(">+"), ">"); + axStr = regex_replace(axStr, regex("<+"), "<"); + boost::replace_all(axStr, " f; + if (!DlCore::fromPolishNotation(f, axStr)) { + cerr << "[" << id << "] Invalid axiom formula \"" << axStr << "\"." << endl; + return 0; + } + customAxioms.push_back(DRuleParser::AxiomInfo(axioms.at(id), f)); + } + + vector> conclusions; + bool normalPolishNotation = true; //### + //#cout << "abstractDProof = " << FctHelper::vectorString(abstractDProof) << endl; + abstractDProof = DRuleParser::recombineAbstractDProof(abstractDProof, conclusions, &customAxioms, nullptr, nullptr, 2, nullptr, true, -2); + //#cout << "conclusions = " << FctHelper::vectorStringF(conclusions, [](const shared_ptr& f) { return DlCore::toPolishNotation(f); }) << endl; + auto print = [&](ostream& mout) -> string::size_type { + string::size_type bytes = 0; + for (const DRuleParser::AxiomInfo& ax : customAxioms) { + string f = normalPolishNotation ? DlCore::toPolishNotation(ax.refinedAxiom) : DlCore::toPolishNotation_noRename(ax.refinedAxiom); + mout << " " << f << " = " << ax.name << "\n"; + bytes += 9 + f.length(); + } + for (size_t i = 0; i < abstractDProof.size(); i++) { + string f = normalPolishNotation ? DlCore::toPolishNotation(conclusions[i]) : DlCore::toPolishNotation_noRename(conclusions[i]); + const string& p = abstractDProof[i]; + mout << "[" << i << "] " << f << " = " << p << "\n"; + bytes += 7 + FctHelper::digitsNum_uint64(i) + f.length() + p.length(); + } + return bytes; + }; + chrono::time_point startTime = chrono::steady_clock::now(); + if (outputFile) { + filesystem::path file = filesystem::u8path(*outputFile); + while (!filesystem::exists(file) && !FctHelper::ensureDirExists(file.string())) + cerr << "Failed to create file at \"" << file.string() << "\", trying again." << endl; + string::size_type bytes; + { + ofstream fout(file, fstream::out | fstream::binary); + bytes = print(fout); + } + cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to print and save " << bytes << " bytes to " << file.string() << "." << endl; + } else { + string::size_type bytes = print(cout); + cout << flush; + cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to print " << bytes << " bytes." << endl; + } + } + return 0; +#endif //### +#if 0 //### ./searchShorterSubproofs ; search shorter proofs for conclusions used in a given proof summary // NOTE: Requires '#include "logic/DlCore.h"' and '#include '. // e.g. ./searchShorterSubproofs data/w3.txt 0 data/tmp.txt CpCqp,CCpCqrCCpqCpr,CCNpNqCqp,Cpp,CCpqCCqrCpr,CCNppp,CpCNpq 0 if (argc <= 1) { @@ -672,75 +1971,61 @@ int main(int argc, char* argv[]) { // argc = 1 + N, argv = { , , cout << FctHelper::durationStringMs(chrono::duration_cast(chrono::steady_clock::now() - startTime)) << " taken to print " << bytes << " bytes." << endl; } return 0; -#endif -#if 0 //### ./DBExtractBySummary ; extract those proofs from a proof database which appear in a given proof summary - // NOTE: Requires '#include "logic/DlCore.h"'. - if (argc <= 3) { - cerr << "Need 1. path to proof database (with conclusions commented in normal Polish notation), 2. path to proof summary, and 3. path for output file." << endl; +#endif //### +#if 0 //### ./uniteLists ; combine comma-separated ordered lists of strings + // e.g. ./uniteLists data/summary-formulas.txt data/summary-formulas1.txt,data/summary-formulas2.txt,data/summary-formulas3.txt + if (argc <= 2) { + cerr << "Need 1. path for output file, and 2. comma-separated list of paths to files with comma-separated elements." << endl; return 0; } - string proofDBFile = argv[1]; - string proofSummaryFile = argv[2]; - string outputFile = argv[3]; - - // 1. Obtain all conclusions used by proof summary (i.e. all which are relevant). - string filterForTheorems = "."; - DlProofEnumerator::recombineProofSummary(proofSummaryFile, true, nullptr, 1, SIZE_MAX, true, false, &filterForTheorems, true, SIZE_MAX, 134217728, false, false, &outputFile, false); - unordered_set conclusions; - vector customAxioms; - { - //vector customAxioms; - vector abstractDProof; - vector requiredIntermediateResults; - DlProofEnumerator::convertProofSummaryToAbstractDProof(outputFile, &customAxioms, &abstractDProof, &requiredIntermediateResults, true, true, false); - //#cout << "abstractDProof = " << FctHelper::vectorString(abstractDProof) << endl; - //#cout << "|abstractDProof| = " << abstractDProof.size() << endl; - //#cout << "requiredIntermediateResults = " << FctHelper::vectorStringF(requiredIntermediateResults, [](const DRuleParser::AxiomInfo& ax) { return DlCore::toPolishNotation(ax.refinedAxiom); }) << endl; - //#cout << "|requiredIntermediateResults| = " << requiredIntermediateResults.size() << endl; - for (const DRuleParser::AxiomInfo& info : requiredIntermediateResults) - conclusions.emplace(DlCore::toPolishNotation(info.refinedAxiom)); - } - cout << "Found " << conclusions.size() << " relevant conclusions." << endl; + string outputFile = argv[1]; + vector inputFiles = FctHelper::stringSplit(argv[2], ","); + set unionOfLists; + for (const string& inputFile : inputFiles) { + string fileString; + if (!FctHelper::readFile(inputFile, fileString)) + throw runtime_error("Failed to read file \"" + inputFile + "\"."); - vector dProofNamesInFile; - vector dProofsInFile = DRuleParser::readFromMmsolitaireFile(proofDBFile, &dProofNamesInFile, true); + // Erase all '\r', '\n', '\t', ' ', and lines starting with '%'. ; NOTE: Much faster than using regular expressions. + bool startOfLine = true; + bool erasingLine = false; + fileString.erase(remove_if(fileString.begin(), fileString.end(), [&](const char c) { + switch (c) { + case '\r': + case '\n': + startOfLine = true; + erasingLine = false; + return true; + case '\t': + case ' ': + startOfLine = false; + return true; + case '%': + if (startOfLine) { + startOfLine = false; + erasingLine = true; + } + return erasingLine; + default: + startOfLine = false; + return erasingLine; + } + }), fileString.end()); - // 2. Copy relevant conclusion's D-proofs into new proof database. - vector relevantIndices; - string result; - for (size_t i = 0; i < dProofNamesInFile.size(); i++) { - string dProof = dProofsInFile[i]; - //vector rawParseData = DRuleParser::parseDProof_raw(dProof, &customAxioms); - //const shared_ptr& conclusion = get<0>(rawParseData.back().second).back(); - //string f_ = DlCore::toPolishNotation(conclusion); - string dProofName = dProofNamesInFile[i]; - string::size_type pos = dProofName.find("; ! "); - if (pos == string::npos) { - cerr << "Invalid DB file" << endl; - return 0; - } - string::size_type posEnd = dProofName.find(' ', pos + 5); - string f = dProofName.substr(pos + 4, posEnd == string::npos ? string::npos : posEnd - pos - 4); - //if (f != f_) { - // cerr << "f: " << f << ", f_: " << f_ << ", for " << dProofNamesInFile[i] << endl; - // return 0; - //} - if (conclusions.count(f)) { - result += DRuleParser::toDBProof(dProof, &customAxioms, posEnd == string::npos ? f : dProofName.substr(pos + 4)) + "\n"; - relevantIndices.push_back(i); - } + vector elements = FctHelper::stringSplit(fileString, ","); + cout << "Found " << elements.size() << " separated element" << (elements.size() == 1 ? "" : "s") << " in \"" << inputFile << "\"." << endl; + unionOfLists.insert(elements.begin(), elements.end()); } - cout << "Copied for " << relevantIndices.size() << " relevant indices: " << FctHelper::vectorString(relevantIndices) << endl; - - if (!FctHelper::writeToFile(outputFile, result)) - cerr << "Failed." << endl; - else - cout << result.length() << " bytes written to \"" << outputFile<< "\"." << endl; - - // e.g. ./DBExtractBySummary data/exs/m-topDB.txt data/m.txt data/exs/m-relevantDB.txt + cout << "In total, found " << unionOfLists.size() << " different element" << (unionOfLists.size() == 1 ? "" : "s") << " in " << inputFiles.size() << " file" << (inputFiles.size() == 1 ? "" : "s") << "." << endl; + if (!unionOfLists.empty()) + cout << "Smallest element: " << *unionOfLists.begin() << "\n (length " << unionOfLists.begin()->length() << ")\nLargest element: " << *unionOfLists.rbegin() << "\n (length " << unionOfLists.rbegin()->length() << ")" << endl; + string result = FctHelper::setString(unionOfLists, { }, { }, ","); + FctHelper::writeToFile(outputFile, result); + cout << result.length() << " bytes written to \"" << outputFile<< "\"." << endl; return 0; -#endif +#endif //### #if 0 //### entropy calculation + // NOTE: Requires '#include '. map m; uint32_t num = 57; ifstream fin("data/0df075acc552c62513b49b6ed674bfcde1c1b018e532c665be229314/dProofs-withConclusions/dProofs" + to_string(num) + ".txt", fstream::in | fstream::binary);