From 7ea0bbe60093c89e82d3f0a6d2c2fd2ed30959c8 Mon Sep 17 00:00:00 2001 From: Ceesjan Luiten Date: Thu, 7 Mar 2024 20:04:20 +0100 Subject: [PATCH] Suggest valid options when the configuration contains unknown keys This changeset is purely to improve the user experience. When the configuration parsing fails with an unknown key we'll test all possible keys and suggest one that looks a lot like it. This way we can be helpful in case someone made a typo or misremembered the precise name. --- FlashMQTests/configtests.cpp | 53 +++++++++++++++++++++++ FlashMQTests/maintests.cpp | 2 + FlashMQTests/maintests.h | 2 + FlashMQTests/testhelpers.cpp | 18 +++++++- FlashMQTests/testhelpers.h | 12 +++++- configfileparser.cpp | 27 ++++++++++-- utils.cpp | 84 ++++++++++++++++++++++++++++++++++++ utils.h | 28 ++++++++++++ 8 files changed, 220 insertions(+), 6 deletions(-) diff --git a/FlashMQTests/configtests.cpp b/FlashMQTests/configtests.cpp index db49f397..71884f08 100644 --- a/FlashMQTests/configtests.cpp +++ b/FlashMQTests/configtests.cpp @@ -117,3 +117,56 @@ void MainTests::test_parsing_numbers() } } } + +void MainTests::testStringDistances() +{ + FMQ_COMPARE(distanceBetweenStrings("", ""), (unsigned int)0); + FMQ_COMPARE(distanceBetweenStrings("dog", ""), (unsigned int)3); + FMQ_COMPARE(distanceBetweenStrings("", "dog"), (unsigned int)3); + FMQ_COMPARE(distanceBetweenStrings("dog", "horse"), (unsigned int)4); + FMQ_COMPARE(distanceBetweenStrings("horse", "dog"), (unsigned int)4); + FMQ_COMPARE(distanceBetweenStrings("industry", "interest"), (unsigned int)6); + FMQ_COMPARE(distanceBetweenStrings("kitten", "sitting"), (unsigned int)3); + FMQ_COMPARE(distanceBetweenStrings("uninformed", "uniformed"), (unsigned int)1); +} + +void MainTests::testConfigSuggestion() +{ + // User made a small typo: 'session' instead of 'sessions' + { + ConfFileTemp config; + config.writeLine("expire_session_after_seconds 180"); + config.closeFile(); + + ConfigFileParser parser(config.getFilePath()); + + try + { + parser.loadFile(false); + FMQ_FAIL("The parser is too liberal"); + } + catch (ConfigFileException &ex) + { + FMQ_COMPARE(ex.what(), "Config key 'expire_session_after_seconds' is not valid (here). Did you mean: expire_sessions_after_seconds ?"); + } + } + + // User entered gibberish. Let's not suggest gibberish back + { + ConfFileTemp config; + config.writeLine("foobarbaz 180"); + config.closeFile(); + + ConfigFileParser parser(config.getFilePath()); + + try + { + parser.loadFile(false); + FMQ_FAIL("The parser is too liberal"); + } + catch (ConfigFileException &ex) + { + FMQ_COMPARE(ex.what(), "Config key 'foobarbaz' is not valid (here)."); + } + } +} diff --git a/FlashMQTests/maintests.cpp b/FlashMQTests/maintests.cpp index e6c1b3ad..ccbff739 100644 --- a/FlashMQTests/maintests.cpp +++ b/FlashMQTests/maintests.cpp @@ -231,6 +231,8 @@ MainTests::MainTests() REGISTER_FUNCTION(testOverrideWillDelayOnSessionDestructionByTakeOver); REGISTER_FUNCTION(testDisabledWills); REGISTER_FUNCTION(testMqtt5DelayedWillsDisabled); + REGISTER_FUNCTION(testStringDistances); + REGISTER_FUNCTION(testConfigSuggestion); REGISTER_FUNCTION(testIncomingTopicAlias); REGISTER_FUNCTION(testOutgoingTopicAlias); REGISTER_FUNCTION(testOutgoingTopicAliasBeyondMax); diff --git a/FlashMQTests/maintests.h b/FlashMQTests/maintests.h index 64947e1d..97b14d4a 100644 --- a/FlashMQTests/maintests.h +++ b/FlashMQTests/maintests.h @@ -121,6 +121,8 @@ class MainTests void testOverrideWillDelayOnSessionDestructionByTakeOver(); void testDisabledWills(); void testMqtt5DelayedWillsDisabled(); + void testStringDistances(); + void testConfigSuggestion(); void testIncomingTopicAlias(); diff --git a/FlashMQTests/testhelpers.cpp b/FlashMQTests/testhelpers.cpp index a5a27f53..0edad193 100644 --- a/FlashMQTests/testhelpers.cpp +++ b/FlashMQTests/testhelpers.cpp @@ -1,6 +1,7 @@ #include "testhelpers.h" #include +#include int assert_count; int assert_fail_count; @@ -16,8 +17,21 @@ bool fmq_assert(bool b, const char *failmsg, const char *actual, const char *exp if (asserts_print) { - std::cout << RED << "FAIL" << COLOR_END << ": '" << failmsg << "', " << actual << " != " << expected << std::endl - << " in " << file << ", line " << line << std::endl; + // There are two types of failmsg: unformatted ones and formatted ones. + // By testing for a newline we can detect formatted ones. + if (strchr(failmsg, '\n') == nullptr) + { + // unformatted + std::cout << RED << "FAIL" << COLOR_END << ": '" << failmsg << "', " << actual << " != " << expected << std::endl + << " in " << file << ", line " << line << std::endl; + } + else + { + // formatted + std::cout << RED << "FAIL" << COLOR_END << " in " << file << ", line " << line << std::endl; + std::cout << failmsg << std::endl; + std::cout << "Comparison: " << actual << " != " << expected << std::endl; + } } } diff --git a/FlashMQTests/testhelpers.h b/FlashMQTests/testhelpers.h index 937b1ce6..120ef192 100644 --- a/FlashMQTests/testhelpers.h +++ b/FlashMQTests/testhelpers.h @@ -61,7 +61,17 @@ inline bool fmq_compare(const char *c1, const char *c2, const char *actual, cons std::string s2(c2); std::ostringstream oss; - oss << s1 << " != " << s2; + + if (s1.length() + s2.length() < 100) + { + // short form + oss << s1 << " != " << s2; + } + else + { + oss << "Actual: " << s1 << std::endl; + oss << "Expected: " << s2; + } return fmq_assert(s1 == s2, oss.str().c_str(), actual, expected, file, line); } diff --git a/configfileparser.cpp b/configfileparser.cpp index e71e5180..15e9afe6 100644 --- a/configfileparser.cpp +++ b/configfileparser.cpp @@ -93,6 +93,15 @@ bool ConfigFileParser::testKeyValidity(const std::string &key, const std::string { std::ostringstream oss; oss << "Config key '" << key << "' is not valid (here)."; + + auto alternative = findCloseStringMatch(validKeys.begin(), validKeys.end(), key); + + if (alternative != validKeys.end()) + { + // The space before the question mark is to make copying using mouse-double-click possible. + oss << " Did you mean: " << *alternative << " ?"; + } + throw ConfigFileException(oss.str()); } @@ -374,6 +383,8 @@ void ConfigFileParser::loadFile(bool test) std::shared_ptr curBridge; Settings tmpSettings; + const std::set blockNames {"listen", "bridge"}; + // Then once we know the config file is valid, process it. for (std::string &line : lines) { @@ -382,19 +393,29 @@ void ConfigFileParser::loadFile(bool test) if (std::regex_match(line, matches, block_regex_start)) { const std::string &key = matches[1].str(); - if (key == "listen") + if (testKeyValidity(key, "listen", blockNames)) { curParseLevel = ConfigParseLevel::Listen; curListener = std::make_shared(); } - else if (key == "bridge") + else if (testKeyValidity(key, "bridge", blockNames)) { curParseLevel = ConfigParseLevel::Bridge; curBridge = std::make_unique(); } else { - throw ConfigFileException(formatString("'%s' is not a valid block.", key.c_str())); + std::ostringstream oss; + oss << "'" << key << "' is not a valid block."; + + auto alt = findCloseStringMatch(blockNames.begin(), blockNames.end(), key); + + if (alt != blockNames.end()) + { + oss << " Did you mean: " << *alt << " ?"; + } + + throw ConfigFileException(oss.str()); } continue; diff --git a/utils.cpp b/utils.cpp index 451dc37c..d5992f9c 100644 --- a/utils.cpp +++ b/utils.cpp @@ -724,6 +724,90 @@ std::string protocolVersionString(ProtocolVersion p) } } +/** + * @brief Returns the edit distance between the two given strings + * + * This function uses the Wagner–Fischer algorithm to calculate the Levenshtein + * distance between two strings: the total number of insertions, swaps, and + * deletions that are needed to transform the one into the other. + */ +unsigned int distanceBetweenStrings(const std::string &stringA, const std::string &stringB) +{ + // The matrix contains the distances between the substrings. + // You can find a description of the algorithm online. + // + // Roughly: + // + // line_a: "dog" + // line_b: "horse" + // + // --> + // | | # | d | o | g + // V --+---+---+---+---+---+---+--- + // # | P + // h | + // o | + // r | Q X Y + // s | + // e | Z + // + // P = [0, 0] = the distance from "" to "" (which is 0) + // Q = [0, 3] = the distance from "" to "hor" (which is 3, all inserts) + // X = [1, 3] = the distance from "d" to "hor" (which is 3, 1 swap and 2 inserts) + // Y = [3, 3] = the distance from "dog" to "hor" (which is 2, both swaps) + // Z = [3, 5] = the distance from "dog" to "horse" (which is 4, two swaps and 2 inserts) + // + // The matrix does not have to be square, the dimensions depends on the inputs + // + // the position within stringA should always be referred to as x + // the position within stringB should always be referred to as y + + using mymatrix = std::vector>; + // +1 because we also need to store the length from the empty strings + int width = stringA.size() + 1; + int height = stringB.size() + 1; + mymatrix distances(width, std::vector(height)); + + // We know that the distance from the substrings of line_a to "" + // is equal to the length of the substring of line_a + for (int x = 0; x < width; x++) + { + distances.at(x).at(0) = x; + } + + // We know that the distance from "" to the substrings of line_b + // is equal to the length of the substring of line_b + for (int y = 0; y < height; y++) + { + distances.at(0).at(y) = y; + } + + // Now all we do is to fill out the rest of the matrix, easy peasy + // note we start at 1 because the top row and left column have already been calculated + for (int x = 1; x < width; x++) + { + for (int y = 1; y < height; y++) + { + if (stringA.at(x - 1) == stringB.at(y - 1)) + { + // the letters in both words are the same: we can travel from the top-left for free to the current state + distances.at(x).at(y) = distances.at(x - 1).at(y - 1); + } + else + { + // let's calculate the different costs and pick the cheapest option + // We use "+1" for all costs since they are all equally likely in our case + int dinstance_with_deletion = distances.at(x).at(y - 1) + 1; + int dinstance_with_insertion = distances.at(x - 1).at(y) + 1; + int dinstance_with_substitution = distances.at(x - 1).at(y - 1) + 1; + distances.at(x).at(y) = std::min({dinstance_with_deletion, dinstance_with_insertion, dinstance_with_substitution}); + } + } + } + + return distances.at(width - 1).at(height - 1); // the last cell contains our answer +} + uint32_t ageFromTimePoint(const std::chrono::time_point &point) { auto duration = std::chrono::steady_clock::now() - point; diff --git a/utils.h b/utils.h index ab1f12e0..4b1c1610 100644 --- a/utils.h +++ b/utils.h @@ -124,6 +124,34 @@ std::string websocketCloseCodeToString(uint16_t code); std::string protocolVersionString(ProtocolVersion p); +unsigned int distanceBetweenStrings(const std::string &stringA, const std::string &stringB); + +template +it findCloseStringMatch(it first, it last, const std::string &s) +{ + it alternative = last; + unsigned int alternative_distance = UINT_MAX; + + for (auto possible_key = first; possible_key != last; ++possible_key) + { + unsigned int distance = distanceBetweenStrings(s, *possible_key); + + // We only want to suggest options that look a bit like the unknown + // one. Experimentally I found 50% of the total length a decent + // cutoff. + // + // The mathemathical formula "distance/length < 0.5" can be + // approximated with integers as "distance*2/length < 1" + if ((distance * 2) / s.length() < 1 && distance < alternative_distance) + { + alternative = possible_key; + alternative_distance = distance; + } + } + + return alternative; +} + uint32_t ageFromTimePoint(const std::chrono::time_point &point); std::chrono::time_point timepointFromAge(const uint32_t age);