Skip to content

Commit

Permalink
Suggest valid options when the configuration contains unknown keys
Browse files Browse the repository at this point in the history
This changeset is purely to improve the user experience. When the
configuration parsing fails with an unknown key we'll test all possible
keys and suggest one that looks a lot like it. This way we can be
helpful in case someone made a typo or misremembered the precise name.
  • Loading branch information
quinox authored and halfgaar committed May 4, 2024
1 parent 195653b commit 7ea0bbe
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 6 deletions.
53 changes: 53 additions & 0 deletions FlashMQTests/configtests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,56 @@ void MainTests::test_parsing_numbers()
}
}
}

void MainTests::testStringDistances()
{
FMQ_COMPARE(distanceBetweenStrings("", ""), (unsigned int)0);
FMQ_COMPARE(distanceBetweenStrings("dog", ""), (unsigned int)3);
FMQ_COMPARE(distanceBetweenStrings("", "dog"), (unsigned int)3);
FMQ_COMPARE(distanceBetweenStrings("dog", "horse"), (unsigned int)4);
FMQ_COMPARE(distanceBetweenStrings("horse", "dog"), (unsigned int)4);
FMQ_COMPARE(distanceBetweenStrings("industry", "interest"), (unsigned int)6);
FMQ_COMPARE(distanceBetweenStrings("kitten", "sitting"), (unsigned int)3);
FMQ_COMPARE(distanceBetweenStrings("uninformed", "uniformed"), (unsigned int)1);
}

void MainTests::testConfigSuggestion()
{
// User made a small typo: 'session' instead of 'sessions'
{
ConfFileTemp config;
config.writeLine("expire_session_after_seconds 180");
config.closeFile();

ConfigFileParser parser(config.getFilePath());

try
{
parser.loadFile(false);
FMQ_FAIL("The parser is too liberal");
}
catch (ConfigFileException &ex)
{
FMQ_COMPARE(ex.what(), "Config key 'expire_session_after_seconds' is not valid (here). Did you mean: expire_sessions_after_seconds ?");
}
}

// User entered gibberish. Let's not suggest gibberish back
{
ConfFileTemp config;
config.writeLine("foobarbaz 180");
config.closeFile();

ConfigFileParser parser(config.getFilePath());

try
{
parser.loadFile(false);
FMQ_FAIL("The parser is too liberal");
}
catch (ConfigFileException &ex)
{
FMQ_COMPARE(ex.what(), "Config key 'foobarbaz' is not valid (here).");
}
}
}
2 changes: 2 additions & 0 deletions FlashMQTests/maintests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ MainTests::MainTests()
REGISTER_FUNCTION(testOverrideWillDelayOnSessionDestructionByTakeOver);
REGISTER_FUNCTION(testDisabledWills);
REGISTER_FUNCTION(testMqtt5DelayedWillsDisabled);
REGISTER_FUNCTION(testStringDistances);
REGISTER_FUNCTION(testConfigSuggestion);
REGISTER_FUNCTION(testIncomingTopicAlias);
REGISTER_FUNCTION(testOutgoingTopicAlias);
REGISTER_FUNCTION(testOutgoingTopicAliasBeyondMax);
Expand Down
2 changes: 2 additions & 0 deletions FlashMQTests/maintests.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ class MainTests
void testOverrideWillDelayOnSessionDestructionByTakeOver();
void testDisabledWills();
void testMqtt5DelayedWillsDisabled();
void testStringDistances();
void testConfigSuggestion();


void testIncomingTopicAlias();
Expand Down
18 changes: 16 additions & 2 deletions FlashMQTests/testhelpers.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "testhelpers.h"

#include <iostream>
#include <cstring>

int assert_count;
int assert_fail_count;
Expand All @@ -16,8 +17,21 @@ bool fmq_assert(bool b, const char *failmsg, const char *actual, const char *exp

if (asserts_print)
{
std::cout << RED << "FAIL" << COLOR_END << ": '" << failmsg << "', " << actual << " != " << expected << std::endl
<< " in " << file << ", line " << line << std::endl;
// There are two types of failmsg: unformatted ones and formatted ones.
// By testing for a newline we can detect formatted ones.
if (strchr(failmsg, '\n') == nullptr)
{
// unformatted
std::cout << RED << "FAIL" << COLOR_END << ": '" << failmsg << "', " << actual << " != " << expected << std::endl
<< " in " << file << ", line " << line << std::endl;
}
else
{
// formatted
std::cout << RED << "FAIL" << COLOR_END << " in " << file << ", line " << line << std::endl;
std::cout << failmsg << std::endl;
std::cout << "Comparison: " << actual << " != " << expected << std::endl;
}
}
}

Expand Down
12 changes: 11 additions & 1 deletion FlashMQTests/testhelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,17 @@ inline bool fmq_compare(const char *c1, const char *c2, const char *actual, cons
std::string s2(c2);

std::ostringstream oss;
oss << s1 << " != " << s2;

if (s1.length() + s2.length() < 100)
{
// short form
oss << s1 << " != " << s2;
}
else
{
oss << "Actual: " << s1 << std::endl;
oss << "Expected: " << s2;
}

return fmq_assert(s1 == s2, oss.str().c_str(), actual, expected, file, line);
}
Expand Down
27 changes: 24 additions & 3 deletions configfileparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ bool ConfigFileParser::testKeyValidity(const std::string &key, const std::string
{
std::ostringstream oss;
oss << "Config key '" << key << "' is not valid (here).";

auto alternative = findCloseStringMatch(validKeys.begin(), validKeys.end(), key);

if (alternative != validKeys.end())
{
// The space before the question mark is to make copying using mouse-double-click possible.
oss << " Did you mean: " << *alternative << " ?";
}

throw ConfigFileException(oss.str());
}

Expand Down Expand Up @@ -374,6 +383,8 @@ void ConfigFileParser::loadFile(bool test)
std::shared_ptr<BridgeConfig> curBridge;
Settings tmpSettings;

const std::set<std::string> blockNames {"listen", "bridge"};

// Then once we know the config file is valid, process it.
for (std::string &line : lines)
{
Expand All @@ -382,19 +393,29 @@ void ConfigFileParser::loadFile(bool test)
if (std::regex_match(line, matches, block_regex_start))
{
const std::string &key = matches[1].str();
if (key == "listen")
if (testKeyValidity(key, "listen", blockNames))
{
curParseLevel = ConfigParseLevel::Listen;
curListener = std::make_shared<Listener>();
}
else if (key == "bridge")
else if (testKeyValidity(key, "bridge", blockNames))
{
curParseLevel = ConfigParseLevel::Bridge;
curBridge = std::make_unique<BridgeConfig>();
}
else
{
throw ConfigFileException(formatString("'%s' is not a valid block.", key.c_str()));
std::ostringstream oss;
oss << "'" << key << "' is not a valid block.";

auto alt = findCloseStringMatch(blockNames.begin(), blockNames.end(), key);

if (alt != blockNames.end())
{
oss << " Did you mean: " << *alt << " ?";
}

throw ConfigFileException(oss.str());
}

continue;
Expand Down
84 changes: 84 additions & 0 deletions utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,90 @@ std::string protocolVersionString(ProtocolVersion p)
}
}

/**
* @brief Returns the edit distance between the two given strings
*
* This function uses the Wagner–Fischer algorithm to calculate the Levenshtein
* distance between two strings: the total number of insertions, swaps, and
* deletions that are needed to transform the one into the other.
*/
unsigned int distanceBetweenStrings(const std::string &stringA, const std::string &stringB)
{
// The matrix contains the distances between the substrings.
// You can find a description of the algorithm online.
//
// Roughly:
//
// line_a: "dog"
// line_b: "horse"
//
// -->
// | | # | d | o | g
// V --+---+---+---+---+---+---+---
// # | P
// h |
// o |
// r | Q X Y
// s |
// e | Z
//
// P = [0, 0] = the distance from "" to "" (which is 0)
// Q = [0, 3] = the distance from "" to "hor" (which is 3, all inserts)
// X = [1, 3] = the distance from "d" to "hor" (which is 3, 1 swap and 2 inserts)
// Y = [3, 3] = the distance from "dog" to "hor" (which is 2, both swaps)
// Z = [3, 5] = the distance from "dog" to "horse" (which is 4, two swaps and 2 inserts)
//
// The matrix does not have to be square, the dimensions depends on the inputs
//
// the position within stringA should always be referred to as x
// the position within stringB should always be referred to as y

using mymatrix = std::vector<std::vector<int>>;
// +1 because we also need to store the length from the empty strings
int width = stringA.size() + 1;
int height = stringB.size() + 1;
mymatrix distances(width, std::vector<int>(height));

// We know that the distance from the substrings of line_a to ""
// is equal to the length of the substring of line_a
for (int x = 0; x < width; x++)
{
distances.at(x).at(0) = x;
}

// We know that the distance from "" to the substrings of line_b
// is equal to the length of the substring of line_b
for (int y = 0; y < height; y++)
{
distances.at(0).at(y) = y;
}

// Now all we do is to fill out the rest of the matrix, easy peasy
// note we start at 1 because the top row and left column have already been calculated
for (int x = 1; x < width; x++)
{
for (int y = 1; y < height; y++)
{
if (stringA.at(x - 1) == stringB.at(y - 1))
{
// the letters in both words are the same: we can travel from the top-left for free to the current state
distances.at(x).at(y) = distances.at(x - 1).at(y - 1);
}
else
{
// let's calculate the different costs and pick the cheapest option
// We use "+1" for all costs since they are all equally likely in our case
int dinstance_with_deletion = distances.at(x).at(y - 1) + 1;
int dinstance_with_insertion = distances.at(x - 1).at(y) + 1;
int dinstance_with_substitution = distances.at(x - 1).at(y - 1) + 1;
distances.at(x).at(y) = std::min({dinstance_with_deletion, dinstance_with_insertion, dinstance_with_substitution});
}
}
}

return distances.at(width - 1).at(height - 1); // the last cell contains our answer
}

uint32_t ageFromTimePoint(const std::chrono::time_point<std::chrono::steady_clock> &point)
{
auto duration = std::chrono::steady_clock::now() - point;
Expand Down
28 changes: 28 additions & 0 deletions utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,34 @@ std::string websocketCloseCodeToString(uint16_t code);

std::string protocolVersionString(ProtocolVersion p);

unsigned int distanceBetweenStrings(const std::string &stringA, const std::string &stringB);

template<typename it>
it findCloseStringMatch(it first, it last, const std::string &s)
{
it alternative = last;
unsigned int alternative_distance = UINT_MAX;

for (auto possible_key = first; possible_key != last; ++possible_key)
{
unsigned int distance = distanceBetweenStrings(s, *possible_key);

// We only want to suggest options that look a bit like the unknown
// one. Experimentally I found 50% of the total length a decent
// cutoff.
//
// The mathemathical formula "distance/length < 0.5" can be
// approximated with integers as "distance*2/length < 1"
if ((distance * 2) / s.length() < 1 && distance < alternative_distance)
{
alternative = possible_key;
alternative_distance = distance;
}
}

return alternative;
}

uint32_t ageFromTimePoint(const std::chrono::time_point<std::chrono::steady_clock> &point);
std::chrono::time_point<std::chrono::steady_clock> timepointFromAge(const uint32_t age);

Expand Down

0 comments on commit 7ea0bbe

Please sign in to comment.