Skip to content

Commit

Permalink
Merge pull request hpcc-systems#19187 from jackdelv/HPCC-32735
Browse files Browse the repository at this point in the history
HPCC-32735 Optimize replaceString in jstring.cpp

Reviewed-By: Dan S. Camper <dan.camper@lexisnexisrisk.com>
Reviewed-by: Gavin Halliday <ghalliday@hpccsystems.com>
Merged-by: Gavin Halliday <ghalliday@hpccsystems.com>
  • Loading branch information
ghalliday authored Oct 18, 2024
2 parents 75c24d3 + 0d02f6a commit 78461cb
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 14 deletions.
35 changes: 21 additions & 14 deletions system/jlib/jstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -937,28 +937,35 @@ StringBuffer & StringBuffer::replace(char oldChar, char newChar)
// Copy source to result, replacing all occurrences of "oldStr" with "newStr"
StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr)
{
if (lenSource)
if (lenOldStr && lenSource >= lenOldStr)
{
size_t left = lenSource;
while (left >= lenOldStr)
// Avoid allocating an unnecessarly large buffer and match the source string
result.ensureCapacity(lenSource);

size_t offset = 0;
size_t lastCopied = 0;
size_t maxOffset = lenSource - lenOldStr + 1;
char firstChar = oldStr[0];
while (offset < maxOffset)
{
if (memcmp(source, oldStr, lenOldStr)==0)
if (unlikely(source[offset] == firstChar)
&& unlikely((lenOldStr == 1) || memcmp(source + offset, oldStr, lenOldStr)==0))
{
// If lastCopied matches the offset nothing is appended, but we can avoid a test for offset == lastCopied
result.append(offset - lastCopied, source + lastCopied);
result.append(lenNewStr, newStr);
source += lenOldStr;
left -= lenOldStr;
offset += lenOldStr;
lastCopied = offset;
}
else
{
result.append(*source);
source++;
left--;
}
offset++;
}

// there are no more possible replacements, make sure we keep the end of the original buffer
result.append(left, source);
// Append the remaining characters
result.append(lenSource - lastCopied, source + lastCopied);
}
else
result.append(lenSource, source); // Search string does not fit in source or is empty

return result;
}

Expand Down
54 changes: 54 additions & 0 deletions testing/unittests/jlibtests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ class JlibStringTest : public CppUnit::TestFixture
public:
CPPUNIT_TEST_SUITE(JlibStringTest);
CPPUNIT_TEST(testEncodeCSVColumn);
CPPUNIT_TEST(testReplaceString);
CPPUNIT_TEST_SUITE_END();

protected:
Expand All @@ -860,6 +861,59 @@ void testEncodeCSVColumn()
encodeCSVColumn(encodedCSV, csvCol2);
CPPUNIT_ASSERT_EQUAL_STR(encodedCSV.str(), "\"hello world, \"\"how are you?\"\"\"");
}

void testReplaceString()
{
// Match string at the end of the source
StringBuffer source("aaaaaaaaab");
source.replaceString("aaab", "x");
CPPUNIT_ASSERT_EQUAL_STR("aaaaaax", source.str());

// Single char match at end of source
source.set("aaaaaaaaab");
source.replaceString("b", "x");
CPPUNIT_ASSERT_EQUAL_STR("aaaaaaaaax", source.str());

// Match string at the start of the source
source.set("abababab");
source.replaceString("aba", "xxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxbxxxb", source.str());

// Match not found
source.set("aaaaa");
source.replaceString("bb", "xxx");
CPPUNIT_ASSERT_EQUAL_STR("aaaaa", source.str());

// Replace string is empty
source.set("aabaa");
source.replaceString("aa", "");
CPPUNIT_ASSERT_EQUAL_STR("b", source.str());

// Search string is empty
source.set("aaaaaa");
source.replaceString("", "b");
CPPUNIT_ASSERT_EQUAL_STR("aaaaaa", source.str());

// Source string is empty
source.set("");
source.replaceString("a", "b");
CPPUNIT_ASSERT_EQUAL_STR("", source.str());

// Search string is longer than source string
source.set("a");
source.replaceString("aa", "b");
CPPUNIT_ASSERT_EQUAL_STR("a", source.str());

// Replace every character
source.set("aaaaa");
source.replaceString("a", "b");
CPPUNIT_ASSERT_EQUAL_STR("bbbbb", source.str());

// Replace string is longer than search string
source.set("abbabab");
source.replaceString("ab", "xxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxbxxxxxx", source.str());
}
};

CPPUNIT_TEST_SUITE_REGISTRATION( JlibStringTest );
Expand Down

0 comments on commit 78461cb

Please sign in to comment.