diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f0f509..ca61cd4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,19 +3,33 @@ cmake_minimum_required(VERSION 3.20) set(MAIN_PROJECT OFF) if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}") - project(ThunderByteScan) + project(TBS) set(MAIN_PROJECT ON) endif() -option(TBS_TESTS "Thunder Byte Scan with Tests" ${MAIN_PROJECT}) -option(TBS_NO_MT "Disable multithreading in Thunder Byte Scan" OFF) +option(TBS_TESTS "TBS with Tests" ${MAIN_PROJECT}) +option(TBS_MT "Enable multithreading in TBS" ON) +option(TBS_USE_SSE2 "Enable SSE2 in TBS" ON) -add_library(ThunderByteScan INTERFACE) +add_library(TBS INTERFACE) -target_include_directories(ThunderByteScan INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include) +if(MSVC) + # Set the minimum required version for SSE2 + if(TBS_USE_SSE2) + target_compile_options(TBS INTERFACE /arch:SSE2) + target_compile_definitions(TBS INTERFACE TBS_USE_SSE2) + message(STATUS "SSE2 support enabled.") + endif() +endif() + +if(TBS_MT) + target_compile_definitions(TBS INTERFACE TBS_MT) +endif() + +target_include_directories(TBS INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include) -if(TBS_NO_MT) - target_compile_definitions(ThunderByteScan INTERFACE TBS_NO_MT) +if(NOT TBS_MT) + target_compile_definitions(TBS INTERFACE TBS_NO_MT) endif() if(TBS_TESTS) diff --git a/CMakeSettings.json b/CMakeSettings.json new file mode 100644 index 0000000..0c5fbf9 --- /dev/null +++ b/CMakeSettings.json @@ -0,0 +1,27 @@ +{ + "configurations": [ + { + "name": "x64-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "inheritEnvironments": [ "msvc_x64_x64" ], + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "" + }, + { + "name": "x64-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "inheritEnvironments": [ "msvc_x64_x64" ], + "variables": [] + } + ] +} \ No newline at end of file diff --git a/include/TBS.hpp b/include/TBS.hpp new file mode 100644 index 0000000..ab27fa3 --- /dev/null +++ b/include/TBS.hpp @@ -0,0 +1,542 @@ +#pragma once + +#include +#include +#include +#include +#include + +#ifdef TBS_MT +#include +#include +#include +#include +#endif + +#ifdef TBS_USE_SSE2 +#ifndef TBS_IMPL_SSE2 +#define TBS_IMPL_SSE2 +#endif +#endif + +#ifdef TBS_IMPL_SSE2 +#include +#endif + +namespace TBS { + using U64 = unsigned long long; + using U32 = unsigned int; + using UByte = unsigned char; + using UShort = unsigned short; + using ULong = unsigned long; + using UPtr = uintptr_t; + +#ifdef TBS_MT + namespace Thread { + class Pool { + public: + inline Pool(size_t threads = std::thread::hardware_concurrency()) : mbStopped(false) + { + auto workerTask = [this] { + for (;;) + { + std::function task; + { + std::unique_lock lock(mTasksMtx); + mWorkersCondVar.wait(lock, [this] { return mbStopped || !mTasks.empty(); }); + if (mbStopped && mTasks.empty()) + return; + task = std::move(mTasks.front()); + mTasks.pop(); + } + task(); + } + }; + + for (size_t i = 0; i < threads; ++i) + mWorkers.emplace_back(workerTask); + } + + template + inline void enqueue(F&& f, Args&&... args) { + { + std::unique_lock lock(mTasksMtx); + mTasks.emplace(std::bind(std::forward(f), std::forward(args)...)); + } + mWorkersCondVar.notify_one(); + } + + inline ~Pool() { + { + std::unique_lock lock(mTasksMtx); + mbStopped = true; + } + mWorkersCondVar.notify_all(); + for (std::thread& worker : mWorkers) + worker.join(); + } + + private: + std::vector mWorkers; + std::queue> mTasks; + std::mutex mTasksMtx; + std::condition_variable mWorkersCondVar; + bool mbStopped; + }; + } +#endif + + namespace Memory { + + inline bool CompareWithMaskWord(const UByte* chunk1, const UByte* chunk2, size_t len, const UByte* wildCardMask) { + // UPtr ==> Platform Word Length + size_t wordLen = len / sizeof(UPtr); // Calculate length in words + + for (size_t i = 0; i < wordLen; i++) { + // Convert byte index to word index + UPtr wordMask = ~*((const UPtr*)wildCardMask + i); + UPtr maskedChunk1 = *((const UPtr*)chunk1 + i) & wordMask; + UPtr maskedChunk2 = *((const UPtr*)chunk2 + i) & wordMask; + if (maskedChunk1 != maskedChunk2) + return false; + } + + size_t remainingBytes = len % sizeof(UPtr); + if (remainingBytes > 0) { + // Calculate the starting index of the last incomplete word + size_t lastWordIndex = wordLen * sizeof(UPtr); + for (size_t i = lastWordIndex; i < len; i++) { + + UByte b1 = chunk1[i] & ~wildCardMask[i]; + UByte b2 = chunk2[i] & ~wildCardMask[i]; + + if (b1 != b2) + return false; + } + } + + return true; + } +#ifdef TBS_IMPL_SSE2 + namespace SSE2 { + inline __m128i _mm_not_si128(__m128i value) + { + __m128i mask = _mm_set1_epi32(-1); + return _mm_xor_si128(value, mask); + } + + inline bool CompareWithMask(const UByte* chunk1, const UByte* chunk2, size_t len, const UByte* wildCardMask) { + + const size_t wordLen = len / sizeof(__m128i); // Calculate length in words + + for (size_t i = 0; i < wordLen; i++) { + // Convert byte index to word index + __m128i wordMask = _mm_not_si128(_mm_load_si128((const __m128i*)wildCardMask + i)); + __m128i maskedChunk1 = _mm_and_si128(_mm_load_si128((const __m128i*)chunk1 + i), wordMask); + __m128i maskedChunk2 = _mm_and_si128(_mm_load_si128((const __m128i*)chunk2 + i), wordMask); + + if (_mm_movemask_epi8(_mm_cmpeq_epi32(maskedChunk1, maskedChunk2)) != 0xFFFF) + return false; + } + + const size_t lastWordIndex = wordLen * sizeof(__m128i); + + return CompareWithMaskWord(chunk1 + lastWordIndex, chunk2 + lastWordIndex, len % sizeof(__m128i), wildCardMask + lastWordIndex); + } + } +#endif + + inline bool CompareWithMask(const UByte* chunk1, const UByte* chunk2, size_t len, const UByte* wildCardMask) + { +#ifdef TBS_USE_SSE2 + return SSE2::CompareWithMask(chunk1, chunk2, len, wildCardMask); +#endif + return CompareWithMaskWord(chunk1, chunk2, len, wildCardMask); + } + } + + namespace Pattern + { + using Result = U64; + using Results = std::vector; + + struct ParseResult { + ParseResult() + : mParseSuccess(false) + {} + + operator bool() + { + return mParseSuccess; + } + + std::vector mPattern; + std::vector mWildcardMask; + bool mParseSuccess; + }; + + inline bool Parse(const std::string& pattern, ParseResult& result) + { + result = ParseResult(); + + if (pattern.empty()) + return true; + + std::stringstream ss(pattern); + + while (!ss.eof()) + { + std::string str; + ss >> str; + + if (str.size() > 2) + return false; + + // At this point, pattern structure good so far + + bool bAnyWildCard = str.find("?") != std::string::npos; + + if (!bAnyWildCard) + { + // Not Wilcarding this byte + result.mPattern.emplace_back(UByte(strtoull(str.c_str(), nullptr, 16))); + result.mWildcardMask.emplace_back(UByte(0x00ull)); + continue; + } + + // At this point, current Byte is wildcarded + + bool bFullByteWildcard = str.find("??") != std::string::npos || (bAnyWildCard && str.size() == 1); + + if (bFullByteWildcard) + { + // At this point we are dealing with Full Byte Wildcard Case + result.mPattern.emplace_back(UByte(0x00u)); + result.mWildcardMask.emplace_back(UByte(0xFFu)); + continue; + } + + // At this point we are dealing with Half Byte Wildcard Case, "X?" or "?X" + + if (str[0] == '?') + { + // At this point, we are dealing with High Part of Byte wildcarding "?X" + str[0] = '0'; + result.mPattern.emplace_back(UByte(strtoull(str.c_str(), nullptr, 16))); + result.mWildcardMask.emplace_back(UByte(0xF0u)); + continue; + } + + // At this point, we are dealing with Low Part of Byte wildcarding "X?" + str[1] = '0'; + result.mPattern.emplace_back(UByte(strtoull(str.c_str(), nullptr, 16))); + result.mWildcardMask.emplace_back(UByte(0x0Fu)); + } + + return result.mParseSuccess = true; + } + + inline bool Valid(const std::string& pattern) + { + ParseResult res; + + return Parse(pattern, res) && res; + } + + struct Description; + + enum class EScan { + SCAN_ALL, + SCAN_FIRST + }; + + struct Description { + struct Shared { + struct ResultAccesor { + ResultAccesor(Shared& sharedDesc) + : mSharedDesc(sharedDesc) + {} + + operator const Results& () const { + return Results(); + } + + operator Result () const { + if (mSharedDesc.mResult.size() < 1) + return 0; + + return mSharedDesc.mResult[0]; + } + + const Results& Results() const + { + return mSharedDesc.mResult; + } + + Shared& mSharedDesc; + }; + + Shared(EScan scanType) + : mScanType(scanType) + , mResultAccesor(*this) + {} + +#ifdef TBS_MT + std::mutex mMutex; + std::atomic mFinished; +#else + bool mFinished; +#endif + EScan mScanType; + Results mResult; + ResultAccesor mResultAccesor; + }; + + Description(Shared& shared, const std::string& uid, const std::string& pattern, const UByte* searchStart, const UByte* searchEnd, const std::vector>& transformers) + : mShared(shared) + , mUID(uid) + , mPattern(pattern) + , mStart(searchStart) + , mEnd(searchEnd) + , mTransforms(transformers) + { + Parse(mPattern, mParsed); + } + + operator bool() + { + return mParsed; + } + + std::string mPattern; + std::string mUID; + std::vector> mTransforms; + ParseResult mParsed; + Shared& mShared; + const UByte* mStart; + const UByte* mEnd; + }; + + inline bool Scan(Description& desc) + { + if (desc.mShared.mFinished) + return desc.mShared.mResult.empty() == false; + + if (!desc.mStart || !desc.mEnd) + return false; + + const size_t patternLen = desc.mParsed.mPattern.size(); + + for (const UByte* i = desc.mStart; i + patternLen - 1 < desc.mEnd && !desc.mShared.mFinished; i++) + { + if (Memory::CompareWithMask(i, desc.mParsed.mPattern.data(), patternLen, desc.mParsed.mWildcardMask.data()) == false) + continue; + + U64 currMatch = (U64)i; + + // At this point, we found a match + + for (auto transform : desc.mTransforms) + currMatch = transform(desc, currMatch); + + // At this point, match is properly user transformed + // lets report it +#ifdef TBS_MT + { + std::lock_guard resultReportLck(desc.mShared.mMutex); +#endif + + if (desc.mShared.mFinished) + break; + + // At this point, we have the lock & we havent finished! + // Lets directly push it + + desc.mShared.mResult.push_back(currMatch); + + if (desc.mShared.mScanType != EScan::SCAN_FIRST) + continue; + + // At this point seems we are searching for a single result + // lets break turn off the searching & break + + desc.mShared.mFinished = true; + return true; +#ifdef TBS_MT + } +#endif + } + + return desc.mShared.mResult.empty() == false; + } + + using SharedDescription = Description::Shared; + using SharedResultAccesor = SharedDescription::ResultAccesor; + } + + namespace Pattern { + struct DescriptionBuilder { + + DescriptionBuilder(std::unordered_map>& sharedDescriptions) + : mSharedDescriptions(sharedDescriptions) + , mScanStart(0) + , mScanEnd(0) + , mScanType(EScan::SCAN_ALL) + {} + + DescriptionBuilder& setPattern(const std::string& pattern) + { + mPattern = pattern; + + if (!mUID.empty()) + return *this; + + return setUID(pattern); + } + + DescriptionBuilder& setUID(const std::string& uid) + { + mUID = uid; + return *this; + } + + template + DescriptionBuilder& setScanStart(T start) + { + mScanStart = (const UByte*)start; + return *this; + } + + template + DescriptionBuilder& setScanEnd(T end) + { + mScanEnd = (const UByte*)end; + return *this; + } + + DescriptionBuilder& AddTransformer(const std::function& transformer) + { + mTransformers.emplace_back(transformer); + return *this; + } + + DescriptionBuilder& setScanType(EScan type) + { + mScanType = type; + return *this; + } + + DescriptionBuilder& EnableScanFirst() + { + return setScanType(EScan::SCAN_FIRST); + } + + DescriptionBuilder& EnableScanAll() + { + return setScanType(EScan::SCAN_FIRST); + } + + DescriptionBuilder Clone() + { + return DescriptionBuilder(*this); + } + + Description Build() + { + if (Pattern::Valid(mPattern) == false) + { + static SharedDescription nullSharedDesc(EScan::SCAN_ALL); + static Description nullDescription(nullSharedDesc, "", "", 0, 0, {}); + return nullDescription; + } + + if (mSharedDescriptions.find(mUID) == mSharedDescriptions.end()) + mSharedDescriptions[mUID] = std::make_unique(mScanType); + + return Description(*mSharedDescriptions[mUID], mUID, mPattern, mScanStart, mScanEnd, mTransformers); + } + + private: + std::unordered_map>& mSharedDescriptions; + EScan mScanType; + std::string mPattern; + std::string mUID; + const UByte* mScanStart; + const UByte* mScanEnd; + std::vector> mTransformers; + }; + } + + struct State { + + State() + : State(nullptr, nullptr) + {} + + template + State(T defScanStart = (T)0, K defScanEnd = (K)0) + : mDefaultScanStart((const UByte*)defScanStart) + , mDefaultScanEnd((const UByte*)defScanEnd) + {} + + State& AddPattern(Pattern::Description&& pattern) + { + mDescriptionts.emplace_back(pattern); + return *this; + } + + Pattern::DescriptionBuilder PatternBuilder() + { + return Pattern::DescriptionBuilder(mSharedDescriptions) + .setScanStart(mDefaultScanStart) + .setScanEnd(mDefaultScanEnd); + } + + Pattern::SharedResultAccesor operator[](const std::string& uid) const + { + if (mSharedDescriptions.find(uid) != mSharedDescriptions.end()) + return *(mSharedDescriptions.at(uid)); + + static Pattern::SharedDescription nullSharedDesc(Pattern::EScan::SCAN_ALL); + + return nullSharedDesc; + } + + const UByte* mDefaultScanStart; + const UByte* mDefaultScanEnd; + std::unordered_map> mSharedDescriptions; + std::vector mDescriptionts; + }; + + bool Scan(State& state) + { + bool bAllFoundAny = true; + +#ifdef TBS_MT + { + Thread::Pool threadPool; +#endif + for (Pattern::Description& description : state.mDescriptionts) + { +#ifdef TBS_MT + threadPool.enqueue( + [&](Pattern::Description& description) + { +#endif + Pattern::Scan(description); + +#ifdef TBS_MT + }, description); +#endif + } +#ifdef TBS_MT + } +#endif + state.mDescriptionts.clear(); + + for (auto& sharedDescKv : state.mSharedDescriptions) + bAllFoundAny = bAllFoundAny && sharedDescKv.second->mResult.empty() == false; + + return bAllFoundAny; + } +} diff --git a/include/ThunderByteScan.hpp b/include/ThunderByteScan.hpp deleted file mode 100644 index d26eb83..0000000 --- a/include/ThunderByteScan.hpp +++ /dev/null @@ -1,881 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Multi-threading enabled by default - -#define TBS_MT - -#ifdef TBS_NO_MT -#undef TBS_MT -#endif - -namespace ThunderByteScan { -#ifdef TBS_MT - class ThreadPool { - public: - inline ThreadPool(size_t threads = std::thread::hardware_concurrency()) : stop(false) - { - auto workerTask = [this] { - for (;;) - { - std::function task; - { - std::unique_lock lock(this->queue_mutex); - this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); }); - if (this->stop && this->tasks.empty()) - return; - task = std::move(this->tasks.front()); - this->tasks.pop(); - } - task(); - } - }; - - for (size_t i = 0; i < threads; ++i) - workers.emplace_back(workerTask); - } - - template - inline void enqueue(F&& f, Args&&... args) { - { - std::unique_lock lock(queue_mutex); - tasks.emplace(std::bind(std::forward(f), std::forward(args)...)); - } - condition.notify_one(); - } - - inline ~ThreadPool() { - { - std::unique_lock lock(queue_mutex); - stop = true; - } - condition.notify_all(); - for (std::thread& worker : workers) - worker.join(); - } - - private: - std::vector workers; - std::queue> tasks; - std::mutex queue_mutex; - std::condition_variable condition; - bool stop; - }; - -#endif - - /** - - @class BatchPatternsScanResults - @brief This class is used to store the results of a batch pattern scan operation. - This class is used to store the results of a batch pattern scan operation. It stores a map of pattern identifiers to a vector of uintptr_t values representing the memory addresses where the pattern was found. - */ - class BatchPatternsScanResults { - private: - std::unordered_map> mResults; - std::unordered_map stoppedMap; - public: - - /** -* @brief Returns a pointer to the atomic bool value stored for a given UID indicating whether the scan for that UID has been stopped or not. -* -* @param uid A string representing the UID for which the atomic bool value is requested. -* @return A pointer to the atomic bool value for the given UID. -*/ - std::atomic_bool* getAtomicBoolStoppedFor(const std::string& uid) - { - if (stoppedMap.find(uid) == stoppedMap.end()) - stoppedMap[uid] = false; - - return &(stoppedMap[uid]); - } - -/** -* @brief Overloads the [] operator to return the first result for a given UID. -* -* @param uid A string representing the UID for which the first result is requested. -* @return The first result found for the given UID. -*/ - uintptr_t operator[] (const std::string& uid) - { - return getFirst(uid); - } - - /** -* @brief Returns the first result of a specified scan. -* -* @param uid The unique identifier for the scan. -* -* @return The uintptr_t address of the first result of the specified scan. -* -* This function returns the uintptr_t address of the first result of a scan with -* the specified unique identifier(pattern itself by default). If the scan has not yet been registered, or if -* it has no results, the function returns 0. -*/ - uintptr_t getFirst(const std::string& uid) - { - // Checking to see if uid has even registered a vector - if (mResults.find(uid) == mResults.end()) - return 0; - - // Lets check if that vector has at least more than one results - if (mResults[uid].size() < 1) - return 0; - - // If So Lets Return it - return mResults[uid][0]; - } - - /** -* @brief sets the first result of a specified scan. -* -* @param uid The unique identifier for the scan. -* -* @return void. -* -* This function assign the uintptr_t address of the first result of a scan with -* the specified unique identifier(pattern itself by default). -*/ - void setFirst(const std::string& uid, uintptr_t result) - { - // Checking to see if uid has even registered a vector - if (mResults.find(uid) == mResults.end()) - mResults[uid] = std::vector(); - - // Lets check if that vector has at least more than one results - if (mResults[uid].size() < 1) - mResults[uid].push_back(result); - else - mResults[uid][0] = result; - } - - /** - - Checks if the given uid has any result(s) in the mResults unordered map. - @param uid The unique identifier to check for results. - @param checkResult A boolean parameter indicating whether to check the result value is zero(if so, consider non existing). Default value is false. - @return A boolean value representing whether the given uid has any result(s) in the mResults unordered map. - */ - bool HasResult(const std::string& uid, bool checkResult = false) - { - return getFirst(uid) != 0; - } - - /** - - Returns a reference to a std::vector of uintptr_t values containing the results associated with the given uid. - If the uid has no results registered, an empty vector is created and returned. - @param uid A std::string representing the unique identifier to retrieve the results for. - @return A reference to a std::vector of uintptr_t values containing the results associated with the given uid. - */ - std::vector& getResults(const std::string& uid) - { - if (mResults.find(uid) == mResults.end()) - mResults[uid] = std::vector(); - - return (mResults[uid]); - } - }; - - struct PatternTaskInfo { - PatternTaskInfo() - : mpStopped(nullptr) - , mStartFind(0) - , mEndFind(0) - {} - - bool mReportResults = true; - std::vector mMatchMask; - std::vector mIgnoreMask; - std::string mUID; - std::string mPattern; - std::function mResolveCallback; - size_t mPatternSize; - bool mFound = false; - uint64_t mStartFind; - uint64_t mEndFind; - - // it will hold the atomic boolean that will decide if alredy found uid (used in find first) - std::atomic_bool* mpStopped; - }; - - struct PatternDesc { - - PatternDesc(const char* _pattern, std::function resolveCallback) - : PatternDesc(_pattern, _pattern, resolveCallback, 0, 0) // Using the pattern as UID - {} - - PatternDesc(const std::string& _pattern, std::function resolveCallback) - : PatternDesc(_pattern, _pattern, resolveCallback, 0, 0) // Using the pattern as UID - {} - - PatternDesc(const char* _pattern) - : PatternDesc(_pattern, _pattern, 0, 0, 0) // Using the pattern as UID - {} - - PatternDesc(const std::string& _pattern) - : PatternDesc(_pattern, _pattern, 0, 0, 0) // Using the pattern as UID - {} - - PatternDesc(const std::string& _pattern, const std::string& _uid) - : PatternDesc(_pattern, _uid, 0, 0, 0) - {} - - PatternDesc(const std::string& _pattern, const std::string& _uid, std::function resolveCallback) - : PatternDesc(_pattern, _uid, resolveCallback, 0, 0) - {} - - PatternDesc(const std::string& _pattern, const std::string& _uid, std::function resolveCallback, uint64_t startFind, uint64_t endFind) - : mPattern(_pattern) - , mUID(_uid) - , mpStopped(nullptr) - , mResolve(resolveCallback) - , mStartFind(startFind) - , mEndFind(endFind) - {} - - std::string mPattern; - std::string mUID; - std::atomic_bool* mpStopped; - std::function mResolve; - uint64_t mStartFind; - uint64_t mEndFind; - }; - - struct PatternDescBuilder { - inline PatternDescBuilder() - : mPattern("") - , mUID("") - , mResolve(0) - , mStartFind(0) - , mEndFind(0) - {} - - inline PatternDescBuilder& setPattern(const std::string& pattern) - { - mPattern = pattern; - - if (mUID.empty()) - mUID = pattern; - - return *this; - } - - inline PatternDescBuilder& setUID(const std::string& uid) - { - mUID = uid; - - return *this; - } - - inline PatternDescBuilder& setResolveCallback(std::function callback) - { - mResolve = callback; - - return *this; - } - - template - inline PatternDescBuilder& setStartFind(T startFind) - { - mStartFind = (uint64_t)startFind; - - return *this; - } - - template - inline PatternDescBuilder& setEndFind(T endFind) - { - mEndFind = (uint64_t)endFind; - - return *this; - } - - inline PatternDesc Build() - { - PatternDesc result(mPattern, mUID, mResolve, mStartFind, mEndFind); - - *this = PatternDescBuilder(); - - return result; - } - - std::string mPattern; - std::string mUID; - std::function mResolve; - uint64_t mStartFind; - uint64_t mEndFind; - }; - - /** - - @class BatchPatternsScanResultFirst - - @brief A class that represents the result of a batch pattern scan where only the first result is stored for each UID. - - This class extends BatchPatternsScanResults and adds additional functionality to store atomic bool values, - - unique_ptr to PatternDesc and check if a specific pattern was found for a given UID. - */ - class BatchPatternsScanResultFirst : public BatchPatternsScanResults { - private: - std::unordered_map> resultExMap; - - public: - - - /** - * @brief Returns a pointer to the PatternDesc object stored for a given UID, describing the pattern who found the result(if any), or nullptr if no such object exists. - * - * @param uid A string representing the UID for which the PatternDesc object is requested. - * @return A pointer to the PatternDesc object for the given UID, or nullptr if no such object exists. - */ - PatternDesc* getResultDescEx(const std::string& uid) - { - if (resultExMap.find(uid) == resultExMap.end()) - return nullptr; - - return resultExMap[uid].get(); - } - - /** - * @brief Stores a new PatternDesc object for a given UID Describing the pattern who found the result. - * - * @param _pattern A string representing the pattern used to find the result. - * @param _uid A string representing the UID for which the result was found. - */ - void setResultDescEx(const std::string& _pattern, const std::string& _uid) - { - resultExMap[_uid] = std::make_unique(_pattern, _uid); - } - - /** - * @brief Stores a new PatternDesc object for a given UID Describing the pattern who found the result using information from a PatternTaskInfo where the result was found from. - * - * @param uid A string representing the UID for which the result was found. - * @param from A PatternTaskInfo object containing information about the result found. - */ - void setResultDescExFrom(const std::string& uid, const PatternTaskInfo& from) - { - setResultDescEx(from.mPattern, from.mUID); - } - - /** - * @brief Checks whether the result found for a given UID was found using a specific pattern. - * - * @param uid A string representing the UID for which the result was found. - * @param pattern A string representing the pattern used to find the result. - * @return True if the result for the given UID was found using the given pattern, false otherwise. - */ - bool ResultWasFoundByPattern(const std::string& uid, const std::string& pattern) - { - PatternDesc* resultFoundByInfo = getResultDescEx(uid); - - if (resultFoundByInfo == nullptr) - return false; - - return resultFoundByInfo->mPattern == pattern; - } - }; - -#ifdef USE_SSE2 -#include // Include SSE2 intrinsics header - - /** - - @brief Compares two memory regions of a given size using SSE2 instructions and a mask. - @param ptr1 Pointer to the first memory region. - @param ptr2 Pointer to the second memory region. - @param num_bytes Size of the memory regions in bytes. - @param mask Mask array that specifies which bytes should be ignored during comparison. - @return true if the memory regions are equal, false otherwise. - */ - inline bool sse2_memcmp_with_mask(const void* ptr1, const void* ptr2, size_t num_bytes, unsigned char* mask) - { - // Cast input pointers to byte pointers - const unsigned char* s1 = static_cast(ptr1); - const unsigned char* s2 = static_cast(ptr2); - - // Number of bytes to compare in each iteration - const size_t block_size = 16; - - // Loop through input buffers, comparing 16 bytes at a time - size_t i = 0; - - for (; i + block_size <= num_bytes; i += block_size) - { - // Load 16 bytes from input buffers into SSE2 registers - __m128i v1 = _mm_loadu_si128(reinterpret_cast(s1 + i)); - __m128i v2 = _mm_loadu_si128(reinterpret_cast(s2 + i)); - __m128i mask_v = _mm_loadu_si128(reinterpret_cast(mask + i)); - - // Perform mask comparison using SSE2 instructions - __m128i cmp = _mm_cmpeq_epi8(v1, v2); - cmp = _mm_or_si128(cmp, mask_v); - __m128i result = _mm_andnot_si128(cmp, _mm_set1_epi8((char)0xFF)); - - // Check if any byte is non-zero in the result register - if (_mm_movemask_epi8(result) != 0x0) - { - return false; - } - } - - // Compare remaining bytes using regular C++ code - const unsigned char* p1 = s1 + i; - const unsigned char* p2 = s2 + i; - for (; i < num_bytes; i++) - { - if ((mask[i] == 0xFF) || (*p1 == *p2)) - { - p1++; - p2++; - continue; - } - else - { - return false; - } - } - - return true; - } -#endif - - - /** - - @brief Compares two memory blocks while taking into account a mask indicating which bytes to compare. - This function compares two memory blocks pointed to by ptr1 and ptr2, each of size num_bytes. - It uses a mask to determine which bytes to compare and which to ignore. The mask is a pointer to - an array of bytes of the same size as num_bytes. If a byte in the mask is non-zero, the corresponding - byte in the memory blocks is compared; otherwise, it is ignored. The function returns true if all - compared bytes match or were ignored according to the mask, false otherwise. - @param ptr1 A pointer to the first memory block to compare. - @param ptr2 A pointer to the second memory block to compare. - @param num_bytes The number of bytes to compare in the memory blocks. - @param mask A pointer to an array of bytes indicating which bytes to compare and which to ignore. - @return A boolean value indicating whether the memory blocks match or not according to the mask. - */ - inline bool fast_memcmp_with_mask(const void* ptr1, const void* ptr2, size_t num_bytes, unsigned char* mask) { - -#ifdef USE_SSE2 - return sse2_memcmp_with_mask(ptr1, ptr2, num_bytes, mask); -#endif - - const unsigned char* p1 = (const unsigned char*)ptr1; - const unsigned char* p2 = (const unsigned char*)ptr2; - size_t i; - for (i = 0; i < num_bytes; i++) { - if ((mask[i] == 0xFF) || (p1[i] == p2[i])) { - continue; - } - else { - return false; - } - } - return true; - } - - /** - @brief Parses a pattern string to generate a rawMask and an ignoreMask vectors. - - @param pattern A string containing a pattern to parse. - @param rawMask A vector of unsigned chars to be filled with the raw values extracted from the pattern. - @param ignoreMask A vector of unsigned chars to be filled with the same values as rawMask but with all bits set to 1 in the positions where there is a value in rawMask. - - @return void - */ - inline void ParsePattern(const std::string& pattern, std::vector& rawMask, std::vector& ignoreMask) - { - std::stringstream ss(pattern); - std::string str; ss >> str; - - do - { - if (str.find("?") != std::string::npos) - { - rawMask.push_back(0x00); - ignoreMask.push_back(0xFF); - } - else - { - auto c = (unsigned char)strtoull(str.c_str(), nullptr, 16); - - rawMask.push_back(c); - ignoreMask.push_back(0x00); - } - - if (ss.eof()) - break; - } while (ss >> str); - } - - /** - - @brief Performs a pattern scan on a specific range of memory. - - @param pattern A pointer to a PatternTaskInfo struct that holds information about the pattern to scan. - - @param startAddr The starting address of the memory range to scan. - - @param endAddr The ending address of the memory range to scan. - - @param foundCallback A function to be called when a pattern is found. The function takes a PatternTaskInfo pointer and the address where the pattern was found as arguments and returns a boolean indicating whether to continue scanning or not. - */ - inline void LocalFindPatternInfoTask(PatternTaskInfo* pattern, uintptr_t startAddr, uintptr_t endAddr, std::function foundCallback) - { - bool bAtLeastOneFound = false; - - if (startAddr == 0 || endAddr == 0 || startAddr >= endAddr) - return; - - for (size_t i = startAddr; i <= (endAddr - pattern->mPatternSize) && pattern->mReportResults; i++) - { - if (pattern->mpStopped && (*pattern->mpStopped) == true) - break; - - if (fast_memcmp_with_mask(pattern->mMatchMask.data(), (unsigned char*)i, - (size_t)pattern->mPatternSize, - pattern->mIgnoreMask.data()) == false) - continue; - - pattern->mFound = true; - - if ((pattern->mReportResults = foundCallback(pattern, (uintptr_t)i)) == false) - break; - } - } - - - /** - Searches for multiple patterns within the specified memory range and calls a callback function for each found occurrence. - - @param patterns A vector of unique ptrs of PatternInfo representing the information patterns to search for. - @param startAddr The starting address of the memory range to search within. - @param endAddr The ending address of the memory range to search within. - @param foundCallback A callback function that is called for each occurrence of a pattern found. The function should have the signature "bool func(const std::string& patt, uintptr_t addr)", where "patt" is the name of the pattern found(pattern itself usually) and "addr" is the address of the found pattern. - - @return Returns true if every single pattern uid did found a result, false otherwise. - - @remarks The function assumes that the memory addresses being searched are valid and accessible. - - @see fast_memcmp_with_mask, ParsePattern - - */ - inline bool LocalFindPatternBatch(const std::vector& patterns, std::function foundCallback) - { - { -#ifdef TBS_MT - ThreadPool pattsTp; -#endif - - for (PatternTaskInfo* patternInfo : patterns) - { -#ifdef TBS_MT - // Lets Delegate the Pattern Scan - pattsTp.enqueue([&](PatternTaskInfo* patternInfo, std::function foundCallback) { -#endif - LocalFindPatternInfoTask(patternInfo, patternInfo->mStartFind, patternInfo->mEndFind, foundCallback); - -#ifdef TBS_MT - }, patternInfo, foundCallback); -#endif - } - } - - // At this point, all the pattern scan tasks - // finished doing its work! - - std::unordered_set uidFoundAtLeastOne; - std::vector foundSorted = patterns; - - std::sort(foundSorted.begin(), foundSorted.end(), [&](PatternTaskInfo* toSort1, PatternTaskInfo* toSort2) { - return toSort1->mFound > toSort2->mFound; - }); - - // sorting foundSorted on the criteria of found first - // so the process of searching the UIDs with alredy a result is simpler - - for (PatternTaskInfo* currPatternTaskInfo : foundSorted) - { - if (uidFoundAtLeastOne.find(currPatternTaskInfo->mUID) != uidFoundAtLeastOne.end()) - { - // At this point, another pattern - // with same UID found a result - // meaning that this Pattern Task Info with - // same UID should be ingnored - - continue; - } - - // At this point, currPatternTaskInfo - // UID was not found yet - - if (currPatternTaskInfo->mFound == false) - { - // At this point, we know that at least 1 or more - // pattern from the batch didnt found its results - // Lets notify to the caller, so caller can re-call - // again with cache and potentially find results else-where - // in another memory range - - return false; - } - - // At this point, this currPatternTaskInfo was indeed found!, - // and its other pattern task friends with same UID wont need - // to find/work/worry anymore, lets add it to the ignore list - - uidFoundAtLeastOne.insert(currPatternTaskInfo->mUID); - } - - // At this point, seems that all Patterns UIDs were - // satisfied with a corresponding result - // Lets report to the caller - - return true; - } - - - - /** - Searches for multiple patterns within the specified memory range and calls a callback function for each found occurrence. - - @param patterns A vector of strings representing the patterns to search for. - @param startAddr The starting address of the memory range to search within. - @param endAddr The ending address of the memory range to search within. - @param foundCallback A callback function that is called for each occurrence of a pattern found. The function should have the signature "bool func(const std::string& patt, uintptr_t addr)", where "patt" is the name of the pattern found(pattern itself usually) and "addr" is the address of the found pattern. - - @return Returns true if the search was successful, false otherwise. - - @remarks The function assumes that the memory addresses being searched are valid and accessible. - - @see fast_memcmp_with_mask, ParsePattern - - */ - inline bool LocalFindPatternBatch(const std::vector& patterns, uintptr_t startAddr, uintptr_t endAddr, std::function foundCallback) - { - - if (startAddr == 0 || endAddr == 0 || startAddr >= endAddr) - return false; - - std::vector> allPatternsInfo; - - for (size_t i = 0; i < patterns.size(); i++) - { - allPatternsInfo.emplace_back(std::make_unique()); - PatternTaskInfo& currPatternTaskInf = *allPatternsInfo.back(); - - currPatternTaskInf.mUID = patterns[i].mUID; - currPatternTaskInf.mPattern = patterns[i].mPattern; - currPatternTaskInf.mpStopped = patterns[i].mpStopped; - currPatternTaskInf.mMatchMask = std::vector(); - currPatternTaskInf.mIgnoreMask = std::vector(); - currPatternTaskInf.mStartFind = patterns[i].mStartFind == 0 ? startAddr : patterns[i].mStartFind; - currPatternTaskInf.mEndFind = patterns[i].mEndFind == 0 ? endAddr : patterns[i].mEndFind; - currPatternTaskInf.mResolveCallback = patterns[i].mResolve; - - ParsePattern(currPatternTaskInf.mPattern, currPatternTaskInf.mMatchMask, currPatternTaskInf.mIgnoreMask); - - currPatternTaskInf.mPatternSize = currPatternTaskInf.mMatchMask.size(); - } - - std::vector allPatternsInfop; - - for (auto& curr : allPatternsInfo) - allPatternsInfop.push_back(curr.get()); - - bool bFoundAll = LocalFindPatternBatch(allPatternsInfop, foundCallback); - - for (auto& currentPatternInfo : allPatternsInfo) - { - // Erasing, so next scan with similar patterns, - // dont find this garbage patterns as a result - - memset(currentPatternInfo->mMatchMask.data(), 0x0, currentPatternInfo->mPatternSize); - memset(currentPatternInfo->mIgnoreMask.data(), 0x0, currentPatternInfo->mPatternSize); - } - - return bFoundAll; - } - - /** - - Searches for a given pattern in the specified address range. - @param pattern The pattern to search for. - @param startAddr The starting address of the range to search. - @param endAddr The ending address of the range to search. - @param foundCallback The callback function to be called for each found address. The function should return true to continue searching and false to stop. - @return true if the search was completed successfully, false otherwise. - */ - inline bool LocalFindPattern(const std::string& pattern, uintptr_t startAddr, uintptr_t endAddr, std::function foundCallback) - { - std::vector pattDesc{ pattern }; - - return LocalFindPatternBatch(pattDesc, startAddr, endAddr, [&](PatternTaskInfo* pattTaskInf, uintptr_t result) { - - return foundCallback(result); - - }); - } - - /** - - @brief Searches for a given pattern within the specified address range and returns all matches. - @param pattern The pattern to search for. - @param startAddr The start address of the address range to search in. - @param endAddr The end address of the address range to search in. - @param results A vector to store the addresses of all matches found. - @return True if the search was successful, false otherwise. - */ - inline bool LocalFindPattern(const std::string& pattern, uintptr_t startAddr, uintptr_t endAddr, std::vector& results) - { - return LocalFindPattern(pattern, startAddr, endAddr, [&](uintptr_t addrFound) { - - results.push_back(addrFound); - - return true; - }); - } - - /** - - Searches for the first occurrence of a pattern in the given memory range. - @param pattern The pattern to search for. - @param startAddr The starting address of the memory range to search in. - @param endAddr The ending address of the memory range to search in. - @param result The reference to the first memory address where the pattern was found. - @return True if the search was successful, false otherwise. - */ - inline bool LocalFindPatternFirst(const std::string& pattern, uintptr_t startAddr, uintptr_t endAddr, uintptr_t& result) - { - result = NULL; - - return LocalFindPattern(pattern, startAddr, endAddr, [&](uintptr_t addrFound) { - - result = addrFound; - - return false; - }); - } - - - /** - - Searches for the first occurrence of a batch of patterns in a specified memory range. - Remarks: if one of the patterns alredy has a result in the BATCHPATTERNSSCANRESULTSFIRST, this function wont touch it, and will ignore searching for that pattern, this way it avoids overriding the alredy there results - this is very useful for the case scenario where you want to search in multiple areas, for multiple patterns, and you are not sure if for example all patterns will be inside a specific memory block - @param patterns A vector of strings containing the patterns to search for. - @param startAddr The starting address of the memory range to search in. - @param endAddr The ending address of the memory range to search in. - @param results A map of pattern strings to uintptr_t values indicating the first occurrence of each pattern found. - @return True if all patterns were found. - */ - inline bool LocalFindPatternBatchFirst(const std::vector& _patterns, uintptr_t startAddr, uintptr_t endAddr, BatchPatternsScanResultFirst& results) - { - std::vector patterns = _patterns; // Copy of all patterns - // So we can modify the array - std::unordered_set toRemove; - - for (auto& patternDesc : patterns) - { - patternDesc.mpStopped = results.getAtomicBoolStoppedFor(patternDesc.mUID); - - if (results.HasResult(patternDesc.mUID, true)) // If Result is zero, we will override - toRemove.insert(patternDesc.mUID); - else - results.setFirst(patternDesc.mUID, 0); - } - - // Removing the ones who alredy have a result - // We dont want to overwrite the cache - patterns.erase(std::remove_if(patterns.begin(), patterns.end(), [&](const PatternDesc& pattDesc) { - return toRemove.count(pattDesc.mUID) > 0; - }), patterns.end()); - - return LocalFindPatternBatch(patterns, startAddr, endAddr, [&](PatternTaskInfo* patternTaskInfo, uintptr_t rslt) { - // At this point, this callback was called - // becouse a result for patternTaskInfo was found - - if (results.HasResult(patternTaskInfo->mUID) == true) - { - // Seems we alredy found it, we dont care about another result - // lets simply ignore it - - return false; - } - - // at this point, a result for patternTaskInfo - // was found, and it wasnt saved, lets call the callback to resolve it(if any) and save it - - if (patternTaskInfo->mResolveCallback) - rslt = patternTaskInfo->mResolveCallback(rslt); - - results.setFirst(patternTaskInfo->mUID, rslt); - results.setResultDescExFrom(patternTaskInfo->mUID, (*patternTaskInfo)); - - // Since finding just first ocurrences, - // we dont want to find anymore - return false; - }); - } - - /** - - Finds all occurrences of multiple patterns in a given memory region. - @param patterns The list of patterns to search for. - @param startAddr The start address of the memory region to search in. - @param endAddr The end address of the memory region to search in. - @param results A reference to the output parameter that will stores the search results. - @return True if the search was successful, false otherwise. - - */ - inline bool LocalFindPatternBatch(const std::vector& _patterns, uintptr_t startAddr, uintptr_t endAddr, BatchPatternsScanResults& results) - { - std::vector patterns = _patterns; - - for (PatternDesc& pd : patterns) - pd.mpStopped = results.getAtomicBoolStoppedFor(pd.mUID); - - return LocalFindPatternBatch(patterns, startAddr, endAddr, [&](PatternTaskInfo* patternTaskInfo, uintptr_t rslt) { - if (patternTaskInfo->mResolveCallback) - rslt = patternTaskInfo->mResolveCallback(rslt); - - results.getResults(patternTaskInfo->mUID).emplace_back(rslt); - return true; - }); - } - - template - uintptr_t VecStart(const std::vector& vec) - { - if (vec.size() < 1) - return 0x0; - - return (uintptr_t)vec.data(); - } - - template - uintptr_t VecEnd(const std::vector& vec) - { - if (vec.size() < 1) - return 0x0; - - return (uintptr_t)(vec.data() + vec.size()); - } -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bb31c07..e51d536 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,6 +10,6 @@ FetchContent_Declare( FetchContent_MakeAvailable(doctest) -add_executable(ThunderByteScanTest main.cpp) +add_executable(TBSTest "TBSTests.cpp") -target_link_libraries(ThunderByteScanTest ThunderByteScan doctest_with_main) \ No newline at end of file +target_link_libraries(TBSTest TBS doctest_with_main) \ No newline at end of file diff --git a/tests/TBSTests.cpp b/tests/TBSTests.cpp new file mode 100644 index 0000000..86ff1f6 --- /dev/null +++ b/tests/TBSTests.cpp @@ -0,0 +1,287 @@ +#include +#include +#include +#include + +#include + +using namespace TBS; + +TEST_CASE("Pattern Parsing") { + Pattern::ParseResult res; + + CHECK(Pattern::Parse("AA ? BB ? CC ? DD ? EE ? FF", res)); + CHECK_EQ(res.mWildcardMask.size(), 11); + CHECK_EQ(res.mPattern.size(), 11); + + CHECK(Pattern::Parse("AA ?? BB ? CC ?? DD ? EE ?? FF ??", res)); + CHECK_EQ(res.mWildcardMask.size(), 12); + CHECK_EQ(res.mPattern.size(), 12); + + CHECK_FALSE(Pattern::Parse("AA ??? BB ? CC ?? DD? ? EE ?? FF ??", res)); + CHECK_EQ(res.mWildcardMask.size(), 1 /*Just 1 valid byte*/); + CHECK_EQ(res.mPattern.size(), 1 /*Just 1 valid byte*/); + + CHECK_FALSE(Pattern::Parse("AA ? BB ?CC ? DD ?EE ? FF", res)); + CHECK_EQ(res.mPattern.size(), 3 /*Just 3 valid bytes*/); + CHECK_EQ(res.mWildcardMask.size(), 3 /*Just 3 valid bytes*/); +} + +TEST_CASE("Memory Comparing Masked") +{ + struct MemoryCompareTest { + std::string mPattern; + const UByte* mTestCase; + size_t mMaskExpectedLength; + size_t mPatternExpectedLength; + }; + + MemoryCompareTest testCases[]{ + // Exact match + {"AA BB CC DD EE FF", [] {static UByte testCase1[]{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF }; return testCase1; }(), 6, 6}, + + // Wildcard match + {"A? ? ?C ? E? ? F?", [] {static UByte testCase2[]{ 0xAA, 0xDE, 0xCC, 0xAD, 0xEE, 0xBE, 0xFF }; return testCase2; }(), 7, 7}, + + // Pattern longer than sequence + {"AA BB CC DD EE FF", [] {static UByte testCase4[]{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF }; return testCase4; }(), 6, 6}, + + // Sequence longer than pattern + {"AA BB CC DD EE FF", [] {static UByte testCase5[]{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11 }; return testCase5; }(), 6, 6}, + + // Multiple wildcard matches + {"A? ? ? ?D ? FF", [] {static UByte testCase6[]{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF }; return testCase6; }(), 6, 6}, + + // All wildcards + {"? ? ? ? ? ?", [] {static UByte testCase7[]{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF }; return testCase7; }(), 6, 6}, + + // Single-byte pattern and sequence + {"AA", [] {static UByte testCase8[]{ 0xAA }; return testCase8; }(), 1, 1}, + + // Single-byte wildcarded + {"?A", [] {static UByte testCase8[]{ 0xBA }; return testCase8; }(), 1, 1}, + + // Empty pattern and sequence + {"", [] {static UByte testCase9[1]{ }; return testCase9; }(), 0, 0}, + }; + + for (const auto& testCase : testCases) + { + Pattern::ParseResult res; + + CHECK(Pattern::Parse(testCase.mPattern, res)); + CHECK_EQ(res.mPattern.size(), testCase.mPatternExpectedLength); + CHECK_EQ(res.mWildcardMask.size(), testCase.mMaskExpectedLength); + CHECK(Memory::CompareWithMask(testCase.mTestCase, res.mPattern.data(), res.mPattern.size(), res.mWildcardMask.data())); + } +} + +/* + wildCardMask expected to be 0xFF for byte that are wild carded and 0x0 for non-wildcarded bytes +*/ +inline bool MemoryCompareWithMaskByteByte(const UByte* chunk1, const UByte* chunk2, size_t len, const UByte* wildCardMask) { + for (size_t i = 0; i < len; i++) { + if (wildCardMask[i] != 0xFF && chunk1[i] != chunk2[i]) + return false; + } + + return true; +} + +TEST_CASE("Benchmark MemoryCompares") +{ + // Define the size of the chunk + constexpr size_t chunkSize = 1000; // a thousand bytes + constexpr size_t iterations = 5000; + constexpr double dIterations = double(iterations); + + // Allocate memory for the chunks and the wildcard mask + UByte* chunk1 = new UByte[chunkSize]; + UByte* chunk2 = new UByte[chunkSize]; + UByte* wildCardMask = new UByte[chunkSize]; + + // Fill the chunks and the wildcard mask with random data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distribution(0, 255); // Random byte values + for (size_t i = 0; i < chunkSize; ++i) { + chunk1[i] = UByte(distribution(gen)); + chunk2[i] = UByte(distribution(gen)); + wildCardMask[i] = distribution(gen) < 128 ? 0xFF : 0x00; // 50% chance of being a wildcard + } + + std::chrono::microseconds elapsedMicrosecondsByteByte = std::chrono::milliseconds(0); + std::chrono::microseconds elapsedMicrosecondsPlatformWord = std::chrono::milliseconds(0); + std::chrono::microseconds elapsedMicrosecondsSSE2 = std::chrono::milliseconds(0); + + for (size_t i = 0; i < iterations; i++) + { + // Measure the time taken by MemoryCompareWithMaskByteByte + auto startByteByte = std::chrono::high_resolution_clock::now(); + bool resultByteByte = MemoryCompareWithMaskByteByte(chunk1, chunk2, chunkSize, wildCardMask); + auto endByteByte = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsedByteByte = endByteByte - startByteByte; + elapsedMicrosecondsByteByte += std::chrono::duration_cast(elapsedByteByte); + } + auto elapsedMicrosecondsByteByteAv = double(elapsedMicrosecondsByteByte.count()) / dIterations; + + for (size_t i = 0; i < iterations; i++) + { + // Measure the time taken by CompareWithMaskWord + auto start = std::chrono::high_resolution_clock::now(); + bool result = Memory::CompareWithMaskWord(chunk1, chunk2, chunkSize, wildCardMask); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + elapsedMicrosecondsPlatformWord += std::chrono::duration_cast(elapsed); + } + + auto elapsedMicrosecondsPlatformWordAv = double(elapsedMicrosecondsPlatformWord.count()) / dIterations; + + for (size_t i = 0; i < iterations; i++) + { + // Measure the time taken by Memory::SSE2::CompareWithMask + auto start = std::chrono::high_resolution_clock::now(); + bool result = Memory::SSE2::CompareWithMask(chunk1, chunk2, chunkSize, wildCardMask); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + elapsedMicrosecondsSSE2 += std::chrono::duration_cast(elapsed); + } + + auto elapsedMicrosecondsSSE2Av = double(elapsedMicrosecondsSSE2.count()) / dIterations; + + + std::cout << "MemoryCompareWithMaskByteByte() took " << elapsedMicrosecondsByteByteAv << " microseconds." << std::endl; + std::cout << "Memory::CompareWithMaskWord() took " << elapsedMicrosecondsPlatformWordAv << " microseconds." << std::endl; + std::cout << "Memory::SSE2::CompareWithMask() took " << elapsedMicrosecondsSSE2Av << " microseconds." << std::endl; + + // Clean up + delete[] chunk1; + delete[] chunk2; + delete[] wildCardMask; +} + +TEST_CASE("Pattern Scan #1") +{ + UByte testCase[] = { + 0xAA, 0x00, 0xBB, 0x11, 0xCC, 0x22, 0xDD, 0x33, 0xEE, 0x44, 0xFF + }; + + State state(testCase, testCase + sizeof(testCase)); + + state.AddPattern( + state + .PatternBuilder() + .setPattern("AA ? BB ? CC ? DD ? EE ? FF") + .setUID("TestUID") + .AddTransformer([](Pattern::Description& desc, U64 res) -> U64 { + return U64(((*(U32*)(res + 6))) & 0x00FF00FFull); // Just Picking 0xEE and 0xFF + }) + .AddTransformer([](Pattern::Description& desc, U64 res) -> U64 { + // expected to be 0xXXEEXXDD comming from previous transform + CHECK(res == 0x00EE00DDull); + return res | 0xFF00FF00ull; // expected to be 0xFFEEFFDD + }) + .Build() + ); + + CHECK(Scan(state)); + CHECK(state["TestUID"].Results().size() == 1); + CHECK(state["TestUID"] == 0xFFEEFFDD); +} + +/* + This test case demonstrates how the pattern scanning library efficiently searches for specific patterns within data buffers. + + - Setup: Two strings are stored in separate buffers along with additional data. + - Pattern Definition: Two patterns are defined to search for specific byte sequences within the buffers, using a shared identifier. + - Transformers: Functions are applied to matched results to adjust their positions within the buffer. + - Scan Execution: The library scans the buffers for the defined patterns, stopping as soon as a match is found. + - Result Verification: The test verifies that a single match is found and retrieves the matched string from the result. + + This example illustrates the library's simplicity and effectiveness in handling complex pattern scanning tasks. +*/ +TEST_CASE("Pattern Scan #2") +{ + const char string1[] = "Old Version String"; + const char string2[] = "New Version String"; + + UByte buffer1[] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40 + }; + + UByte buffer2[] = { + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, + 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, + 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80 + }; + + static size_t strOff1 = sizeof(buffer1) - sizeof(string1) - 5; + static size_t strOff2 = sizeof(buffer2) - sizeof(string2) - 12; + static size_t strRelOff1 = 16; + static size_t strRelOff2 = 18; + static size_t pattern1TostrRelOff1 = strRelOff1 - 2; + static size_t pattern2TostrRelOff2 = strRelOff2 - 5; + + strcpy((char*)(buffer1 + strOff1), string1); + strcpy((char*)(buffer2 + strOff2), string2); + *(size_t*)(buffer1 + strRelOff1) = strOff1 - strRelOff1; + *(size_t*)(buffer2 + strRelOff2) = strOff2 - strRelOff2; + + State state; + { + auto builder = state + .PatternBuilder() + .setUID("String") + .EnableScanFirst(); + + state.AddPattern( + builder + .Clone() + .setPattern("? 04 ? ?? ?7 0?") + .setScanStart(buffer1) + .setScanEnd(buffer1 + sizeof(buffer1)) + .AddTransformer([](auto& desc, U64 res) { + return res + pattern1TostrRelOff1; + }) + .AddTransformer([](auto& desc, U64 res) { + return res + *(size_t*)res; + }) + .Build() + ); + + state.AddPattern( + builder + .Clone() + .setPattern("4? ? ?8 49 ? 4B 4?") + .setScanStart(buffer2) + .setScanEnd(buffer2 + sizeof(buffer2)) + .AddTransformer([](auto& desc, U64 res) { + return res + pattern2TostrRelOff2; + }) + .AddTransformer([](auto& desc, U64 res) { + return res + *(size_t*)res; + }) + .Build() + ); + } + + CHECK(Scan(state)); + CHECK(state["String"].Results().size() == 1); + const char* pStr = (const char*)(U64)state["String"]; + CHECK_FALSE(pStr == nullptr); + std::string str = std::string(pStr); + CHECK((str == std::string(string1) || str == std::string(string2))); + std::cout << "Magic: " << str << std::endl; +} \ No newline at end of file diff --git a/tests/main.cpp b/tests/main.cpp deleted file mode 100644 index baa4db4..0000000 --- a/tests/main.cpp +++ /dev/null @@ -1,140 +0,0 @@ -#include -#include -#include -#include - -namespace TBS = ThunderByteScan; - -TEST_CASE("Batch Patterns Scan Results Test") -{ - TBS::BatchPatternsScanResults batchPattern; - - batchPattern.setFirst("UID_1", 0x10); - - CHECK(batchPattern.HasResult("UID_1")); - CHECK_FALSE(batchPattern.HasResult("INVALID_UID")); - - CHECK(batchPattern.getResults("UID_1").size() == 1); - CHECK(batchPattern.getResults("INVALID_UID").size() == 0); - - CHECK(batchPattern.getFirst("UID_1") == 0x10); -} - -/*Dummy X86 Sample 1*/ -std::vector gSample1{ - 0x55, // push ebp - 0x89, 0xE5, // mov ebp, esp - 0xB8, 0x2A, 0x00, 0x00, 0x00, // mov eax, 42 - 0x83, 0xC0, 0x08, // add eax, 8 - 0x83, 0xE8, 0x05, // sub eax, 5 - 0xBB, 0x0A, 0x00, 0x00, 0x00, // mov ebx, 10 - 0xF7, 0xE3, // mul ebx - 0xF7, 0xF1, // div ecx - 0x5D, // pop ebp - 0xC3 // ret -}; - - -/*Dummy X86 Sample 2*/ -std::vector gSample2{ - 0x55, // push ebp - 0x89, 0xE5, // mov ebp, esp - 0x83, 0xC0, 0x08, // add eax, 8 - 0xBB, 0x0A, 0x00, 0x00, 0x00, // mov ebx, 10 - 0x83, 0xE8, 0x05, // sub eax, 5 - 0xB8, 0x2A, 0x00, 0x00, 0x00, // mov eax, 42 - 0xB8, 0x08, 0x00, 0x00, 0x00, // mov eax, 8 - 0xF7, 0xE3, // mul ebx - 0xF7, 0xF1, // div ecx - 0x5D, // pop ebp - 0xC3 // ret -}; - -TEST_CASE("Batch Pattern Search Test") -{ - TBS::BatchPatternsScanResults results; - std::vector testCase = { - {"55 89 ? B8 ? ? ? ? 83 ? ? 83 ? ? BB", "X86PatternTag", [](uint64_t result) { return *(uint32_t*)(result + 4); }}, - {"55 89 ? 83 ? ? BB ? ? ? ? 83 ? ? B8", "X86PatternTag", [](uint64_t result) { return *(uint8_t*)(result + 5); }} - }; - - testCase.push_back(TBS::PatternDescBuilder().setPattern("F7 ? 5D C3").Build()); - - bool foundAll = TBS::LocalFindPatternBatch( - testCase, - TBS::VecStart(gSample1), - TBS::VecEnd(gSample1), - results - ); - - CHECK(foundAll); - - CHECK(results.HasResult("X86PatternTag")); - CHECK(results.HasResult("F7 ? 5D C3")); - CHECK(results["X86PatternTag"] == 42); - - // Now with second sample - - results = TBS::BatchPatternsScanResults(); - - foundAll = TBS::LocalFindPatternBatch( - testCase, - TBS::VecStart(gSample2), - TBS::VecEnd(gSample2), - results - ); - - CHECK(foundAll); - - CHECK(results.HasResult("X86PatternTag")); - CHECK(results.HasResult("F7 ? 5D C3")); - CHECK(results["X86PatternTag"] == 8); -} - -TEST_CASE("Test Find Pattern") -{ - const std::string testCase = "55 89 ? B8 ? ? ? ? 83 ? ? 83 ? ? BB"; - std::vector results; - - bool found = TBS::LocalFindPattern(testCase, - TBS::VecStart(gSample1), - TBS::VecEnd(gSample1), results); - - CHECK(found); - CHECK(results.size() == 1); - CHECK(TBS::VecStart(gSample1) == results[0]); -} - -TEST_CASE("Test Builder") -{ - TBS::BatchPatternsScanResults results; - - bool foundAll = TBS::LocalFindPatternBatch( - { TBS::PatternDescBuilder() - .setPattern("B8 ? ? ? ? 83 ? ? 83 ? ? BB") - .setUID("Example") - .setStartFind(gSample1.data() + 5) // Purposefully starting after the pattern beginning - .setEndFind(gSample1.end()._Ptr) - .Build() }, - TBS::VecStart(gSample1), - TBS::VecEnd(gSample1), - results - ); - - CHECK_FALSE(foundAll); - - results = TBS::BatchPatternsScanResults(); - - foundAll = TBS::LocalFindPatternBatch( - { TBS::PatternDescBuilder() - .setPattern("B8 ? ? ? ? 83 ? ? 83 ? ? BB") - .setUID("Example") - .setStartFind(gSample1.data() + 3) // Starting right at the pattern beginning (edge case) - .Build() }, - TBS::VecStart(gSample1), - TBS::VecEnd(gSample1), - results - ); - - CHECK(foundAll); -} \ No newline at end of file