diff --git a/.editorconfig b/.editorconfig index 4d9b62f..797117e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -15,3 +15,9 @@ indent_size = 4 [*.md] trim_trailing_whitespace = false + +[CascLib/**] +trim_trailing_whitespace = false +insert_final_newline = unset +indent_style = unset +end_of_line = unset diff --git a/.gitmodules b/.gitmodules index 578bfec..42b9300 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "CascLib"] path = CascLib - url = https://github.com/ladislav-zezula/CascLib.git + url = https://github.com/Talv/CascLib.git [submodule "cxxopts"] path = cxxopts url = https://github.com/jarro2783/cxxopts.git diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..b04449e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,25 @@ +# Change Log + +## [2.0.0] - 2019-07-26 + +* Refactored entire codebase.. +* `CascLib` upgraded to `1.20`: + * Enumerating MNDX storage is now much faster (SC2 & Storm data) + * The library now outputs filepaths with backslashes, instead of forwardslashes as it did previously in case of SC2/Storm. + * Applied [patch](https://github.com/Talv/CascLib/commit/b2646e578b43641a46df5725d951b093a7cefce0) to preserve original case sensitivity of filenames. +* The core functionality remains intact, however some of the existing options/arguments have been renamed and/or reorganized. Hopefully for the better. + +## [1.4.0] - 2019-07-22 + +* Introduced compatibility with Windows + +## [1.3.0] - 2019-05-26 + +* `CascLib` upgraded to [ef66d7bb46f0bb4dd782d3b68eb7dcc358d52a13](https://github.com/ladislav-zezula/CascLib/commit/ef66d7bb46f0bb4dd782d3b68eb7dcc358d52a13). + +## [1.1.0] - 2019-05-26 + +* Forked [storm-extract#216812d7f91ab2ca72b04f2561c587c754273489](https://github.com/nydus/storm-extract/tree/216812d7f91ab2ca72b04f2561c587c754273489). +* Rebranded to `stormex`. +* Removed NodeJS bindings, and everything related. +* Enhanced cli app with regex pattern filters on a filelist. diff --git a/CascLib b/CascLib index ef66d7b..b2646e5 160000 --- a/CascLib +++ b/CascLib @@ -1 +1 @@ -Subproject commit ef66d7bb46f0bb4dd782d3b68eb7dcc358d52a13 +Subproject commit b2646e578b43641a46df5725d951b093a7cefce0 diff --git a/README.md b/README.md index 68e790e..d148173 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # stormex -Command-line application to list and extract files from the [CASC](https://wowdev.wiki/CASC) (Content -Addressable Storage Container) used in Blizzard games. +Command-line application to enumerate and extract files from [CASC](https://wowdev.wiki/CASC) (Content Addressable Storage Container) used in Blizzard games. Tested on: @@ -40,59 +39,89 @@ MSBuild STORMEXTRACT.sln /p:Configuration=Release ## Usage ``` -Usage: stormex [options] - -This program can list and optionally extract files from a CASC storage container. - - -h, --help Display this help +Command-line application to enumerate and extract files from CASC (Content Addressable Storage Container) used in Blizzard games. + +Regex pattern is expected to follow ECMAScript syntax + +Usage: + stormex [OPTION...] [STORAGE] + + Common options: + -h, --help Print help. + -v, --verbose Verbose output. + -q, --quiet Supresses output entirely. + --version Print version. + + Base options: + -S, --storage [PATH] Path to directory with CASC. + + List options: + -l, --list List files inside CASC. + -d, --details Show details about each file - such as its size. + + Filter options: + -s, --search [SEARCH...] Search for files using a substring. + --smart-case Searches case insensitively if the pattern is + all lowercase. Search case sensitively + otherwise. (default: true) + -i, --in-regex [PATTERN...] Include files matching regex. + -I, --in-iregex [PATTERN...] Include files matching regex case + insensitively. + -e, --ex-regex [PATTERN...] Exclude files matching regex. + -E, --ex-iregex [PATTERN...] Exclude files matching regex case + insensitively. + + Extract options: + -x, --extract-all Extract all files matching search filters. + -X, --extract-file [FILE...] Extract file(s) matching exactly. + -o, --outdir [PATH] Output directory for extracted files. + (default: ./) + -p, --stdout Pipe content of a file(s) to stdout instead + writing it to the filesystem. + -P, --progress Notify about progress during extraction. + -n, --dry-run Simulate extraction process without writing + any data to the filesystem. +``` -Arguments: - Path to game installation folder +### Examples -Options: - General: - -v, --verbose Prints more information - -q, --quiet Prints nothing, nada, zip +#### List content - Common: - -s, --search Restrict results to full paths matching STRING - --ignore-case Case-insensitive pattern - --include Include files matching ECMAScript regex PATTERN - --exclude Exclude files matching ECMAScript regex PATTERN +List based on a search phrase - Extract: - -x, --extract Extract the files found - -o, --out The folder where the files are extracted (extract only) +```sh +stormex '/mnt/s1/BnetGameLib/StarCraft II' -s 'buildid' -l ``` -### Examples - -#### List content of CASC based on regex pattern +List all files with details and sort by filesize. ```sh -stormex '/mnt/s1/BnetGameLib/StarCraft II' --include '/BuildId.txt$'` +stormex '/mnt/s1/BnetGameLib/StarCraft II' -ld | sort -h ``` -#### Extract content of CASC based on inclusion and exclusion patterns +#### Extract files based on inclusion and exclusion patterns ```sh -stormex '/mnt/s1/BnetGameLib/StarCraft II' --ignore-case -v \ - --include '\/(DocumentInfo|Objects|Regions|Triggers)$' \ - --include '\.(fx|xml|txt|json|galaxy|SC2Style|SC2Hotkeys|SC2Lib|TriggerLib|SC2Interface|SC2Locale|SC2Components|SC2Layout)$' \ - --exclude '(dede|eses|esmx|frfr|itit|kokr|plpl|ptbr|ruru|zhcn|zhtw)\.sc2data' \ - --exclude '(PreloadAssetDB|TextureReductionValues)\.txt$' \ +stormex '/mnt/s1/BnetGameLib/StarCraft II' \ + -I '\/(DocumentInfo|Objects|Regions|Triggers)$' \ + -I '\.(fx|xml|txt|json|galaxy|SC2Style|SC2Hotkeys|SC2Lib|TriggerLib|SC2Interface|SC2Locale|SC2Components|SC2Layout)$' \ + -E '(dede|eses|esmx|frfr|itit|kokr|plpl|ptbr|ruru|zhcn|zhtw)\.sc2data' \ + -E '(PreloadAssetDB|TextureReductionValues)\.txt$' \ -x -o './out' ``` -## Credits +#### Extract to stdout -The library absolutely, unequivocably, could not be possible without -[ladislav-zezula's CascLib](https://github.com/ladislav-zezula/CascLib) -library. Many thanks to [ladislav-zezula](https://github.com/ladislav-zezula). +Extract specific file to `stdout` and pipe the stream to another program. For example convert dds to png and display it with `imagick`. -~~Most of the program was canibalized from -[Kanma's CASCExtractor](https://github.com/Kanma/CASCExtractor/) with the -purpose of customizing it for Heroes of the Storm and integration into -[NodeJS](https://www.nodejs.org).~~ +```sh +stormex -S '/mnt/s1/BnetGameLib/StarCraft II' -X 'mods/core.sc2mod/base.sc2data/EditorData/Images/HeroesEditor_Logo.tga' -p | display tga: +``` + +```sh +stormex -S '/mnt/s1/BnetGameLib/StarCraft II' -X 'mods/core.sc2mod/base.sc2data/EditorData/Images/EditorLogo.dds' -p | magick dds: png: | display png: +``` + +## Credits -Most of the program was canibalized from [storm-extract](https://github.com/nydus/storm-extract) with the purpose of reducing code complexity by removing NodeJS stuff. And transitioning it into more streamlined cli app. +* Powered by [CascLib](https://github.com/ladislav-zezula/CascLib) diff --git a/include/storage.hpp b/include/storage.hpp index 738e707..4f47e2b 100644 --- a/include/storage.hpp +++ b/include/storage.hpp @@ -11,9 +11,41 @@ #include "common.hpp" #include "util.hpp" -struct StorageSearchResult +// Based on CASC_FIND_DATA +struct STORAGE_SEARCH_RESULT { - size_t fileSize; + // Full name of the found file. In case when this is CKey/EKey, + // this will be just string representation of the key stored in 'FileKey' + std::string filename; + + // Content key. This is present if the CASC_FEATURE_ROOT_CKEY is present + BYTE CKey[MD5_HASH_SIZE]; + + // Encoded key. This is always present. + BYTE EKey[MD5_HASH_SIZE]; + + // Tag mask. Only valid if the storage supports tags, otherwise 0 + ULONGLONG TagBitMask; + + // File data ID. Only valid if the storage supports file data IDs, otherwise CASC_INVALID_ID + DWORD dwFileDataId; + + // Size of the file, as retrieved from CKey entry or EKey entry + DWORD dwFileSize; + + // Locale flags. Only valid if the storage supports locale flags, otherwise CASC_INVALID_ID + DWORD dwLocaleFlags; + + // Content flags. Only valid if the storage supports content flags, otherwise CASC_INVALID_ID + DWORD dwContentFlags; + + // Hints as for which open method is suitable + DWORD bFileAvailable:1; // If true the file is available locally + DWORD bCanOpenByName:1; + DWORD bCanOpenByDataId:1; + DWORD bCanOpenByCKey:1; + DWORD bCanOpenByEKey:1; + CASC_NAME_TYPE NameType; }; /** @@ -44,7 +76,7 @@ class StorageExplorer { // TODO: CascGetStorageInfo - std::vector enumerateFiles(); + bool enumerateFiles(std::vector& searchResults); /** * @brief extract data of given file to location specified under filesystem diff --git a/src/storage.cc b/src/storage.cc index abe1c15..7d8336f 100644 --- a/src/storage.cc +++ b/src/storage.cc @@ -1,4 +1,3 @@ -#include #include "storage.hpp" int StorageExplorer::openStorage(std::string src) @@ -20,23 +19,30 @@ bool StorageExplorer::closeStorage() return CascCloseStorage(m_hStorage); } -std::vector StorageExplorer::enumerateFiles() +bool StorageExplorer::enumerateFiles(std::vector& searchResults) { - std::vector ret; - CASC_FIND_DATA findData; HANDLE handle = CascFindFirstFile(m_hStorage, "*", &findData, NULL); + if (handle == INVALID_HANDLE_VALUE) { + PLOG_FATAL << "CascFindFirstFile E(" << GetLastError() << ")"; + return false; + } + do { - findData.dwFileSize; - findData.FileKey; - findData.szFileName; - ret.push_back(findData.szFileName); + if (!findData.bFileAvailable) continue; + + STORAGE_SEARCH_RESULT *record = new STORAGE_SEARCH_RESULT(); + record->filename = findData.szFileName; + memcpy(record->CKey, findData.CKey, sizeof(record->CKey)); + memcpy(record->EKey, findData.EKey, sizeof(record->EKey)); + record->dwFileSize = findData.dwFileSize; + searchResults.push_back(record); } while (CascFindNextFile(handle, &findData)); CascFindClose(handle); - return ret; + return true; } size_t StorageExplorer::extractFileToPath(const std::string& storedFilename, const std::string& targetFilename) diff --git a/src/stormex.cc b/src/stormex.cc index 5f42dff..596889d 100644 --- a/src/stormex.cc +++ b/src/stormex.cc @@ -5,11 +5,13 @@ #include #include #include +#include #include "cxxopts.hpp" #include "common.hpp" #include "util.hpp" #include "storage.hpp" +#include "common/Common.h" class StormexContext { public: @@ -31,7 +33,8 @@ class StormexContext { } m_filters; struct { - std::vector extractFiles; + bool doExtractAll; + std::vector xFilenames; std::string outDir; bool stdOut; bool progress; @@ -83,7 +86,7 @@ void parseArguments(int argc, char* argv[]) "\n" "Command-line application to enumerate and extract files from CASC (Content Addressable Storage Container) used in Blizzard games.\n" "\n" - "Expected regex pattern should follow ECMAScript syntax\n"); + "Regex pattern is expected to follow ECMAScript syntax\n"); options .positional_help("[STORAGE]") .show_positional_help(); @@ -91,14 +94,14 @@ void parseArguments(int argc, char* argv[]) options.add_options("Common") ("h,help", "Print help.") ("v,verbose", "Verbose output.", cxxopts::value()) - ("q,quiet", "Supresses output of messages entirely.", cxxopts::value()) + ("q,quiet", "Supresses output entirely.", cxxopts::value()) ("version", "Print version."); options.add_options("Base") - ("S,storage", "Path to directory with CASC.", cxxopts::value(appCtx.m_base.storageSrc), "[PATH]") - ("L,listfile", - "Map filenames from provided newline delimeted (LF or CRLF) textfile, instead of enumerating content of the archive, " - "which is an extensive operation. It combines well when extracting single files, or a small group that matches given substring or regex pattern.", cxxopts::value(appCtx.m_base.listfileSrc), "[FILE]"); + ("S,storage", "Path to directory with CASC.", cxxopts::value(appCtx.m_base.storageSrc), "[PATH]"); + // ("L,listfile", + // "Map filenames from provided newline delimeted (LF or CRLF) textfile, instead of enumerating content of the archive, " + // "which is an extensive operation. It combines well when extracting single files, or a small group that matches given substring or regex pattern.", cxxopts::value(appCtx.m_base.listfileSrc), "[FILE]"); options.add_options("List") ("l,list", "List files inside CASC.", cxxopts::value(appCtx.m_list.listFiles)) @@ -115,10 +118,13 @@ void parseArguments(int argc, char* argv[]) ("E,ex-iregex", "Exclude files matching regex case insensitively.", cxxopts::value>(), "[PATTERN...]"); options.add_options("Extract") - ("x,extract", - "Extract file(s) matching exactly. Argument is optional - if ommitted it will extract all files matching search filters.", - cxxopts::value>(appCtx.m_extract.extractFiles), "[FILE...]") - ("O,outdir", "Output directory for extracted files.", cxxopts::value(appCtx.m_extract.outDir)->default_value("./"), "[PATH]") + ("x,extract-all", + "Extract all files matching search filters.", + cxxopts::value(appCtx.m_extract.doExtractAll)) + ("X,extract-file", + "Extract file(s) matching exactly.", + cxxopts::value>(appCtx.m_extract.xFilenames), "[FILE...]") + ("o,outdir", "Output directory for extracted files.", cxxopts::value(appCtx.m_extract.outDir)->default_value("./"), "[PATH]") ("p,stdout", "Pipe content of a file(s) to stdout instead writing it to the filesystem.", cxxopts::value(appCtx.m_extract.stdOut)) ("P,progress", "Notify about progress during extraction.", cxxopts::value(appCtx.m_extract.progress)) ("n,dry-run", "Simulate extraction process without writing any data to the filesystem.", cxxopts::value(appCtx.m_extract.dryRun)); @@ -133,7 +139,7 @@ void parseArguments(int argc, char* argv[]) } if (result.count("version")) { - std::cerr << "stormex v" << stormexVersion << std::endl; + std::cerr << "stormex v" << stormexVersion << " | CascLib v" << CASCLIB_VERSION_STRING << std::endl; exit(0); } @@ -165,7 +171,7 @@ void parseArguments(int argc, char* argv[]) } } -void extractFiles(StorageExplorer& stExplorer, const std::vector& filesToExtract) +void extractFilenames(StorageExplorer& stExplorer, const std::vector& filesToExtract) { PLOG_DEBUG << "Preparing to extract " << filesToExtract.size() << " files.."; if (appCtx.m_extract.dryRun) { @@ -193,11 +199,18 @@ void extractFiles(StorageExplorer& stExplorer, const std::vector& f } targetFile += storedFilename; - PLOG_DEBUG << "Extracting file: " << storedFilename; + if (appCtx.m_extract.progress) { + // TODO: display progress + } + + PLOG_INFO << "Extracting file " << storedFilename; size_t fileSize = 0; if (!appCtx.m_extract.dryRun) { + // normalize slashes in the paths received from CASC and force '/' + std::replace(targetFile.begin(), targetFile.end(), '\\', PATH_SEP_CHAR); + fileSize = stExplorer.extractFileToPath(storedFilename, targetFile); - PLOG_DEBUG << "Saved at: " << targetFile << " [" << formatFileSize(fileSize) << "]"; + PLOG_DEBUG << "Written " << formatFileSize(fileSize) << " to " << targetFile; } else { } @@ -205,7 +218,7 @@ void extractFiles(StorageExplorer& stExplorer, const std::vector& f } } -bool searchRegexMulti(const std::string filename, const std::vector& patterns) +bool searchRegexMulti(const std::string& filename, const std::vector& patterns) { for (const auto& current : patterns) { if (regex_search(filename, current)) { @@ -216,22 +229,22 @@ bool searchRegexMulti(const std::string filename, const std::vector& return false; } -std::vector filterFiles(const std::vector& inputList) +std::vector filterFiles(const std::vector& inputList) { - std::vector filteredList; + std::vector filteredList; for (const auto& entry : inputList) { if (appCtx.m_filters.searchPhrase.size()) { bool c = false; for (const auto& needle : appCtx.m_filters.searchPhrase) { - c = findStringIC(entry, needle); + c = findStringIC(entry->filename, needle); if (c) break; } if (!c) continue; } - if (appCtx.m_filters.includePatterns.size() && !searchRegexMulti(entry, appCtx.m_filters.includePatterns)) continue; - if (appCtx.m_filters.excludePatterns.size() && searchRegexMulti(entry, appCtx.m_filters.excludePatterns)) continue; + if (appCtx.m_filters.includePatterns.size() && !searchRegexMulti(entry->filename, appCtx.m_filters.includePatterns)) continue; + if (appCtx.m_filters.excludePatterns.size() && searchRegexMulti(entry->filename, appCtx.m_filters.excludePatterns)) continue; filteredList.push_back(entry); } @@ -253,10 +266,13 @@ std::vector readListFile(const std::string& filename) return filelist; } -void listFiles(StorageExplorer& stExplorer) +std::vector enumerateFiles(StorageExplorer& stExplorer) { PLOG_INFO << "Enumerating all files in storage.."; - auto inputList = stExplorer.enumerateFiles(); + std::vector inputList; + if (!stExplorer.enumerateFiles(inputList)) { + return inputList; + } auto filteredList = inputList; if (appCtx.m_filters.searchPhrase.size() || appCtx.m_filters.includePatterns.size() || appCtx.m_filters.excludePatterns.size()) { @@ -264,11 +280,8 @@ void listFiles(StorageExplorer& stExplorer) filteredList = filterFiles(inputList); } - for (const auto& entry : filteredList) { - std::cout << entry << std::endl; - } - - PLOG_DEBUG << "count " << inputList.size() << " : " << filteredList.size(); + PLOG_DEBUG << "list count " << inputList.size() << " : " << filteredList.size(); + return filteredList; } int main(int argc, char* argv[]) @@ -286,11 +299,35 @@ int main(int argc, char* argv[]) PLOG_INFO << "Storage opened " << static_cast(stExplorer.getHandle()); try { + auto fResults = enumerateFiles(stExplorer); + if (appCtx.m_list.listFiles) { - listFiles(stExplorer); + for (const auto& entry : fResults) { + char keyBuff[MD5_STRING_SIZE + 1]; + std::string tmps; + if (appCtx.m_list.showDetails) { + tmps = formatFileSize(entry->dwFileSize); + std::cout << tmps << std::setw(8 - tmps.length()) << " "; + std::cout << StringFromMD5((LPBYTE)entry->CKey, keyBuff) << " "; + std::cout << StringFromMD5((LPBYTE)entry->EKey, keyBuff) << " "; + } + std::cout << entry->filename; + std::cout << std::endl; + } } - else if (appCtx.m_extract.extractFiles.size()) { - extractFiles(stExplorer, appCtx.m_extract.extractFiles); + else if (appCtx.m_extract.doExtractAll) { + std::vector fList; + for (const auto& item : fResults) { + fList.push_back(item->filename); + } + extractFilenames(stExplorer, fList); + } + else if (appCtx.m_extract.xFilenames.size()) { + for (auto& item : appCtx.m_extract.xFilenames) { + // force backslashes regardless of the platform - that's the expected output from CASC anyway, and it'll get normalized later + std::replace(item.begin(), item.end(), '/', '\\'); + } + extractFilenames(stExplorer, appCtx.m_extract.xFilenames); } } catch (const std::exception& e) { stExplorer.closeStorage(); diff --git a/src/util.cc b/src/util.cc index 173db32..648e79c 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1,4 +1,6 @@ #include +#include +#include #include #include #include @@ -22,9 +24,6 @@ bool pathExists(const std::string& target) int ensureDirExists(std::string strDestName) { - // normalize slashes in the paths received from CASC and force '/' - std::replace(strDestName.begin(), strDestName.end(), '\\', '/'); - // ensure directory path to the file exists size_t pos = -1; while ((pos = strDestName.find('/', pos + 1)) != std::string::npos) { @@ -52,21 +51,21 @@ template std::string valueToString(T num) { std::ostringstream convert; - convert << num; + convert << std::setprecision(8) << num; return convert.str(); } static double roundOff(double n) { - double d = n * 100.0; + double d = n * 10.0; int i = d + 0.5; - d = (float)i / 100.0; + d = (float)i / 10.0; return d; } std::string formatFileSize(size_t size) { - static const char *SIZES[] = { "B", "KB", "MB", "GB" }; + static const char *SIZES[] = { "B", "K", "M", "G" }; int div = 0; size_t rem = 0; @@ -77,7 +76,9 @@ std::string formatFileSize(size_t size) } double size_d = (float)size + (float)rem / 1024.0; - std::string result = valueToString(roundOff(size_d)) + " " + SIZES[div]; + std::string result = valueToString(roundOff(size_d)) + SIZES[div]; + std::replace(result.begin(), result.end(), '.', ','); + return result; }