From 3023774c06bc2c641205f300f0f19d901f9e83c6 Mon Sep 17 00:00:00 2001 From: r-ex <67599507+r-ex@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:48:11 +0000 Subject: [PATCH] WIP cache builder tool --- src/RePak.vcxproj | 7 ++ src/application/cachebuilder.cpp | 207 +++++++++++++++++++++++++++++++ src/application/cachebuilder.h | 9 ++ src/application/repak.cpp | 16 ++- src/utils/MurmurHash3.cpp | 171 +++++++++++++++++++++++++ src/utils/MurmurHash3.h | 33 +++++ 6 files changed, 441 insertions(+), 2 deletions(-) create mode 100644 src/application/cachebuilder.cpp create mode 100644 src/application/cachebuilder.h create mode 100644 src/utils/MurmurHash3.cpp create mode 100644 src/utils/MurmurHash3.h diff --git a/src/RePak.vcxproj b/src/RePak.vcxproj index 31b407a..833177d 100644 --- a/src/RePak.vcxproj +++ b/src/RePak.vcxproj @@ -11,6 +11,7 @@ + @@ -148,10 +149,15 @@ + + NotUsing + NotUsing + + @@ -254,6 +260,7 @@ + diff --git a/src/application/cachebuilder.cpp b/src/application/cachebuilder.cpp new file mode 100644 index 0000000..7a8ee3b --- /dev/null +++ b/src/application/cachebuilder.cpp @@ -0,0 +1,207 @@ +#include +#include +#include +#include + +#include + +REPAK_BEGIN_NAMESPACE(CacheBuilder) + +#pragma pack(push, 1) +struct CacheFileHeader_t +{ + size_t starpakPathBufferSize; + + size_t dataEntryCount; + size_t dataEntriesOffset; +}; +#pragma pack(pop) + +struct CacheDataEntry_t +{ + size_t dataOffset; + size_t dataSize; + + __m128i hash; + + size_t starpakPathOffset; +}; +static_assert(sizeof(CacheDataEntry_t) == 0x30); + +class CCacheFile +{ +public: + CCacheFile() : starpakBufSize(0) {}; + + // returns offset in starpak paths buffer to this path + size_t AddStarpakPathToCache(const std::string& path) + { + this->cachedStarpaks.push_back(path); + + assert(this->cachedStarpaks.size() > 0 && this->cachedStarpaks.size() <= UINT32_MAX); + + size_t pathOffset = this->starpakBufSize; + + // add this path to the starpak buffer size + this->starpakBufSize += path.length() + 1; + + return pathOffset; + } + + CacheFileHeader_t ConstructHeader() const + { + CacheFileHeader_t fileHeader = {}; + fileHeader.starpakPathBufferSize = starpakBufSize; + fileHeader.dataEntryCount = cachedDataEntries.size(); + fileHeader.dataEntriesOffset = IALIGN4(sizeof(CacheFileHeader_t) + starpakBufSize); + + return fileHeader; + } + + size_t starpakBufSize; + + // vector of starpak paths relative to game root + // (i.e., paks/Win64/(name).starpak) + std::vector cachedStarpaks; + + std::vector cachedDataEntries; +}; + +std::vector GetStarpakFilesFromDirectory(const fs::path& directoryPath) +{ + std::vector paths; + for (auto& it : std::filesystem::directory_iterator(directoryPath)) + { + const fs::path& entryPath = it.path(); + + if (!entryPath.has_extension()) + continue; + + if (entryPath.extension() == ".starpak") + paths.push_back(entryPath); + } + + return paths; +} + +bool BuildCacheFileFromGamePaksDirectory(const fs::path& directoryPath) +{ + // ensure that our directory path is both a path and a directory + if (!std::filesystem::exists(directoryPath) || !std::filesystem::is_directory(directoryPath)) + return false; + + // open cache file at the start so we don't get thru the whole process and fail to write at the end + BinaryIO cacheFileStream; + cacheFileStream.Open((directoryPath / "repak_starpak_cache.bin").string(), BinaryIO::Mode_e::Write); + + if (!cacheFileStream.IsWritable()) + { + Warning("CacheBuilder: Failed to open cache file '%s' for writing.\n", (directoryPath / "starpak.rpcache").u8string().c_str()); + return false; + } + + const std::unique_ptr cacheFile = std::make_unique(); + const std::vector foundStarpakPaths = GetStarpakFilesFromDirectory(directoryPath); + + size_t starpakIndex = 0; + for (const fs::path& starpakPath : foundStarpakPaths) + { +#if _DEBUG + printf("\n"); + //TIME_SCOPE(starpakPath.u8string().c_str()); + Debug("CacheBuilder: Opening StarPak file '%s' (%lld/%lld) for reading.\n", starpakPath.u8string().c_str(), starpakIndex, foundStarpakPaths.size()); +#endif + + const std::string relativeStarpakPath = ("paks/Win64/" / starpakPath.stem()).string(); + const size_t starpakPathOffset = cacheFile->AddStarpakPathToCache(relativeStarpakPath); + + BinaryIO starpakStream; + starpakStream.Open(starpakPath.string(), BinaryIO::Mode_e::Read); + + if (!starpakStream.IsReadable()) + { + Warning("CacheBuilder: Failed to open StarPak file '%s' for reading.\n", starpakPath.u8string().c_str()); + continue; + } + + PakStreamSetFileHeader_s starpakFileHeader = starpakStream.Read(); + + if (starpakFileHeader.magic != STARPAK_MAGIC) + { + Warning("CacheBuilder: StarPak file '%s' had invalid file magic; found %X, expected %X.\n", starpakPath.u8string().c_str(), starpakFileHeader.magic, STARPAK_MAGIC); + continue; + } + + const size_t starpakFileSize = fs::file_size(starpakPath); + + starpakStream.Seek(starpakFileSize - 8, std::ios::beg); + + // get the number of data entries in this starpak file + const size_t starpakEntryCount = starpakStream.Read(); + const size_t starpakEntryHeadersSize = sizeof(PakStreamSetEntry_s) * starpakEntryCount; + + std::unique_ptr starpakEntryHeaders = std::unique_ptr(new PakStreamSetEntry_s[starpakEntryCount]); + + // go to the start of the entry structs + starpakStream.Seek(starpakFileSize - (8 + starpakEntryHeadersSize), std::ios::beg); + starpakStream.Read(reinterpret_cast(starpakEntryHeaders.get()), starpakEntryHeadersSize); + + for (size_t i = 0; i < starpakEntryCount; ++i) + { + const PakStreamSetEntry_s* entryHeader = &starpakEntryHeaders.get()[i]; + + if (entryHeader->dataSize <= 0) [[unlikely]] // not possible + continue; + + if (entryHeader->offset < 0x1000) [[unlikely]] // also not possible + continue; + + char* entryData = reinterpret_cast(_aligned_malloc(entryHeader->dataSize, 8)); + //std::unique_ptr entryData = std::make_unique(new char[entryHeader->dataSize]); + + starpakStream.Seek(entryHeader->offset, std::ios::beg); + starpakStream.Read(entryData, entryHeader->dataSize); + + CacheDataEntry_t cacheEntry = {}; + cacheEntry.starpakPathOffset = starpakPathOffset; + cacheEntry.dataOffset = entryHeader->offset; + cacheEntry.dataSize = entryHeader->dataSize; + + // ideally we don't have entries over 2gb. + assert(entryHeader->dataSize < INT32_MAX); + + MurmurHash3_x64_128(entryData, static_cast(entryHeader->dataSize), 0x165DCA75, &cacheEntry.hash); + + cacheFile->cachedDataEntries.push_back(cacheEntry); + + _aligned_free(entryData); + } + + starpakIndex++; + + starpakStream.Close(); + } + + CacheFileHeader_t cacheHeader = cacheFile->ConstructHeader(); + + cacheFileStream.Write(cacheHeader); + + for (const std::string& it : cacheFile->cachedStarpaks) + { + cacheFileStream.WriteString(it); + } + + cacheFileStream.Seek(cacheHeader.dataEntriesOffset); + + for (const CacheDataEntry_t& dataEntry : cacheFile->cachedDataEntries) + { + cacheFileStream.Write(dataEntry); + } + + cacheFileStream.Close(); + + return true; +} + + +REPAK_END_NAMESPACE() \ No newline at end of file diff --git a/src/application/cachebuilder.h b/src/application/cachebuilder.h new file mode 100644 index 0000000..ab10295 --- /dev/null +++ b/src/application/cachebuilder.h @@ -0,0 +1,9 @@ +#pragma once +#include +#include + +REPAK_BEGIN_NAMESPACE(CacheBuilder) + +bool BuildCacheFileFromGamePaksDirectory(const std::filesystem::path& directoryPath); + +REPAK_END_NAMESPACE() \ No newline at end of file diff --git a/src/application/repak.cpp b/src/application/repak.cpp index 44d7619..752fc6f 100644 --- a/src/application/repak.cpp +++ b/src/application/repak.cpp @@ -2,6 +2,8 @@ #include "assets/assets.h" #include "logic/pakfile.h" +#include + const char startupVersion[] = { "RePak - Built " __DATE__ @@ -18,8 +20,18 @@ int main(int argc, char** argv) if (argc < 2) Error("invalid usage\n"); - CPakFile pakFile(8); - pakFile.BuildFromMap(argv[1]); + const std::string targetPath = argv[1]; + + // this should be changed to proper CLI handling and mode selection + if (std::filesystem::is_directory(targetPath)) + { + CacheBuilder::BuildCacheFileFromGamePaksDirectory(targetPath); + } + else + { + CPakFile pakFile(8); + pakFile.BuildFromMap(targetPath); + } return EXIT_SUCCESS; } \ No newline at end of file diff --git a/src/utils/MurmurHash3.cpp b/src/utils/MurmurHash3.cpp new file mode 100644 index 0000000..529b2b5 --- /dev/null +++ b/src/utils/MurmurHash3.cpp @@ -0,0 +1,171 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline + +#include + +#define ROTL32(x,y) _rotl(x,y) +#define ROTL64(x,y) _rotl64(x,y) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint32_t rotl32(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64(uint64_t x, int8_t r) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) +#define ROTL64(x,y) rotl64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) +{ + return p[i]; +} + +FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) +{ + return p[i]; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix32(uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +FORCE_INLINE uint64_t fmix64(uint64_t k) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128(const void* key, const int len, + const uint32_t seed, void* out) +{ + const uint8_t* data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const uint64_t* blocks = (const uint64_t*)(data); + + for (int i = 0; i < nblocks; i++) + { + uint64_t k1 = getblock64(blocks, i * 2 + 0); + uint64_t k2 = getblock64(blocks, i * 2 + 1); + + k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1; + + h1 = ROTL64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729; + + k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2; + + h2 = ROTL64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5; + } + + //---------- + // tail + + const uint8_t* tail = (const uint8_t*)(data + nblocks * 16ull); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) + { + case 15: k2 ^= ((uint64_t)tail[14]) << 48; + case 14: k2 ^= ((uint64_t)tail[13]) << 40; + case 13: k2 ^= ((uint64_t)tail[12]) << 32; + case 12: k2 ^= ((uint64_t)tail[11]) << 24; + case 11: k2 ^= ((uint64_t)tail[10]) << 16; + case 10: k2 ^= ((uint64_t)tail[9]) << 8; + case 9: k2 ^= ((uint64_t)tail[8]) << 0; + k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)tail[7]) << 56; + case 7: k1 ^= ((uint64_t)tail[6]) << 48; + case 6: k1 ^= ((uint64_t)tail[5]) << 40; + case 5: k1 ^= ((uint64_t)tail[4]) << 32; + case 4: k1 ^= ((uint64_t)tail[3]) << 24; + case 3: k1 ^= ((uint64_t)tail[2]) << 16; + case 2: k1 ^= ((uint64_t)tail[1]) << 8; + case 1: k1 ^= ((uint64_t)tail[0]) << 0; + k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} diff --git a/src/utils/MurmurHash3.h b/src/utils/MurmurHash3.h new file mode 100644 index 0000000..719e774 --- /dev/null +++ b/src/utils/MurmurHash3.h @@ -0,0 +1,33 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ +#define _MURMURHASH3_H_ + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128(const void* key, int len, uint32_t seed, void* out); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ \ No newline at end of file