From 3023774c06bc2c641205f300f0f19d901f9e83c6 Mon Sep 17 00:00:00 2001
From: r-ex <67599507+r-ex@users.noreply.github.com>
Date: Mon, 16 Dec 2024 14:48:11 +0000
Subject: [PATCH] WIP cache builder tool
---
src/RePak.vcxproj | 7 ++
src/application/cachebuilder.cpp | 207 +++++++++++++++++++++++++++++++
src/application/cachebuilder.h | 9 ++
src/application/repak.cpp | 16 ++-
src/utils/MurmurHash3.cpp | 171 +++++++++++++++++++++++++
src/utils/MurmurHash3.h | 33 +++++
6 files changed, 441 insertions(+), 2 deletions(-)
create mode 100644 src/application/cachebuilder.cpp
create mode 100644 src/application/cachebuilder.h
create mode 100644 src/utils/MurmurHash3.cpp
create mode 100644 src/utils/MurmurHash3.h
diff --git a/src/RePak.vcxproj b/src/RePak.vcxproj
index 31b407a..833177d 100644
--- a/src/RePak.vcxproj
+++ b/src/RePak.vcxproj
@@ -11,6 +11,7 @@
+
@@ -148,10 +149,15 @@
+
+ NotUsing
+ NotUsing
+
+
@@ -254,6 +260,7 @@
+
diff --git a/src/application/cachebuilder.cpp b/src/application/cachebuilder.cpp
new file mode 100644
index 0000000..7a8ee3b
--- /dev/null
+++ b/src/application/cachebuilder.cpp
@@ -0,0 +1,207 @@
+#include
+#include
+#include
+#include
+
+#include
+
+REPAK_BEGIN_NAMESPACE(CacheBuilder)
+
+#pragma pack(push, 1)
+struct CacheFileHeader_t
+{
+ size_t starpakPathBufferSize;
+
+ size_t dataEntryCount;
+ size_t dataEntriesOffset;
+};
+#pragma pack(pop)
+
+struct CacheDataEntry_t
+{
+ size_t dataOffset;
+ size_t dataSize;
+
+ __m128i hash;
+
+ size_t starpakPathOffset;
+};
+static_assert(sizeof(CacheDataEntry_t) == 0x30);
+
+class CCacheFile
+{
+public:
+ CCacheFile() : starpakBufSize(0) {};
+
+ // returns offset in starpak paths buffer to this path
+ size_t AddStarpakPathToCache(const std::string& path)
+ {
+ this->cachedStarpaks.push_back(path);
+
+ assert(this->cachedStarpaks.size() > 0 && this->cachedStarpaks.size() <= UINT32_MAX);
+
+ size_t pathOffset = this->starpakBufSize;
+
+ // add this path to the starpak buffer size
+ this->starpakBufSize += path.length() + 1;
+
+ return pathOffset;
+ }
+
+ CacheFileHeader_t ConstructHeader() const
+ {
+ CacheFileHeader_t fileHeader = {};
+ fileHeader.starpakPathBufferSize = starpakBufSize;
+ fileHeader.dataEntryCount = cachedDataEntries.size();
+ fileHeader.dataEntriesOffset = IALIGN4(sizeof(CacheFileHeader_t) + starpakBufSize);
+
+ return fileHeader;
+ }
+
+ size_t starpakBufSize;
+
+ // vector of starpak paths relative to game root
+ // (i.e., paks/Win64/(name).starpak)
+ std::vector cachedStarpaks;
+
+ std::vector cachedDataEntries;
+};
+
+std::vector GetStarpakFilesFromDirectory(const fs::path& directoryPath)
+{
+ std::vector paths;
+ for (auto& it : std::filesystem::directory_iterator(directoryPath))
+ {
+ const fs::path& entryPath = it.path();
+
+ if (!entryPath.has_extension())
+ continue;
+
+ if (entryPath.extension() == ".starpak")
+ paths.push_back(entryPath);
+ }
+
+ return paths;
+}
+
+bool BuildCacheFileFromGamePaksDirectory(const fs::path& directoryPath)
+{
+ // ensure that our directory path is both a path and a directory
+ if (!std::filesystem::exists(directoryPath) || !std::filesystem::is_directory(directoryPath))
+ return false;
+
+ // open cache file at the start so we don't get thru the whole process and fail to write at the end
+ BinaryIO cacheFileStream;
+ cacheFileStream.Open((directoryPath / "repak_starpak_cache.bin").string(), BinaryIO::Mode_e::Write);
+
+ if (!cacheFileStream.IsWritable())
+ {
+ Warning("CacheBuilder: Failed to open cache file '%s' for writing.\n", (directoryPath / "starpak.rpcache").u8string().c_str());
+ return false;
+ }
+
+ const std::unique_ptr cacheFile = std::make_unique();
+ const std::vector foundStarpakPaths = GetStarpakFilesFromDirectory(directoryPath);
+
+ size_t starpakIndex = 0;
+ for (const fs::path& starpakPath : foundStarpakPaths)
+ {
+#if _DEBUG
+ printf("\n");
+ //TIME_SCOPE(starpakPath.u8string().c_str());
+ Debug("CacheBuilder: Opening StarPak file '%s' (%lld/%lld) for reading.\n", starpakPath.u8string().c_str(), starpakIndex, foundStarpakPaths.size());
+#endif
+
+ const std::string relativeStarpakPath = ("paks/Win64/" / starpakPath.stem()).string();
+ const size_t starpakPathOffset = cacheFile->AddStarpakPathToCache(relativeStarpakPath);
+
+ BinaryIO starpakStream;
+ starpakStream.Open(starpakPath.string(), BinaryIO::Mode_e::Read);
+
+ if (!starpakStream.IsReadable())
+ {
+ Warning("CacheBuilder: Failed to open StarPak file '%s' for reading.\n", starpakPath.u8string().c_str());
+ continue;
+ }
+
+ PakStreamSetFileHeader_s starpakFileHeader = starpakStream.Read();
+
+ if (starpakFileHeader.magic != STARPAK_MAGIC)
+ {
+ Warning("CacheBuilder: StarPak file '%s' had invalid file magic; found %X, expected %X.\n", starpakPath.u8string().c_str(), starpakFileHeader.magic, STARPAK_MAGIC);
+ continue;
+ }
+
+ const size_t starpakFileSize = fs::file_size(starpakPath);
+
+ starpakStream.Seek(starpakFileSize - 8, std::ios::beg);
+
+ // get the number of data entries in this starpak file
+ const size_t starpakEntryCount = starpakStream.Read();
+ const size_t starpakEntryHeadersSize = sizeof(PakStreamSetEntry_s) * starpakEntryCount;
+
+ std::unique_ptr starpakEntryHeaders = std::unique_ptr(new PakStreamSetEntry_s[starpakEntryCount]);
+
+ // go to the start of the entry structs
+ starpakStream.Seek(starpakFileSize - (8 + starpakEntryHeadersSize), std::ios::beg);
+ starpakStream.Read(reinterpret_cast(starpakEntryHeaders.get()), starpakEntryHeadersSize);
+
+ for (size_t i = 0; i < starpakEntryCount; ++i)
+ {
+ const PakStreamSetEntry_s* entryHeader = &starpakEntryHeaders.get()[i];
+
+ if (entryHeader->dataSize <= 0) [[unlikely]] // not possible
+ continue;
+
+ if (entryHeader->offset < 0x1000) [[unlikely]] // also not possible
+ continue;
+
+ char* entryData = reinterpret_cast(_aligned_malloc(entryHeader->dataSize, 8));
+ //std::unique_ptr entryData = std::make_unique(new char[entryHeader->dataSize]);
+
+ starpakStream.Seek(entryHeader->offset, std::ios::beg);
+ starpakStream.Read(entryData, entryHeader->dataSize);
+
+ CacheDataEntry_t cacheEntry = {};
+ cacheEntry.starpakPathOffset = starpakPathOffset;
+ cacheEntry.dataOffset = entryHeader->offset;
+ cacheEntry.dataSize = entryHeader->dataSize;
+
+ // ideally we don't have entries over 2gb.
+ assert(entryHeader->dataSize < INT32_MAX);
+
+ MurmurHash3_x64_128(entryData, static_cast(entryHeader->dataSize), 0x165DCA75, &cacheEntry.hash);
+
+ cacheFile->cachedDataEntries.push_back(cacheEntry);
+
+ _aligned_free(entryData);
+ }
+
+ starpakIndex++;
+
+ starpakStream.Close();
+ }
+
+ CacheFileHeader_t cacheHeader = cacheFile->ConstructHeader();
+
+ cacheFileStream.Write(cacheHeader);
+
+ for (const std::string& it : cacheFile->cachedStarpaks)
+ {
+ cacheFileStream.WriteString(it);
+ }
+
+ cacheFileStream.Seek(cacheHeader.dataEntriesOffset);
+
+ for (const CacheDataEntry_t& dataEntry : cacheFile->cachedDataEntries)
+ {
+ cacheFileStream.Write(dataEntry);
+ }
+
+ cacheFileStream.Close();
+
+ return true;
+}
+
+
+REPAK_END_NAMESPACE()
\ No newline at end of file
diff --git a/src/application/cachebuilder.h b/src/application/cachebuilder.h
new file mode 100644
index 0000000..ab10295
--- /dev/null
+++ b/src/application/cachebuilder.h
@@ -0,0 +1,9 @@
+#pragma once
+#include
+#include
+
+REPAK_BEGIN_NAMESPACE(CacheBuilder)
+
+bool BuildCacheFileFromGamePaksDirectory(const std::filesystem::path& directoryPath);
+
+REPAK_END_NAMESPACE()
\ No newline at end of file
diff --git a/src/application/repak.cpp b/src/application/repak.cpp
index 44d7619..752fc6f 100644
--- a/src/application/repak.cpp
+++ b/src/application/repak.cpp
@@ -2,6 +2,8 @@
#include "assets/assets.h"
#include "logic/pakfile.h"
+#include
+
const char startupVersion[] = {
"RePak - Built "
__DATE__
@@ -18,8 +20,18 @@ int main(int argc, char** argv)
if (argc < 2)
Error("invalid usage\n");
- CPakFile pakFile(8);
- pakFile.BuildFromMap(argv[1]);
+ const std::string targetPath = argv[1];
+
+ // this should be changed to proper CLI handling and mode selection
+ if (std::filesystem::is_directory(targetPath))
+ {
+ CacheBuilder::BuildCacheFileFromGamePaksDirectory(targetPath);
+ }
+ else
+ {
+ CPakFile pakFile(8);
+ pakFile.BuildFromMap(targetPath);
+ }
return EXIT_SUCCESS;
}
\ No newline at end of file
diff --git a/src/utils/MurmurHash3.cpp b/src/utils/MurmurHash3.cpp
new file mode 100644
index 0000000..529b2b5
--- /dev/null
+++ b/src/utils/MurmurHash3.cpp
@@ -0,0 +1,171 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE __forceinline
+
+#include
+
+#define ROTL32(x,y) _rotl(x,y)
+#define ROTL64(x,y) _rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#define FORCE_INLINE inline __attribute__((always_inline))
+
+inline uint32_t rotl32(uint32_t x, int8_t r)
+{
+ return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64(uint64_t x, int8_t r)
+{
+ return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL32(x,y) rotl32(x,y)
+#define ROTL64(x,y) rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i)
+{
+ return p[i];
+}
+
+FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i)
+{
+ return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix32(uint32_t h)
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix64(uint64_t k)
+{
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+ k ^= k >> 33;
+
+ return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128(const void* key, const int len,
+ const uint32_t seed, void* out)
+{
+ const uint8_t* data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint64_t h1 = seed;
+ uint64_t h2 = seed;
+
+ const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+ const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+ //----------
+ // body
+
+ const uint64_t* blocks = (const uint64_t*)(data);
+
+ for (int i = 0; i < nblocks; i++)
+ {
+ uint64_t k1 = getblock64(blocks, i * 2 + 0);
+ uint64_t k2 = getblock64(blocks, i * 2 + 1);
+
+ k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1;
+
+ h1 = ROTL64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729;
+
+ k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2;
+
+ h2 = ROTL64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t* tail = (const uint8_t*)(data + nblocks * 16ull);
+
+ uint64_t k1 = 0;
+ uint64_t k2 = 0;
+
+ switch (len & 15)
+ {
+ case 15: k2 ^= ((uint64_t)tail[14]) << 48;
+ case 14: k2 ^= ((uint64_t)tail[13]) << 40;
+ case 13: k2 ^= ((uint64_t)tail[12]) << 32;
+ case 12: k2 ^= ((uint64_t)tail[11]) << 24;
+ case 11: k2 ^= ((uint64_t)tail[10]) << 16;
+ case 10: k2 ^= ((uint64_t)tail[9]) << 8;
+ case 9: k2 ^= ((uint64_t)tail[8]) << 0;
+ k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2;
+
+ case 8: k1 ^= ((uint64_t)tail[7]) << 56;
+ case 7: k1 ^= ((uint64_t)tail[6]) << 48;
+ case 6: k1 ^= ((uint64_t)tail[5]) << 40;
+ case 5: k1 ^= ((uint64_t)tail[4]) << 32;
+ case 4: k1 ^= ((uint64_t)tail[3]) << 24;
+ case 3: k1 ^= ((uint64_t)tail[2]) << 16;
+ case 2: k1 ^= ((uint64_t)tail[1]) << 8;
+ case 1: k1 ^= ((uint64_t)tail[0]) << 0;
+ k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len; h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ ((uint64_t*)out)[0] = h1;
+ ((uint64_t*)out)[1] = h2;
+}
diff --git a/src/utils/MurmurHash3.h b/src/utils/MurmurHash3.h
new file mode 100644
index 0000000..719e774
--- /dev/null
+++ b/src/utils/MurmurHash3.h
@@ -0,0 +1,33 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#include
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128(const void* key, int len, uint32_t seed, void* out);
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
\ No newline at end of file