From c9c86d759f67b68bc337f2d1faf1d806814ec622 Mon Sep 17 00:00:00 2001 From: Petr Benes Date: Thu, 24 Dec 2015 01:25:52 +0100 Subject: [PATCH] initial commit --- .gitignore | 33 + LICENSE.txt | 21 + README.md | 213 ++++++ Scripts/env.bat | 2 + Scripts/test.py | 229 +++++++ Source/PDB.cpp | 910 +++++++++++++++++++++++++ Source/PDB.h | 398 +++++++++++ Source/PDBExtractor.cpp | 567 ++++++++++++++++ Source/PDBExtractor.h | 90 +++ Source/PDBHeaderReconstructor.cpp | 683 +++++++++++++++++++ Source/PDBHeaderReconstructor.h | 275 ++++++++ Source/PDBReconstructorBase.h | 201 ++++++ Source/PDBSymbolSorter.h | 155 +++++ Source/PDBSymbolVisitor.h | 327 +++++++++ Source/PDBSymbolVisitor.inl | 976 +++++++++++++++++++++++++++ Source/PDBSymbolVisitorBase.h | 248 +++++++ Source/UserDataFieldDefinition.h | 120 ++++ Source/UserDataFieldDefinitionBase.h | 104 +++ Source/main.cpp | 14 + Source/pdbex.vcxproj | 181 +++++ Source/pdbex.vcxproj.filters | 65 ++ Source/pdbex.vcxproj.user | 17 + pdbex.sln | 28 + 23 files changed, 5857 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 Scripts/env.bat create mode 100644 Scripts/test.py create mode 100644 Source/PDB.cpp create mode 100644 Source/PDB.h create mode 100644 Source/PDBExtractor.cpp create mode 100644 Source/PDBExtractor.h create mode 100644 Source/PDBHeaderReconstructor.cpp create mode 100644 Source/PDBHeaderReconstructor.h create mode 100644 Source/PDBReconstructorBase.h create mode 100644 Source/PDBSymbolSorter.h create mode 100644 Source/PDBSymbolVisitor.h create mode 100644 Source/PDBSymbolVisitor.inl create mode 100644 Source/PDBSymbolVisitorBase.h create mode 100644 Source/UserDataFieldDefinition.h create mode 100644 Source/UserDataFieldDefinitionBase.h create mode 100644 Source/main.cpp create mode 100644 Source/pdbex.vcxproj create mode 100644 Source/pdbex.vcxproj.filters create mode 100644 Source/pdbex.vcxproj.user create mode 100644 pdbex.sln diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..32c1074 --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +# Compiled Object files +*.obj +*.slo +*.lo +*.o + +# Compiled Dynamic libraries +*.so +*.dylib + +# Compiled Static libraries +*.lai +*.la +*.a + +# Binaries +*.exe +*.dll +*.lib +*.ilk +*.pdb + +# Project files +Bin/** + +# Visual Studio files +obj/** +ipch +*.psess +*.vspx +*.suo +*.sdf +*.opensdf diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..cbf71f2 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Petr Benes + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5ffb895 --- /dev/null +++ b/README.md @@ -0,0 +1,213 @@ +# pdbex + +pdbex is a utility for reconstructing structures and unions from the [PDB files][msdn-symbols] into compilable C headers. + +### Why? + +PDB files, among others, contain information about structures and unions. +These information can be very useful - for instance structures and unions from **ntdll.dll** or **ntoskrnl.exe** can be useful for experimenting with Windows internals. +But information in the PDB files are limited only to the symbol name, member name, its type and offset. +Information about nested anonymous structures and unions are lost. +However, with a bit of work, they can be formed back. + +I am not aware of any utility which could make a compilable and offset-accurate C header representation of symbols in the PDB file. +Although there do exist [some][headers-mirt] [public][headers-nirsoft] [servers][headers-moonsoft] which list some of the structures, it is only limited subset of various symbols of files of various Windows versions. +Not to say that many of them are not offset-accurate. +The fact that we have [ReactOS][headers-reactos] and [Volatility][headers-volatility] does not help. They will not provide header file for any given PDB file. + +### Usage + +```c +> pdbex.exe _SID ntdll.pdb + +/* + * PDB file: ntdll.pdb + * Image architecture: x86 + * + * Dumped by pdbex tool v0.1, by wbenny + */ + +typedef struct _SID_IDENTIFIER_AUTHORITY +{ + /* 0x0000 */ unsigned char Value[6]; +} SID_IDENTIFIER_AUTHORITY, *PSID_IDENTIFIER_AUTHORITY; + +typedef struct _SID +{ + /* 0x0000 */ unsigned char Revision; + /* 0x0001 */ unsigned char SubAuthorityCount; + /* 0x0002 */ struct _SID_IDENTIFIER_AUTHORITY IdentifierAuthority; + /* 0x0008 */ unsigned long SubAuthority[1]; +} SID, *PSID; +``` + +This command will dump not only specified symbol, but also all symbols referenced by it - and in correct order. +If you insist on dumping only the specified symbol, you can disable this feature by **-j-** option: + +```c +> pdbex.exe _SID ntdll.pdb -j- -k- + +typedef struct _SID +{ + /* 0x0000 */ unsigned char Revision; + /* 0x0001 */ unsigned char SubAuthorityCount; + /* 0x0002 */ struct _SID_IDENTIFIER_AUTHORITY IdentifierAuthority; + /* 0x0008 */ unsigned long SubAuthority[1]; +} SID, *PSID; +``` + +_(**-k-** switch is responsible for ommiting the header.)_ + +You can even control if definition of referenced symbols should be inlined by **-e [n|i|a]** option. + +* n - will not inline anything (unnamed symbols are created separately and named as _TAG_UNNAMED\_###_ +* i - will inline only unnamed structures and union (default behavior) +* a - will inline everything + +Example of inlining everything: +```c +> pdbex.exe _SID ntdll.pdb -e a -k- + +typedef struct _SID +{ + /* 0x0000 */ unsigned char Revision; + /* 0x0001 */ unsigned char SubAuthorityCount; + struct _SID_IDENTIFIER_AUTHORITY + { + /* 0x0002 */ unsigned char Value[6]; + } IdentifierAuthority; + /* 0x0008 */ unsigned long SubAuthority[1]; +} SID, *PSID; +``` + +Example of not inlining anything: +```c +> pdbex.exe _LARGE_INTEGER ntdll.pdb -e n -k- + +typedef struct _TAG_UNNAMED_1 +{ + /* 0x0000 */ unsigned long LowPart; + /* 0x0004 */ long HighPart; +} TAG_UNNAMED_1, *PTAG_UNNAMED_1; + +typedef union _LARGE_INTEGER +{ + union + { + struct + { + /* 0x0000 */ unsigned long LowPart; + /* 0x0004 */ long HighPart; + }; + /* 0x0000 */ struct _TAG_UNNAMED_1 u; + /* 0x0000 */ __int64 QuadPart; + }; +} LARGE_INTEGER, *PLARGE_INTEGER; + +``` + +Default behavior: +```c +> pdbex.exe _LARGE_INTEGER ntdll.pdb -e i -k- + +typedef union _LARGE_INTEGER +{ + union + { + struct + { + /* 0x0000 */ unsigned long LowPart; + /* 0x0004 */ long HighPart; + }; + struct // _TAG_UNNAMED_1 + { + /* 0x0000 */ unsigned long LowPart; + /* 0x0004 */ long HighPart; + } u; + /* 0x0000 */ __int64 QuadPart; + }; +} LARGE_INTEGER, *PLARGE_INTEGER; + +``` + +You can also dump all symbols using **"\*"** as the symbol name to dump: + +``` +> pdbex.exe * ntdll.pdb -o ntdll.h +``` + +This command will dump all structures and unions to the file **ntdll.h**. + + +### Remarks + +* **const**-ness and **volatile**-ness is not projected into the dumped headers (although this information is preserved in the PDB file). To my knowledge, it is not possible to obtain this information via **dbghelp** interface (which **pdbex** currently uses), but it is possible through **DIA**. +* Pointers to functions are represented only as **void\*** with additional comment **/\* function \*/**. +* Produced structures expect **packing alignment to be set at 1 byte**. +* Produced **union**s have one extra **union** nested inside of it (you could notice few lines above). This is a known cosmetic bug. +* **pdbex** is designed to dump headers from C project only - C++ classes are not supported. + +### Compilation + +Compile **pdbex** using Visual Studio 2015. Solution file is included. No other dependencies are required. + +### Testing + +There are 2 files in the _Scripts_ folder: + +* env.bat - sets environment variables for Microsoft Visual C++ 2015 +* test.py - testing script + +**test.py** dumps all symbols from the provided PDB file. It also generates C file which tests if offsets of the members of structures and unions do match the original offsets in the PDB file. The C file is then compiled using **msbuild** and ran. If the resulting program prints a line starting with **[!]**, it is considered as error. In that case, line also contains information about struct/union + member + offset that did not match. It prints nothing on success. + +Because the **test.py** uses **msbuild** for creating tests, special environment variables must be set. It can be accomplished either by running **test.py** from the developer console or by calling **env.bat**. **env.bat** file exists only for convenience and does nothing else than running the **VsDevCmd.bat** from the default Visual Studio 2015 installation directory. The environment variables are set in the current console process, therefore this script can be called only once. + +### Documentation + +**pdbex -h** should make it: + +``` +pdbex [-o ] [-t ] [-e ] + [-u ] [-s prefix] [-r prefix] [-g suffix] + [-p] [-x] [-m] [-b] [-d] [-i] [-l] + + Symbol name to extract or '*' if all symbol should + be extracted. + Path to the PDB file. + -o filename Specifies the output file. (stdout) + -t filename Specifies the output test file. (off) + -e [n,i,a] Specifies expansion of nested structures/unions. (i) + n = none Only top-most type is printed. + i = inline unnamed Unnamed types are nested. + a = inline all All types are nested. + -u prefix Unnamed union prefix (in combination with -d). + -s prefix Unnamed struct prefix (in combination with -d). + -r prefix Prefix for all symbols. + -g suffix Suffix for all symbols. + +Following options can be explicitly turned of by leading '-'. +Example: -p- + -p Create padding members. (T) + -x Show offsets. (T) + -m Create Microsoft typedefs. (T) + -b Allow bitfields in union. (F) + -d Allow unnamed data types. (T) + -i Use types from stdint.h instead of native types. (F) + -j Print definitions of referenced types. (T) + -k Print header. (T) + -n Print declarations. (T) + -l Print definitions. (T) +``` + + +### License + +All the code in this repository is open-source under the MIT license. See the **LICENSE.txt** file in this repository. + + + [msdn-symbols]: + [headers-nirsoft]: + [headers-moonsoft]: + [headers-reactos]: + [headers-mirt]: + [headers-volatility]: diff --git a/Scripts/env.bat b/Scripts/env.bat new file mode 100644 index 0000000..5693532 --- /dev/null +++ b/Scripts/env.bat @@ -0,0 +1,2 @@ +@echo off +call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\Tools\VsDevCmd.bat" diff --git a/Scripts/test.py b/Scripts/test.py new file mode 100644 index 0000000..8dff82d --- /dev/null +++ b/Scripts/test.py @@ -0,0 +1,229 @@ +import os +import sys +import re +import subprocess + +VCXPROJ_TEMPLATE = ''' + + + + Debug + Win32 + + + + Debug + x64 + + + + + false + .\ + .\Obj\ + + + + + 1Byte + + + + + Application + v140 + + + + + + + + + + + + + + +''' + +MSBUILD_CMD_TEMPLATE = 'msbuild %(file_vcxproj)s /p:configuration=%(configuration)s /p:platform=%(platform)s /p:platformtoolset=%(platformtoolset)s' +PDBEX_CMD_TEMPLATE = '..\\..\\Bin\\x86\\Release\\pdbex.exe "*" "%(file_pdb)s" -o "%(file_h)s" -t "%(file_c)s" -g "%(symbol_suffix)s"' + +OUTPUT_DIRECTORY = 'Output' + +VERBOSITY_LEVEL = 0 # 0, 1, 2 + + +def test_create(file_pdb, file_h, file_c, symbol_suffix): + command = PDBEX_CMD_TEMPLATE % { + 'file_pdb' : file_pdb, + 'file_h' : file_h, + 'file_c' : file_c, + 'symbol_suffix' : symbol_suffix + } + + if VERBOSITY_LEVEL >= 1: + print ' ' + command + + subprocess.call(command) + + +def test_get_platform(file_h): + platform = 'Win32' + + with open(file_h) as f: + line_counter = 0 + for line in f: + line_counter += 1 + + # + # Platform is specified at 3rd line. + # + + if line_counter == 3: + if 'x64' in line: + platform = 'x64' + + break + + return platform + + +def test_compile(file_vcxproj, file_c, file_h, platform): + with open(file_vcxproj, 'w+') as f: + f.write(VCXPROJ_TEMPLATE % { + 'file_c' : file_c, + 'file_h' : file_h + }) + + command = MSBUILD_CMD_TEMPLATE % { + 'file_vcxproj' : file_vcxproj, + 'configuration' : 'debug', + 'platform' : platform, + 'platformtoolset' : 'v140' + } + + if VERBOSITY_LEVEL >= 1: + print ' ' + command + + fnull = open(os.devnull, 'w') + result = subprocess.call(command, stdout=fnull, stderr=fnull) + + if result != 0: + raise Exception('Compilation error') + + +def test_run(file_exe): + p = subprocess.Popen(file_exe, stdout=subprocess.PIPE) + result = p.communicate()[0] + + if '[!]' in result: + raise Exception('Test failed:\n' + result) + + +def process_pdb(file_pdb): + # + # Filename without extension. + # + + try: + os.chdir(OUTPUT_DIRECTORY) + except: + os.mkdir(OUTPUT_DIRECTORY) + os.chdir(OUTPUT_DIRECTORY) + + filename = os.path.splitext(os.path.basename(file_pdb))[0] + + if (re.search('\\\\[0-9A-Z]{33}\\\\', file_pdb)): + # + # We're most likely dumping from symbol path + # (ie. S:\Symbols\ntdll.pdb\10DC95804D2C4756947338A43F573BAD2\ntdll.pdb) + # + # Make the filename as 10DC95804D2C4756947338A43F573BAD2_ntdll + # + filename = os.path.splitext(file_pdb)[0] + filename = filename.rsplit('\\', 2)[1] + '_' + filename.rsplit('\\', 2)[2] + + file_c = filename + '.c' + file_h = filename + '.h' + file_exe = filename + '.exe' + file_vcxproj = filename + '.vcxproj' + + symbol_suffix = '_' + + print 'Processing "%s"' % file_pdb + + try: + if VERBOSITY_LEVEL > 0: + print ' Extracting data' + test_create(file_pdb, file_h, file_c, symbol_suffix) + + platform = test_get_platform(file_h) + + if VERBOSITY_LEVEL > 0: + print ' Compiling for platform "%s"' % platform + test_compile(file_vcxproj, file_c, file_h, platform) + + if VERBOSITY_LEVEL > 0: + print ' Testing' + test_run(file_exe) + except Exception as e: + print e + + os.chdir('..') + + +def process_dir(pdb_dir): + for root, directories, filenames in os.walk(pdb_dir): + for filename in filenames: + file_pdb = os.path.join(root, filename) + process_pdb(file_pdb) + + +def main(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('pdbs', type=str, nargs='*', help='directory which contains PDB files, or PDB file') + parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') + parser.add_argument('-d', '--debug', action='store_true', help='even more verbosity') + parser.add_argument('-c', '--clean', action='store_true', help='clean all files') + + args = parser.parse_args() + + global VERBOSITY_LEVEL + + if args.verbose: + VERBOSITY_LEVEL = 1 + + if args.debug: + VERBOSITY_LEVEL = 2 + + if args.clean: + import shutil + + try: + shutil.rmtree(OUTPUT_DIRECTORY) + except: + pass + + return + + if not hasattr(args, 'pdbs'): + parser.print_help() + return + + for pdb in args.pdbs: + pdb = os.path.abspath(pdb) + + if os.path.isfile(pdb): + process_pdb(pdb) + elif os.path.isdir(pdb): + process_dir(pdb) + else: + print 'Error: %s is not a directory or file' % pdb + + +if __name__ == '__main__': + main() diff --git a/Source/PDB.cpp b/Source/PDB.cpp new file mode 100644 index 0000000..01ff397 --- /dev/null +++ b/Source/PDB.cpp @@ -0,0 +1,910 @@ +#include "PDB.h" + +////////////////////////////////////////////////////////////////////////// +// SymbolModuleBase +// + +class SymbolModuleBase +{ + public: + SymbolModuleBase(); + + BOOL + Open( + IN CONST CHAR* Path + ); + + VOID + Close(); + + BOOL + IsValid() const; + + BOOL + GetSymbolTypeInfo( + IN ULONG TypeId, + IN IMAGEHLP_SYMBOL_TYPE_INFO GetType, + OUT PVOID Info + ) const; + + protected: + HANDLE m_ProcessHandle; + DWORD64 m_ModuleAddress; +}; + +////////////////////////////////////////////////////////////////////////// +// SymbolModuleBase - implementation +// + +SymbolModuleBase::SymbolModuleBase() + : m_ProcessHandle(0) + , m_ModuleAddress(0) +{ + +} + +BOOL +SymbolModuleBase::Open( + IN CONST CHAR* Path + ) +{ + static ULONG_PTR FakeProcessHandleCounter = 2; + static const DWORD64 FakeBaseAddress = 0x10000000; + + HANDLE FileHandle; + DWORD FileSize; + BOOL Result; + + m_ProcessHandle = (HANDLE)FakeProcessHandleCounter++; + + // + // First get the file size. + // + + FileHandle = CreateFileA( + Path, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + 0, + NULL + ); + + if (FileHandle == INVALID_HANDLE_VALUE) + { + return FALSE; + } + + FileSize = GetFileSize(FileHandle, NULL); + CloseHandle(FileHandle); + + // + // Initialize the symbol handler. The passed handle does not need + // to be neccessarily an actual process handle, so we fake it + // in order to distinguish each PDB file. + // + + Result = SymInitialize( + m_ProcessHandle, + NULL, + FALSE + ); + + if (!Result) + { + return FALSE; + } + + m_ModuleAddress = SymLoadModuleEx( + m_ProcessHandle, + NULL, + Path, + NULL, + FakeBaseAddress, + FileSize, + NULL, + 0 + ); + + if (m_ModuleAddress == 0) + { + return FALSE; + } + + return TRUE; +} + +VOID +SymbolModuleBase::Close() +{ + if (IsValid()) + { + SymUnloadModule64(m_ProcessHandle, m_ModuleAddress); + SymCleanup(m_ProcessHandle); + } + + m_ProcessHandle = 0; + m_ModuleAddress = 0; +} + +BOOL +SymbolModuleBase::IsValid() const +{ + return m_ProcessHandle != 0; +} + +BOOL +SymbolModuleBase::GetSymbolTypeInfo( + IN ULONG TypeId, + IN IMAGEHLP_SYMBOL_TYPE_INFO GetType, + OUT PVOID Info + ) const +{ + return SymGetTypeInfo( + m_ProcessHandle, + m_ModuleAddress, + TypeId, + GetType, + Info + ); +} + +////////////////////////////////////////////////////////////////////////// +// SymbolModule +// + +class SymbolModule + : public SymbolModuleBase +{ + public: + SymbolModule(); + + ~SymbolModule(); + + BOOL + Open( + IN CONST CHAR* Path + ); + + BOOL + IsOpened() const; + + CONST CHAR* + GetPath() const; + + VOID + Close(); + + SYMBOL* + GetSymbolByName( + IN CONST CHAR* SymbolName + ); + + SYMBOL* + GetSymbolByTypeId( + IN DWORD TypeId + ); + + CHAR* + GetSymbolNameByTypeId( + IN DWORD TypeId + ); + + BOOL + BuildSymbolMap(); + + const SymbolMap& + GetSymbolMap() const; + + const SymbolNameMap& + GetSymbolNameMap() const; + + private: + static + BOOL + CALLBACK + EnumSymbolsCallbackStaticImpl( + IN PSYMBOL_INFO SymbolInfo, + IN ULONG SymbolSize, + IN PVOID UserContext + ); + + VOID + EnumSymbolsCallbackImpl( + IN PSYMBOL_INFO SymbolInfo, + IN ULONG SymbolSize + ); + + private: + VOID + InitSymbol( + IN SYMBOL* Symbol, + IN DWORD TypeId + ); + + VOID + ProcessSymbolBase( + IN SYMBOL* Symbol + ); + + VOID + ProcessSymbolEnum( + IN SYMBOL* Symbol + ); + + VOID + ProcessSymbolTypedef( + IN SYMBOL* Symbol + ); + + VOID + ProcessSymbolPointer( + IN SYMBOL* Symbol + ); + + VOID + ProcessSymbolArray( + IN SYMBOL* Symbol + ); + + VOID + ProcessSymbolFunction( + IN SYMBOL* Symbol + ); + + VOID + ProcessSymbolFunctionArg( + IN SYMBOL* Symbol + ); + + VOID + ProcessSymbolUserDataType( + IN SYMBOL* Symbol + ); + + VOID + DestroySymbol( + IN SYMBOL* Symbol + ); + + private: + std::string m_Path; + SymbolMap m_SymbolMap; + SymbolNameMap m_SymbolNameMap; + SymbolSet m_SymbolSet; +}; + +SymbolModule::SymbolModule() +{ + +} + +SymbolModule::~SymbolModule() +{ + Close(); +} + +BOOL +SymbolModule::Open( + IN CONST CHAR* Path + ) +{ + if (SymbolModuleBase::Open(Path) == FALSE) + { + return FALSE; + } + + return BuildSymbolMap(); +} + +BOOL +SymbolModule::IsOpened() const +{ + return IsValid(); +} + +CONST CHAR* +SymbolModule::GetPath() const +{ + return m_Path.c_str(); +} + +VOID +SymbolModule::Close() +{ + SymbolModuleBase::Close(); + + for (auto&& Symbol : m_SymbolSet) + { + DestroySymbol(Symbol); + delete Symbol; + } + + m_Path.clear(); + m_SymbolMap.clear(); + m_SymbolNameMap.clear(); + m_SymbolSet.clear(); +} + +CHAR* +SymbolModule::GetSymbolNameByTypeId( + IN DWORD TypeId + ) +{ + WCHAR* SymbolName; + + if (GetSymbolTypeInfo(TypeId, TI_GET_SYMNAME, &SymbolName)) + { + CHAR* SymbolNameMb; + size_t SymbolNameLength; + + SymbolNameLength = wcslen(SymbolName) + 1; + SymbolNameMb = (CHAR*)malloc(SymbolNameLength); + wcstombs(SymbolNameMb, SymbolName, SymbolNameLength); + + // + // Result of GetSymbolTypeInfo() call is supposed to be freed by this call. + // + + LocalFree(SymbolName); + + return SymbolNameMb; + } + + return NULL; +} + +SYMBOL* +SymbolModule::GetSymbolByName( + IN CONST CHAR* SymbolName + ) +{ + auto it = m_SymbolNameMap.find(SymbolName); + return it == m_SymbolNameMap.end() ? NULL : it->second; +} + +SYMBOL* +SymbolModule::GetSymbolByTypeId( + IN DWORD TypeId + ) +{ + if (m_SymbolMap.find(TypeId) != m_SymbolMap.end()) + { + return m_SymbolMap[TypeId]; + } + + SYMBOL* NewSymbol = new SYMBOL; + m_SymbolMap[TypeId] = NewSymbol; + m_SymbolSet.insert(NewSymbol); + + InitSymbol(NewSymbol, TypeId); + + return NewSymbol; +} + +BOOL +SymbolModule::BuildSymbolMap() +{ + return SymEnumTypes( + m_ProcessHandle, + m_ModuleAddress, + &EnumSymbolsCallbackStaticImpl, + this + ); +} + +const SymbolMap& +SymbolModule::GetSymbolMap() const +{ + return m_SymbolMap; +} + +const SymbolNameMap& +SymbolModule::GetSymbolNameMap() const +{ + return m_SymbolNameMap; +} + +BOOL +CALLBACK +SymbolModule::EnumSymbolsCallbackStaticImpl( + IN PSYMBOL_INFO SymbolInfo, + IN ULONG SymbolSize, + IN PVOID UserContext + ) +{ + if (SymbolInfo != NULL) + { + SymbolModule* Pdb = reinterpret_cast(UserContext); + Pdb->EnumSymbolsCallbackImpl(SymbolInfo, SymbolSize); + } + + return TRUE; +} + +VOID +SymbolModule::EnumSymbolsCallbackImpl( + IN PSYMBOL_INFO SymbolInfo, + IN ULONG SymbolSize + ) +{ + SYMBOL* CreatedSymbol = GetSymbolByTypeId(SymbolInfo->TypeIndex); + m_SymbolNameMap[CreatedSymbol->Name] = CreatedSymbol; +} + +VOID +SymbolModule::InitSymbol( + IN SYMBOL* Symbol, + IN DWORD TypeId + ) +{ + GetSymbolTypeInfo(TypeId, TI_GET_SYMTAG, &Symbol->Tag); + GetSymbolTypeInfo(TypeId, TI_GET_BASETYPE, &Symbol->BaseType); + GetSymbolTypeInfo(TypeId, TI_GET_LENGTH, &Symbol->Size); + + Symbol->Name = GetSymbolNameByTypeId(TypeId); + Symbol->TypeId = TypeId; + + switch (Symbol->Tag) + { + case SymTagUDT: ProcessSymbolUserDataType(Symbol); break; + case SymTagEnum: ProcessSymbolEnum (Symbol); break; + case SymTagFunctionType: ProcessSymbolFunction (Symbol); break; + case SymTagPointerType: ProcessSymbolPointer (Symbol); break; + case SymTagArrayType: ProcessSymbolArray (Symbol); break; + case SymTagBaseType: ProcessSymbolBase (Symbol); break; + case SymTagTypedef: ProcessSymbolTypedef (Symbol); break; + case SymTagFunctionArgType: ProcessSymbolFunctionArg (Symbol); break; + default: break; + } +} + +VOID +SymbolModule::ProcessSymbolBase( + IN SYMBOL* Symbol + ) +{ + +} + +VOID +SymbolModule::ProcessSymbolEnum( + IN SYMBOL* Symbol + ) +{ + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_CHILDRENCOUNT, &Symbol->u.Enum.FieldCount); + + TI_FINDCHILDREN_PARAMS* FindChildrenParams = (TI_FINDCHILDREN_PARAMS*)alloca(offsetof(TI_FINDCHILDREN_PARAMS, ChildId[Symbol->u.Enum.FieldCount])); + FindChildrenParams->Start = 0; + FindChildrenParams->Count = Symbol->u.Enum.FieldCount; + GetSymbolTypeInfo(Symbol->TypeId, TI_FINDCHILDREN, FindChildrenParams); + + Symbol->u.Enum.Fields = (SYMBOL_ENUM_FIELD*)calloc(Symbol->u.Enum.FieldCount, sizeof(SYMBOL_ENUM_FIELD)); + + for (DWORD i = 0; i < Symbol->u.Enum.FieldCount; i++) + { + SYMBOL_ENUM_FIELD* EnumValue = &Symbol->u.Enum.Fields[i]; + + EnumValue->Parent = Symbol; + + EnumValue->Name = GetSymbolNameByTypeId(FindChildrenParams->ChildId[i]); + GetSymbolTypeInfo(FindChildrenParams->ChildId[i], TI_GET_VALUE, &EnumValue->Value); + } +} + +VOID +SymbolModule::ProcessSymbolTypedef( + IN SYMBOL* Symbol + ) +{ + DWORD TypedefTypeId; + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_TYPE, &TypedefTypeId); + Symbol->u.Typedef.Type = GetSymbolByTypeId(TypedefTypeId); +} + +VOID +SymbolModule::ProcessSymbolPointer( + IN SYMBOL* Symbol + ) +{ + DWORD PointerTypeId; + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_TYPE, &PointerTypeId); + Symbol->u.Pointer.Type = GetSymbolByTypeId(PointerTypeId); +} + +VOID +SymbolModule::ProcessSymbolArray( + IN SYMBOL* Symbol + ) +{ + DWORD DataTypeId; + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_TYPE, &DataTypeId); + Symbol->u.Array.ElementType = GetSymbolByTypeId(DataTypeId); + + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_COUNT, &Symbol->u.Array.ElementCount); +} + +VOID +SymbolModule::ProcessSymbolFunction( + IN SYMBOL* Symbol + ) +{ + // + // Calling convention. + // + + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_CALLING_CONVENTION, &Symbol->u.Function.CallingConvention); + + // + // Return type. + // + + DWORD ReturnTypeId; + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_TYPE, &ReturnTypeId); + Symbol->u.Function.ReturnType = GetSymbolByTypeId(ReturnTypeId); + + // + // Arguments. + // + + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_CHILDRENCOUNT, &Symbol->u.Function.ArgumentCount); + + Symbol->u.Function.Arguments = (PSYMBOL*)calloc(sizeof(SYMBOL*), Symbol->u.Function.ArgumentCount); + + TI_FINDCHILDREN_PARAMS* FindChildrenParams = (TI_FINDCHILDREN_PARAMS*)alloca(offsetof(TI_FINDCHILDREN_PARAMS, ChildId[Symbol->u.Function.ArgumentCount])); + FindChildrenParams->Start = 0; + FindChildrenParams->Count = Symbol->u.Function.ArgumentCount; + GetSymbolTypeInfo(Symbol->TypeId, TI_FINDCHILDREN, FindChildrenParams); + + for (DWORD i = 0; i < Symbol->u.Function.ArgumentCount; i++) + { + SYMBOL* Argument; + Argument = GetSymbolByTypeId(FindChildrenParams->ChildId[i]); + Symbol->u.Function.Arguments[i] = Argument; + } +} + +VOID +SymbolModule::ProcessSymbolFunctionArg( + IN SYMBOL* Symbol + ) +{ + DWORD ArgumentTypeId; + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_TYPE, &ArgumentTypeId); + Symbol->u.FunctionArg.Type = GetSymbolByTypeId(ArgumentTypeId); +} + +VOID +SymbolModule::ProcessSymbolUserDataType( + IN SYMBOL* Symbol + ) +{ + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_UDTKIND, &Symbol->u.UserData.Kind); + + GetSymbolTypeInfo(Symbol->TypeId, TI_GET_CHILDRENCOUNT, &Symbol->u.UserData.FieldCount); + + Symbol->u.UserData.Fields = (SYMBOL_USERDATA_FIELD*)calloc(sizeof(SYMBOL_USERDATA_FIELD), Symbol->u.UserData.FieldCount + 1); + + TI_FINDCHILDREN_PARAMS* FindChildrenParams = (TI_FINDCHILDREN_PARAMS*)alloca(offsetof(TI_FINDCHILDREN_PARAMS, ChildId[Symbol->u.UserData.FieldCount])); + FindChildrenParams->Start = 0; + FindChildrenParams->Count = Symbol->u.UserData.FieldCount; + GetSymbolTypeInfo(Symbol->TypeId, TI_FINDCHILDREN, FindChildrenParams); + + SYMBOL_USERDATA_FIELD* PreviousMember = NULL; + DWORD PreviousBitPosition = 0; + for (DWORD i = 0; i < Symbol->u.UserData.FieldCount; i++) + { + SYMBOL_USERDATA_FIELD* Member = &Symbol->u.UserData.Fields[i]; + + Member->Parent = Symbol; + + Member->Name = GetSymbolNameByTypeId(FindChildrenParams->ChildId[i]); + + GetSymbolTypeInfo(FindChildrenParams->ChildId[i], TI_GET_OFFSET, &Member->Offset); + GetSymbolTypeInfo(FindChildrenParams->ChildId[i], TI_GET_BITPOSITION, &Member->BitPosition); + + DWORD MemberType; + GetSymbolTypeInfo(FindChildrenParams->ChildId[i], TI_GET_TYPE, &MemberType); + + Member->Type = GetSymbolByTypeId(MemberType); + + if (Member->BitPosition != 0 && PreviousMember != NULL) + { + PreviousMember->Bits = Member->BitPosition - PreviousBitPosition; + PreviousBitPosition = Member->BitPosition; + } + else if (PreviousBitPosition != 0) + { + PreviousMember->Bits = (DWORD)PreviousMember->Type->Size * CHAR_BIT - PreviousBitPosition; + PreviousBitPosition = 0; + } + + PreviousMember = Member; + } + + if (PreviousBitPosition != 0) + { + PreviousMember->Bits = (DWORD)PreviousMember->Type->Size * CHAR_BIT - PreviousMember->BitPosition; + } + + // + // Padding. + // + if (Symbol->u.UserData.Kind == UdtStruct && Symbol->u.UserData.FieldCount > 0) + { + SYMBOL_USERDATA_FIELD* LastUserDataField = &Symbol->u.UserData.Fields[Symbol->u.UserData.FieldCount - 1]; + SYMBOL_USERDATA_FIELD* PaddingUserDataField = &Symbol->u.UserData.Fields[Symbol->u.UserData.FieldCount]; + DWORD PaddingSize = (DWORD)Symbol->Size - (LastUserDataField->Offset + (DWORD)LastUserDataField->Type->Size); + + if (PaddingSize > 0) + { + SYMBOL* PaddingSymbolArrayElement = new SYMBOL; + PaddingSymbolArrayElement->Tag = SymTagBaseType; + PaddingSymbolArrayElement->BaseType = !(PaddingSize % 4) ? btLong : btChar; + PaddingSymbolArrayElement->Size = PaddingSymbolArrayElement->BaseType == btLong ? 4 : 1; + PaddingSymbolArrayElement->TypeId = 0; + PaddingSymbolArrayElement->Name = NULL; + + + SYMBOL* PaddingSymbolArray = new SYMBOL; + PaddingSymbolArray->Tag = SymTagArrayType; + PaddingSymbolArray->BaseType = btNoType; + PaddingSymbolArray->Size = PaddingSize; + PaddingSymbolArray->TypeId = 0; + PaddingSymbolArray->Name = NULL; + PaddingSymbolArray->u.Array.ElementType = PaddingSymbolArrayElement; + PaddingSymbolArray->u.Array.ElementCount = PaddingSymbolArrayElement->BaseType == btLong ? PaddingSize / 4 : PaddingSize; + + PaddingUserDataField->Name = (CHAR*)malloc(64); + PaddingUserDataField->Type = PaddingSymbolArray; + PaddingUserDataField->Offset = LastUserDataField->Offset + (DWORD)LastUserDataField->Type->Size; + + PaddingUserDataField->Bits = 0; + PaddingUserDataField->BitPosition = 0; + PaddingUserDataField->Parent = Symbol; + + strcpy(PaddingUserDataField->Name, "__PADDING__"); + + Symbol->u.UserData.FieldCount++; + + m_SymbolSet.insert(PaddingSymbolArray); + m_SymbolSet.insert(PaddingSymbolArrayElement); + } + } +} + +void SymbolModule::DestroySymbol( + IN SYMBOL* Symbol + ) +{ + free(Symbol->Name); + + switch (Symbol->Tag) + { + case SymTagUDT: + for (DWORD i = 0; i < Symbol->u.UserData.FieldCount; i++) + { + free(Symbol->u.UserData.Fields[i].Name); + } + + free(Symbol->u.UserData.Fields); + break; + + case SymTagEnum: + for (DWORD i = 0; i < Symbol->u.Enum.FieldCount; i++) + { + free(Symbol->u.Enum.Fields[i].Name); + } + + free(Symbol->u.Enum.Fields); + break; + + case SymTagFunctionType: + free(Symbol->u.Function.Arguments); + break; + } +} + +////////////////////////////////////////////////////////////////////////// +// PDB - implementation +// + +struct BasicTypeMapElement +{ + BasicType BaseType; + ULONG64 Length; + const CHAR* BasicTypeString; + const CHAR* TypeString; +}; + +BasicTypeMapElement BasicTypeMapMSVC[] = { + { btNoType, 0, "btNoType", NULL }, + { btVoid, 0, "btVoid", "void" }, + { btChar, 1, "btChar", "char" }, + { btWChar, 2, "btWChar", "wchar_t" }, + { btInt, 1, "btInt", "char" }, + { btInt, 2, "btInt", "short" }, + { btInt, 4, "btInt", "int" }, + { btInt, 8, "btInt", "__int64" }, + { btUInt, 1, "btUInt", "unsigned char" }, + { btUInt, 2, "btUInt", "unsigned short" }, + { btUInt, 4, "btUInt", "unsigned int" }, + { btUInt, 8, "btUInt", "unsigned __int64" }, + { btFloat, 4, "btFloat", "float" }, + { btFloat, 8, "btFloat", "double" }, + { btBCD, 0, "btBCD", "BCD" }, + { btBool, 0, "btBool", "BOOL" }, + { btLong, 4, "btLong", "long" }, + { btULong, 4, "btULong", "unsigned long" }, + { btCurrency, 0, "btCurrency", NULL }, + { btDate, 0, "btDate", "DATE" }, + { btVariant, 0, "btVariant", "VARIANT" }, + { btComplex, 0, "btComplex", NULL }, + { btBit, 0, "btBit", NULL }, + { btBSTR, 0, "btBSTR", "BSTR" }, + { btHresult, 4, "btHresult", "HRESULT" }, + { (BasicType)0, 0, NULL, NULL }, +}; + +BasicTypeMapElement BasicTypeMapStdInt[] = { + { btNoType, 0, "btNoType", NULL }, + { btVoid, 0, "btVoid", "void" }, + { btChar, 1, "btChar", "char" }, + { btWChar, 2, "btWChar", "wchar_t" }, + { btInt, 1, "btInt", "int8_t" }, + { btInt, 2, "btInt", "int16_t" }, + { btInt, 4, "btInt", "int32_t" }, + { btInt, 8, "btInt", "int64_t" }, + { btUInt, 1, "btUInt", "uint8_t" }, + { btUInt, 2, "btUInt", "uint16_t" }, + { btUInt, 4, "btUInt", "uint32_t" }, + { btUInt, 8, "btUInt", "uint64_t" }, + { btFloat, 4, "btFloat", "float" }, + { btFloat, 8, "btFloat", "double" }, + { btBCD, 0, "btBCD", "BCD" }, + { btBool, 0, "btBool", "BOOL" }, + { btLong, 4, "btLong", "int32_t" }, + { btULong, 4, "btULong", "uint32_t" }, + { btCurrency, 0, "btCurrency", NULL }, + { btDate, 0, "btDate", "DATE" }, + { btVariant, 0, "btVariant", "VARIANT" }, + { btComplex, 0, "btComplex", NULL }, + { btBit, 0, "btBit", NULL }, + { btBSTR, 0, "btBSTR", "BSTR" }, + { btHresult, 4, "btHresult", "HRESULT" }, + { (BasicType)0, 0, NULL, NULL }, +}; + +PDB::PDB() +{ + m_Impl = new SymbolModule(); +} + +PDB::PDB( + IN CONST CHAR* Path + ) +{ + m_Impl = new SymbolModule(); + m_Impl->Open(Path); +} + +PDB::~PDB() +{ + delete m_Impl; +} + +BOOL +PDB::Open( + IN CONST CHAR* Path + ) +{ + return m_Impl->Open(Path); +} + +BOOL +PDB::IsOpened() const +{ + return m_Impl->IsOpened(); +} + +CONST CHAR* +PDB::GetPath() const +{ + return m_Impl->GetPath(); +} + +VOID +PDB::Close() +{ + m_Impl->Close(); +} + +CONST SYMBOL* +PDB::GetSymbolByName( + IN CONST CHAR* SymbolName + ) +{ + return m_Impl->GetSymbolByName(SymbolName); +} + +CONST SYMBOL* +PDB::GetSymbolByTypeId( + IN DWORD TypeId + ) +{ + return m_Impl->GetSymbolByTypeId(TypeId); +} + +const SymbolMap& +PDB::GetSymbolMap() const +{ + return m_Impl->GetSymbolMap(); +} + +const SymbolNameMap& +PDB::GetSymbolNameMap() const +{ + return m_Impl->GetSymbolNameMap(); +} + +CONST CHAR* +PDB::GetBasicTypeString( + IN BasicType BaseType, + IN ULONG64 Size, + IN BOOL UseStdInt + ) +{ + BasicTypeMapElement* TypeMap = UseStdInt ? BasicTypeMapStdInt : BasicTypeMapMSVC; + + for (int n = 0; TypeMap[n].BasicTypeString != NULL; n++) + { + if (TypeMap[n].BaseType == BaseType) + { + if (TypeMap[n].Length == Size || + TypeMap[n].Length == 0) + { + return TypeMap[n].TypeString; + } + } + } + + return NULL; +} + +CONST CHAR* +PDB::GetBasicTypeString( + IN CONST SYMBOL* Symbol, + IN BOOL UseStdInt + ) +{ + return GetBasicTypeString(Symbol->BaseType, Symbol->Size, UseStdInt); +} + +CONST CHAR* +PDB::GetUdtKindString( + IN UdtKind UdtKind + ) +{ + static CONST CHAR* UdtKindStrings[] = { + "struct", + "class", + "union", + }; + + if (UdtKind >= UdtStruct && UdtKind <= UdtUnion) + { + return UdtKindStrings[UdtKind]; + } + + return NULL; +} + +BOOL +PDB::IsUnnamedSymbol( + CONST SYMBOL* Symbol + ) +{ + return strstr(Symbol->Name, "Name, "__unnamed") != NULL; +} diff --git a/Source/PDB.h b/Source/PDB.h new file mode 100644 index 0000000..aa15965 --- /dev/null +++ b/Source/PDB.h @@ -0,0 +1,398 @@ +#pragma once +#define _CRT_SECURE_NO_WARNINGS +#include + +#pragma warning (disable : 4091) // warning C4091: 'typedef ': ignored on left of '' when no variable is declared +#include +#include + +#include +#include + +typedef struct _SYMBOL SYMBOL, *PSYMBOL; + +// +// Representation of the enum field. +// +// enum XYZ +// { +// // Name Value +// // ----------------- +// XYZ_1 = 2, +// XYZ_2 = 4, +// }; +// +// Note that 'Value' is represented as the VARIANT type. +// +typedef struct _SYMBOL_ENUM_FIELD +{ + // + // Name of the enumeration field. + // + CHAR* Name; + + // + // Assigned value of the enumeration field. + // + VARIANT Value; + + // + // Parent enumeration. + // + SYMBOL* Parent; + +} SYMBOL_ENUM_FIELD, *PSYMBOL_ENUM_FIELD; + +// +// Representation of the struct/class/union field. +// +// struct XYZ +// { +// // Type Name Bits Offset BitPosition +// // ------------------------------------------------- +// int XYZ_1; // 0 0 +// short XYZ_2 : 3; // 4 0 +// short XYZ_3 : 13; // 4 3 +// }; +// +typedef struct _SYMBOL_USERDATA_FIELD +{ + // + // Name of the user data field. + // + CHAR* Name; + + // + // Type of the field. + // + SYMBOL* Type; + + // + // Offset from the start of the structure/class/union. + // + DWORD Offset; + + // + // Amount of bits this field takes. + // If this value is 0, this field takes + // all of the space of the field type (Type->Size bytes). + // + DWORD Bits; + + // + // Which bit this fields starts at (relative to the Offset). + // + DWORD BitPosition; + + // + // Parent User Data symbol. + // + SYMBOL* Parent; + +} SYMBOL_USERDATA_FIELD, *PSYMBOL_USERDATA_FIELD; + +// +// Representation of the enumeration. +// +// Example for FieldCount = 3 +// +// enum XYZ +// { +// XYZ_1, +// XYZ_2, +// XYZ_3, +// }; +// +typedef struct _SYMBOL_ENUM +{ + // + // Count of fields in the enumeration. + // + DWORD FieldCount; + + // + // Pointer to the continuous array of structures of the enumeration fields. + // + SYMBOL_ENUM_FIELD* Fields; + +} SYMBOL_ENUM, *PSYMBOL_ENUM; + +// +// Representation of the typedef statement. +// +typedef struct _SYMBOL_TYPEDEF +{ + // + // Underlying type of the type definition. + // + SYMBOL* Type; + +} SYMBOL_TYPEDEF, *PSYMBOL_TYPEDEF; + +// +// Representation of the pointer statement. +// +typedef struct _SYMBOL_POINTER +{ + // + // Underlying type of the pointer definition. + // + SYMBOL* Type; + +} SYMBOL_POINTER, *PSYMBOL_POINTER; + +// +// Representation of the array. +// +typedef struct _SYMBOL_ARRAY +{ + // + // Type of the array element. + // + SYMBOL* ElementType; + + // + // Array size in elements. + // + DWORD ElementCount; + +} SYMBOL_ARRAY, *PSYMBOL_ARRAY; + +// +// Representation of the function. +// +typedef struct _SYMBOL_FUNCTION +{ + // + // Return type of the function. + // + SYMBOL* ReturnType; + + // + // Calling convention of the function. + // + CV_call_e CallingConvention; + + // + // Number of arguments. + // + DWORD ArgumentCount; + + // + // Pointer to the continuous array of pointers to the symbol structure for arguments. + // These symbols are of type SYMBOL_FUNCTIONARG and has tag SymTagFunctionArgType. + // + SYMBOL** Arguments; + +} SYMBOL_FUNCTION, *PSYMBOL_FUNCTION; + +// +// Representation of the function argument type. +// +typedef struct _SYMBOL_FUNCTIONARG +{ + // + // Underlying type of the argument. + // + PSYMBOL Type; + +} SYMBOL_FUNCTIONARG, *PSYMBOL_FUNCTIONARG; + +// +// Representation of the user data type (struct/class/union). +// +typedef struct _SYMBOL_USERDATA +{ + // + // Kind of the user data type. + // It may be either UdtStruct, UdtClass or UdtUnion. + // + UdtKind Kind; + + // + // Number of fields (members) in the user data type. + // + DWORD FieldCount; + + // + // Pointer to the continuous array of structures of the user data fields. + // + SYMBOL_USERDATA_FIELD* Fields; + +} SYMBOL_USERDATA, *PSYMBOL_USERDATA; + +// +// Representation of the debug symbol. +// +struct _SYMBOL +{ + // + // Type of the symbol. + // + enum SymTagEnum Tag; + + // + // Base type. + // Only set if Tag == SymTagBaseType. + // + BasicType BaseType; + + // + // Total size of the type which this symbol represents. + // + ULONG64 Size; + + // + // Internal PDB's ID of the type. + // + DWORD TypeId; + + // + // Name of the type. + // + CHAR* Name; + + union + { + SYMBOL_ENUM Enum; + SYMBOL_TYPEDEF Typedef; + SYMBOL_POINTER Pointer; + SYMBOL_ARRAY Array; + SYMBOL_FUNCTION Function; + SYMBOL_FUNCTIONARG FunctionArg; + SYMBOL_USERDATA UserData; + } u; +}; + +class SymbolModule; + +using SymbolMap = std::unordered_map; +using SymbolNameMap = std::unordered_map; +using SymbolSet = std::unordered_set; + +class PDB +{ + public: + // + // Default constructor. + // + PDB(); + + // + // Instantiates PDB class with particular PDB file. + // + PDB( + IN CONST CHAR* Path + ); + + // + // Destructor. + // + ~PDB(); + + // + // Opens particular PDB file and parses it. + // + // Returns non-zero value on success. + // + BOOL + Open( + IN CONST CHAR* Path + ); + + // + // Returns TRUE if a PDB file is opened. + // + BOOL + IsOpened() const; + + // + // Returns path of the currently opened PDB file. + // + CONST CHAR* + GetPath() const; + + // + // Closes all resources which holds the opened PDB file. + // + VOID + Close(); + + // + // Returns a SYMBOL structure of particular name. + // + // Returns non-NULL value on success. + // + CONST SYMBOL* + GetSymbolByName( + IN CONST CHAR* SymbolName + ); + + // + // Returns a SYMBOL structure of particular Type ID. + // + // Returns non-NULL value on success. + // + CONST SYMBOL* + GetSymbolByTypeId( + IN DWORD TypeId + ); + + // + // Returns collection of all symbols. + // + const SymbolMap& + GetSymbolMap() const; + + // + // Returns collection of all named symbols. + // + const SymbolNameMap& + GetSymbolNameMap() const; + + // + // Returns C-like name of the type of provided symbol. + // The symbol must be BaseType. + // + // Returns non-NULL value on success. + // + static + CONST CHAR* + PDB::GetBasicTypeString( + IN BasicType BaseType, + IN ULONG64 Size, + IN BOOL UseStdInt = FALSE + ); + + // + // Returns C-like name of the type of provided symbol. + // The symbol must be BaseType. + // + // Returns non-NULL value on success. + // + static + CONST CHAR* + GetBasicTypeString( + IN CONST SYMBOL* Symbol, + IN BOOL UseStdInt = FALSE + ); + + // + // Returns string representing the kind + // of provided User Data Type. + // + static + CONST CHAR* + GetUdtKindString( + IN UdtKind UdtKind + ); + + static + BOOL + IsUnnamedSymbol( + CONST SYMBOL* Symbol + ); + + private: + SymbolModule* m_Impl; +}; diff --git a/Source/PDBExtractor.cpp b/Source/PDBExtractor.cpp new file mode 100644 index 0000000..1521383 --- /dev/null +++ b/Source/PDBExtractor.cpp @@ -0,0 +1,567 @@ +#include "PDBExtractor.h" +#include "PDB.h" +#include "PDBHeaderReconstructor.h" +#include "PDBSymbolVisitor.h" +#include "PDBSymbolSorter.h" +#include "UserDataFieldDefinition.h" + +#include +#include +#include + +namespace +{ + // + // Headers & footers for test file and reconstructed header. + // + + static const char TEST_FILE_HEADER[] = + "#include \n" + "#include \n" + "#include \n" + "\n" + "#include \"%s\"\n" + "\n" + "int main()\n" + "{\n"; + + static const char TEST_FILE_FOOTER[] = + "\n" + "\treturn 0;\n" + "}\n" + "\n"; + + static const char HEADER_FILE_HEADER[] = + "/*\n" + " * PDB file: %s\n" + " * Image architecture: %s\n" + " *\n" + " * Dumped by pdbex tool v" PDBEX_VERSION_STRING ", by wbenny\n" + " */\n\n"; + + // + // Error messages. + // + + static const char* MESSAGE_INVALID_PARAMETERS = + "Invalid parameters"; + + static const char* MESSAGE_FILE_NOT_FOUND = + "File not found"; + + static const char* MESSAGE_SYMBOL_NOT_FOUND = + "Symbol not found"; + + // + // Our exception class. + // + + class PDBDumperException + : public std::runtime_error + { + public: + PDBDumperException(const char* Message) + : std::runtime_error(Message) + { + + } + }; +} + +int +PDBExtractor::Run( + int argc, + char** argv + ) +{ + int Result = ERROR_SUCCESS; + + try + { + ParseParameters(argc, argv); + OpenPDBFile(); + + PrintTestHeader(); + + if (m_Settings.SymbolName == "*") + { + DumpAllSymbols(); + } + else + { + DumpOneSymbol(); + } + + PrintTestFooter(); + } + catch (PDBDumperException& e) + { + std::cerr << e.what() << std::endl; + Result = EXIT_FAILURE; + } + + CloseOpenedFiles(); + + return Result; +} + +void +PDBExtractor::PrintUsage() +{ + printf("Extracts types and structures from PDB (Program database).\n"); + printf("Version v%s\n", PDBEX_VERSION_STRING); + printf("\n"); + printf("pdbex [-o ] [-t ] [-e ]\n"); + printf(" [-u ] [-s prefix] [-r prefix] [-g suffix]\n"); + printf(" [-p] [-x] [-m] [-b] [-d] [-i] [-l]\n"); + printf("\n"); + printf(" Symbol name to extract or '*' if all symbol should\n"); + printf(" be extracted.\n"); + printf(" Path to the PDB file.\n"); + printf(" -o filename Specifies the output file. (stdout)\n"); + printf(" -t filename Specifies the output test file. (off)\n"); + printf(" -e [n,i,a] Specifies expansion of nested structures/unions. (i)\n"); + printf(" n = none Only top-most type is printed.\n"); + printf(" i = inline unnamed Unnamed types are nested.\n"); + printf(" a = inline all All types are nested.\n"); + printf(" -u prefix Unnamed union prefix (in combination with -d).\n"); + printf(" -s prefix Unnamed struct prefix (in combination with -d).\n"); + printf(" -r prefix Prefix for all symbols.\n"); + printf(" -g suffix Suffix for all symbols.\n"); + printf("\n"); + printf("Following options can be explicitly turned of by leading '-'.\n"); + printf("Example: -p-\n"); + printf(" -p Create padding members. (T)\n"); + printf(" -x Show offsets. (T)\n"); + printf(" -m Create Microsoft typedefs. (T)\n"); + printf(" -b Allow bitfields in union. (F)\n"); + printf(" -d Allow unnamed data types. (T)\n"); + printf(" -i Use types from stdint.h instead of native types. (F)\n"); + printf(" -j Print definitions of referenced types. (T)\n"); + printf(" -k Print header. (T)\n"); + printf(" -n Print declarations. (T)\n"); + printf(" -l Print definitions. (T)\n"); + printf("\n"); +} + +void +PDBExtractor::ParseParameters( + int argc, + char** argv + ) +{ + // + // Early check for help parameter. + // + + if ( argc == 1 || + (argc == 2 && strcmp(argv[1], "-h") == 0) || + (argc == 2 && strcmp(argv[1], "--help") == 0)) + { + PrintUsage(); + + // + // Kitten died when I wrote this. + // + + exit(EXIT_SUCCESS); + } + + int ArgumentPointer = 0; + + m_Settings.SymbolName = argv[++ArgumentPointer]; + m_Settings.PdbPath = argv[++ArgumentPointer]; + + while (++ArgumentPointer < argc) + { + const char* CurrentArgument = argv[ArgumentPointer]; + size_t CurrentArgumentLength = strlen(CurrentArgument); + + const char* NextArgument = ArgumentPointer < argc + ? argv[ArgumentPointer + 1] + : nullptr; + + size_t NextArgumentLength = NextArgument + ? strlen(CurrentArgument) + : 0; + + // + // Handling of -X- switches. + // + + if ((CurrentArgumentLength != 2 && CurrentArgumentLength != 3) || + (CurrentArgumentLength == 2 && CurrentArgument[0] != '-') || + (CurrentArgumentLength == 3 && CurrentArgument[0] != '-' && CurrentArgument[2] != '-')) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + bool OffSwitch = CurrentArgumentLength == 3 && CurrentArgument[2] == '-'; + + // + // Handling of options. + // + + switch (CurrentArgument[1]) + { + case 'o': + if (!NextArgument) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + ++ArgumentPointer; + m_Settings.OutputFilename = NextArgument; + m_Settings.PdbHeaderReconstructorSettings.OutputFile = new std::ofstream( + NextArgument, + std::ios::out + ); + break; + + case 't': + if (!NextArgument) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + ++ArgumentPointer; + m_Settings.TestFilename = NextArgument; + m_Settings.PdbHeaderReconstructorSettings.TestFile = new std::ofstream( + m_Settings.TestFilename, + std::ios::out + ); + + break; + + case 'e': + if (!NextArgument) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + ++ArgumentPointer; + switch (NextArgument[0]) + { + case 'n': + m_Settings.PdbHeaderReconstructorSettings.MemberStructExpansion = + PDBHeaderReconstructor::MemberStructExpansionType::None; + break; + + case 'i': + m_Settings.PdbHeaderReconstructorSettings.MemberStructExpansion = + PDBHeaderReconstructor::MemberStructExpansionType::InlineUnnamed; + break; + + case 'a': + m_Settings.PdbHeaderReconstructorSettings.MemberStructExpansion = + PDBHeaderReconstructor::MemberStructExpansionType::InlineAll; + break; + + default: + m_Settings.PdbHeaderReconstructorSettings.MemberStructExpansion = + PDBHeaderReconstructor::MemberStructExpansionType::InlineUnnamed; + break; + } + break; + + case 'u': + if (!NextArgument) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + ++ArgumentPointer; + m_Settings.PdbHeaderReconstructorSettings.AnonymousUnionPrefix = NextArgument; + break; + + case 's': + if (!NextArgument) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + ++ArgumentPointer; + m_Settings.PdbHeaderReconstructorSettings.AnonymousStructPrefix = NextArgument; + break; + + case 'r': + if (!NextArgument) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + ++ArgumentPointer; + m_Settings.PdbHeaderReconstructorSettings.SymbolPrefix = NextArgument; + break; + + case 'g': + if (!NextArgument) + { + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + + ++ArgumentPointer; + m_Settings.PdbHeaderReconstructorSettings.SymbolSuffix = NextArgument; + break; + + case 'p': + m_Settings.PdbHeaderReconstructorSettings.CreatePaddingMembers = !OffSwitch; + break; + + case 'x': + m_Settings.PdbHeaderReconstructorSettings.ShowOffsets = !OffSwitch; + break; + + case 'm': + m_Settings.PdbHeaderReconstructorSettings.MicrosoftTypedefs = !OffSwitch; + break; + + case 'b': + m_Settings.PdbHeaderReconstructorSettings.AllowBitFieldsInUnion = !OffSwitch; + break; + + case 'd': + m_Settings.PdbHeaderReconstructorSettings.AllowAnonymousDataTypes = !OffSwitch; + break; + + case 'i': + m_Settings.UserDataFieldDefinitionSettings.UseStdInt = !OffSwitch; + break; + + case 'j': + m_Settings.PrintReferencedTypes = !OffSwitch; + break; + + case 'k': + m_Settings.PrintHeader = !OffSwitch; + break; + + case 'n': + m_Settings.PrintDeclarations = !OffSwitch; + break; + + case 'l': + m_Settings.PrintDefinitions = !OffSwitch; + break; + + default: + throw PDBDumperException(MESSAGE_INVALID_PARAMETERS); + } + } + + m_HeaderReconstructor = std::make_unique( + &m_Settings.PdbHeaderReconstructorSettings + ); + + m_SymbolVisitor = std::make_unique>( + m_HeaderReconstructor.get(), + &m_Settings.UserDataFieldDefinitionSettings + ); + + m_SymbolSorter = std::make_unique(); +} + +void +PDBExtractor::OpenPDBFile() +{ + if (m_PDB.Open(m_Settings.PdbPath.c_str()) == FALSE) + { + throw PDBDumperException(MESSAGE_FILE_NOT_FOUND); + } +} + +void +PDBExtractor::PrintTestHeader() +{ + if (m_Settings.PdbHeaderReconstructorSettings.TestFile != nullptr) + { + static char TEST_FILE_HEADER_FORMATTED[16 * 1024]; + sprintf_s( + TEST_FILE_HEADER_FORMATTED, TEST_FILE_HEADER, + m_Settings.OutputFilename + ); + + (*m_Settings.PdbHeaderReconstructorSettings.TestFile) << TEST_FILE_HEADER_FORMATTED; + } +} + +void +PDBExtractor::PrintTestFooter() +{ + if (m_Settings.PdbHeaderReconstructorSettings.TestFile != nullptr) + { + (*m_Settings.PdbHeaderReconstructorSettings.TestFile) << TEST_FILE_FOOTER; + } +} + +void +PDBExtractor::PrintPDBHeader() +{ + if (m_Settings.PrintHeader) + { + GetArchitecture(); + + static char* ArchictureStrings[] = { + "None", + "x86", + "x64" + }; + + static char HEADER_FILE_HEADER_FORMATTED[16 * 1024]; + + sprintf_s( + HEADER_FILE_HEADER_FORMATTED, HEADER_FILE_HEADER, + m_Settings.PdbPath.c_str(), + ArchictureStrings[(int)m_Architecture] + ); + + (*m_Settings.PdbHeaderReconstructorSettings.OutputFile) << HEADER_FILE_HEADER_FORMATTED; + } +} + +void +PDBExtractor::PrintPDBDeclarations() +{ + // + // Write declarations. + // + + if (m_Settings.PrintDeclarations) + { + for (auto&& e : m_SymbolSorter->GetSortedSymbols()) + { + if (e->Tag == SymTagUDT && !PDB::IsUnnamedSymbol(e)) + { + *m_Settings.PdbHeaderReconstructorSettings.OutputFile + << PDB::GetUdtKindString(e->u.UserData.Kind) + << " " << m_HeaderReconstructor->GetCorrectedSymbolName(e) << ";" + << std::endl; + } + } + + *m_Settings.PdbHeaderReconstructorSettings.OutputFile << std::endl; + } +} + +void +PDBExtractor::PrintPDBDefinitions() +{ + // + // Write definitions. + // + + if (m_Settings.PrintDefinitions) + { + for (auto&& e : m_SymbolSorter->GetSortedSymbols()) + { + bool Expand = true; + + // + // Do not expand unnamed types, if they will be inlined. + // + + if (m_Settings.PdbHeaderReconstructorSettings.MemberStructExpansion == PDBHeaderReconstructor::MemberStructExpansionType::InlineUnnamed && + (e->Tag == SymTagEnum || e->Tag == SymTagUDT) && + PDB::IsUnnamedSymbol(e)) + { + Expand = false; + } + + if (Expand) + { + m_SymbolVisitor->Run(e); + } + } + } +} + +void +PDBExtractor::GetArchitecture() +{ + for (auto&& e : m_PDB.GetSymbolMap()) + { + m_SymbolSorter->Visit(e.second); + + if (m_SymbolSorter->GetImageArchitecture() != ImageArchitecture::None) + { + m_Architecture = m_SymbolSorter->GetImageArchitecture(); + + m_SymbolSorter->Clear(); + break; + } + } +} + +void +PDBExtractor::DumpAllSymbols() +{ + // + // We are going to print all symbols. + // + + PrintPDBHeader(); + + for (auto&& e : m_PDB.GetSymbolMap()) + { + m_SymbolSorter->Visit(e.second); + } + + PrintPDBDeclarations(); + PrintPDBDefinitions(); +} + +void +PDBExtractor::DumpOneSymbol() +{ + const SYMBOL* Symbol = m_PDB.GetSymbolByName(m_Settings.SymbolName.c_str()); + + if (Symbol == nullptr) + { + throw PDBDumperException(MESSAGE_SYMBOL_NOT_FOUND); + } + + PrintPDBHeader(); + + // + // InlineAll supresses PrintReferencedTypes. + // + + if (m_Settings.PrintReferencedTypes && + m_Settings.PdbHeaderReconstructorSettings.MemberStructExpansion != PDBHeaderReconstructor::MemberStructExpansionType::InlineAll) + { + m_SymbolSorter->Visit(Symbol); + + // + // Print header only when PrintReferencedTypes == true. + // + + PrintPDBDefinitions(); + } + else + { + // + // Print only the specified symbol. + // + + m_SymbolVisitor->Run(Symbol); + } +} + +void +PDBExtractor::CloseOpenedFiles() +{ + // + // We want to free the memory only if the filename was specified, + // because OutputFile or TestFile may be std::cout. + // + + if (m_Settings.TestFilename) + { + delete m_Settings.PdbHeaderReconstructorSettings.TestFile; + } + + if (m_Settings.OutputFilename) + { + delete m_Settings.PdbHeaderReconstructorSettings.OutputFile; + } +} \ No newline at end of file diff --git a/Source/PDBExtractor.h b/Source/PDBExtractor.h new file mode 100644 index 0000000..79fbdfc --- /dev/null +++ b/Source/PDBExtractor.h @@ -0,0 +1,90 @@ +#pragma once +#include "PDBSymbolSorter.h" +#include "PDBHeaderReconstructor.h" +#include "PDBSymbolVisitor.h" +#include "UserDataFieldDefinition.h" + +#include +#include + +#define PDBEX_VERSION_MAJOR 0 +#define PDBEX_VERSION_MINOR 1 + +#define PDBEX_VERSION_STRING "0.1" + +class PDBExtractor +{ + public: + struct Settings + { + PDBHeaderReconstructor::Settings PdbHeaderReconstructorSettings; + UserDataFieldDefinition::Settings UserDataFieldDefinitionSettings; + + std::string SymbolName; + std::string PdbPath; + + const char* OutputFilename = nullptr; + const char* TestFilename = nullptr; + + bool PrintReferencedTypes = true; + bool PrintHeader = true; + bool PrintDeclarations = true; + bool PrintDefinitions = true; + }; + + int Run( + int argc, + char** argv + ); + + private: + void + PrintUsage(); + + void + ParseParameters( + int argc, + char** argv + ); + + void + OpenPDBFile(); + + void + PrintTestHeader(); + + void + PrintTestFooter(); + + void + PrintPDBHeader(); + + void + PrintPDBDeclarations(); + + void + PrintPDBDefinitions(); + + void + GetArchitecture(); + + void + DumpAllSymbols(); + + void + DumpOneSymbol(); + + void + CloseOpenedFiles(); + + private: + PDB m_PDB; + Settings m_Settings; + + ImageArchitecture m_Architecture = ImageArchitecture::None; + + std::unique_ptr m_SymbolSorter; + std::unique_ptr m_HeaderReconstructor; + std::unique_ptr> m_SymbolVisitor; +}; + diff --git a/Source/PDBHeaderReconstructor.cpp b/Source/PDBHeaderReconstructor.cpp new file mode 100644 index 0000000..aa882b4 --- /dev/null +++ b/Source/PDBHeaderReconstructor.cpp @@ -0,0 +1,683 @@ +#include "PDBHeaderReconstructor.h" + +#pragma once +#include "PDBReconstructorBase.h" + +#include +#include // std::accumulate +#include +#include +#include + +#include + +PDBHeaderReconstructor::PDBHeaderReconstructor( + Settings* VisitorSettings + ) +{ + static Settings DefaultSettings; + + if (VisitorSettings == nullptr) + { + VisitorSettings = &DefaultSettings; + } + + m_Settings = VisitorSettings; +} + +void +PDBHeaderReconstructor::Clear() +{ + assert(m_Depth == 0); + + m_AnonymousDataTypeCounter = 0; + m_PaddingMemberCounter = 0; + + m_UnnamedSymbols.clear(); + m_CorrectedSymbolNames.clear(); + m_VisitedSymbols.clear(); +} + +const std::string& +PDBHeaderReconstructor::GetCorrectedSymbolName( + const SYMBOL* Symbol + ) const +{ + auto CorrectedNameIt = m_CorrectedSymbolNames.find(Symbol); + if (CorrectedNameIt == m_CorrectedSymbolNames.end()) + { + // + // Build corrected name: + // SymbolPrefix + // + "_" (if Microsoft typedefs are enabled) + // + unnamed tag (if symbol does not have name) + // + symbol name (if symbol does have name) + // + // ...and cache the name. + // + + std::string CorrectedName; + + CorrectedName += m_Settings->SymbolPrefix; + + if (PDB::IsUnnamedSymbol(Symbol)) + { + if (m_Settings->MicrosoftTypedefs) + { + CorrectedName += "_"; + } + + m_UnnamedSymbols.insert(Symbol); + + CorrectedName += m_Settings->UnnamedTypePrefix + std::to_string(m_UnnamedSymbols.size()); + } + else + { + CorrectedName += Symbol->Name; + } + + CorrectedName += m_Settings->SymbolSuffix; + + m_CorrectedSymbolNames[Symbol] = CorrectedName; + } + + return m_CorrectedSymbolNames[Symbol]; +} + +bool +PDBHeaderReconstructor::OnEnumType( + const SYMBOL* Symbol + ) +{ + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + + bool Expand = ShouldExpand(Symbol); + + MarkAsVisited(Symbol); + + if (!Expand) + { + Write("enum %s", CorrectedName.c_str()); + } + + return Expand; +} + +void +PDBHeaderReconstructor::OnEnumTypeBegin( + const SYMBOL* Symbol + ) +{ + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + + // + // Handle begin of the typedef. + // + + WriteTypedefBegin(Symbol); + + Write("enum"); + + if (PDB::IsUnnamedSymbol(Symbol) && m_Depth != 0) + { + Write(" //"); + } + + Write(" %s", CorrectedName.c_str()); + Write("\n"); + + WriteIndent(); + Write("{\n"); + + m_Depth += 1; +} + +void +PDBHeaderReconstructor::OnEnumTypeEnd( + const SYMBOL* Symbol + ) +{ + m_Depth -= 1; + + WriteIndent(); + Write("}"); + + // + // Handle end of the typedef. + // + + WriteTypedefEnd(Symbol); + + if (m_Depth == 0) + { + Write(";\n\n"); + } +} + +void +PDBHeaderReconstructor::OnEnumField( + const SYMBOL_ENUM_FIELD* EnumField + ) +{ + WriteIndent(); + Write("%s = ", EnumField->Name); + + WriteVariant(&EnumField->Value); + Write(",\n"); +} + +bool +PDBHeaderReconstructor::OnUserDataType( + const SYMBOL* Symbol + ) +{ + bool Expand = ShouldExpand(Symbol); + + MarkAsVisited(Symbol); + + if (!Expand) + { + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + Write("%s %s", PDB::GetUdtKindString(Symbol->u.UserData.Kind), CorrectedName.c_str()); + + // + // If we're not expanding the type at the root level, + // OnUserDataTypeEnd() won't be called, so print the semicolon here. + // + + if (m_Depth == 0) + { + Write(";\n\n"); + } + } + + return Expand; +} + +void +PDBHeaderReconstructor::OnUserDataTypeBegin( + const SYMBOL* Symbol + ) +{ + // + // Handle begin of the typedef. + // + + WriteTypedefBegin(Symbol); + + Write("%s", PDB::GetUdtKindString(Symbol->u.UserData.Kind)); + + if (PDB::IsUnnamedSymbol(Symbol) && m_Depth != 0) + { + Write(" //"); + } + + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + Write(" %s", CorrectedName.c_str()); + + Write("\n"); + + WriteIndent(); + Write("{\n"); + + m_Depth += 1; +} + +void +PDBHeaderReconstructor::OnUserDataTypeEnd( + const SYMBOL* Symbol + ) +{ + m_Depth -= 1; + + WriteIndent(); + Write("}"); + + // + // Handle end of the typedef. + // + + WriteTypedefEnd(Symbol); + + if (m_Depth == 0) + { + Write(";\n\n"); + } +} + +void +PDBHeaderReconstructor::OnUserDataFieldBegin( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + WriteIndent(); + + // + // Do not show offsets for members which will be expanded. + // + + if (UserDataField->Type->Tag != SymTagUDT || + ShouldExpand(UserDataField->Type) == false) + { + WriteOffset(UserDataField, GetParentOffset()); + } + + AppendToTest(UserDataField); + + // + // Push current offset in case we will be expanding + // some user data field. + // + + m_OffsetStack.push_back(UserDataField->Offset); +} + +void +PDBHeaderReconstructor::OnUserDataFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + // + // Pop offset of the current user data field. + // + + m_OffsetStack.pop_back(); +} + +void +PDBHeaderReconstructor::OnUserDataField( + const SYMBOL_USERDATA_FIELD* UserDataField, + UserDataFieldDefinitionBase* MemberDefinition + ) +{ + Write("%s", MemberDefinition->GetPrintableDefinition().c_str()); + + // + // BitField handling. + // + + if (UserDataField->Bits != 0) + { + Write(" : %i", UserDataField->Bits); + } + + Write(";"); + Write("\n"); +} + +void +PDBHeaderReconstructor::OnAnonymousUserDataTypeBegin( + UdtKind UserDataTypeKind, + const SYMBOL_USERDATA_FIELD* FirstUserDataField + ) +{ + WriteIndent(); + Write("%s\n", PDB::GetUdtKindString(UserDataTypeKind)); + + WriteIndent(); + Write("{\n"); + + m_Depth += 1; +} + +void +PDBHeaderReconstructor::OnAnonymousUserDataTypeEnd( + UdtKind UserDataTypeKind, + const SYMBOL_USERDATA_FIELD* FirstUserDataField, + const SYMBOL_USERDATA_FIELD* LastUserDataField + ) +{ + m_Depth -= 1; + WriteIndent(); + Write("}"); + + WriteUnnamedDataType(UserDataTypeKind); + + Write(";"); + Write("\n"); +} + +void +PDBHeaderReconstructor::OnUserDataFieldBitFieldBegin( + const SYMBOL_USERDATA_FIELD* FirstUserDataFieldBitField + ) +{ + if (m_Settings->AllowBitFieldsInUnion == false) + { + WriteIndent(); + Write("%s /* bitfield */\n", PDB::GetUdtKindString(UdtStruct)); + + WriteIndent(); + Write("{\n"); + + m_Depth += 1; + } +} + +void +PDBHeaderReconstructor::OnUserDataFieldBitFieldEnd( + const SYMBOL_USERDATA_FIELD* FirstUserDataFieldBitField, + const SYMBOL_USERDATA_FIELD* LastUserDataFieldBitField + ) +{ + if (m_Settings->AllowBitFieldsInUnion == false) + { + m_Depth -= 1; + + WriteIndent(); + Write("}; /* bitfield */\n"); + } +} + +void +PDBHeaderReconstructor::OnPaddingMember( + const SYMBOL_USERDATA_FIELD* UserDataField, + BasicType PaddingBasicType, + ULONG64 PaddingBasicTypeSize, + DWORD PaddingSize + ) +{ + if (m_Settings->CreatePaddingMembers) + { + WriteIndent(); + + WriteOffset(UserDataField, -((int)PaddingSize * (int)PaddingBasicTypeSize)); + + Write( + "%s %s%u", + PDB::GetBasicTypeString(PaddingBasicType, PaddingBasicTypeSize), + m_Settings->PaddingMemberPrefix.c_str(), + m_PaddingMemberCounter++ + ); + + if (PaddingSize > 1) + { + Write("[%u]", PaddingSize); + } + + Write(";\n"); + } +} + +void +PDBHeaderReconstructor::Write( + const char* Format, + ... + ) +{ + char TempBuffer[16 * 1024]; + + va_list ArgPtr; + va_start(ArgPtr, Format); + vsprintf_s(TempBuffer, Format, ArgPtr); + va_end(ArgPtr); + + m_Settings->OutputFile->write(TempBuffer, strlen(TempBuffer)); +} + +void +PDBHeaderReconstructor::WriteIndent() +{ + for (DWORD i = 0; i < m_Depth; i++) + { + Write(" "); + } +} + +void +PDBHeaderReconstructor::WriteVariant( + const VARIANT* v + ) +{ + switch (v->vt) + { + case VT_I1: + Write("%d", (INT)v->cVal); + break; + + case VT_UI1: + Write("0x%x", (UINT)v->cVal); + break; + + case VT_I2: + Write("%d", (UINT)v->iVal); + break; + + case VT_UI2: + Write("0x%x", (UINT)v->iVal); + break; + + case VT_INT: + case VT_I4: + Write("%d", (UINT)v->lVal); + break; + + case VT_UINT: + case VT_UI4: + Write("0x%x", (UINT)v->lVal); + break; + } +} + +void +PDBHeaderReconstructor::WriteUnnamedDataType( + UdtKind Kind + ) +{ + if (m_Settings->AllowAnonymousDataTypes == false) + { + switch (Kind) + { + case UdtStruct: + case UdtClass: + Write(" %s", m_Settings->AnonymousStructPrefix.c_str()); + break; + + case UdtUnion: + Write(" %s", m_Settings->AnonymousUnionPrefix.c_str()); + break; + + default: + assert(0); + break; + } + + if (m_AnonymousDataTypeCounter++ > 0) + { + Write("%u", m_AnonymousDataTypeCounter); + } + } +} + +void +PDBHeaderReconstructor::WriteTypedefBegin( + const SYMBOL* Symbol + ) +{ + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + bool UseTypedef = m_Settings->MicrosoftTypedefs && CorrectedName[0] == '_'; + + if (UseTypedef && m_Depth == 0) + { + Write("typedef "); + } +} + +void +PDBHeaderReconstructor::WriteTypedefEnd( + const SYMBOL* Symbol + ) +{ + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + bool UseTypedef = m_Settings->MicrosoftTypedefs && CorrectedName[0] == '_'; + + if (UseTypedef && m_Depth == 0) + { + Write(" %s, *P%s", &CorrectedName[1], &CorrectedName[1]); + } +} + +void +PDBHeaderReconstructor::WriteOffset( + const SYMBOL_USERDATA_FIELD* UserDataField, + int PaddingOffset + ) +{ + if (m_Settings->ShowOffsets) + { + Write("/* 0x%04x */ ", UserDataField->Offset + PaddingOffset); + } +} + +bool +PDBHeaderReconstructor::HasBeenVisited( + const SYMBOL* Symbol + ) const +{ + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + return m_VisitedSymbols.find(CorrectedName) != m_VisitedSymbols.end(); +} + +void +PDBHeaderReconstructor::MarkAsVisited( + const SYMBOL* Symbol + ) +{ + std::string CorrectedName = GetCorrectedSymbolName(Symbol); + m_VisitedSymbols.insert(CorrectedName); +} + +DWORD +PDBHeaderReconstructor::GetParentOffset() const +{ + return std::accumulate(m_OffsetStack.begin(), m_OffsetStack.end(), (DWORD)0); +} + +void +PDBHeaderReconstructor::AppendToTest( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + // + // Test of the current member is produced when: + // - Test file was specified. + // - We're in the root user data type. + // - This field is not a part of the bitfield. + // + + if (m_Settings->TestFile != nullptr && + m_OffsetStack.empty() && + UserDataField->Bits == 0) + { + // + // Line of the test: + // + // printf( + // \"[%%c] 0x%%04x - 0x%04x (%s %s.%s)\\n\", + // offsetof(%s %s, %s) == %u ? ' ' : '!', + // (unsigned)offsetof(%s %s, %s) + // ); + // + // + // Example: + // + // printf( + // "[%c] 0x%04x - 0x000c (struct _DEVICE_OBJECT.NextDevice)\n", + // offsetof(struct _DEVICE_OBJECT, NextDevice) == 12 ? ' ' : '!', + // (unsigned)offsetof(struct _DEVICE_OBJECT, NextDevice) + // ); + // + static const char TestFormatString[] = + "\t" + "printf(" + "\"[%%c] 0x%%04x - 0x%04x (%s %s.%s)\\n\", " + "offsetof(%s %s, %s) == %u ? ' ' : '!', " + "(unsigned)offsetof(%s %s, %s)" + ");"; + + // + // Delimiter for the structs. + // + // printf("\n"); + // + static const char TestDelimiterString[] = + "\t" + "printf(" + "\"\\n\"" + ");"; + + // + // Holds corrected name of a last symbol + // which test was produced for. + // This is used for delimiting tests + // with extra new line. + // + static std::string LastTestedUserDataType; + + std::string CorrectedSymbolName = GetCorrectedSymbolName(UserDataField->Parent); + + // + // If the current member is part of a different user data type + // than the previous one, delimit the output of the test + // by extra new line. + // + if (CorrectedSymbolName != LastTestedUserDataType) + { + (*m_Settings->TestFile) << TestDelimiterString << std::endl; + } + + LastTestedUserDataType = CorrectedSymbolName; + + // + // Build the line for the test. + // + static char FormattedStringBuffer[4096]; + sprintf_s( + FormattedStringBuffer, + TestFormatString, + UserDataField->Offset, + + PDB::GetUdtKindString(UserDataField->Parent->u.UserData.Kind), + CorrectedSymbolName.c_str(), + UserDataField->Name, + + PDB::GetUdtKindString(UserDataField->Parent->u.UserData.Kind), + CorrectedSymbolName.c_str(), + UserDataField->Name, + UserDataField->Offset, + + PDB::GetUdtKindString(UserDataField->Parent->u.UserData.Kind), + CorrectedSymbolName.c_str(), + UserDataField->Name + ); + + (*m_Settings->TestFile) << FormattedStringBuffer << std::endl; + } +} + +bool +PDBHeaderReconstructor::ShouldExpand( + const SYMBOL* Symbol + ) const +{ + bool Expand = false; + + switch (m_Settings->MemberStructExpansion) + { + default: + case MemberStructExpansionType::None: + Expand = m_Depth == 0; + break; + + case MemberStructExpansionType::InlineUnnamed: + Expand = m_Depth == 0 || PDB::IsUnnamedSymbol(Symbol); + break; + + case MemberStructExpansionType::InlineAll: + Expand = !HasBeenVisited(Symbol); + break; + } + + return Expand && Symbol->Size > 0; +} + diff --git a/Source/PDBHeaderReconstructor.h b/Source/PDBHeaderReconstructor.h new file mode 100644 index 0000000..fadfe4b --- /dev/null +++ b/Source/PDBHeaderReconstructor.h @@ -0,0 +1,275 @@ +#pragma once +#include "PDBReconstructorBase.h" + +#include +#include // std::accumulate +#include +#include +#include + +#include + +class PDBHeaderReconstructor + : public PDBReconstructorBase +{ + public: + enum class MemberStructExpansionType + { + // + // No expansion of nested user data types. + // + None, + + // + // Expand only unnamed user data types. + // + InlineUnnamed, + + // + // Expand all references user data types. + // + InlineAll, + }; + + struct Settings + { + Settings() + { + MemberStructExpansion = MemberStructExpansionType::InlineUnnamed; + OutputFile = &std::cout; + TestFile = nullptr; + PaddingMemberPrefix = "Padding_"; + UnnamedTypePrefix = "TAG_UNNAMED_"; + AnonymousStructPrefix = "s"; // DUMMYSTRUCTNAME (up to 6) + AnonymousUnionPrefix = "u"; // DUMMYUNIONNAME (up to 9) + CreatePaddingMembers = true; + ShowOffsets = true; + MicrosoftTypedefs = true; + AllowBitFieldsInUnion = false; + AllowAnonymousDataTypes = true; + } + + MemberStructExpansionType MemberStructExpansion; + std::ostream* OutputFile; + std::ostream* TestFile; + std::string PaddingMemberPrefix; + std::string UnnamedTypePrefix; + std::string SymbolPrefix; + std::string SymbolSuffix; + std::string AnonymousStructPrefix; + std::string AnonymousUnionPrefix; + bool CreatePaddingMembers : 1; + bool ShowOffsets : 1; + bool MicrosoftTypedefs : 1; + bool AllowBitFieldsInUnion : 1; + bool AllowAnonymousDataTypes : 1; + }; + + PDBHeaderReconstructor( + Settings* VisitorSettings = nullptr + ); + + void + Clear(); + + const std::string& + GetCorrectedSymbolName( + const SYMBOL* Symbol + ) const; + + protected: + bool + OnEnumType( + const SYMBOL* Symbol + ) override; + + void + OnEnumTypeBegin( + const SYMBOL* Symbol + ) override; + + void + OnEnumTypeEnd( + const SYMBOL* Symbol + ) override; + + void + OnEnumField( + const SYMBOL_ENUM_FIELD* EnumField + ) override; + + bool + OnUserDataType( + const SYMBOL* Symbol + ) override; + + void + OnUserDataTypeBegin( + const SYMBOL* Symbol + ) override; + + void + OnUserDataTypeEnd( + const SYMBOL* Symbol + ) override; + + void + OnUserDataFieldBegin( + const SYMBOL_USERDATA_FIELD* UserDataField + ) override; + + void + OnUserDataFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) override; + + void + OnUserDataField( + const SYMBOL_USERDATA_FIELD* UserDataField, + UserDataFieldDefinitionBase* MemberDefinition + ) override; + + void + OnAnonymousUserDataTypeBegin( + UdtKind UserDataTypeKind, + const SYMBOL_USERDATA_FIELD* FirstUserDataField + ); + + void + OnAnonymousUserDataTypeEnd( + UdtKind UserDataTypeKind, + const SYMBOL_USERDATA_FIELD* FirstUserDataField, + const SYMBOL_USERDATA_FIELD* LastUserDataField + ) override; + + void + OnUserDataFieldBitFieldBegin( + const SYMBOL_USERDATA_FIELD* FirstUserDataFieldBitField + ) override; + + void + OnUserDataFieldBitFieldEnd( + const SYMBOL_USERDATA_FIELD* FirstUserDataFieldBitField, + const SYMBOL_USERDATA_FIELD* LastUserDataFieldBitField + ) override; + + void + OnPaddingMember( + const SYMBOL_USERDATA_FIELD* UserDataField, + BasicType PaddingBasicType, + ULONG64 PaddingBasicTypeSize, + DWORD PaddingSize + ) override; + + private: + void + Write( + const char* Format, + ... + ); + + void + WriteIndent(); + + void + WriteVariant( + const VARIANT* v + ); + + void + WriteUnnamedDataType( + UdtKind Kind + ); + + void + WriteTypedefBegin( + const SYMBOL* Symbol + ); + + void + WriteTypedefEnd( + const SYMBOL* Symbol + ); + + void + WriteOffset( + const SYMBOL_USERDATA_FIELD* UserDataField, + int PaddingOffset + ); + + bool + HasBeenVisited( + const SYMBOL* Symbol + ) const; + + void + MarkAsVisited( + const SYMBOL* Symbol + ); + + DWORD + GetParentOffset() const; + + void + AppendToTest( + const SYMBOL_USERDATA_FIELD* UserDataField + ); + + bool + ShouldExpand( + const SYMBOL* Symbol + ) const; + + private: + // + // Settings for this visitor. + // + Settings* m_Settings; + + // + // Everytime visitor enters a new member (user data field), + // it pushes the current offset here. + // In case the current member is a new struct (or any other + // user data type) which will be expanded, this property + // helps to find the offset of the parent member. + // + std::vector m_OffsetStack; + + // + // Indentation. + // + DWORD m_Depth = 0; + + // + // Counter for anonymous user data types. + // + DWORD m_AnonymousDataTypeCounter = 0; + + // + // Counter of padding members. + // + DWORD m_PaddingMemberCounter = 0; + + // + // Collection of unnamed symbols. + // + // Unnamed symbols actually have a special name. + // See PDB::IsUnnamedSymbol() for more information. + // + mutable std::set m_UnnamedSymbols; + + // + // Mapping of symbols to their "corrected" names. + // + mutable std::map m_CorrectedSymbolNames; + + // + // Collection of symbol names which has already been visited. + // We save names of the symbols here, because some PDBs + // has multiple definition of the same symbol. + // + // See PDBVisitorSorter::HasBeenVisited() for more information. + // + std::set m_VisitedSymbols; +}; + diff --git a/Source/PDBReconstructorBase.h b/Source/PDBReconstructorBase.h new file mode 100644 index 0000000..029af4d --- /dev/null +++ b/Source/PDBReconstructorBase.h @@ -0,0 +1,201 @@ +#pragma once + +#include "PDB.h" +#include "UserDataFieldDefinitionBase.h" + +class PDBReconstructorBase +{ + public: + // + // Called when reached the 'enum' type. + // If the return value is true, the enum will be expanded. + // + virtual + bool + OnEnumType( + const SYMBOL* Symbol + ) + { + return false; + } + + // + // Called when entering into the 'enum' type + // which will be expanded. + // + virtual + void + OnEnumTypeBegin( + const SYMBOL* Symbol + ) + { + + } + + // + // Called when leaving from the 'enum' type. + // + virtual + void + OnEnumTypeEnd( + const SYMBOL* Symbol + ) + { + + } + + // + // Called for each field of the curent 'enum' type. + // + virtual + void + OnEnumField( + const SYMBOL_ENUM_FIELD* EnumField + ) + { + + } + + // + // Called when reached the user data type (struct/class/union) + // If the return value is true, the user data type will be expanded. + // + virtual + bool + OnUserDataType( + const SYMBOL* Symbol + ) + { + return false; + } + + // + // Called when entering into the user data type (struct/class/union) + // which will be expanded. + // + virtual + void + OnUserDataTypeBegin( + const SYMBOL* Symbol + ) + { + + } + + // + // Called when leaving from the current user data type. + // + virtual + void + OnUserDataTypeEnd( + const SYMBOL* Symbol + ) + { + + } + + // + // Called when entering into the field of the current user data type. + // + virtual + void + OnUserDataFieldBegin( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + + } + + // + // Called when leaving from the field of the current user data type. + // + virtual + void + OnUserDataFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + + } + + // + // Called for each field in the current user data type. + // + virtual + void + OnUserDataField( + const SYMBOL_USERDATA_FIELD* UserDataField, + UserDataFieldDefinitionBase* MemberDefinition + ) + { + + } + + // + // Called when entering into the nested anonymous user data type (struct/class/union) + // which will be expanded. + // + virtual + void + OnAnonymousUserDataTypeBegin( + UdtKind UserDataTypeKind, + const SYMBOL_USERDATA_FIELD* FirstUserDataField + ) + { + + } + + // + // Called when leaving from the current nested anonymous user data type. + // + virtual + void + OnAnonymousUserDataTypeEnd( + UdtKind UserDataTypeKind, + const SYMBOL_USERDATA_FIELD* FirstUserDataField, + const SYMBOL_USERDATA_FIELD* LastUserDataField + ) + { + + } + + // + // Called when entering the bitfield. + // + virtual + void + OnUserDataFieldBitFieldBegin( + const SYMBOL_USERDATA_FIELD* FirstUserDataFieldBitField + ) + { + + } + + // + // Called when leaving the bitfield. + // + virtual + void + OnUserDataFieldBitFieldEnd( + const SYMBOL_USERDATA_FIELD* FirstUserDataFieldBitField, + const SYMBOL_USERDATA_FIELD* LastUserDataFieldBitField + ) + { + + } + + // + // Called when a padding member should be created. + // + virtual + void + OnPaddingMember( + const SYMBOL_USERDATA_FIELD* UserDataField, + BasicType PaddingBasicType, + ULONG64 PaddingBasicTypeSize, + DWORD PaddingSize + ) + { + + } +}; diff --git a/Source/PDBSymbolSorter.h b/Source/PDBSymbolSorter.h new file mode 100644 index 0000000..3a43879 --- /dev/null +++ b/Source/PDBSymbolSorter.h @@ -0,0 +1,155 @@ +#pragma once +#include "PDB.h" +#include "PDBSymbolVisitorBase.h" + +#include +#include +#include +#include + +enum class ImageArchitecture +{ + None, + x86, + x64, +}; + +class PDBSymbolSorter + : public PDBSymbolVisitorBase +{ + public: + std::vector& + GetSortedSymbols() + { + return m_SortedSymbols; + } + + ImageArchitecture + GetImageArchitecture() const + { + return m_Architecture; + } + + void + Clear() + { + ImageArchitecture m_Architecture = ImageArchitecture::None; + + m_VisitedUserDataTypes.clear(); + m_SortedSymbols.clear(); + } + + protected: + void + VisitEnumType( + const SYMBOL* Symbol + ) override + { + if (HasBeenVisited(Symbol)) return; + + AddSymbol(Symbol); + } + + void + VisitPointerType( + const SYMBOL* Symbol + ) override + { + if (m_Architecture == ImageArchitecture::None) + { + switch (Symbol->Size) + { + case 4: + m_Architecture = ImageArchitecture::x86; + break; + + case 8: + m_Architecture = ImageArchitecture::x64; + break; + + default: + assert(0); + break; + } + } + } + + void + VisitUserDataType( + const SYMBOL* Symbol + ) override + { + if (HasBeenVisited(Symbol)) return; + + PDBSymbolVisitorBase::VisitUserDataType(Symbol); + + AddSymbol(Symbol); + } + + void + VisitUserDataField( + const SYMBOL_USERDATA_FIELD* UserDataField + ) override + { + Visit(UserDataField->Type); + } + + private: + bool + HasBeenVisited( + const SYMBOL* Symbol + ) + { + static DWORD UnnamedCounter = 0; + + // + // In one PDB there can be more than one symbol + // with same name (and different definitions), + // which would result into redefinitions of types + // during the printing. + // + // Problem is solved by taking into account + // and printing only the first definition of the symbol. + // + // Another solution could be appending a suffix (_1, _2, ...) + // to the symbol names, but then it wouldn't reflect the real names. + // So let's just assume all definitions are same + // and/or the first one is the most correct one. + // + // Also, unnamed symbols must be handled as a special case. + // + + std::string Key = Symbol->Name; + if (m_VisitedUserDataTypes.find(Key) != m_VisitedUserDataTypes.end()) + { + return true; + } + else + { + if (PDB::IsUnnamedSymbol(Symbol)) + { + Key += std::to_string(++UnnamedCounter); + } + + m_VisitedUserDataTypes[Key] = Symbol; + return false; + } + } + + void + AddSymbol( + const SYMBOL* Symbol + ) + { + if (std::find(m_SortedSymbols.begin(), m_SortedSymbols.end(), Symbol) == m_SortedSymbols.end()) + { + m_SortedSymbols.push_back(Symbol); + } + } + + ImageArchitecture m_Architecture = ImageArchitecture::None; + + std::map m_VisitedUserDataTypes; + std::vector m_SortedSymbols; +}; + diff --git a/Source/PDBSymbolVisitor.h b/Source/PDBSymbolVisitor.h new file mode 100644 index 0000000..66fc0fe --- /dev/null +++ b/Source/PDBSymbolVisitor.h @@ -0,0 +1,327 @@ +#pragma once +#include "PDB.h" +#include "PDBSymbolVisitorBase.h" +#include "PDBReconstructorBase.h" + +#include +#include + +template < + typename MEMBER_DEFINITION_TYPE +> +class PDBSymbolVisitor + : public PDBSymbolVisitorBase +{ + public: + // + // Public methods. + // + + PDBSymbolVisitor( + PDBReconstructorBase* ReconstructVisitor, + void* MemberDefinitionSettings = nullptr + ); + + void + Run( + const SYMBOL* Symbol + ); + + protected: + // + // Protected methods. + // + + void + Visit( + const SYMBOL* Symbol + ) override; + + void + VisitBaseType( + const SYMBOL* Symbol + ) override; + + void + VisitEnumType( + const SYMBOL* Symbol + ) override; + + void + VisitTypedefType( + const SYMBOL* Symbol + ) override; + + void + VisitPointerType( + const SYMBOL* Symbol + ) override; + + void + VisitArrayType( + const SYMBOL* Symbol + ) override; + + void + VisitFunctionType( + const SYMBOL* Symbol + ) override; + + void + VisitFunctionArgType( + const SYMBOL* Symbol + ) override; + + void + VisitUserDataType( + const SYMBOL* Symbol + ) override; + + void + VisitOtherType( + const SYMBOL* Symbol + ) override; + + void + VisitEnumField( + const SYMBOL_ENUM_FIELD* EnumField + ) override; + + void + VisitUserDataField( + const SYMBOL_USERDATA_FIELD* UserDataField + ) override; + + void + VisitUserDataFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) override; + + void + VisitUserDataFieldBitFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) override; + + private: + // + // Private data types. + // + + struct AnonymousUserDataType + { + // + // This structure holds information about + // nested anonymous user data types. + // Anonymous user data type (ie. anonymous struct) + // is a type which members are in fact members + // of a parent user data type. + // + // struct Foo + // { + // struct + // { + // int hi; + // int bye; + // }; // <--- no member name! + // }; + // + // Visit http://stackoverflow.com/a/14248127 for more information about differences + // between unnamed and anonymous data types. + // + + AnonymousUserDataType( + UdtKind UserDataTypeKind, + const SYMBOL_USERDATA_FIELD* FirstUserDataField, + const SYMBOL_USERDATA_FIELD* LastUserDataField, + ULONG64 Size = (ULONG64)0, + DWORD MemberCount = (DWORD)0 + ) + { + this->UserDataTypeKind = UserDataTypeKind; + this->FirstUserDataField = FirstUserDataField; + this->LastUserDataField = LastUserDataField; + this->Size = Size; + this->MemberCount = MemberCount; + } + + // + // First member of the anonymous user data type. + // + const SYMBOL_USERDATA_FIELD* FirstUserDataField; + + // + // Last member of the anonymous user data type. + // + const SYMBOL_USERDATA_FIELD* LastUserDataField; + + // + // Size of the anonymous user data type. + // + ULONG64 Size; + + // + // Current count of members in this anonymous user data type. + // + DWORD MemberCount; + + // + // User data type kind. + // + UdtKind UserDataTypeKind; + }; + + struct UserDataFieldContext + { + UserDataFieldContext( + const SYMBOL_USERDATA_FIELD* UserDataField, + BOOL RespectBitFields = TRUE + ) + { + SYMBOL_USERDATA* ParentUserData = &UserDataField->Parent->u.UserData; + DWORD UserDataFieldCount = ParentUserData->FieldCount; + + FirstUserDataField = &ParentUserData->Fields[0]; + EndOfUserDataField = &ParentUserData->Fields[UserDataFieldCount]; + + PreviousUserDataField = &UserDataField[-1]; + CurrentUserDataField = &UserDataField[ 0]; + NextUserDataField = &UserDataField[ 1]; + + if (RespectBitFields) + { + NextUserDataField = GetNextUserDataFieldWithRespectToBitFields(UserDataField); + } + } + + bool + IsFirst() const + { + return PreviousUserDataField < FirstUserDataField; + } + + bool + IsLast() const + { + return NextUserDataField == EndOfUserDataField; + } + + bool + GetNext() + { + PreviousUserDataField = CurrentUserDataField; + CurrentUserDataField = NextUserDataField; + NextUserDataField = GetNextUserDataFieldWithRespectToBitFields(CurrentUserDataField); + + return IsLast() == false; + } + + const SYMBOL_USERDATA_FIELD* FirstUserDataField; + const SYMBOL_USERDATA_FIELD* EndOfUserDataField; + + const SYMBOL_USERDATA_FIELD* PreviousUserDataField; + const SYMBOL_USERDATA_FIELD* CurrentUserDataField; + const SYMBOL_USERDATA_FIELD* NextUserDataField; + }; + + using AnonymousUserDataTypeStack = std::stack>; + using ContextStack = std::stack>; + + private: + // + // Private methods. + // + + void + CheckForDataFieldPadding( + const SYMBOL_USERDATA_FIELD* UserDataField + ); + + void + CheckForAnonymousUnion( + const SYMBOL_USERDATA_FIELD* UserDataField + ); + + void + CheckForAnonymousStruct( + const SYMBOL_USERDATA_FIELD* UserDataField + ); + + void + CheckForEndOfAnonymousUserDataType( + const SYMBOL_USERDATA_FIELD* UserDataField + ); + + std::shared_ptr + MemberDefinitionFactory(); + + void + PushAnonymousUserDataType( + std::shared_ptr Item + ); + + void + PopAnonymousUserDataType(); + + private: + // + // Static methods. + // + + static + const SYMBOL_USERDATA_FIELD* + GetNextUserDataFieldWithRespectToBitFields( + const SYMBOL_USERDATA_FIELD* UserDataField + ); + + static + bool + Is64BitBasicType( + const SYMBOL* Symbol + ); + + private: + // + // Class properties. + // + + // + // These two properties are used for padding. + // m_SizeOfPreviousUserDataField holds the size of the previous + // user data field with respect to nested unnamed and anonymous + // user data types. + // + // m_PreviousUserDataField just holds pointer to the previous + // user data field. + // + ULONG64 m_SizeOfPreviousUserDataField = 0; + const SYMBOL_USERDATA_FIELD* m_PreviousUserDataField = nullptr; + + // + // This stack holds information about anonymous user data types. + // More information about anonymous user data types are in documentation + // of the AnonymousUserDataType struct. + // + AnonymousUserDataTypeStack m_AnonymousUserDataTypeStack; + + AnonymousUserDataTypeStack m_AnonymousUnionStack; + AnonymousUserDataTypeStack m_AnonymousStructStack; + + // + // This stack holds instance of a class which will be responsible + // for the formatting of the current member (user data field) - + // - its type, member name, ... + // + ContextStack m_MemberContextStack; + + // + // Settings for this Visit. + // + PDBReconstructorBase* m_ReconstructVisitor; + + // + // Settigs for constructing member definitions. + // + void* m_MemberDefinitionSettings; +}; + +#include "PDBSymbolVisitor.inl" diff --git a/Source/PDBSymbolVisitor.inl b/Source/PDBSymbolVisitor.inl new file mode 100644 index 0000000..806dda2 --- /dev/null +++ b/Source/PDBSymbolVisitor.inl @@ -0,0 +1,976 @@ +#include "PDBSymbolVisitor.h" + +#pragma once +#include "PDB.h" +#include "PDBSymbolVisitorBase.h" +#include "PDBReconstructorBase.h" + +#include +#include + +template < + typename MEMBER_DEFINITION_TYPE +> +PDBSymbolVisitor::PDBSymbolVisitor( + PDBReconstructorBase* ReconstructVisitor, + void* MemberDefinitionSettings = nullptr + ) +{ + m_ReconstructVisitor = ReconstructVisitor; + m_MemberDefinitionSettings = MemberDefinitionSettings; +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::Run( + const SYMBOL* Symbol + ) +{ + Visit(Symbol); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::Visit( + const SYMBOL* Symbol + ) +{ + PDBSymbolVisitorBase::Visit(Symbol); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitBaseType( + const SYMBOL* Symbol + ) +{ + // + // BaseType: + // short/int/long/... + // + + m_MemberContextStack.top()->VisitBaseType(Symbol); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitEnumType( + const SYMBOL* Symbol + ) +{ + // + // EnumType: + // enum XYZ + // { + // XYZ_1, + // XYZ_2, + // }; + // + + // + // enum XYZ ... + // + + if (m_ReconstructVisitor->OnEnumType(Symbol)) + { + // + // ... + // { + // XYZ_1, + // XYZ_2, + // } + // + + m_ReconstructVisitor->OnEnumTypeBegin(Symbol); + PDBSymbolVisitorBase::VisitEnumType(Symbol); + m_ReconstructVisitor->OnEnumTypeEnd(Symbol); + } +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitTypedefType( + const SYMBOL* Symbol + ) +{ + +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitPointerType( + const SYMBOL* Symbol + ) +{ + // + // PointerType: + // short*/int*/long*/... + // + + m_MemberContextStack.top()->VisitPointerTypeBegin(Symbol); + PDBSymbolVisitorBase::VisitPointerType(Symbol); + m_MemberContextStack.top()->VisitPointerTypeEnd(Symbol); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitArrayType( + const SYMBOL* Symbol + ) +{ + // + // ArrayType: + // int XYZ[8]; + // + + m_MemberContextStack.top()->VisitArrayTypeBegin(Symbol); + PDBSymbolVisitorBase::VisitArrayType(Symbol); + m_MemberContextStack.top()->VisitArrayTypeEnd(Symbol); + +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitFunctionType( + const SYMBOL* Symbol + ) +{ + // + // #TODO: + // Currently, show void* instead of functions. + // + + m_MemberContextStack.top()->VisitFunctionTypeBegin(Symbol); + // PDBSymbolVisitorBase::VisitFunctionType(Symbol); + m_MemberContextStack.top()->VisitFunctionTypeEnd(Symbol); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitFunctionArgType( + const SYMBOL* Symbol + ) +{ + +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitUserDataType( + const SYMBOL* Symbol + ) +{ + // + // UserDataType: + // struct XYZ + // { + // int XYZ_1; + // char XYZ_2; + // }; + // + + // + // struct XYZ ... + // + + if (m_ReconstructVisitor->OnUserDataType(Symbol)) + { + // + // ... + // { + // int XYZ_1; + // char XYZ_2; + // } + // + + if (Symbol->Size > 0) + { + // + // Save the current stacks of anonymous UDTs. + // This prevents interferencing of members + // of nested UDTs. + // + // Stacks are restored after visiting of the current UDT. + // + AnonymousUserDataTypeStack AnonymousUDTStackBackup; + AnonymousUserDataTypeStack AnonymousUnionStackBackup; + AnonymousUserDataTypeStack AnonymousStructStackBackup; + m_AnonymousUserDataTypeStack.swap(AnonymousUDTStackBackup); + m_AnonymousUnionStack.swap(AnonymousUnionStackBackup); + m_AnonymousStructStack.swap(AnonymousStructStackBackup); + + { + m_MemberContextStack.push(MemberDefinitionFactory()); + + m_ReconstructVisitor->OnUserDataTypeBegin(Symbol); + PDBSymbolVisitorBase::VisitUserDataType(Symbol); + m_ReconstructVisitor->OnUserDataTypeEnd(Symbol); + + m_MemberContextStack.pop(); + } + + m_AnonymousStructStack.swap(AnonymousStructStackBackup); + m_AnonymousUnionStack.swap(AnonymousUnionStackBackup); + m_AnonymousUserDataTypeStack.swap(AnonymousUDTStackBackup); + } + } +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitOtherType( + const SYMBOL* Symbol + ) +{ + +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitEnumField( + const SYMBOL_ENUM_FIELD* EnumField + ) +{ + m_ReconstructVisitor->OnEnumField(EnumField); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitUserDataField( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + BOOL IsBitFieldMember = UserDataField->Bits != 0; + BOOL IsFirstBitFieldMember = IsBitFieldMember && UserDataField->BitPosition == 0; + + // + // Push new member context. + // + + m_MemberContextStack.push(MemberDefinitionFactory()); + m_MemberContextStack.top()->SetMemberName(UserDataField->Name); + + if (IsFirstBitFieldMember || !IsBitFieldMember) + { + // + // Handling of inlined user data types. + // + // These checks are performed when the current member + // is not a bitfield member (except the first one). + // + // Note that calling these inside of the bitfield + // would not make sense. + // + + CheckForDataFieldPadding(UserDataField); + CheckForAnonymousUnion(UserDataField); + CheckForAnonymousStruct(UserDataField); + } + + if (IsFirstBitFieldMember) + { + // + // This is the first bitfield member. + // + + m_ReconstructVisitor->OnUserDataFieldBitFieldBegin(UserDataField); + } + + // + // Dump the field. + // + + m_ReconstructVisitor->OnUserDataFieldBegin(UserDataField); + Visit(UserDataField->Type); + m_ReconstructVisitor->OnUserDataField(UserDataField, m_MemberContextStack.top().get()); + m_ReconstructVisitor->OnUserDataFieldEnd(UserDataField); + + m_MemberContextStack.pop(); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitUserDataFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + CheckForEndOfAnonymousUserDataType(UserDataField); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::VisitUserDataFieldBitFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + m_ReconstructVisitor->OnUserDataFieldBitFieldEnd(UserDataField, UserDataField); + + VisitUserDataFieldEnd(UserDataField); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::CheckForDataFieldPadding( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + // + // Members are sometimes not properly aligned. + // Example (original definition): + // struct XYZ + // { + // char XYZ_1; + // int XYZ_2; // This member actually begins at offset 4 (if packing was not applied), + // // resulting in 3 spare bytes before this field. + // }; + // + // This routine creates a "padding" member to fill the empty space, so the final reconstructed + // structure would look like following: + // struct XYZ + // { + // char XYZ_1; + // char Padding_0[3]; // Padding member. + // int XYZ_2; + // }; + // + + // + // Take previous member, sum the size of the field and its offset + // and compare it to the current member offset. + // If the sum is less than the current member offset, there is a spare space + // which will be filled by padding member. + // + + UserDataFieldContext UserDataFieldCtx(UserDataField); + + if (UserDataFieldCtx.IsFirst() == false && + m_PreviousUserDataField->Offset + (DWORD)m_SizeOfPreviousUserDataField < UserDataField->Offset) + { + DWORD Difference = UserDataField->Offset - (m_PreviousUserDataField->Offset + (DWORD)m_SizeOfPreviousUserDataField); + + // + // We can use !(Difference & 3) if we want to be clever. + // + + BOOL DifferenceIsDivisibleBy4 = !(Difference % 4); + + m_ReconstructVisitor->OnPaddingMember( + UserDataField, + DifferenceIsDivisibleBy4 ? btLong : btChar , + DifferenceIsDivisibleBy4 ? 4 : 1 , + DifferenceIsDivisibleBy4 ? Difference / 4 : Difference + ); + } +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::CheckForAnonymousUnion( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + // + // When some user data type contains anonymous unions, they are not projected + // into the PDB file - they are part of the user data type (ie. struct). + // Anonymous unions can be detected through checking of starting offsets + // of members in the structure - if there exist more than 1 member (DataField) + // which start at the same offset, they are placed inside of the union. + // + + UserDataFieldContext UserDataFieldCtx(UserDataField); + + if (UserDataFieldCtx.IsLast()) + { + // + // If current member is the last member of the current + // user data type, there won't be any anonymous unions. + // + + return; + } + + if (!m_AnonymousUserDataTypeStack.empty() && + m_AnonymousUserDataTypeStack.top()->UserDataTypeKind == UdtUnion) + { + // + // Don't start an anonymous union while we're still inside of one. + // + + return; + } + + // + // Iterate members starting from the current one. + // If any following member which starts at the same offset + // as the current member does exist, then they must be wrapped + // inside of the union. + // + + do + { + if (UserDataFieldCtx.NextUserDataField->Offset == UserDataField->Offset) + { + + // + // Do not try to wrap in the union + // those members, which are out of bounds + // of the anonymous struct we're currently in. + // + // In other words, this prevents creating meaningless unions + // which have only one member - because it detected + // that there exist member, which has the same offset - + // - but the member is already in another struct. + // + + if (m_AnonymousStructStack.empty() || + (!m_AnonymousStructStack.empty() && UserDataFieldCtx.NextUserDataField <= m_AnonymousStructStack.top()->LastUserDataField)) + { + PushAnonymousUserDataType(std::make_shared(UdtUnion, UserDataField, nullptr, UserDataField->Type->Size)); + m_ReconstructVisitor->OnAnonymousUserDataTypeBegin(UdtUnion, UserDataField); + break; + } + } + } while (UserDataFieldCtx.GetNext()); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::CheckForAnonymousStruct( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + + // + // When some user data type contains anonymous structs, they are not projected + // into the PDB file - they are part of the structure (UserDataType, respectively). + // This dumper creates anonymous structs where it's obvious + // that an anonmous structure is present in the union. + // Consider following snippet: + // + // 0: kd> dt ntdll!_KTHREAD + // ... + // +0x190 StackBase : Ptr32 Void + // +0x194 SuspendApc : _KAPC + // +0x194 SuspendApcFill0 : [1] UChar + // +0x195 ResourceIndex : UChar + // +0x194 SuspendApcFill1 : [3] UChar + // +0x197 QuantumReset : UChar + // +0x194 SuspendApcFill2 : [4] UChar + // +0x198 KernelTime : Uint4B + // +0x194 SuspendApcFill3 : [36] UChar + // +0x1b8 WaitPrcb : Ptr32 _KPRCB + // ... + // + // Note that offset 0x194 is shared among many members, even though after those members + // is placed another member which starts at another offset than 0x194. + // This is effectively done by structs placed inside unions. The above snipped could be represented + // as: + // + // struct _KTHREAD { + // ... + // /* 0x0190 */ void* StackBase; + // union { + // /* 0x0194 */ struct _KAPC SuspendApc; + // struct { + // /* 0x0194 */ unsigned char SuspendApcFill0[1]; + // /* 0x0195 */ unsigned char ResourceIndex; + // }; + // struct { + // /* 0x0194 */ unsigned char SuspendApcFill1[3]; + // /* 0x0197 */ unsigned char QuantumReset; + // }; + // struct { + // /* 0x0194 */ unsigned char SuspendApcFill2[4]; + // /* 0x0198 */ unsigned long KernelTime; + // }; + // struct { + // /* 0x0194 */ unsigned char SuspendApcFill3[36]; + // /* 0x01b8 */ KPRCB* WaitPrcb; + // }; + // ... + // }; + // + + UserDataFieldContext UserDataFieldCtx(UserDataField); + + if (UserDataFieldCtx.IsLast()) + { + // + // If current member is the last member of the current + // user data type, there won't be any anonymous structs. + // + + return; + } + + if (!m_AnonymousUserDataTypeStack.empty() && + m_AnonymousUserDataTypeStack.top()->UserDataTypeKind != UdtUnion) + { + // + // Don't start an anonymous struct while we're still inside of one. + // + + return; + } + + if (UserDataFieldCtx.NextUserDataField->Offset <= UserDataField->Offset) + { + // + // If the offset of the next member is less than or equals to the offset + // of the actual member, we cannot create a struct here. + // + + return; + } + + do + { + + // + // If offsets of next member and current member equal + // or the offset of the next member is less than the offset + // of the end of the last anonymous user data type, + // we will create an anonymous struct. + // + + if ( + UserDataFieldCtx.NextUserDataField->Offset == UserDataField->Offset || + ( + !m_AnonymousUserDataTypeStack.empty() && + UserDataFieldCtx.NextUserDataField->Offset < m_AnonymousUserDataTypeStack.top()->FirstUserDataField->Offset + m_AnonymousUserDataTypeStack.top()->Size + ) + ) + { + + // + // Guess the last member of this anonymous struct. + // Note that this guess is not required to be correct. + // It only serves as a break for creation of anonymous unions. + // + + do + { + bool IsEndOfAnonymousStruct = + UserDataFieldCtx.IsLast() || + UserDataFieldCtx.NextUserDataField->Offset <= UserDataField->Offset; + + if (IsEndOfAnonymousStruct) + { + break; + } + } while (UserDataFieldCtx.GetNext()); + + // + // UserDataFieldCtx.CurrentUserDataField now holds the last member + // of this anonymous struct. + // + + PushAnonymousUserDataType(std::make_shared(UdtStruct, UserDataField, UserDataFieldCtx.CurrentUserDataField)); + m_ReconstructVisitor->OnAnonymousUserDataTypeBegin(UdtStruct, UserDataField); + break; + } + } while (UserDataFieldCtx.GetNext()); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::CheckForEndOfAnonymousUserDataType( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + // + // This method is called after each user data field + // and after the last member of the bitfield, + // so this is the best place to refresh + // these two properties. + // + + m_PreviousUserDataField = UserDataField; + m_SizeOfPreviousUserDataField = UserDataField->Type->Size; + + if (m_AnonymousUserDataTypeStack.empty()) + { + // + // No user data type to check. + // + + return; + } + + UserDataFieldContext UserDataFieldCtx(UserDataField, FALSE); + + // + // The current member could be nested more than once + // and at this point more anonymous user data types + // could be closed, so the code is wrapped inside of the loop. + // + + AnonymousUserDataType* LastAnonymousUserDataType; + + do + { + LastAnonymousUserDataType = m_AnonymousUserDataTypeStack.top().get(); + LastAnonymousUserDataType->MemberCount += 1; + + bool IsEndOfAnonymousUserDataType = false; + + if (LastAnonymousUserDataType->UserDataTypeKind == UdtUnion) + { + // + // Update the size of the current nested union. + // The size of the union is as big as its biggest member. + // + + LastAnonymousUserDataType->Size = max(LastAnonymousUserDataType->Size, m_SizeOfPreviousUserDataField); + + // + // Determination if this is the end of the anonymous union. + // + // - UserDataFieldCtx.IsLast() + // - If the current member is last in the root structure. + // + // This check covers all opened anonymous user data types before + // top root structure ends. + // + // - UserDataFieldCtx.NextUserDataField->Offset < UserDataField->Offset + // - If the offset of the next member is less than to the offset of the current member. + // + // - (UserDataFieldCtx.NextUserDataField->Offset == UserDataField->Offset + LastAnonymousUserDataType->Size) + // - If the offset of the next member equals to the sum of + // * the offset of the current member and + // * the computed size of the current nested union. + // + // - (UserDataFieldCtx.NextUserDataField->Offset == UserDataField->Offset + 8 && Is64BitBasicType(UserDataFieldCtx.NextUserDataField->Type)) + // - If the offset of the next member equals to the offset of current member + 8 and + // the next member is of type [u]int64_t. + // This is the cause of the alignment. + // + // - (UserDataFieldCtx.NextUserDataField->Offset > UserDataField->Offset && UserDataField->Bits != 0) + // - If the offset of the next member is bigger than the offset of the current member and + // current member is not a part of the bitfield. + // + // - (UserDataFieldCtx.NextUserDataField->Offset > UserDataField->Offset && UserDataField->Offset + UserDataField->Type->Size != UserDataFieldCtx.NextUserDataField->Offset) + // - If the offset of the next member is bigger than the offset of the current member and + // the offset of the end of the current member is not equal to the offset of the next member. + // + + IsEndOfAnonymousUserDataType = + UserDataFieldCtx.IsLast() || + UserDataFieldCtx.NextUserDataField->Offset < UserDataField->Offset || + (UserDataFieldCtx.NextUserDataField->Offset == UserDataField->Offset + LastAnonymousUserDataType->Size) || + (UserDataFieldCtx.NextUserDataField->Offset == UserDataField->Offset + 8 && Is64BitBasicType(UserDataFieldCtx.NextUserDataField->Type)) || + (UserDataFieldCtx.NextUserDataField->Offset > UserDataField->Offset && UserDataField->Bits != 0) || + (UserDataFieldCtx.NextUserDataField->Offset > UserDataField->Offset && UserDataField->Offset + UserDataField->Type->Size != UserDataFieldCtx.NextUserDataField->Offset); + } + else + { + // + // Update the size of the current nested structure/class. + // The total size increases by the size of previous member. + // Because the previous member could be non-trivial member (ie. union), + // we will use the variable m_SizeOfPreviousUserDataField. + // + + LastAnonymousUserDataType->Size += m_SizeOfPreviousUserDataField; + + // + // Determination if this is the end of the anonymous struct. + // + // - UserDataFieldCtx.IsLast() + // - If the current member is last in the root structure. + // + // This check covers all opened anonymous user data types before + // top root structure ends. + // + // - UserDataFieldCtx.NextUserDataField->Offset <= UserDataField->Offset + // - If the offset of the next member is less than or equal to the offset of the current member. + // + + IsEndOfAnonymousUserDataType = + UserDataFieldCtx.IsLast() || + UserDataFieldCtx.NextUserDataField->Offset <= UserDataField->Offset; + + // + // Special condition for closing anonymous structs + // which are placed inside of the anonymous unions. + // + // This prevents structs to be longer than it's actually needed. + // + // If the offset of the first member after the parent union + // would be equal to the actual offset of the next member, + // we can close this struct. + // Also, in this struct must be at least 2 members. + // + + AnonymousUserDataType* LastAnonymousUnion = + m_AnonymousUnionStack.empty() + ? nullptr + : m_AnonymousUnionStack.top().get(); + + IsEndOfAnonymousUserDataType = IsEndOfAnonymousUserDataType || ( + LastAnonymousUnion != nullptr && + LastAnonymousUnion->FirstUserDataField->Offset + LastAnonymousUnion->Size == UserDataField->Offset + UserDataField->Type->Size && + LastAnonymousUserDataType->MemberCount >= 2 + ); + } + + if (IsEndOfAnonymousUserDataType) + { + // + // Close the anonymous user data type. + // + + m_SizeOfPreviousUserDataField = LastAnonymousUserDataType->Size; + LastAnonymousUserDataType->LastUserDataField = UserDataField; + + m_ReconstructVisitor->OnAnonymousUserDataTypeEnd( + LastAnonymousUserDataType->UserDataTypeKind, + LastAnonymousUserDataType->FirstUserDataField, + LastAnonymousUserDataType->LastUserDataField + ); + + PopAnonymousUserDataType(); + + LastAnonymousUserDataType = nullptr; + } + + if (!m_AnonymousUserDataTypeStack.empty()) + { + if (m_AnonymousUserDataTypeStack.top()->UserDataTypeKind == UdtUnion) + { + // + // If the AnonymousUserDataTypeStack is still not empty + // and an anonymous union is at the top of it, + // we must set the first member of the anonymous union + // as the current member. + // + // The reason behind is that the first member of the union + // is guaranteed to be at the starting offset of the union. + // This not might be true for another members, as they + // can be part of another anonymous struct. + // + // Example: + // + // union { + // int a; /* 0x10 */ + // int b; /* 0x10 */ + // struct { + // int c; /* 0x10 */ + // int d; /* 0x14 */ + // /* + // * This is where we are now. We end the struct here, + // * and the current offset is 0x14, + // * but the union starts at the offset 0x10, so we set + // * the current member to the first member of the unnamed union + // * which is "int a". + // */ + // }; + // }; + + UserDataField = m_AnonymousUserDataTypeStack.top()->FirstUserDataField; + m_PreviousUserDataField = UserDataField; + } + else + { + // + // If at the top of the AnonymousUserDataTypeStack is the struct or class, + // set the current member back to the actual current member + // which has been provided. + // + + UserDataField = UserDataFieldCtx.CurrentUserDataField; + m_PreviousUserDataField = UserDataField; + } + } + } while (LastAnonymousUserDataType == nullptr && !m_AnonymousUserDataTypeStack.empty()); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +std::shared_ptr +PDBSymbolVisitor::MemberDefinitionFactory() +{ + auto MemberDefinition = std::make_shared(); + MemberDefinition->SetSettings(m_MemberDefinitionSettings); + + return MemberDefinition; +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::PushAnonymousUserDataType( + std::shared_ptr Item + ) +{ + m_AnonymousUserDataTypeStack.push(Item); + + if (Item->UserDataTypeKind == UdtUnion) + { + m_AnonymousUnionStack.push(Item); + } + else + { + m_AnonymousStructStack.push(Item); + } +} + +template < + typename MEMBER_DEFINITION_TYPE +> +void +PDBSymbolVisitor::PopAnonymousUserDataType() +{ + if (m_AnonymousUserDataTypeStack.top()->UserDataTypeKind == UdtUnion) + { + m_AnonymousUnionStack.pop(); + } + else + { + m_AnonymousStructStack.pop(); + } + + m_AnonymousUserDataTypeStack.pop(); +} + +template < + typename MEMBER_DEFINITION_TYPE +> +const SYMBOL_USERDATA_FIELD* +PDBSymbolVisitor::GetNextUserDataFieldWithRespectToBitFields( + const SYMBOL_USERDATA_FIELD* UserDataField + ) +{ + const SYMBOL_USERDATA* ParentUserData = &UserDataField->Parent->u.UserData; + DWORD UserDataFieldCount = ParentUserData->FieldCount; + + const SYMBOL_USERDATA_FIELD* NextUserDataField = UserDataField; + const SYMBOL_USERDATA_FIELD* EndOfUserDataField = &ParentUserData->Fields[UserDataFieldCount]; + DWORD UserDataFieldOffset = UserDataField->Offset; + + if (UserDataField->Bits == 0) + { + // + // Provided member is not a bitfield. + // Increment to the next user data field. + // + NextUserDataField++; + } + else + { + // + // If provided member is a part of a bitfield, + // we will try to iterate until end of that bitfield. + // + // This is achieved through summing all bits in the bitfield members. + // + + DWORD BitSum = 0; + + do + { + if (NextUserDataField->Offset != UserDataFieldOffset) + { + // + // If offsets don't match up before appropriate sum was reached, + // it means that the UserDataField provided was actually + // already (non-first) member of some bitfield. + // + + if (NextUserDataField->Bits == 0) + { + // + // If next user data field is not a part of the bitfield, + // just break here. + // + + break; + } + else + { + // + // If next user data field is a part of the bitfield, + // it means that new bitfield starts after the current one. + // Just reset the counter and start counting from the scratch. + // + + BitSum = 0; + } + } + + BitSum += NextUserDataField->Bits; + + if (BitSum == NextUserDataField->Type->Size * 8) + { + // + // If the sum now equals to the bit size of the data type + // of the bitfield, break the loop. + // The NextUserDataField currently points to the last member + // of the bitfield, so we increment it here so it points + // to the first member after the current bitfield. + // + + NextUserDataField++; + break; + } + } while (++NextUserDataField < EndOfUserDataField); + } + + if (NextUserDataField >= EndOfUserDataField) + { + return EndOfUserDataField; + } + + return NextUserDataField; +} + +template < + typename MEMBER_DEFINITION_TYPE +> +bool +PDBSymbolVisitor::Is64BitBasicType( + const SYMBOL* Symbol + ) +{ + return (Symbol->Tag == SymTagBaseType && Symbol->Size == 8); +} diff --git a/Source/PDBSymbolVisitorBase.h b/Source/PDBSymbolVisitorBase.h new file mode 100644 index 0000000..f0fefc4 --- /dev/null +++ b/Source/PDBSymbolVisitorBase.h @@ -0,0 +1,248 @@ +#pragma once +#include "PDB.h" + +class PDBSymbolVisitorBase +{ + public: + virtual + ~PDBSymbolVisitorBase() = default; + + virtual + void + Visit( + const SYMBOL* Symbol + ) + { + switch (Symbol->Tag) + { + case SymTagBaseType: + VisitBaseType(Symbol); + break; + + case SymTagEnum: + VisitEnumType(Symbol); + break; + + case SymTagTypedef: + VisitTypedefType(Symbol); + break; + + case SymTagPointerType: + VisitPointerType(Symbol); + break; + + case SymTagArrayType: + VisitArrayType(Symbol); + break; + + case SymTagFunctionType: + VisitFunctionType(Symbol); + break; + + case SymTagFunctionArgType: + VisitFunctionArgType(Symbol); + break; + + case SymTagUDT: + VisitUserDataType(Symbol); + break; + + default: + VisitOtherType(Symbol); + break; + } + } + + protected: + virtual + void + VisitBaseType( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitEnumType( + const SYMBOL* Symbol + ) + { + for (DWORD i = 0; i < Symbol->u.Enum.FieldCount; i++) + { + VisitEnumField(&Symbol->u.Enum.Fields[i]); + } + } + + virtual + void + VisitTypedefType( + const SYMBOL* Symbol + ) + { + Visit(Symbol->u.Typedef.Type); + } + + virtual + void + VisitPointerType( + const SYMBOL* Symbol + ) + { + Visit(Symbol->u.Pointer.Type); + } + + virtual + void + VisitArrayType( + const SYMBOL* Symbol + ) + { + Visit(Symbol->u.Array.ElementType); + } + + virtual + void + VisitFunctionType( + const SYMBOL* Symbol + ) + { + for (DWORD i = 0; i < Symbol->u.Function.ArgumentCount; i++) + { + Visit(Symbol->u.Function.Arguments[i]); + } + } + + virtual + void + VisitFunctionArgType( + const SYMBOL* Symbol + ) + { + Visit(Symbol->u.FunctionArg.Type); + } + + virtual + void + VisitUserDataType( + const SYMBOL* Symbol + ) + { + const SYMBOL_USERDATA_FIELD* UserDataField; + DWORD UserDataFieldCount; + DWORD UserDataFieldOffset; + DWORD BitSum = 0; + + UserDataField = Symbol->u.UserData.Fields; + UserDataFieldCount = Symbol->u.UserData.FieldCount; + + for (DWORD i = 0; i < UserDataFieldCount; i++) + { + UserDataField = &Symbol->u.UserData.Fields[i]; + UserDataFieldOffset = UserDataField->Offset; + + if (UserDataField->Bits == 0) + { + VisitUserDataFieldBegin(UserDataField); + VisitUserDataField(UserDataField); + VisitUserDataFieldEnd(UserDataField); + continue; + } + + VisitUserDataFieldBitFieldBegin(UserDataField); + for (; i < UserDataFieldCount; i++) + { + UserDataField = &Symbol->u.UserData.Fields[i]; + + BitSum += UserDataField->Bits; + + VisitUserDataFieldBitField(UserDataField); + + if (BitSum == UserDataField->Type->Size * 8) + { + VisitUserDataFieldBitFieldEnd(UserDataField); + BitSum = 0; + break; + } + } + } + } + + virtual + void + VisitOtherType( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitEnumField( + const SYMBOL_ENUM_FIELD* EnumField + ) + { + + } + + virtual + void + VisitUserDataFieldBegin( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + + } + + virtual + void + VisitUserDataFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + + } + + virtual + void + VisitUserDataField( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + + } + + virtual + void + VisitUserDataFieldBitFieldBegin( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + + } + + virtual + void + VisitUserDataFieldBitFieldEnd( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + + } + + virtual + void + VisitUserDataFieldBitField( + const SYMBOL_USERDATA_FIELD* UserDataField + ) + { + // + // Call VisitUserDataField by default. + // + + VisitUserDataField(UserDataField); + } +}; + diff --git a/Source/UserDataFieldDefinition.h b/Source/UserDataFieldDefinition.h new file mode 100644 index 0000000..da23a38 --- /dev/null +++ b/Source/UserDataFieldDefinition.h @@ -0,0 +1,120 @@ +#pragma once +#include "UserDataFieldDefinitionBase.h" + +#include + +class UserDataFieldDefinition + : public UserDataFieldDefinitionBase +{ + public: + struct Settings + { + bool UseStdInt = false; + }; + + void + VisitBaseType( + const SYMBOL* Symbol + ) override + { + // + // BaseType: + // short/int/long/... + // + + m_TypePrefix += PDB::GetBasicTypeString(Symbol, m_Settings->UseStdInt); + } + + void + VisitPointerTypeEnd( + const SYMBOL* Symbol + ) override + { + m_TypePrefix += "*"; + } + + void + VisitArrayTypeEnd( + const SYMBOL* Symbol + ) override + { + if (Symbol->u.Array.ElementCount == 0) + { + // + // Apparently array with 0 element count can exist in PDB. + // But XYZ Name[0] is not compilable. + // This hack "converts" the zero-sized array into the pointer. + // + // Also, size of the symbol is set to 1 instead of 0, + // otherwise we would end up in anonymous union. + // + + const_cast(Symbol)->Size = 1; + m_TypePrefix += "*"; + } + else + { + m_TypeSuffix += "[" + std::to_string(Symbol->u.Array.ElementCount) + "]"; + } + } + + void + VisitFunctionTypeEnd( + const SYMBOL* Symbol + ) override + { + // + // #TODO: + // Currently, show void* instead of functions. + // + + m_TypePrefix += "void"; + + m_Comment = " /* function */"; + } + + void + SetMemberName( + CONST CHAR* MemberName + ) + { + m_MemberName = MemberName ? MemberName : std::string(); + } + + std::string + GetPrintableDefinition() const override + { + return m_TypePrefix + " " + m_MemberName + m_TypeSuffix + m_Comment; + } + + void + SetSettings( + void* MemberDefinitionSettings + ) override + { + static Settings DefaultSettings; + + if (MemberDefinitionSettings == nullptr) + { + MemberDefinitionSettings = &DefaultSettings; + } + + m_Settings = static_cast(MemberDefinitionSettings); + } + + virtual + void* + GetSettings() override + { + return &m_Settings; + } + + private: + std::string m_TypePrefix; // "int*" + std::string m_MemberName; // "XYZ" + std::string m_TypeSuffix; // "[8]" + std::string m_Comment; + + Settings* m_Settings; +}; + diff --git a/Source/UserDataFieldDefinitionBase.h b/Source/UserDataFieldDefinitionBase.h new file mode 100644 index 0000000..27b36c1 --- /dev/null +++ b/Source/UserDataFieldDefinitionBase.h @@ -0,0 +1,104 @@ +#pragma once +#include "PDB.h" + +#include + +class UserDataFieldDefinitionBase +{ + public: + virtual + void + VisitBaseType( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitPointerTypeBegin( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitPointerTypeEnd( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitArrayTypeBegin( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitArrayTypeEnd( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitFunctionTypeBegin( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + VisitFunctionTypeEnd( + const SYMBOL* Symbol + ) + { + + } + + virtual + void + SetMemberName( + CONST CHAR* MemberName + ) + { + + } + + virtual + std::string + GetPrintableDefinition() const + { + return std::string(); + } + + virtual + void + SetSettings( + void* MemberDefinitionSettings + ) + { + + } + + virtual + void* + GetSettings() + { + return nullptr; + } +}; + diff --git a/Source/main.cpp b/Source/main.cpp new file mode 100644 index 0000000..ab6b9dc --- /dev/null +++ b/Source/main.cpp @@ -0,0 +1,14 @@ +#include "PDBExtractor.h" + +#pragma comment(lib, "dbghelp.lib") + +int main_impl(int argc, char** argv) +{ + PDBExtractor Instance; + return Instance.Run(argc, argv); +} + +int main(int argc, char** argv) +{ + return main_impl(argc, argv); +} diff --git a/Source/pdbex.vcxproj b/Source/pdbex.vcxproj new file mode 100644 index 0000000..5deb360 --- /dev/null +++ b/Source/pdbex.vcxproj @@ -0,0 +1,181 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {06FB1AF7-647C-4BA4-860A-4533763440F9} + Win32Proj + pdbex + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)Bin\$(PlatformShortName)\$(Configuration)\ + $(SolutionDir)Bin\Obj\$(PlatformShortName)\$(Configuration)\$(ProjectName)\ + + + true + $(SolutionDir)Bin\$(PlatformShortName)\$(Configuration)\ + $(SolutionDir)Bin\Obj\$(PlatformShortName)\$(Configuration)\$(ProjectName)\ + + + false + $(SolutionDir)Bin\$(PlatformShortName)\$(Configuration)\ + $(SolutionDir)Bin\Obj\$(PlatformShortName)\$(Configuration)\$(ProjectName)\ + + + false + $(SolutionDir)Bin\$(PlatformShortName)\$(Configuration)\ + $(SolutionDir)Bin\Obj\$(PlatformShortName)\$(Configuration)\$(ProjectName)\ + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(VSInstallDir)\DIA SDK\include + + + Console + true + + + + + + + Level3 + Disabled + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(VSInstallDir)\DIA SDK\include + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(VSInstallDir)\DIA SDK\include + MultiThreaded + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(VSInstallDir)\DIA SDK\include + MultiThreaded + + + Console + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Source/pdbex.vcxproj.filters b/Source/pdbex.vcxproj.filters new file mode 100644 index 0000000..af026ec --- /dev/null +++ b/Source/pdbex.vcxproj.filters @@ -0,0 +1,65 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + \ No newline at end of file diff --git a/Source/pdbex.vcxproj.user b/Source/pdbex.vcxproj.user new file mode 100644 index 0000000..278300f --- /dev/null +++ b/Source/pdbex.vcxproj.user @@ -0,0 +1,17 @@ + + + + + + WindowsLocalDebugger + + + WindowsLocalDebugger + + + WindowsLocalDebugger + + + WindowsLocalDebugger + + \ No newline at end of file diff --git a/pdbex.sln b/pdbex.sln new file mode 100644 index 0000000..b478fa0 --- /dev/null +++ b/pdbex.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.23107.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pdbex", "Source\pdbex.vcxproj", "{06FB1AF7-647C-4BA4-860A-4533763440F9}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Debug|x64.ActiveCfg = Debug|x64 + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Debug|x64.Build.0 = Debug|x64 + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Debug|x86.ActiveCfg = Debug|Win32 + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Debug|x86.Build.0 = Debug|Win32 + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Release|x64.ActiveCfg = Release|x64 + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Release|x64.Build.0 = Release|x64 + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Release|x86.ActiveCfg = Release|Win32 + {06FB1AF7-647C-4BA4-860A-4533763440F9}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal