Skip to content

Commit

Permalink
Merge pull request #9 from happyhavoc/export-ghidra
Browse files Browse the repository at this point in the history
Add script and CI to export ghidra database
  • Loading branch information
roblabla authored Sep 17, 2023
2 parents 98bb6ac + 3a82c68 commit 41287a7
Show file tree
Hide file tree
Showing 7 changed files with 475 additions and 0 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/export-ghidra.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Export Ghidra Database

on:
workflow_dispatch:
schedule:
- cron: '0 2 * * *'

jobs:
export-ghidra:
runs-on: ubuntu-latest

permissions:
contents: write

steps:
- uses: actions/checkout@v4
# Don't use actions/checkout@v4 because hurr durr it be broken: https://github.com/actions/checkout/issues/1477
- name: Clone th06-re repo
run: |
echo "$GHIDRA_SSH_AUTH" > ssh_key
chmod 0600 ssh_key
GIT_SSH_COMMAND="ssh -i $PWD/ssh_key -o IdentitiesOnly=yes" git clone git@github.com:happyhavoc/th06-re th06-re
rm ssh_key
env:
GHIDRA_SSH_AUTH: ${{ secrets.GHIDRA_SSH_AUTH }}
- name: Install python 3.11
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Get ghidra
run: |
curl -L https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.3.3_build/ghidra_10.3.3_PUBLIC_20230829.zip -o /tmp/ghidra.zip
unzip -d /tmp /tmp/ghidra.zip
echo /tmp/ghidra_*/support >> $GITHUB_PATH
- name: Export ghidra
run: |
echo "$GHIDRA_SSH_AUTH" > ssh_key
python scripts/export_ghidra_database.py --user-mappings config/ghidra-user-maps.toml --username github-action --ssh-key ssh_key --program th06_102h.exe 'ghidra://roblab.la/Touhou 06' th06-re
rm ssh_key
env: # Or as an environment variable
GHIDRA_SSH_AUTH: ${{ secrets.GHIDRA_SSH_AUTH }}
- name: Push
run: |
echo "$GHIDRA_SSH_AUTH" > ssh_key
chmod 0600 ssh_key
GIT_SSH_COMMAND="ssh -i $PWD/ssh_key -o IdentitiesOnly=yes" git -C th06-re push origin HEAD
rm ssh_key
env:
GHIDRA_SSH_AUTH: ${{ secrets.GHIDRA_SSH_AUTH }}
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,14 @@ Run the following script:
### Building

On Windows, run `scripts/build.bat` otherwise `./scripts/wineth06 scripts/build.bat`.

## Reverse Engineering

You can find an XML export of our Ghidra RE in the companion repository
[th06-re]. This repo is updated nightly through [`scripts/export_ghidra_database.py`],
and its history matches the checkin history from our team's Ghidra Server.

If you wish to help us in our Reverse Engineering effort, please contact
@roblabla on discord so we can give you an account on the Ghidra Server.

[th06-re]: https://github.com/happyhavoc/th06-re
2 changes: 2 additions & 0 deletions config/ghidra-user-maps.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mary = "Mary <mary@mary.zone>"
roblabla = "roblabla <unfiltered@roblab.la>"
150 changes: 150 additions & 0 deletions scripts/export_ghidra_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env nix-shell
#!nix-shell -p python311 -i python3

import argparse
from datetime import datetime, timezone
import json
import os
from pathlib import Path
import re
import subprocess
import tempfile
import tomllib
from typing import Optional

SCRIPT_PATH = Path(os.path.realpath(__file__)).parent


def runAnalyze(args, extraArgs):
commonAnalyzeHeadlessArgs = ['analyzeHeadless', args.GHIDRA_REPO_NAME]
commonAnalyzeHeadlessArgs += ['-noanalysis', '-readOnly', '-scriptPath', str(SCRIPT_PATH / 'ghidra')]
if args.ssh_key:
commonAnalyzeHeadlessArgs += ['-keystore', args.ssh_key]

# TODO: If program is not provided, export all files from server.
if args.program:
commonAnalyzeHeadlessArgs += ['-process', args.program]

commonAnalyzeHeadlessEnv = os.environ.copy()
commonAnalyzeHeadlessEnv['_JAVA_OPTIONS'] = f'-Duser.name={args.username} ' + os.environ.get('_JAVA_OPTIONS', '')

return subprocess.run(commonAnalyzeHeadlessArgs + extraArgs, env=commonAnalyzeHeadlessEnv, check=True)


def fetchVersions(args):
"""
Fetches all the versions of the program being exported.
"""
with tempfile.NamedTemporaryFile(prefix='versions') as f:
runAnalyze(args, ['-preScript', 'ExportFileVersions.java', f.name])
versions = json.loads(f.read())
versions.sort(key=lambda x: x['version'])
return versions


def exportXmlVersion(args, version: dict):
xmlFileInRepo = args.program + '.xml'
verFileInRepo = args.program + '.version'
outXml = args.GIT_REPO_PATH / str(xmlFileInRepo)
outVer = args.GIT_REPO_PATH / str(verFileInRepo)
if not outXml.exists():
outXml.touch()
runAnalyze(args, ['-preScript', 'ExportToXML.java', str(outXml), str(version['version'])])

# The XML contains the timestamp of when the export was done. This is kinda
# annoying as it introduces some noise in the diff. Let's patch it out.
text = outXml.read_text()
text = re.sub('<INFO_SOURCE (.*) TIMESTAMP="(.*)" (.*)/>', '<INFO_SOURCE \1 \2/>', text)
outXml.write_text(text)

# Add a file, verFileInRepo, which contains the current version number. This
# is used so the next run of export_ghidra_database.py knows from which
# version it needs to start to read again.
outVer.write_text(str(version['version']))

# Add the files to the git repo.
subprocess.run(['git', '-C', args.GIT_REPO_PATH, 'add', xmlFileInRepo, verFileInRepo], check=True)

# Ghidra Create Time is stored as num of milliseconds since UNIX epoch.
# Python expects number of seconds, so fix it.
# We also set the timezone to UTC to ensure our commits are reproducible.
# Turns out the timezone leaks into the commit data, so having a different
# timezone may end up creating different commit hash.
createTime = datetime.fromtimestamp(version['createTime'] / 1000., tz=timezone.utc)

# Git needs both a name and an email to make a commit. To satisfy it, we
# first try to find the user in the `user_mapping` file, which maps ghidra
# username to a string in the form 'user <email>', and extract both values
# from there. If we don't find the user there, we use the ghidra username
# as-is, and `unknown` as the email.
if version['user'] in args.user_mappings:
user = args.user_mappings[version['user']]
user, email = user.rsplit("<", 1)
email = email.removesuffix(">").strip()
user = user.strip()
else:
user = version['user']
email = 'unknown'

# We use the comment as a git message. In ghidra, the comment may be empty,
# but git disallows this. In this case, we leave a default commit message.
commitMsg = version['comment']
if commitMsg == "":
commitMsg = 'Generic reverse engineering progress'

# Let's commit now.
gitEnv = {
'GIT_COMMITTER_NAME': user,
'GIT_COMMITTER_EMAIL': email,
'GIT_COMMITTER_DATE': createTime.isoformat(),
'GIT_AUTHOR_NAME': user,
'GIT_AUTHOR_EMAIL': email,
'GIT_AUTHOR_DATE': createTime.isoformat(),
}
subprocess.run(['git', '-C', args.GIT_REPO_PATH, 'commit', '-m', commitMsg], env=gitEnv, check=True)


def parseUserMappings(path: str):
print(path)
try:
res = tomllib.loads(Path(path).read_text())
except Exception as e:
print(e)
raise
return res


def getLatestVersionInRepo(git_repo_path: Path, program: str) -> Optional[int]:
verFileInRepo = program + '.version'
verFile = git_repo_path / verFileInRepo
if verFile.exists():
return int(verFile.read_text())
return None


def main():
parser = argparse.ArgumentParser(
description='Export a ghidra database history to git',
)
parser.add_argument('GHIDRA_REPO_NAME')
parser.add_argument('GIT_REPO_PATH', type=Path)
parser.add_argument('--username', help='Username to use when connecting to the ghidra server.')
parser.add_argument('--ssh-key', help="""SSH key to use to authenticate to a ghidra server.
Note that the ghidra server must have SSH authentication enabled for this to work.
To enable SSH auth, add -ssh in the wrapper.parameters of the Ghidra Server's server.conf""")
parser.add_argument('--user-mappings', type=parseUserMappings, default={}, help='JSON mapping of ghidra username to git "user <email>" format')
parser.add_argument('--program', help='Program to export')
args = parser.parse_args()

versions = fetchVersions(args)

versionInRepo = getLatestVersionInRepo(args.GIT_REPO_PATH, args.program)

for version in versions:
if versionInRepo is not None and version['version'] <= versionInRepo:
continue
exportXmlVersion(args, version)


if __name__ == '__main__':
main()
46 changes: 46 additions & 0 deletions scripts/ghidra/ExportFileVersions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* LICENSE
*/
// Description
//@author roblabla
//@category exports
//@keybinding
//@menupath Skeleton
//@toolbar Skeleton
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.stream.JsonWriter;
import ghidra.app.script.GhidraScript;
import ghidra.framework.model.DomainFile;
import ghidra.framework.store.Version;
import java.io.File;
import java.io.FileWriter;

public class ExportFileVersions extends GhidraScript
{
@Override protected void run() throws Exception
{
// We get the DomainFile this way to ensure we get a GhidraFile and not
// a DomainProxyFile. This is because DomainProxyFile does not contain
// the VersionHistory that we need.
DomainFile f = parseDomainFile(currentProgram.getDomainFile().getPathname());
File outFile = askFile("Output JSON", "");
Version versions[] = f.getVersionHistory();
JsonArray arr = new JsonArray(versions.length);
for (Version ver : versions)
{
JsonObject obj = new JsonObject();
obj.addProperty("version", ver.getVersion());
obj.addProperty("user", ver.getUser());
obj.addProperty("comment", ver.getComment());
obj.addProperty("createTime", ver.getCreateTime());
arr.add(obj);
}
FileWriter outFileWriter = new FileWriter(outFile);
new Gson().toJson(arr, outFileWriter);
outFileWriter.flush();
outFileWriter.close();
}
}
37 changes: 37 additions & 0 deletions scripts/ghidra/ExportToXML.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* LICENSE
*/
// Description
//@author roblabla
//@category exports
//@keybinding
//@menupath Skeleton
//@toolbar Skeleton
import ghidra.app.script.GhidraScript;
import ghidra.app.util.exporter.XmlExporter;
import ghidra.framework.model.DomainFile;
import ghidra.framework.model.DomainObject;
import ghidra.program.model.mem.Memory;
import java.io.File;

public class ExportToXML extends GhidraScript
{
@Override protected void run() throws Exception
{
XmlExporter exporter = new XmlExporter();

File outFile = askFile("Output XML", "");
int outVer = askInt("File Version to Export", "");

// We get the DomainFile this way to ensure we get a GhidraFile and not
// a DomainProxyFile. This is because DomainProxyFile does not handle
// getting anything but the latest version of a file.
DomainFile f = parseDomainFile(currentProgram.getDomainFile().getPathname());

DomainObject obj = f.getReadOnlyDomainObject(this, outVer, monitor);
Memory mem = getCurrentProgram().getMemory();

exporter.export(outFile, obj, mem, monitor);
obj.release(this);
}
}
Loading

0 comments on commit 41287a7

Please sign in to comment.