-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from happyhavoc/export-ghidra
Add script and CI to export ghidra database
- Loading branch information
Showing
7 changed files
with
475 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
name: Export Ghidra Database | ||
|
||
on: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: '0 2 * * *' | ||
|
||
jobs: | ||
export-ghidra: | ||
runs-on: ubuntu-latest | ||
|
||
permissions: | ||
contents: write | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
# Don't use actions/checkout@v4 because hurr durr it be broken: https://github.com/actions/checkout/issues/1477 | ||
- name: Clone th06-re repo | ||
run: | | ||
echo "$GHIDRA_SSH_AUTH" > ssh_key | ||
chmod 0600 ssh_key | ||
GIT_SSH_COMMAND="ssh -i $PWD/ssh_key -o IdentitiesOnly=yes" git clone git@github.com:happyhavoc/th06-re th06-re | ||
rm ssh_key | ||
env: | ||
GHIDRA_SSH_AUTH: ${{ secrets.GHIDRA_SSH_AUTH }} | ||
- name: Install python 3.11 | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.11' | ||
- name: Get ghidra | ||
run: | | ||
curl -L https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.3.3_build/ghidra_10.3.3_PUBLIC_20230829.zip -o /tmp/ghidra.zip | ||
unzip -d /tmp /tmp/ghidra.zip | ||
echo /tmp/ghidra_*/support >> $GITHUB_PATH | ||
- name: Export ghidra | ||
run: | | ||
echo "$GHIDRA_SSH_AUTH" > ssh_key | ||
python scripts/export_ghidra_database.py --user-mappings config/ghidra-user-maps.toml --username github-action --ssh-key ssh_key --program th06_102h.exe 'ghidra://roblab.la/Touhou 06' th06-re | ||
rm ssh_key | ||
env: # Or as an environment variable | ||
GHIDRA_SSH_AUTH: ${{ secrets.GHIDRA_SSH_AUTH }} | ||
- name: Push | ||
run: | | ||
echo "$GHIDRA_SSH_AUTH" > ssh_key | ||
chmod 0600 ssh_key | ||
GIT_SSH_COMMAND="ssh -i $PWD/ssh_key -o IdentitiesOnly=yes" git -C th06-re push origin HEAD | ||
rm ssh_key | ||
env: | ||
GHIDRA_SSH_AUTH: ${{ secrets.GHIDRA_SSH_AUTH }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
mary = "Mary <mary@mary.zone>" | ||
roblabla = "roblabla <unfiltered@roblab.la>" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
#!/usr/bin/env nix-shell | ||
#!nix-shell -p python311 -i python3 | ||
|
||
import argparse | ||
from datetime import datetime, timezone | ||
import json | ||
import os | ||
from pathlib import Path | ||
import re | ||
import subprocess | ||
import tempfile | ||
import tomllib | ||
from typing import Optional | ||
|
||
SCRIPT_PATH = Path(os.path.realpath(__file__)).parent | ||
|
||
|
||
def runAnalyze(args, extraArgs): | ||
commonAnalyzeHeadlessArgs = ['analyzeHeadless', args.GHIDRA_REPO_NAME] | ||
commonAnalyzeHeadlessArgs += ['-noanalysis', '-readOnly', '-scriptPath', str(SCRIPT_PATH / 'ghidra')] | ||
if args.ssh_key: | ||
commonAnalyzeHeadlessArgs += ['-keystore', args.ssh_key] | ||
|
||
# TODO: If program is not provided, export all files from server. | ||
if args.program: | ||
commonAnalyzeHeadlessArgs += ['-process', args.program] | ||
|
||
commonAnalyzeHeadlessEnv = os.environ.copy() | ||
commonAnalyzeHeadlessEnv['_JAVA_OPTIONS'] = f'-Duser.name={args.username} ' + os.environ.get('_JAVA_OPTIONS', '') | ||
|
||
return subprocess.run(commonAnalyzeHeadlessArgs + extraArgs, env=commonAnalyzeHeadlessEnv, check=True) | ||
|
||
|
||
def fetchVersions(args): | ||
""" | ||
Fetches all the versions of the program being exported. | ||
""" | ||
with tempfile.NamedTemporaryFile(prefix='versions') as f: | ||
runAnalyze(args, ['-preScript', 'ExportFileVersions.java', f.name]) | ||
versions = json.loads(f.read()) | ||
versions.sort(key=lambda x: x['version']) | ||
return versions | ||
|
||
|
||
def exportXmlVersion(args, version: dict): | ||
xmlFileInRepo = args.program + '.xml' | ||
verFileInRepo = args.program + '.version' | ||
outXml = args.GIT_REPO_PATH / str(xmlFileInRepo) | ||
outVer = args.GIT_REPO_PATH / str(verFileInRepo) | ||
if not outXml.exists(): | ||
outXml.touch() | ||
runAnalyze(args, ['-preScript', 'ExportToXML.java', str(outXml), str(version['version'])]) | ||
|
||
# The XML contains the timestamp of when the export was done. This is kinda | ||
# annoying as it introduces some noise in the diff. Let's patch it out. | ||
text = outXml.read_text() | ||
text = re.sub('<INFO_SOURCE (.*) TIMESTAMP="(.*)" (.*)/>', '<INFO_SOURCE \1 \2/>', text) | ||
outXml.write_text(text) | ||
|
||
# Add a file, verFileInRepo, which contains the current version number. This | ||
# is used so the next run of export_ghidra_database.py knows from which | ||
# version it needs to start to read again. | ||
outVer.write_text(str(version['version'])) | ||
|
||
# Add the files to the git repo. | ||
subprocess.run(['git', '-C', args.GIT_REPO_PATH, 'add', xmlFileInRepo, verFileInRepo], check=True) | ||
|
||
# Ghidra Create Time is stored as num of milliseconds since UNIX epoch. | ||
# Python expects number of seconds, so fix it. | ||
# We also set the timezone to UTC to ensure our commits are reproducible. | ||
# Turns out the timezone leaks into the commit data, so having a different | ||
# timezone may end up creating different commit hash. | ||
createTime = datetime.fromtimestamp(version['createTime'] / 1000., tz=timezone.utc) | ||
|
||
# Git needs both a name and an email to make a commit. To satisfy it, we | ||
# first try to find the user in the `user_mapping` file, which maps ghidra | ||
# username to a string in the form 'user <email>', and extract both values | ||
# from there. If we don't find the user there, we use the ghidra username | ||
# as-is, and `unknown` as the email. | ||
if version['user'] in args.user_mappings: | ||
user = args.user_mappings[version['user']] | ||
user, email = user.rsplit("<", 1) | ||
email = email.removesuffix(">").strip() | ||
user = user.strip() | ||
else: | ||
user = version['user'] | ||
email = 'unknown' | ||
|
||
# We use the comment as a git message. In ghidra, the comment may be empty, | ||
# but git disallows this. In this case, we leave a default commit message. | ||
commitMsg = version['comment'] | ||
if commitMsg == "": | ||
commitMsg = 'Generic reverse engineering progress' | ||
|
||
# Let's commit now. | ||
gitEnv = { | ||
'GIT_COMMITTER_NAME': user, | ||
'GIT_COMMITTER_EMAIL': email, | ||
'GIT_COMMITTER_DATE': createTime.isoformat(), | ||
'GIT_AUTHOR_NAME': user, | ||
'GIT_AUTHOR_EMAIL': email, | ||
'GIT_AUTHOR_DATE': createTime.isoformat(), | ||
} | ||
subprocess.run(['git', '-C', args.GIT_REPO_PATH, 'commit', '-m', commitMsg], env=gitEnv, check=True) | ||
|
||
|
||
def parseUserMappings(path: str): | ||
print(path) | ||
try: | ||
res = tomllib.loads(Path(path).read_text()) | ||
except Exception as e: | ||
print(e) | ||
raise | ||
return res | ||
|
||
|
||
def getLatestVersionInRepo(git_repo_path: Path, program: str) -> Optional[int]: | ||
verFileInRepo = program + '.version' | ||
verFile = git_repo_path / verFileInRepo | ||
if verFile.exists(): | ||
return int(verFile.read_text()) | ||
return None | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser( | ||
description='Export a ghidra database history to git', | ||
) | ||
parser.add_argument('GHIDRA_REPO_NAME') | ||
parser.add_argument('GIT_REPO_PATH', type=Path) | ||
parser.add_argument('--username', help='Username to use when connecting to the ghidra server.') | ||
parser.add_argument('--ssh-key', help="""SSH key to use to authenticate to a ghidra server. | ||
Note that the ghidra server must have SSH authentication enabled for this to work. | ||
To enable SSH auth, add -ssh in the wrapper.parameters of the Ghidra Server's server.conf""") | ||
parser.add_argument('--user-mappings', type=parseUserMappings, default={}, help='JSON mapping of ghidra username to git "user <email>" format') | ||
parser.add_argument('--program', help='Program to export') | ||
args = parser.parse_args() | ||
|
||
versions = fetchVersions(args) | ||
|
||
versionInRepo = getLatestVersionInRepo(args.GIT_REPO_PATH, args.program) | ||
|
||
for version in versions: | ||
if versionInRepo is not None and version['version'] <= versionInRepo: | ||
continue | ||
exportXmlVersion(args, version) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* LICENSE | ||
*/ | ||
// Description | ||
//@author roblabla | ||
//@category exports | ||
//@keybinding | ||
//@menupath Skeleton | ||
//@toolbar Skeleton | ||
import com.google.gson.Gson; | ||
import com.google.gson.JsonArray; | ||
import com.google.gson.JsonElement; | ||
import com.google.gson.JsonObject; | ||
import com.google.gson.stream.JsonWriter; | ||
import ghidra.app.script.GhidraScript; | ||
import ghidra.framework.model.DomainFile; | ||
import ghidra.framework.store.Version; | ||
import java.io.File; | ||
import java.io.FileWriter; | ||
|
||
public class ExportFileVersions extends GhidraScript | ||
{ | ||
@Override protected void run() throws Exception | ||
{ | ||
// We get the DomainFile this way to ensure we get a GhidraFile and not | ||
// a DomainProxyFile. This is because DomainProxyFile does not contain | ||
// the VersionHistory that we need. | ||
DomainFile f = parseDomainFile(currentProgram.getDomainFile().getPathname()); | ||
File outFile = askFile("Output JSON", ""); | ||
Version versions[] = f.getVersionHistory(); | ||
JsonArray arr = new JsonArray(versions.length); | ||
for (Version ver : versions) | ||
{ | ||
JsonObject obj = new JsonObject(); | ||
obj.addProperty("version", ver.getVersion()); | ||
obj.addProperty("user", ver.getUser()); | ||
obj.addProperty("comment", ver.getComment()); | ||
obj.addProperty("createTime", ver.getCreateTime()); | ||
arr.add(obj); | ||
} | ||
FileWriter outFileWriter = new FileWriter(outFile); | ||
new Gson().toJson(arr, outFileWriter); | ||
outFileWriter.flush(); | ||
outFileWriter.close(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* LICENSE | ||
*/ | ||
// Description | ||
//@author roblabla | ||
//@category exports | ||
//@keybinding | ||
//@menupath Skeleton | ||
//@toolbar Skeleton | ||
import ghidra.app.script.GhidraScript; | ||
import ghidra.app.util.exporter.XmlExporter; | ||
import ghidra.framework.model.DomainFile; | ||
import ghidra.framework.model.DomainObject; | ||
import ghidra.program.model.mem.Memory; | ||
import java.io.File; | ||
|
||
public class ExportToXML extends GhidraScript | ||
{ | ||
@Override protected void run() throws Exception | ||
{ | ||
XmlExporter exporter = new XmlExporter(); | ||
|
||
File outFile = askFile("Output XML", ""); | ||
int outVer = askInt("File Version to Export", ""); | ||
|
||
// We get the DomainFile this way to ensure we get a GhidraFile and not | ||
// a DomainProxyFile. This is because DomainProxyFile does not handle | ||
// getting anything but the latest version of a file. | ||
DomainFile f = parseDomainFile(currentProgram.getDomainFile().getPathname()); | ||
|
||
DomainObject obj = f.getReadOnlyDomainObject(this, outVer, monitor); | ||
Memory mem = getCurrentProgram().getMemory(); | ||
|
||
exporter.export(outFile, obj, mem, monitor); | ||
obj.release(this); | ||
} | ||
} |
Oops, something went wrong.