-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #34 from tmr232/scan-codebase
Create script for scanning full codebase
- Loading branch information
Showing
13 changed files
with
173 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/** | ||
* This script allows running the CFG builders on all the functions of a given | ||
* code base. | ||
* This can be useful in finding CFG generation bugs. | ||
* | ||
* @module | ||
*/ | ||
import { parseArgs } from "node:util"; | ||
import { | ||
functionNodeTypes, | ||
type Language, | ||
newCFGBuilder, | ||
supportedLanguages, | ||
} from "../src/control-flow/cfg"; | ||
import { Glob } from "bun"; | ||
import type Parser from "web-tree-sitter"; | ||
import { initializeParser } from "../src/parser-loader/bun.ts"; | ||
import * as path from "node:path"; | ||
|
||
type FileType = { ext: string; language: Language }; | ||
const fileTypes: FileType[] = [ | ||
{ ext: "c", language: "C" }, | ||
{ ext: "cpp", language: "C++" }, | ||
{ ext: "h", language: "C++" }, | ||
{ ext: "hh", language: "C++" }, | ||
{ ext: "hpp", language: "C++" }, | ||
{ ext: "cc", language: "C++" }, | ||
{ ext: "py", language: "Python" }, | ||
{ ext: "go", language: "Go" }, | ||
]; | ||
|
||
const parsers: { [language in Language]: Parser } = Object.fromEntries( | ||
await (async () => { | ||
const parsers = []; | ||
for (const language of supportedLanguages) { | ||
parsers.push([language, (await initializeParser(language)).parser]); | ||
} | ||
return parsers; | ||
})(), | ||
); | ||
|
||
const extToLanguage: Map<string, Language> = new Map( | ||
fileTypes.map(({ ext, language }) => [`.${ext}`, language]), | ||
); | ||
|
||
function getLanguage(filename: string): Language { | ||
const ext = path.extname(filename).toLowerCase(); | ||
const language = extToLanguage.get(ext); | ||
if (!language) { | ||
throw new Error(`Unsupported extension ${ext}`); | ||
} | ||
return language; | ||
} | ||
|
||
function iterSourceFiles(root: string): IterableIterator<string> { | ||
const sourceGlob = new Glob( | ||
`**/*.{${fileTypes.map(({ ext }) => ext).join(",")}}`, | ||
); | ||
return sourceGlob.scanSync(root); | ||
} | ||
|
||
function* iterFunctions( | ||
code: string, | ||
language: Language, | ||
): IterableIterator<Parser.SyntaxNode> { | ||
const tree = parsers[language].parse(code); | ||
|
||
const cursor = tree.walk(); | ||
function* visitNode(): IterableIterator<Parser.SyntaxNode> { | ||
if (functionNodeTypes[language].includes(cursor.nodeType)) { | ||
yield cursor.currentNode; | ||
} | ||
|
||
if (cursor.gotoFirstChild()) { | ||
do { | ||
yield* visitNode(); | ||
} while (cursor.gotoNextSibling()); | ||
cursor.gotoParent(); | ||
} | ||
} | ||
yield* visitNode(); | ||
} | ||
|
||
async function main() { | ||
const { values } = parseArgs({ | ||
args: Bun.argv, | ||
options: { | ||
root: { | ||
type: "string", | ||
}, | ||
}, | ||
strict: true, | ||
allowPositionals: true, | ||
}); | ||
|
||
const root = values.root ?? "."; | ||
|
||
for (const filename of iterSourceFiles(root)) { | ||
const filepath = path.join(root, filename); | ||
const code = await Bun.file(filepath).text(); | ||
const language = getLanguage(filename); | ||
for (const func of iterFunctions(code, language)) { | ||
const builder = newCFGBuilder(language, {}); | ||
const cfg = builder.buildCFG(func); | ||
console.log(filepath, func.startPosition, cfg.graph.order); | ||
} | ||
} | ||
} | ||
|
||
await main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import type { Language } from "../control-flow/cfg.ts"; | ||
import Parser from "web-tree-sitter"; | ||
import { wasmMapping } from "./wasmMapping.ts"; | ||
|
||
export async function initializeParser( | ||
language: Language, | ||
): Promise<{ parser: Parser; language: Parser.Language }> { | ||
await Parser.init(); | ||
|
||
const parserLanguage = await Parser.Language.load(wasmMapping[language]); | ||
const parser = new Parser(); | ||
parser.setLanguage(parserLanguage); | ||
return { parser, language: parserLanguage }; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import type { Language } from "../control-flow/cfg.ts"; | ||
import Parser from "web-tree-sitter"; | ||
import treeSitterCore from "../../parsers/tree-sitter.wasm?url"; | ||
import { wasmMapping } from "./wasmMapping.ts"; | ||
|
||
export async function initializeParser(language: Language) { | ||
await Parser.init({ | ||
locateFile(_scriptName: string, _scriptDirectory: string) { | ||
return treeSitterCore; | ||
}, | ||
}); | ||
const parserLanguage = await Parser.Language.load(wasmMapping[language]); | ||
const parser = new Parser(); | ||
parser.setLanguage(parserLanguage); | ||
return parser; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import type { Language } from "../control-flow/cfg.ts"; | ||
import treeSitterC from "../../parsers/tree-sitter-c.wasm?url"; | ||
import treeSitterGo from "../../parsers/tree-sitter-go.wasm?url"; | ||
import treeSitterPython from "../../parsers/tree-sitter-python.wasm?url"; | ||
import treeSitterCpp from "../../parsers/tree-sitter-cpp.wasm?url"; | ||
|
||
// ADD-LANGUAGES-HERE | ||
export const wasmMapping: { [language in Language]: string } = { | ||
C: treeSitterC, | ||
Go: treeSitterGo, | ||
Python: treeSitterPython, | ||
"C++": treeSitterCpp, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.