Skip to content

Commit

Permalink
Merge pull request #34 from tmr232/scan-codebase
Browse files Browse the repository at this point in the history
Create script for scanning full codebase
  • Loading branch information
tmr232 authored Dec 1, 2024
2 parents cd35cfa + f8cee8f commit 1aeddef
Show file tree
Hide file tree
Showing 13 changed files with 173 additions and 55 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how
- The `generate-parsers.ts` script has been updated to support copying existing `.wasm` files from tree-sitter grammar packages
- Initial support for C++
- A basic [typedoc](https://typedoc.org/) configuration was added, to help in rendering docs
- A utility script for running CFG builders on a complete codebase (`scan-codebase.ts`)

### Changed

Expand Down
2 changes: 1 addition & 1 deletion docs/AddNewLanguage.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Search for `ADD-LANGUAGES-HERE` in the code, and add the language in all the rel
Those will include:

- Language & builder definitions in `src/control-flow/cfg.ts`
- Mapping languages to `.wasm` files in `src/components/utils.ts`
- Mapping languages to `.wasm` files in `src/components/parser-loader/wasmMappings.ts`
- Mapping VSCode's `languageId` to our language definitions in `src/vscode/extension.ts`
- Adding test-collectors and tests in `src/test/commentTestCollector.ts`
- Adding the language in the demo's UI in `src/components/Demo.svelte`
Expand Down
1 change: 1 addition & 0 deletions scripts/generate-parsers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* parsers in to the `./parsers` directory.
*
* To add a new parsers, add it's package name to the `parsersToBuild` array.
* @module
*/
import { $ } from "bun";
import * as fs from "node:fs";
Expand Down
110 changes: 110 additions & 0 deletions scripts/scan-codebase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/**
* This script allows running the CFG builders on all the functions of a given
* code base.
* This can be useful in finding CFG generation bugs.
*
* @module
*/
import { parseArgs } from "node:util";
import {
functionNodeTypes,
type Language,
newCFGBuilder,
supportedLanguages,
} from "../src/control-flow/cfg";
import { Glob } from "bun";
import type Parser from "web-tree-sitter";
import { initializeParser } from "../src/parser-loader/bun.ts";
import * as path from "node:path";

type FileType = { ext: string; language: Language };
const fileTypes: FileType[] = [
{ ext: "c", language: "C" },
{ ext: "cpp", language: "C++" },
{ ext: "h", language: "C++" },
{ ext: "hh", language: "C++" },
{ ext: "hpp", language: "C++" },
{ ext: "cc", language: "C++" },
{ ext: "py", language: "Python" },
{ ext: "go", language: "Go" },
];

const parsers: { [language in Language]: Parser } = Object.fromEntries(
await (async () => {
const parsers = [];
for (const language of supportedLanguages) {
parsers.push([language, (await initializeParser(language)).parser]);
}
return parsers;
})(),
);

const extToLanguage: Map<string, Language> = new Map(
fileTypes.map(({ ext, language }) => [`.${ext}`, language]),
);

function getLanguage(filename: string): Language {
const ext = path.extname(filename).toLowerCase();
const language = extToLanguage.get(ext);
if (!language) {
throw new Error(`Unsupported extension ${ext}`);
}
return language;
}

function iterSourceFiles(root: string): IterableIterator<string> {
const sourceGlob = new Glob(
`**/*.{${fileTypes.map(({ ext }) => ext).join(",")}}`,
);
return sourceGlob.scanSync(root);
}

function* iterFunctions(
code: string,
language: Language,
): IterableIterator<Parser.SyntaxNode> {
const tree = parsers[language].parse(code);

const cursor = tree.walk();
function* visitNode(): IterableIterator<Parser.SyntaxNode> {
if (functionNodeTypes[language].includes(cursor.nodeType)) {
yield cursor.currentNode;
}

if (cursor.gotoFirstChild()) {
do {
yield* visitNode();
} while (cursor.gotoNextSibling());
cursor.gotoParent();
}
}
yield* visitNode();
}

async function main() {
const { values } = parseArgs({
args: Bun.argv,
options: {
root: {
type: "string",
},
},
strict: true,
allowPositionals: true,
});

const root = values.root ?? ".";

for (const filename of iterSourceFiles(root)) {
const filepath = path.join(root, filename);
const code = await Bun.file(filepath).text();
const language = getLanguage(filename);
for (const func of iterFunctions(code, language)) {
const builder = newCFGBuilder(language, {});
const cfg = builder.buildCFG(func);
console.log(filepath, func.startPosition, cfg.graph.order);
}
}
}

await main();
35 changes: 5 additions & 30 deletions src/components/utils.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,18 @@
import Parser from "web-tree-sitter";

import treeSitterGo from "../../parsers/tree-sitter-go.wasm?url";
import treeSitterC from "../../parsers/tree-sitter-c.wasm?url";
import treeSitterPython from "../../parsers/tree-sitter-python.wasm?url";
import treeSitterCore from "../../parsers/tree-sitter.wasm?url";
import treeSitterCpp from "../../parsers/tree-sitter-cpp.wasm?url";
import type Parser from "web-tree-sitter";
import {
newCFGBuilder,
functionNodeTypes,
type Language,
newCFGBuilder,
supportedLanguages,
functionNodeTypes,
} from "../control-flow/cfg";
import type { TestFuncRecord } from "../test/commentTestUtils";
import type { TestFunction } from "../test/commentTestTypes";
import { requirementTests } from "../test/commentTestHandlers";
import { simplifyCFG, trimFor } from "../control-flow/graph-ops";
import { type CFG, mergeNodeAttrs } from "../control-flow/cfg-defs";
import { graphToDot } from "../control-flow/render";
import { Graphviz, type Format } from "@hpcc-js/wasm-graphviz";

// ADD-LANGUAGES-HERE
const wasmMapping: { [language in Language]: string } = {
C: treeSitterC,
Go: treeSitterGo,
Python: treeSitterPython,
"C++": treeSitterCpp,
};

async function initializeParser(language: Language) {
await Parser.init({
locateFile(_scriptName: string, _scriptDirectory: string) {
return treeSitterCore;
},
});
const parserLanguage = await Parser.Language.load(wasmMapping[language]);
const parser = new Parser();
parser.setLanguage(parserLanguage);
return parser;
}
import { type Format, Graphviz } from "@hpcc-js/wasm-graphviz";
import { initializeParser } from "../parser-loader/vite.ts";

export type Parsers = { [language in Language]: Parser };

Expand Down
14 changes: 14 additions & 0 deletions src/parser-loader/bun.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import type { Language } from "../control-flow/cfg.ts";
import Parser from "web-tree-sitter";
import { wasmMapping } from "./wasmMapping.ts";

export async function initializeParser(
language: Language,
): Promise<{ parser: Parser; language: Parser.Language }> {
await Parser.init();

const parserLanguage = await Parser.Language.load(wasmMapping[language]);
const parser = new Parser();
parser.setLanguage(parserLanguage);
return { parser, language: parserLanguage };
}
16 changes: 16 additions & 0 deletions src/parser-loader/vite.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import type { Language } from "../control-flow/cfg.ts";
import Parser from "web-tree-sitter";
import treeSitterCore from "../../parsers/tree-sitter.wasm?url";
import { wasmMapping } from "./wasmMapping.ts";

export async function initializeParser(language: Language) {
await Parser.init({
locateFile(_scriptName: string, _scriptDirectory: string) {
return treeSitterCore;
},
});
const parserLanguage = await Parser.Language.load(wasmMapping[language]);
const parser = new Parser();
parser.setLanguage(parserLanguage);
return parser;
}
13 changes: 13 additions & 0 deletions src/parser-loader/wasmMapping.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import type { Language } from "../control-flow/cfg.ts";
import treeSitterC from "../../parsers/tree-sitter-c.wasm?url";
import treeSitterGo from "../../parsers/tree-sitter-go.wasm?url";
import treeSitterPython from "../../parsers/tree-sitter-python.wasm?url";
import treeSitterCpp from "../../parsers/tree-sitter-cpp.wasm?url";

// ADD-LANGUAGES-HERE
export const wasmMapping: { [language in Language]: string } = {
C: treeSitterC,
Go: treeSitterGo,
Python: treeSitterPython,
"C++": treeSitterCpp,
};
6 changes: 3 additions & 3 deletions src/test/collect-c.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import type Parser from "web-tree-sitter";
import treeSitterC from "../../parsers/tree-sitter-c.wasm?url";
import { parseComment } from "./commentTestUtils";
import type { TestFunction } from "./commentTestTypes";
import { initializeParser } from "./parser-init";

const { parser, language } = await initializeParser(treeSitterC);
import { initializeParser } from "../parser-loader/bun.ts";

const { parser, language } = await initializeParser("C");

export function getTestFuncs(code: string): Generator<TestFunction> {
const tree = parser.parse(code);
Expand Down
6 changes: 3 additions & 3 deletions src/test/collect-cpp.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import type Parser from "web-tree-sitter";
import treeSitterCpp from "../../parsers/tree-sitter-cpp.wasm?url";
import { parseComment } from "./commentTestUtils";
import type { TestFunction } from "./commentTestTypes";
import { initializeParser } from "./parser-init";

const { parser, language } = await initializeParser(treeSitterCpp);
import { initializeParser } from "../parser-loader/bun.ts";

const { parser, language } = await initializeParser("C++");

export function getTestFuncs(code: string): Generator<TestFunction> {
const tree = parser.parse(code);
Expand Down
6 changes: 3 additions & 3 deletions src/test/collect-go.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import type Parser from "web-tree-sitter";
import treeSitterGo from "../../parsers/tree-sitter-go.wasm?url";
import { parseComment } from "./commentTestUtils";
import type { TestFunction } from "./commentTestTypes";
import { initializeParser } from "./parser-init";

const { parser } = await initializeParser(treeSitterGo);
import { initializeParser } from "../parser-loader/bun.ts";

const { parser } = await initializeParser("Go");

export function getTestFuncs(code: string): Generator<TestFunction> {
const tree = parser.parse(code);
Expand Down
6 changes: 3 additions & 3 deletions src/test/collect-python.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import type Parser from "web-tree-sitter";
import treeSitterPython from "../../parsers/tree-sitter-python.wasm?url";
import { parseComment } from "./commentTestUtils";
import type { TestFunction } from "./commentTestTypes";
import { initializeParser } from "./parser-init";

const { parser, language } = await initializeParser(treeSitterPython);
import { initializeParser } from "../parser-loader/bun.ts";

const { parser, language } = await initializeParser("Python");

export function getTestFuncs(code: string): Generator<TestFunction> {
const tree = parser.parse(code);
Expand Down
12 changes: 0 additions & 12 deletions src/test/parser-init.ts

This file was deleted.

0 comments on commit 1aeddef

Please sign in to comment.