-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #31 from tmr232/cpp-support
C++ Support
- Loading branch information
Showing
45 changed files
with
8,647 additions
and
4,039 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
{ | ||
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", | ||
"vcs": { | ||
"enabled": false, | ||
"clientKind": "git", | ||
"useIgnoreFile": false, | ||
}, | ||
"files": { | ||
"ignoreUnknown": false, | ||
"ignore": ["./dist", "*.svelte"], | ||
}, | ||
"formatter": { | ||
"enabled": true, | ||
"indentStyle": "space", | ||
}, | ||
"organizeImports": { | ||
"enabled": true, | ||
}, | ||
"linter": { | ||
"enabled": true, | ||
"rules": { | ||
"recommended": true, | ||
"style": { | ||
"noParameterAssign": "off", | ||
}, | ||
}, | ||
}, | ||
"javascript": { | ||
"formatter": { | ||
"quoteStyle": "double", | ||
}, | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
--- | ||
title: Adding a New Language | ||
group: Documents | ||
category: Guides | ||
--- | ||
|
||
# Adding a New Language | ||
|
||
## Add the Relevant Parser | ||
|
||
We're using [tree-sitter] to parse code into ASTs. | ||
Each language requires its own parser. | ||
Find yours in [tree-sitter's list of parsers][tree-sitter parsers]. | ||
|
||
Once you find the parser, you need to install it: | ||
|
||
```shell | ||
bun add --dev tree-sitter-<language> | ||
``` | ||
|
||
After installing it, add it to `./scripts/generate-parsers.ts` | ||
and run `bun generate-parsers` to try and generate the `.wasm` parser file from it. | ||
|
||
If the package contains a pre-built `.wasm` file, this will work. | ||
If it fails, Follow the [tree-sitter instructions for generating .wasm language files][build wasm] to set up emsrcipten, | ||
and run `bun generate-parsers` again. | ||
|
||
Once the command completes successfully, your new parser should be inside `./parsers`. | ||
|
||
## Generating the CFG | ||
|
||
Each CFG-builder resides in its own file inside `./src/control-flow`. | ||
Name yours `cfg-<language>.ts`. | ||
|
||
Your builder is expected to expose a `createCFGBuilder(options: BuilderOptions): CFGBuilder` function. | ||
A naive implementation to get started with would look something like this: | ||
|
||
```typescript | ||
import type Parser from "web-tree-sitter"; | ||
import type { BasicBlock, BuilderOptions, CFGBuilder } from "./cfg-defs"; | ||
import { | ||
type Context, | ||
GenericCFGBuilder, | ||
type StatementHandlers, | ||
} from "./generic-cfg-builder.ts"; | ||
|
||
export function createCFGBuilder(options: BuilderOptions): CFGBuilder { | ||
return new GenericCFGBuilder(statementHandlers, options); | ||
} | ||
|
||
const statementHandlers: StatementHandlers = { | ||
named: {}, | ||
default: defaultProcessStatement, | ||
}; | ||
|
||
function defaultProcessStatement( | ||
syntax: Parser.SyntaxNode, | ||
ctx: Context, | ||
): BasicBlock { | ||
const newNode = ctx.builder.addNode( | ||
"STATEMENT", | ||
syntax.text, | ||
syntax.startIndex, | ||
); | ||
ctx.link.syntaxToNode(syntax, newNode); | ||
return { entry: newNode, exit: newNode }; | ||
} | ||
``` | ||
|
||
Once you have your initial builder file, there's quite a lot of wiring to do, | ||
to register the language in all the relevant places. | ||
Search for `ADD-LANGUAGES-HERE` in the code, and add the language in all the relevant places. | ||
Those will include: | ||
|
||
- Language & builder definitions in `src/control-flow/cfg.ts` | ||
- Mapping languages to `.wasm` files in `src/components/utils.ts` | ||
- Mapping VSCode's `languageId` to our language definitions in `src/vscode/extension.ts` | ||
- Adding test-collectors and tests in `src/test/commentTestCollector.ts` | ||
- Adding the language in the demo's UI in `src/components/Demo.svelte` | ||
|
||
### Implementing the Builder | ||
|
||
Once all the wiring is in place, it's time to actually generate the CFG. | ||
It is highly recommended that you read the other CFG implementation for reference. | ||
|
||
While you're working, the [tree-sitter playground] will prove highly valuable in understanding the AST | ||
and creating queries. | ||
|
||
[tree-sitter]: https://tree-sitter.github.io/tree-sitter/ | ||
[tree-sitter parsers]: https://github.com/tree-sitter/tree-sitter/wiki/List-of-parsers | ||
[tree-sitter playground]: https://tree-sitter.github.io/tree-sitter/playground | ||
[build-wasm]: https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/README.md#generate-wasm-language-files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
--- | ||
title: Running & Writing Tests | ||
group: Documents | ||
category: Guides | ||
--- | ||
|
||
# Comment Tests | ||
|
||
The comment-tests framework allows us to define CFG generation tests in the source-code that we test on. | ||
This makes test-writing easier, as we don't need to include code as strings in our tests. | ||
|
||
## Running Tests | ||
|
||
Use `bun test` to run all the tests. | ||
|
||
### Visualizing Failures | ||
|
||
If you have failing tests, you might want to visualize them. | ||
To do that, collect the test results as they get updated: | ||
|
||
```shell | ||
bun web-tests --watch | ||
``` | ||
|
||
And run the web server to visualize them: | ||
|
||
```shell | ||
bun web | ||
``` | ||
|
||
## Test Types | ||
|
||
The current available test types are: | ||
|
||
1. `nodes`: asserts the expected node-count in the CFG | ||
2. `exits`: asserts the expected exit-node count in the CFG | ||
3. `reaches`: asserts reachability between node pairs | ||
4. `render`: asserts that the code CFG for ths code renders successfully | ||
|
||
Additionally, code-segmentation and snapshot-tests are added automatically for the code used in comment-tests. | ||
|
||
## Writing Tests | ||
|
||
1. Write your code in a new function in the matching file under `src/test/commentTestSamples` | ||
2. Add a comment right above the function, declaring the relevant tests. | ||
The commend format is JSON, but without the curly braces. | ||
|
||
## Adding Languages | ||
|
||
When we add a new language, we need to add a test-collector for that language. | ||
A test collector exports a `getTestFuncs(code: string): Generator<TestFunction>` function. | ||
To do that, we need to parse the code, and extract all functions and comments inside it. | ||
It's best to look at one of the `collect-<language>.ts` files to see how this is done. | ||
|
||
Once we have a collector, we add it in `src/test/commentTestCollector.ts` and map file-extensions to use with it. | ||
Then, we add a test file under `src/test/commentTestSamples`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"$schema": "./node_modules/oxlint/configuration_schema.json" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,58 @@ | ||
/** | ||
* The `generate-parsers` script copies or builds the relevant tree-sitter | ||
* parsers in to the `./parsers` directory. | ||
* | ||
* To add a new parsers, add it's package name to the `parsersToBuild` array. | ||
*/ | ||
import { $ } from "bun"; | ||
import * as fs from "node:fs"; | ||
import { fileURLToPath } from "node:url"; | ||
|
||
const treeSitter = Bun.file("./node_modules/web-tree-sitter/tree-sitter.wasm"); | ||
await Bun.write("./parsers/tree-sitter.wasm", treeSitter); | ||
/** | ||
* The parsers to include | ||
*/ | ||
const parsersToBuild = [ | ||
"tree-sitter-go", | ||
"tree-sitter-c", | ||
"tree-sitter-python", | ||
"tree-sitter-cpp", | ||
]; | ||
|
||
const parsers = ["tree-sitter-go", "tree-sitter-c", "tree-sitter-python"]; | ||
function locatePrebuiltWasm(packageName: string): string { | ||
return fileURLToPath( | ||
import.meta.resolve(`${packageName}/${packageName}.wasm`), | ||
); | ||
} | ||
|
||
function hasPrebuiltWasm(packageName: string): boolean { | ||
try { | ||
locatePrebuiltWasm(packageName); | ||
} catch { | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
for (const name of parsersToBuild) { | ||
const targetWasmPath = `./parsers/${name}.wasm`; | ||
if (await Bun.file(targetWasmPath).exists()) { | ||
console.log(`${name}: .wasm found, skipping copy.`); | ||
} else if (hasPrebuiltWasm(name)) { | ||
console.log(`${name}: copying .wasm`); | ||
fs.copyFileSync(locatePrebuiltWasm(name), targetWasmPath); | ||
} else { | ||
console.log(`${name}: building .wasm`); | ||
await $`bun x --bun tree-sitter build --wasm -o ${targetWasmPath} ./node_modules/${name}/`; | ||
} | ||
|
||
await $`git add ${targetWasmPath}`; | ||
} | ||
|
||
for (const name of parsers) { | ||
await $`bun x --bun tree-sitter build --wasm -o ./parsers/${name}.wasm ./node_modules/${name}/`; | ||
const treeSitterPath = "./parsers/tree-sitter.wasm"; | ||
if (!(await Bun.file(treeSitterPath).exists())) { | ||
const treeSitter = Bun.file( | ||
"./node_modules/web-tree-sitter/tree-sitter.wasm", | ||
); | ||
await Bun.write(treeSitterPath, treeSitter); | ||
await $`git add ${treeSitterPath}`; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.