diff --git a/packages/cursorless-engine/src/languages/LanguageDefinition.ts b/packages/cursorless-engine/src/languages/LanguageDefinition.ts index c358b86121..f6eeeff61a 100644 --- a/packages/cursorless-engine/src/languages/LanguageDefinition.ts +++ b/packages/cursorless-engine/src/languages/LanguageDefinition.ts @@ -4,7 +4,7 @@ import { SimpleScopeType, showError, } from "@cursorless/common"; -import { dirname, join } from "path"; +import { basename, dirname, join } from "path"; import { TreeSitterScopeHandler } from "../processTargets/modifiers/scopeHandlers"; import { TreeSitterTextFragmentScopeHandler } from "../processTargets/modifiers/scopeHandlers/TreeSitterScopeHandler/TreeSitterTextFragmentScopeHandler"; import { ScopeHandler } from "../processTargets/modifiers/scopeHandlers/scopeHandler.types"; @@ -12,6 +12,7 @@ import { ide } from "../singletons/ide.singleton"; import { TreeSitter } from "../typings/TreeSitter"; import { matchAll } from "../util/regex"; import { TreeSitterQuery } from "./TreeSitterQuery"; +import { validateQueryCaptures } from "./TreeSitterQuery/validateQueryCaptures"; import { TEXT_FRAGMENT_CAPTURE_NAME } from "./captureNames"; /** @@ -105,6 +106,8 @@ async function readQueryFileAndImports( [languageQueryPath]: null, }; + const doValidation = ide().runMode !== "production"; + // Keep reading imports until we've read all the imports. Every time we // encounter an import in a query file, we add it to the map with a value // of null, so that it will be read on the next iteration @@ -114,6 +117,8 @@ async function readQueryFileAndImports( continue; } + const fileName = basename(queryPath); + let rawQuery = await fileSystem.readBundledFile(queryPath); if (rawQuery == null) { @@ -137,6 +142,10 @@ async function readQueryFileAndImports( rawQuery = ""; } + if (doValidation) { + validateQueryCaptures(fileName, rawQuery); + } + rawQueryStrings[queryPath] = rawQuery; matchAll( rawQuery, @@ -150,18 +159,9 @@ async function readQueryFileAndImports( /^[^\S\r\n]*;;?[^\S\r\n]*(?:import|include)[^\S\r\n]+['"]?([\w|/.]+)['"]?[^\S\r\n]*$/gm, (match) => { const relativeImportPath = match[1]; - const canonicalSyntax = `;; import ${relativeImportPath}`; - - if (match[0] !== canonicalSyntax) { - showError( - ide().messages, - "LanguageDefinition.readQueryFileAndImports.malformedImport", - `Malformed import statement in ${queryPath}: "${match[0]}". Import statements must be of the form "${canonicalSyntax}"`, - ); - - if (ide().runMode === "test") { - throw new Error("Invalid import statement"); - } + + if (doValidation) { + validateImportSyntax(fileName, relativeImportPath, match[0]); } const importQueryPath = join(dirname(queryPath), relativeImportPath); @@ -174,3 +174,23 @@ async function readQueryFileAndImports( return Object.values(rawQueryStrings).join("\n"); } + +function validateImportSyntax( + file: string, + relativeImportPath: string, + actual: string, +) { + const canonicalSyntax = `;; import ${relativeImportPath}`; + + if (actual !== canonicalSyntax) { + showError( + ide().messages, + "LanguageDefinition.readQueryFileAndImports.malformedImport", + `Malformed import statement in ${file}: "${actual}". Import statements must be of the form "${canonicalSyntax}"`, + ); + + if (ide().runMode === "test") { + throw new Error("Invalid import statement"); + } + } +} diff --git a/packages/cursorless-engine/src/languages/TreeSitterQuery/validateQueryCaptures.test.ts b/packages/cursorless-engine/src/languages/TreeSitterQuery/validateQueryCaptures.test.ts new file mode 100644 index 0000000000..b3a803e95a --- /dev/null +++ b/packages/cursorless-engine/src/languages/TreeSitterQuery/validateQueryCaptures.test.ts @@ -0,0 +1,110 @@ +import { FakeIDE } from "@cursorless/common"; +import assert from "assert"; +import { injectIde } from "../../singletons/ide.singleton"; +import { validateQueryCaptures } from "./validateQueryCaptures"; + +const testCases: { name: string; isOk: boolean; content: string }[] = [ + { + name: "Scope captures", + isOk: true, + content: "(if_statement) @statement @ifStatement @comment", + }, + { + name: "Relationships", + isOk: true, + content: "(if_statement) @statement.domain @statement.interior @_.removal", + }, + { + name: "Position captures", + isOk: true, + content: + "(if_statement) @statement.startOf @statement.leading.startOf @statement.trailing.endOf", + }, + { + name: "Range captures", + isOk: true, + content: + "(if_statement) @statement.start @statement.start.endOf @statement.removal.start @statement.interior.start.endOf", + }, + { + name: "Dummy capture", + isOk: true, + content: "(if_statement) @_foo", + }, + { + name: "No range dummy relationships", + isOk: false, + content: "(if_statement) @_foo.start @_foo.startOf", + }, + { + name: "Text fragment", + isOk: true, + content: "(comment) @textFragment", + }, + { + name: "Iteration", + isOk: true, + content: "(document) @statement.iteration @statement.iteration.domain", + }, + { + name: "Unknown capture in comment", + isOk: true, + content: ";; (if_statement) @unknown", + }, + { + name: "Unknown capture", + isOk: false, + content: "(if_statement) @unknown", + }, + { + name: "Unknown relationship", + isOk: false, + content: "(if_statement) @statement.unknown", + }, + { + name: "Single @", + isOk: false, + content: "(if_statement) @", + }, + { + name: "Single wildcard", + isOk: false, + content: "(if_statement) @_", + }, + { + name: "Wildcard start", + isOk: false, + content: "(if_statement) @_.start", + }, + { + name: "Leading start", + isOk: false, + content: "(if_statement) @statement.leading.start", + }, + { + name: "Text fragment removal", + isOk: false, + content: "(comment) @textFragment.removal", + }, +]; + +suite("validateQueryCaptures", function () { + suiteSetup(() => { + injectIde(new FakeIDE()); + }); + + for (const testCase of testCases) { + const name = [testCase.isOk ? "OK" : "Error", testCase.name].join(": "); + + test(name, () => { + const runTest = () => + validateQueryCaptures(testCase.name, testCase.content); + + if (testCase.isOk) { + assert.doesNotThrow(runTest); + } else { + assert.throws(runTest); + } + }); + } +}); diff --git a/packages/cursorless-engine/src/languages/TreeSitterQuery/validateQueryCaptures.ts b/packages/cursorless-engine/src/languages/TreeSitterQuery/validateQueryCaptures.ts new file mode 100644 index 0000000000..ce3e91b58a --- /dev/null +++ b/packages/cursorless-engine/src/languages/TreeSitterQuery/validateQueryCaptures.ts @@ -0,0 +1,110 @@ +import { showError, simpleScopeTypeTypes } from "@cursorless/common"; +import { ide } from "../../singletons/ide.singleton"; + +const wildcard = "_"; +const textFragment = "textFragment"; +const captureNames = [wildcard, ...simpleScopeTypeTypes]; + +const positionRelationships = ["prefix", "leading", "trailing"]; +const positionSuffixes = ["startOf", "endOf"]; + +const rangeRelationships = [ + "domain", + "removal", + "interior", + "iteration", + "iteration.domain", +]; +const rangeSuffixes = [ + "start", + "end", + "start.startOf", + "start.endOf", + "end.startOf", + "end.endOf", +]; + +const allowedCaptures = new Set(); + +allowedCaptures.add(textFragment); + +for (const suffix of rangeSuffixes) { + allowedCaptures.add(`${textFragment}.${suffix}`); +} + +for (const captureName of captureNames) { + // Wildcard is not allowed by itself without a relationship + if (captureName !== wildcard) { + // eg: statement + allowedCaptures.add(captureName); + + // eg: statement.start | statement.start.endOf + for (const suffix of rangeSuffixes) { + allowedCaptures.add(`${captureName}.${suffix}`); + } + } + + for (const relationship of positionRelationships) { + // eg: statement.leading + allowedCaptures.add(`${captureName}.${relationship}`); + + for (const suffix of positionSuffixes) { + // eg: statement.leading.endOf + allowedCaptures.add(`${captureName}.${relationship}.${suffix}`); + } + } + + for (const relationship of rangeRelationships) { + // eg: statement.domain + allowedCaptures.add(`${captureName}.${relationship}`); + + for (const suffix of rangeSuffixes) { + // eg: statement.domain.start | statement.domain.start.endOf + allowedCaptures.add(`${captureName}.${relationship}.${suffix}`); + } + } +} + +// Not a comment. ie line is not starting with `;;` +// Capture starts with `@` and is followed by words and/or dots +const capturePattern = new RegExp(`^(?!;;).*@([\\w.]*)`, "gm"); + +export function validateQueryCaptures(file: string, rawQuery: string): void { + const matches = rawQuery.matchAll(capturePattern); + + const errors: string[] = []; + + for (const match of matches) { + const captureName = match[1]; + + if ( + captureName.length > 1 && + !captureName.includes(".") && + captureName.startsWith("_") + ) { + // Allow @_foo dummy captures to use for referring to in query predicates + continue; + } + + if (!allowedCaptures.has(captureName)) { + const lineNumber = match.input!.slice(0, match.index!).split("\n").length; + errors.push(`${file}(${lineNumber}) invalid capture '@${captureName}'.`); + } + } + + if (errors.length === 0) { + return; + } + + const message = errors.join("\n"); + + showError( + ide().messages, + "validateQueryCaptures.invalidCaptureName", + message, + ); + + if (ide().runMode === "test") { + throw new Error(message); + } +} diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/scm/clearEveryEntry4.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/scm/clearEveryEntry4.yml index 1b93695f54..ae6c0b318c 100644 --- a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/scm/clearEveryEntry4.yml +++ b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/scm/clearEveryEntry4.yml @@ -13,7 +13,7 @@ initialState: documentContents: |- ( (_ - (_) @dummy + (_) @_dummy (capture) @name @_.domain.end ) @_.domain.start ) diff --git a/queries/go.scm b/queries/go.scm index 09bd1707db..45cc515747 100644 --- a/queries/go.scm +++ b/queries/go.scm @@ -246,8 +246,8 @@ ( (if_statement consequence: (block) @branch.end.endOf - ) @_if @branch.start.startOf - (#not-parent-type? @_if if_statement) + ) @branch.start.startOf + (#not-parent-type? @branch.start.startOf if_statement) (#insertion-delimiter! @branch.start.startOf " ") ) @@ -271,6 +271,6 @@ ;; iteration scope is always the outermost if statement ( - (if_statement) @_if @branch.iteration - (#not-parent-type? @_if if_statement) + (if_statement) @branch.iteration + (#not-parent-type? @branch.iteration if_statement) ) diff --git a/queries/javascript.core.scm b/queries/javascript.core.scm index 4bb1837695..5e853f2aee 100644 --- a/queries/javascript.core.scm +++ b/queries/javascript.core.scm @@ -23,11 +23,11 @@ (export_statement (_ name: (_) @name - ) @dummy + ) @_dummy ;; We have a special case for this one. Note we don't need to list the other ;; special cases from above because they can't be exported - (#not-type? @dummy variable_declarator) + (#not-type? @_dummy variable_declarator) ) @_.domain ;; Special cases for `(let | const | var) foo = ...;` because the full statement @@ -266,8 +266,8 @@ . value: (_)? @value ) @_.domain - ) @dummy - (#has-multiple-children-of-type? @dummy variable_declarator) + ) @_dummy + (#has-multiple-children-of-type? @_dummy variable_declarator) ) (expression_statement @@ -696,9 +696,9 @@ (_) @argumentOrParameter . (_)? @_.trailing.startOf - ) @dummy + ) @_dummy (#not-type? @argumentOrParameter "comment") - (#single-or-multi-line-delimiter! @argumentOrParameter @dummy ", " ",\n") + (#single-or-multi-line-delimiter! @argumentOrParameter @_dummy ", " ",\n") ) ;;!! foo("bar") @@ -710,9 +710,9 @@ (_) @argumentOrParameter . (_)? @_.trailing.startOf - ) @dummy + ) @_dummy (#not-type? @argumentOrParameter "comment") - (#single-or-multi-line-delimiter! @argumentOrParameter @dummy ", " ",\n") + (#single-or-multi-line-delimiter! @argumentOrParameter @_dummy ", " ",\n") ) (_ diff --git a/queries/javascript.fieldAccess.scm b/queries/javascript.fieldAccess.scm index 5a0f33fc6b..355f2f0756 100644 --- a/queries/javascript.fieldAccess.scm +++ b/queries/javascript.fieldAccess.scm @@ -12,10 +12,10 @@ ( (member_expression object: (call_expression - function: (_) @dummy + function: (_) @_dummy ) @private.fieldAccess ) - (#not-type? @dummy member_expression) + (#not-type? @_dummy member_expression) ) ;;!! foo[0].bar @@ -23,10 +23,10 @@ ( (member_expression object: (subscript_expression - object: (_) @dummy + object: (_) @_dummy ) @private.fieldAccess ) - (#not-type? @dummy member_expression) + (#not-type? @_dummy member_expression) ) ;;!! foo.bar @@ -38,8 +38,8 @@ (optional_chain) ] @private.fieldAccess.start property: (_) @private.fieldAccess.end - ) @dummy - (#not-parent-type? @dummy call_expression subscript_expression) + ) @_dummy + (#not-parent-type? @_dummy call_expression subscript_expression) ) ;;!! foo.bar() diff --git a/queries/python.fieldAccess.scm b/queries/python.fieldAccess.scm index 56284aa6f9..583952ba0f 100644 --- a/queries/python.fieldAccess.scm +++ b/queries/python.fieldAccess.scm @@ -12,10 +12,10 @@ ( (attribute object: (call - function: (_) @dummy + function: (_) @_dummy ) @private.fieldAccess ) - (#not-type? @dummy attribute) + (#not-type? @_dummy attribute) ) ;;!! foo[0].bar @@ -23,10 +23,10 @@ ( (attribute object: (subscript - value: (_) @dummy + value: (_) @_dummy ) @private.fieldAccess ) - (#not-type? @dummy attribute) + (#not-type? @_dummy attribute) ) ;;!! foo.bar @@ -35,8 +35,8 @@ (attribute "." @private.fieldAccess.start attribute: (_) @private.fieldAccess.end - ) @dummy - (#not-parent-type? @dummy call subscript) + ) @_dummy + (#not-parent-type? @_dummy call subscript) ) ;;!! foo.bar() diff --git a/queries/scm.name.scm b/queries/scm.name.scm index 1926a67a76..247b0ad971 100644 --- a/queries/scm.name.scm +++ b/queries/scm.name.scm @@ -3,14 +3,14 @@ ;;! --------------- ( (_ - _ @dummy + _ @_dummy . (capture) @name.start (capture)? @name.end . ) @_.domain (#not-type? @_.domain parameters) - (#not-type? @dummy capture) + (#not-type? @_dummy capture) (#not-parent-type? @_.domain field_definition) ) @@ -20,14 +20,14 @@ ( (field_definition (_ - _ @dummy + _ @_dummy . (capture) @name.start (capture)? @name.end . ) ) @_.domain - (#not-type? @dummy capture) + (#not-type? @_dummy capture) ) ;;!! (aaa) @bbb @ccc @@ -35,9 +35,9 @@ ( (_ (capture) @name - ) @dummy - (#not-type? @dummy parameters) - (#has-multiple-children-of-type? @dummy capture) + ) @_dummy + (#not-type? @_dummy parameters) + (#has-multiple-children-of-type? @_dummy capture) ) ;;!! (aaa) @bbb @ccc @@ -45,11 +45,11 @@ ;;! --------------- <~ iteration domain ( (_ - _ @dummy + _ @_dummy . (capture) @name.iteration.start ) @name.iteration.end.endOf @name.iteration.domain - (#not-type? @dummy capture) + (#not-type? @_dummy capture) (#not-type? @name.iteration.start parameters) (#not-parent-type? @name.iteration.domain field_definition) ) @@ -63,21 +63,21 @@ ;; Note that we can't use wildcard node due to ;; https://github.com/tree-sitter/tree-sitter/issues/2483 (named_node - _ @dummy + _ @_dummy . (capture) @name.iteration.start ) (anonymous_node - _ @dummy + _ @_dummy . (capture) @name.iteration.start ) (list - _ @dummy + _ @_dummy . (capture) @name.iteration.start ) ] ) @name.iteration.end.endOf @name.iteration.domain - (#not-type? @dummy capture) + (#not-type? @_dummy capture) ) diff --git a/queries/typescript.core.scm b/queries/typescript.core.scm index b4ca34752e..f8dd45a6f3 100644 --- a/queries/typescript.core.scm +++ b/queries/typescript.core.scm @@ -152,8 +152,8 @@ (_) @type ) @type.removal ) @_.domain - ) @dummy - (#has-multiple-children-of-type? @dummy variable_declarator) + ) @_dummy + (#has-multiple-children-of-type? @_dummy variable_declarator) ) ;;!! function ccc(aaa: string, bbb?: string) {} @@ -194,8 +194,8 @@ ;;! ^^^^^^ ^^^^^^ (type_arguments (_) @type - (#not-parent-type? @dummy type_assertion) -) @dummy + (#not-parent-type? @_dummy type_assertion) +) @_dummy ;;!! function foo() {} ;;! ^