diff --git a/src/pattern/Simplifier.ts b/src/pattern/Simplifier.ts deleted file mode 100644 index cb77039..0000000 --- a/src/pattern/Simplifier.ts +++ /dev/null @@ -1,60 +0,0 @@ -import type { LiteralNode, Node, WildcardNode } from './Nodes'; -import { SyntaxKind } from './Nodes'; - -export type SimpleNode = LiteralNode | WildcardNode; - -// Returns a list of patterns using simple constructs that match the same set of strings -// as the original pattern. -export function simplify(nodes: Node[]) { - const result: SimpleNode[][] = [[]]; - for (const node of nodes) { - if (node.kind === SyntaxKind.Optional) { - // Given the pattern [p1, p2, p3, ..., pm] where p1, p2, p3, etc., are - // wildcard / literal nodes and pm is an optional node, we can - // simplify it to two patterns (one where the optional node exists, - // and one where it does not): - // [p1, p2, p3, ..., pm-1], [p1, p2, p3, ..., pm] - // - // If there are multiple optional nodes, the idea is the same; we - // "fork" the pattern whenever we find an optional node and - // continue doing so until all nodes have been handled. - // - // N.B.: This technique results in the number of simplified patterns - // growing exponentially, as having two possibilities at each - // optional node results in 2^n patterns where n is the number of - // optional nodes. However, in practice, this is unlikely to be an - // issue. - const mark = result.length; - for (let i = 0; i < mark; i++) { - const pre = result[i]; - result.push([...pre, node.childNode]); - } - } else { - for (const nodes of result) nodes.push(node); - } - } - - return result.map(mergeLiteralNodeRuns); -} - -function mergeLiteralNodeRuns(nodes: SimpleNode[]) { - const merged: SimpleNode[] = []; - let i = 0; - while (i < nodes.length) { - const node = nodes[i++]; - if (node.kind !== SyntaxKind.Literal) { - merged.push(node); - continue; - } - - // Find all literal nodes right after the current one, and merge their content. - const chars = [...node.chars]; - while (i < nodes.length && nodes[i].kind === SyntaxKind.Literal) { - chars.push(...(nodes[i++] as LiteralNode).chars); - } - - merged.push({ kind: SyntaxKind.Literal, chars }); - } - - return merged; -} diff --git a/src/pattern/Util.ts b/src/pattern/Util.ts index 4b1203c..7b3f7b4 100644 --- a/src/pattern/Util.ts +++ b/src/pattern/Util.ts @@ -1,6 +1,5 @@ -import type { LiteralNode, Node, ParsedPattern } from './Nodes'; +import type { Node, ParsedPattern } from './Nodes'; import { SyntaxKind } from './Nodes'; -import type { SimpleNode } from './Simplifier'; export function potentiallyMatchesEmptyString(pattern: ParsedPattern) { return pattern.nodes.every((node) => node.kind === SyntaxKind.Optional); @@ -36,38 +35,3 @@ export function getRegExpStringForNode(node: Node): string { return `.`; } } - -export function computePatternMatchLength(nodes: SimpleNode[]) { - return nodes.reduce((total, node) => total + (node.kind === SyntaxKind.Wildcard ? 1 : node.chars.length), 0); -} - -export function groupByNodeType(nodes: SimpleNode[]) { - let i = 0; - const groups: NodeGroup[] = []; - while (i < nodes.length) { - const node = nodes[i]; - if (node.kind === SyntaxKind.Literal) { - const literals: LiteralNode[] = []; - while (i < nodes.length && nodes[i].kind === SyntaxKind.Literal) literals.push(nodes[i++] as LiteralNode); - groups.push({ literals, isLiteralGroup: true }); - } else { - const mark = i; - while (i < nodes.length && nodes[i].kind === SyntaxKind.Wildcard) i++; - groups.push({ wildcardCount: i - mark, isLiteralGroup: false }); - } - } - - return groups; -} - -export type NodeGroup = LiteralGroup | WildcardGroup; - -export interface LiteralGroup { - isLiteralGroup: true; - literals: LiteralNode[]; -} - -export interface WildcardGroup { - isLiteralGroup: false; - wildcardCount: number; -} diff --git a/test/pattern/Simplifier.test.ts b/test/pattern/Simplifier.test.ts deleted file mode 100644 index 0727a81..0000000 --- a/test/pattern/Simplifier.test.ts +++ /dev/null @@ -1,78 +0,0 @@ -import type { Node } from '../../src/pattern/Nodes'; -import { SyntaxKind } from '../../src/pattern/Nodes'; -import { simplify } from '../../src/pattern/Simplifier'; - -describe('simplify()', () => { - it('should leave patterns without optional nodes as-is, disregarding literal node merging', () => { - const nodes: Node[] = [ - { kind: SyntaxKind.Literal, chars: [1, 2, 3, 4] }, - { kind: SyntaxKind.Wildcard }, - { kind: SyntaxKind.Literal, chars: [2, 3, 4, 5] }, - ]; - expect(simplify(nodes)).toStrictEqual([nodes]); - }); - - describe('optional node expansion', () => { - it('should create two variations of the pattern whenever an optional node is seen (simple version, only 1 optional node)', () => { - const node0: Node = { kind: SyntaxKind.Literal, chars: [1, 2, 3, 4] }; - const childNode: Node = { kind: SyntaxKind.Wildcard }; - const node1: Node = { kind: SyntaxKind.Optional, childNode }; - const node2: Node = { kind: SyntaxKind.Wildcard }; - expect(simplify([node0, node1, node2])).toBePermutationOf([ - [node0, node2], - [node0, childNode, node2], - ]); - }); - - it('should create two variations of the pattern whenever an optional node is seen (more than 1 optional node present)', () => { - const node0: Node = { kind: SyntaxKind.Literal, chars: [1, 2, 3, 4] }; - const childNode0: Node = { kind: SyntaxKind.Wildcard }; - const node1: Node = { kind: SyntaxKind.Optional, childNode: childNode0 }; - const node2: Node = { kind: SyntaxKind.Wildcard }; - const childNode1: Node = { kind: SyntaxKind.Wildcard }; - const node3: Node = { kind: SyntaxKind.Optional, childNode: childNode1 }; - const node4: Node = { kind: SyntaxKind.Literal, chars: [2, 3, 4, 5] }; - expect(simplify([node0, node1, node2, node3, node4])).toBePermutationOf([ - [node0, node2, node4], - [node0, childNode0, node2, node4], - [node0, childNode0, node2, childNode1, node4], - [node0, node2, childNode1, node4], - ]); - }); - }); - - describe('literal node merging', () => { - it('should merge the nodes of runs of literal nodes', () => { - const nodes: Node[] = [ - { kind: SyntaxKind.Literal, chars: [1, 2, 3, 4] }, - { kind: SyntaxKind.Literal, chars: [2, 3, 4] }, - { kind: SyntaxKind.Literal, chars: [3, 4] }, - - { kind: SyntaxKind.Wildcard }, - - { kind: SyntaxKind.Literal, chars: [2, 3] }, - { kind: SyntaxKind.Literal, chars: [4, 5] }, - ]; - expect(simplify(nodes)).toStrictEqual([ - [ - { kind: SyntaxKind.Literal, chars: [1, 2, 3, 4, 2, 3, 4, 3, 4] }, - { kind: SyntaxKind.Wildcard }, - { kind: SyntaxKind.Literal, chars: [2, 3, 4, 5] }, - ], - ]); - }); - - it('should merge the nodes of runs of literal nodes in variations of patterns', () => { - const nodes: Node[] = [ - { kind: SyntaxKind.Literal, chars: [1, 2, 3, 4] }, - { kind: SyntaxKind.Optional, childNode: { kind: SyntaxKind.Literal, chars: [2, 3, 4] } }, - { kind: SyntaxKind.Literal, chars: [4, 5] }, - { kind: SyntaxKind.Wildcard }, - ]; - expect(simplify(nodes)).toBePermutationOf([ - [{ kind: SyntaxKind.Literal, chars: [1, 2, 3, 4, 4, 5] }, { kind: SyntaxKind.Wildcard }], - [{ kind: SyntaxKind.Literal, chars: [1, 2, 3, 4, 2, 3, 4, 4, 5] }, { kind: SyntaxKind.Wildcard }], - ]); - }); - }); -}); diff --git a/test/pattern/Util.test.ts b/test/pattern/Util.test.ts index 71e0fc8..8990a8c 100644 --- a/test/pattern/Util.test.ts +++ b/test/pattern/Util.test.ts @@ -1,13 +1,6 @@ import type { LiteralNode, OptionalNode } from '../../src/pattern/Nodes'; import { SyntaxKind } from '../../src/pattern/Nodes'; -import type { SimpleNode } from '../../src/pattern/Simplifier'; -import { - compilePatternToRegExp, - computePatternMatchLength, - getRegExpStringForNode, - groupByNodeType, - potentiallyMatchesEmptyString, -} from '../../src/pattern/Util'; +import { compilePatternToRegExp, getRegExpStringForNode, potentiallyMatchesEmptyString } from '../../src/pattern/Util'; import { CharacterIterator } from '../../src/util/CharacterIterator'; function toLiteralNode(str: string): LiteralNode { @@ -123,63 +116,3 @@ describe('getRegExpStringForNode()', () => { }); }); }); - -describe('computePatternMatchLength()', () => { - it('should return 0 if given an empty array', () => { - expect(computePatternMatchLength([])).toBe(0); - }); - - it('should return the total number of chars in literal nodes plus 1 for each wildcard node', () => { - const nodes: SimpleNode[] = [ - { kind: SyntaxKind.Literal, chars: [0, 0] }, - { kind: SyntaxKind.Wildcard }, - { kind: SyntaxKind.Literal, chars: [0, 0, 0] }, - ]; - expect(computePatternMatchLength(nodes)).toBe(6); - }); -}); - -describe('groupByNodeType()', () => { - it('should return an empty array if nodes=[]', () => { - expect(groupByNodeType([])).toStrictEqual([]); - }); - - it('should return one literal group if there are only literal nodes in the input', () => { - const nodes: SimpleNode[] = [ - { kind: SyntaxKind.Literal, chars: [1, 2, 3] }, - { kind: SyntaxKind.Literal, chars: [2, 3, 4] }, - ]; - expect(groupByNodeType(nodes)).toStrictEqual([{ isLiteralGroup: true, literals: nodes }]); - }); - - it('should return one wildcard group if there are only wildcards in the input', () => { - const nodes: SimpleNode[] = [ - { kind: SyntaxKind.Wildcard }, - { kind: SyntaxKind.Wildcard }, - { kind: SyntaxKind.Wildcard }, - ]; - expect(groupByNodeType(nodes)).toStrictEqual([{ isLiteralGroup: false, wildcardCount: 3 }]); - }); - - it('should group literals and wildcards together', () => { - const literal0: LiteralNode = { kind: SyntaxKind.Literal, chars: [1, 2, 3] }; - const literal1: LiteralNode = { kind: SyntaxKind.Literal, chars: [2, 3, 4] }; - const literal2: LiteralNode = { kind: SyntaxKind.Literal, chars: [3, 4, 5] }; - const nodes: SimpleNode[] = [ - { kind: SyntaxKind.Wildcard }, - literal0, - literal1, - { kind: SyntaxKind.Wildcard }, - literal2, - { kind: SyntaxKind.Wildcard }, - { kind: SyntaxKind.Wildcard }, - ]; - expect(groupByNodeType(nodes)).toStrictEqual([ - { isLiteralGroup: false, wildcardCount: 1 }, - { isLiteralGroup: true, literals: [literal0, literal1] }, - { isLiteralGroup: false, wildcardCount: 1 }, - { isLiteralGroup: true, literals: [literal2] }, - { isLiteralGroup: false, wildcardCount: 2 }, - ]); - }); -});