From f4ccac69558b0a25b1195b5cd6aa0a677c8966c4 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 11 Oct 2024 13:34:56 -0500 Subject: [PATCH 01/21] Updated the regex --- .github/workflows/ci.yml | 2 +- parser/converter1.js | 121 +++++++++++++++++++++++++ parser/main.js | 4 +- spec_tests/javascriptTests.json | 6 +- validator/event/specialTags.json | 148 +++++++++++++++++-------------- 5 files changed, 212 insertions(+), 69 deletions(-) create mode 100644 parser/converter1.js diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5229fb2a..17b23239 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,7 +43,7 @@ jobs: - name: Download dependencies run: npm ci - name: Test & publish code coverage - uses: paambaati/codeclimate-action@v8.0.0 + uses: paambaati/codeclimate-action@v9.0.0 env: CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} with: diff --git a/parser/converter1.js b/parser/converter1.js new file mode 100644 index 00000000..51c71bbf --- /dev/null +++ b/parser/converter1.js @@ -0,0 +1,121 @@ +import { IssueError } from '../common/issues/issues' +import { getTagSlashIndices } from '../utils/hedStrings' +import { SchemaValueTag } from '../validator/schema/types' + +/** + * Converter from a tag specification to a schema-based tag object. + */ +export default class TagConverter { + /** + * Constructor. + * + * @param {TagSpec} tagSpec - The tag specification to convert. + * @param {Schemas} hedSchemas - The HED schema collection. + */ + constructor(tagSpec, hedSchemas) { + this.tagSpec = tagSpec + this.tagString = tagSpec.tag + this.tagLevels = this.tagString.split('/') + this.tagSlashes = getTagSlashIndices(this.tagString) + this.hedSchemas = hedSchemas + this.tagMapping = hedSchemas.getSchema(tagSpec.library).entries.tags + this.schemaTag = undefined + this.remainder = '' + } + + /** + * Retrieve the SchemaTag object for a tag specification. + * + * @returns {[SchemaTag, string]} The schema's corresponding tag object and the remainder of the tag string. + */ + convert() { + let parentTag = undefined + + for (let i = 0; i < this.tagLevels.length; i++) { + if (parentTag?.valueTag) return this._finalizeSchemaTag(parentTag.valueTag, i) + + const childTag = this._validateAndGetChildTag(parentTag, i) + if (!childTag) return this._finalizeSchemaTag(parentTag, i) + + parentTag = childTag + } + + return this._finalizeSchemaTag(parentTag, this.tagLevels.length) + } + + /** + * Validates the child tag against schema rules and retrieves the corresponding schema entry. + * + * @param {SchemaTag} parentTag - The parent schema tag. + * @param {number} levelIndex - The index level of the tag. + * @returns {SchemaTag} The child schema tag, if valid. + */ + _validateAndGetChildTag(parentTag, levelIndex) { + const childTag = this._getSchemaTag(levelIndex) + + if (this.schemaTag instanceof SchemaValueTag) { + this._throwIssue('internalConsistencyError', 'Child tag is a value tag which should have been handled earlier.') + } + + if (!childTag) { + if (levelIndex === 0) this._throwIssue('invalidTag', { tag: this.tagString }) + if (parentTag && !parentTag.hasAttributeName('extensionAllowed')) { + this._throwIssue('invalidExtension', { tag: this.tagLevels[levelIndex], parentTag: parentTag.longName }) + } + return undefined + } + + if (parentTag && (!childTag.parent || childTag.parent !== parentTag)) { + this._throwIssue('invalidParentNode', { tag: this.tagLevels[levelIndex], parentTag: childTag.longName }) + } + + return childTag + } + + /** + * Retrieves a schema tag from the tag mapping. + * + * @param {number} levelIndex - The index of the tag level. + * @param {boolean} [trimLeft=false] - Whether to trim the left side of the string. + * @returns {SchemaTag|undefined} The corresponding schema tag, if found. + */ + _getSchemaTag(levelIndex, trimLeft = false) { + let tagLevel = this.tagLevels[levelIndex].toLowerCase() + if (trimLeft) tagLevel = tagLevel.trimLeft() + + if (!tagLevel || tagLevel !== tagLevel.trim()) { + this._throwIssue('invalidTag', { tag: this.tagString }) + } + + return this.tagMapping.getEntry(tagLevel) + } + + /** + * Sets the final schema tag and calculates the remainder string. + * + * @param {SchemaTag} schemaTag - The schema tag to set. + * @param {number} splitIndex - The index to split the remainder. + * @returns {[SchemaTag, string]} The schema tag and remainder. + */ + _finalizeSchemaTag(schemaTag, splitIndex) { + if (this.schemaTag) return [this.schemaTag, this.remainder] + + this.schemaTag = schemaTag + this.remainder = this.tagLevels.slice(splitIndex).join('/') + if (schemaTag?.hasAttributeName('requireChild') && !this.remainder) { + this._throwIssue('childRequired', { tag: this.tagString }) + } + + return [this.schemaTag, this.remainder] + } + + /** + * Generates and throws an issue error. + * + * @param {string} errorType - The type of error. + * @param {object} details - Additional details for the error. + */ + _throwIssue(errorType, details) { + IssueError.generateAndThrow(errorType, details) + } +} diff --git a/parser/main.js b/parser/main.js index ff250d4b..161a0383 100644 --- a/parser/main.js +++ b/parser/main.js @@ -2,7 +2,7 @@ import { mergeParsingIssues } from '../utils/hedData' import { generateIssue } from '../common/issues/issues' import ParsedHedString from './parsedHedString' -import splitHedString from './splitHedString' +import HedStringSplitter from './splitHedString' import { getCharacterCount, stringIsEmpty } from '../utils/string' const openingGroupCharacter = '(' @@ -154,7 +154,7 @@ export const parseHedString = function (hedString, hedSchemas) { fullStringIssues.syntax = [] return [null, fullStringIssues] } - const [parsedTags, splitIssues] = splitHedString(hedString, hedSchemas) + const [parsedTags, splitIssues] = new HedStringSplitter(hedString, hedSchemas) const parsingIssues = Object.assign(fullStringIssues, splitIssues) if (parsedTags === null) { return [null, parsingIssues] diff --git a/spec_tests/javascriptTests.json b/spec_tests/javascriptTests.json index 1272aec9..ffbe300c 100644 --- a/spec_tests/javascriptTests.json +++ b/spec_tests/javascriptTests.json @@ -9,7 +9,7 @@ "definitions": ["(Definition/Acc/#, (Acceleration/#, Red))", "(Definition/MyColor, (Label/Pie))"], "tests": { "string_tests": { - "fails": ["Item/Bl\b"], + "fails": ["Item/Bl\b", "Item/ABC\u009e"], "passes": ["Red, Blue, Description/Red", "Description/This is a \u00ca\u00b0 good character"] }, "sidecar_tests": { @@ -37,6 +37,10 @@ [ ["onset", "duration", "HED"], [4.5, 0, "Item/Bl\b"] + ], + [ + ["onset", "duration", "HED"], + [4.5, 0, "Item/{abc}"] ] ], "passes": [ diff --git a/validator/event/specialTags.json b/validator/event/specialTags.json index 7190340a..3f2b52f0 100644 --- a/validator/event/specialTags.json +++ b/validator/event/specialTags.json @@ -1,121 +1,139 @@ { - "Definition": { - "child": true, - "requireChild": true, + "Def": { + "allowValue": true, + "allowTwoLevelValue": true, + "requireValue": true, + "tagGroup": false, + "topLevelTagGroup": false, + "maxNumberSubgroups": -1, + "minNumberSubgroups": -1, + "ERROR_CODE": "DEF_INVALID", + "forbiddenSubgroupTags": [], + "defTagRequired": false, + "otherAllowedTags": [] + }, + "Def-expand": { + "allowValue": true, + "allowTwoLevelValue": true, + "requireValue": true, "tagGroup": true, - "topLevelTagGroup": true, + "topLevelTagGroup": false, "maxNumberSubgroups": 1, "minNumberSubgroups": 0, - "ERROR_CODE": "DEFINITION_INVALID", - "subgroupTagsNotAllowed": [ + "ERROR_CODE": "DEF_EXPAND_INVALID", + "forbiddenSubgroupTags": [ "Def", "Def-expand", - "Event-context", "Definition", - "Onset", + "Delay", + "Duration", + "Event-context", "Inset", "Offset", - "Delay", - "Duration" + "Onset" ], "defTagRequired": false, "otherAllowedTags": [] }, - "Def": { - "child": true, - "tagGroup": false, - "topLevelTagGroup": false, - "maxNumberSubgroups": null, - "minNumberSubgroups": null, - "ERROR_CODE": "DEF_INVALID", - "subgroupTagsNotAllowed": [], - "defTagRequired": false, - "otherAllowedTags": null - }, - "Def-expand": { - "child": true, + "Definition": { + "allowValue": true, + "allowTwoLevelValue": true, + "requireValue": true, "tagGroup": true, - "topLevelTagGroup": false, + "topLevelTagGroup": true, "maxNumberSubgroups": 1, "minNumberSubgroups": 0, - "ERROR_CODE": "DEF_EXPAND_INVALID", - "subgroupTagsNotAllowed": [ + "ERROR_CODE": "DEFINITION_INVALID", + "forbiddenSubgroupTags": [ "Def", "Def-expand", - "Event-context", "Definition", - "Onset", + "Delay", + "Duration", + "Event-context", "Inset", "Offset", - "Delay", - "Duration" + "Onset" ], "defTagRequired": false, "otherAllowedTags": [] }, - "Onset": { - "child": false, + "Delay": { + "allowValue": true, + "allowTwoLevelValue": false, + "requireValue": true, "tagGroup": true, "topLevelTagGroup": true, "maxNumberSubgroups": 1, - "minNumberSubgroups": 0, + "minNumberSubgroups": 1, "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": true, + "forbiddenSubgroupTags": ["Definition", "Delay", "Duration", "Event-context", "Inset", "Offset", "Onset"], + "defTagRequired": false, + "otherAllowedTags": ["Duration"] + }, + "Duration": { + "allowValue": true, + "allowTwoLevelValue": false, + "requireValue": true, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNumberSubgroups": 1, + "minNumberSubgroups": 1, + "ERROR_CODE": "TEMPORAL_TAG_ERROR", + "forbiddenSubgroupTags": ["Definition", "Delay", "Duration", "Event-context", "Inset", "Offset", "Onset"], + "defTagRequired": false, + "otherAllowedTags": ["Delay"] + }, + "Event-context": { + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNumberSubgroups": null, + "minNumberSubgroups": 0, + "ERROR_CODE": "TAG_GROUP_ERROR", + "forbiddenSubgroupTags": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], + "defTagRequired": false, "otherAllowedTags": [] }, "Inset": { - "child": false, + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, "tagGroup": true, "topLevelTagGroup": true, "maxNumberSubgroups": 1, "minNumberSubgroups": 0, "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], + "forbiddenSubgroupTags": ["Definition", "Delay", "Duration", "Event-context", "Inset", "Offset", "Onset"], "defTagRequired": true, "otherAllowedTags": [] }, "Offset": { - "child": false, + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, "tagGroup": true, "topLevelTagGroup": true, "maxNumberSubgroups": 0, "minNumberSubgroups": 0, "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": [], + "forbiddenSubgroupTags": [], "defTagRequired": true, "otherAllowedTags": [] }, - "Delay": { - "child": true, + "Onset": { + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, "tagGroup": true, "topLevelTagGroup": true, "maxNumberSubgroups": 1, - "minNumberSubgroups": 1, - "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": false, - "otherAllowedTags": ["Duration"] - }, - "Duration": { - "child": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": 1, - "minNumberSubgroups": 1, - "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": false, - "otherAllowedTags": ["Delay"] - }, - "Event-context": { - "child": false, - "tagGroup": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": null, "minNumberSubgroups": 0, - "ERROR_CODE": "TAG_GROUP_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": false, + "ERROR_CODE": "TEMPORAL_TAG_ERROR", + "forbiddenSubgroupTags": ["Definition", "Delay", "Duration", "Event-context", "Inset", "Offset", "Onset"], + "defTagRequired": true, "otherAllowedTags": [] } } From 51279431f4f4221c6b76df3758a902e27631dd49 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 16 Oct 2024 11:10:31 -0500 Subject: [PATCH 02/21] Updating to start tokenizer --- parser/tokenizerNew.js | 377 ++++++++++++++++++++++++++++++++++ spec_tests/temp1.spec.js | 140 +++++++++++++ tests/temp.spec.js | 83 ++++++++ tests/temp.tockenizer.spec.js | 115 +++++++++++ 4 files changed, 715 insertions(+) create mode 100644 parser/tokenizerNew.js create mode 100644 spec_tests/temp1.spec.js create mode 100644 tests/temp.spec.js create mode 100644 tests/temp.tockenizer.spec.js diff --git a/parser/tokenizerNew.js b/parser/tokenizerNew.js new file mode 100644 index 00000000..8af2ecbc --- /dev/null +++ b/parser/tokenizerNew.js @@ -0,0 +1,377 @@ +import { replaceTagNameWithPound } from '../utils/hedStrings' +import { unicodeName } from 'unicode-name' +import { generateIssue } from '../common/issues/issues' + +const CHARACTERS = { + BLANK: ' ', + OPENING_GROUP: '(', + CLOSING_GROUP: ')', + OPENING_COLUMN: '{', + CLOSING_COLUMN: '}', + COMMA: ',', + COLON: ':', + SLASH: '/', +} + +const invalidCharacters = new Set(['[', ']', '~', '"']) +// Add control codes to invalidCharacters +for (let i = 0x00; i <= 0x1f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} +for (let i = 0x7f; i <= 0x9f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} + +const invalidCharactersOutsideOfValues = new Set([':']) + +/** + * A specification for a tokenized substring. + */ +export class SubstringSpec { + /** + * The starting and ending bounds of the substring. + * @type {number[]} + */ + bounds + + constructor(start, end) { + this.bounds = [start, end] + } +} + +/** + * A specification for a tokenized tag. + */ +export class TagSpec extends SubstringSpec { + /** + * The tag this spec represents. + * @type {string} + */ + tag + /** + * The schema prefix for this tag, if any. + * @type {string} + */ + library + + constructor(tag, start, end, librarySchema) { + super(start, end) + + this.tag = tag.trim() + this.library = librarySchema + } +} + +/** + * A specification for a tokenized tag group. + */ +export class GroupSpec extends SubstringSpec { + /** + * The child group specifications. + * @type {GroupSpec[]} + */ + children + + constructor(start, end) { + super(start, end) + + this.children = [] + } +} + +/** + * A specification for a tokenized column splice template. + */ +export class ColumnSpliceSpec extends SubstringSpec { + /** + * The column name this spec refers to. + * @type {string} + */ + columnName + + constructor(name, start, end) { + super(start, end) + + this.columnName = name.trim() + } +} + +class TokenizerState { + constructor() { + this.currentToken = '' // Characters in the token currently being parsed + this.groupDepth = 0 + this.startingIndex = 0 // Starting index of this token + this.resetIndexFlag = false + this.slashFound = false + this.librarySchema = '' + this.columnSpliceIndex = -1 //Index of { if this token is column splice + this.currentGroupStack = [[]] + this.parenthesesStack = [new GroupSpec(0)] + this.ignoringCharacters = false + this.closingGroup = false + // this.closingColumn = false + } +} + +/** + * Class for tokenizing HED strings. + */ +export class HedStringTokenizerNew { + constructor(hedString) { + this.hedString = hedString + this.syntaxIssues = [] + this.state = new TokenizerState() + } + + /** + * Split the HED string into delimiters and tags. + * + * @returns {[TagSpec[], GroupSpec, Object]} The tag specifications, group bounds, and any issues found. + */ + tokenize() { + this.initializeTokenizer() + + for (let i = 0; i < this.hedString.length; i++) { + const character = this.hedString.charAt(i) + this.tokenizeCharacter(i, character) + if (this.state.resetIndexFlag) { + this.state.resetIndexFlag = false + this.state.startingIndex = i + 1 + this.state.currentToken = '' + } + } + this.pushTag(this.hedString.length - 1) + + if (this.state.columnSpliceIndex >= 0) { + this.pushIssue('unclosedCurlyBrace', this.state.columnSpliceIndex) + } + + this.unwindGroupStack() + + const tagSpecs = this.state.currentGroupStack.pop() + const groupSpecs = this.state.parenthesesStack.pop() + const issues = { + syntax: this.syntaxIssues, + conversion: [], + } + return [tagSpecs, groupSpecs, issues] + } + + initializeTokenizer() { + this.syntaxIssues = [] + this.state = new TokenizerState() + } + + tokenizeCharacter(i, character) { + if (this.state.ignoringCharacters) { + this.handleIgnoringCharacters(i, character) + } else { + this.handleCharacter(i, character) + } + } + + handleIgnoringCharacters(i, character) { + const characterHandler = { + [CHARACTERS.CLOSING_GROUP]: () => { + this.clearToken() + this.handleClosingGroup(i) + }, + [CHARACTERS.COMMA]: () => { + this.clearToken() + this.handleClosingGroup(i) + }, + }[character] + + if (characterHandler) { + characterHandler() + } + } + + handleCharacter(i, character) { + const characterHandler = { + [CHARACTERS.OPENING_GROUP]: () => this.handleOpeningGroup(i), + [CHARACTERS.CLOSING_GROUP]: () => { + this.pushTag(i) + this.handleClosingGroup(i) + }, + [CHARACTERS.OPENING_COLUMN]: () => this.handleOpeningColumn(i), + [CHARACTERS.CLOSING_COLUMN]: () => { + this.pushTag(i) + this.handleClosingColumn(i) + }, + [CHARACTERS.COMMA]: () => { + this.pushTag(i) + //this.state.closingColumn = false + }, + [CHARACTERS.COLON]: () => this.handleColon(character), + [CHARACTERS.SLASH]: () => this.handleSlash(i), + }[character] // Selects the character handler based on the value of character + + if (characterHandler) { + characterHandler() + } else if (invalidCharacters.has(character)) { + this.pushInvalidCharacterIssue(character, i) + } else { + this.handleRegularCharacter(character) + } + } + + handleOpeningGroup(i) { + this.state.currentGroupStack.push([]) + this.state.parenthesesStack.push(new GroupSpec(i)) + this.state.resetIndexFlag = true + this.state.groupDepth++ + } + + handleClosingGroup(i) { + // If the group depth is <= 0, it means there's no corresponding opening group. + if (this.state.groupDepth <= 0) { + this.pushIssue('unopenedParenthesis', i) + return + } + // Close the group by updating its bounds and moving it to the parent group. + this.closeGroup(i) + } + + handleOpeningColumn(i) { + // We're already in the middle of a token -- can't have an opening brace + if (this.state.currentToken.trim().length > 0) { + this.pushInvalidCharacterIssue(CHARACTERS.OPENING_COLUMN, i) + this.state.ignoringCharacters = true + return + } + if (this.state.columnSpliceIndex >= 0) { + this.pushIssue('nestedCurlyBrace', i) + } + this.state.columnSpliceIndex = i + } + + handleClosingColumn(i) { + // If a column splice is not in progress push an issue indicating an unopened curly brace. + if (this.state.columnSpliceIndex < 0) { + this.pushIssue('unopenedCurlyBrace', i) + return + } + // Ensure that column slice is not empty + if (this.state.currentToken == '') { + this.pushIssue('emptyCurlyBrace', i) + return + } + + // Close the column by updating its bounds and moving it to the parent group, push a column splice on the stack. + this.state.currentGroupStack[this.state.groupDepth].push( + new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), + ) + this.state.columnSpliceIndex = -1 + this.clearToken() + this.state.closingColumn = true // Used to indicate that + } + + handleColon(character) { + if (!this.state.slashFound && !this.state.librarySchema) { + this.state.librarySchema = this.state.currentToken + this.state.resetIndexFlag = true + } else { + this.state.currentToken += character + } + } + + handleSlash(i) { + if (!this.state.currentToken || this.state.slashFound) { + // Leading slash is error -- ignore rest of the token + this.pushIssue('extraSlash', i) + this.state.ignoringCharacters = true + } else { + this.state.slashFound = true + this.state.currentToken += CHARACTERS.SLASH + } + } + + handleRegularCharacter(character) { + // if (character != CHARACTERS.BLANK && this.state.closingColumn) { + // this.pushIssue('unparsedCurlyBraces', i) + // } + if (!this.state.ignoringCharacters) { + this.state.currentToken += character + this.state.resetIndexFlag = this.state.currentToken === '' + } + } + + unwindGroupStack() { + while (this.state.groupDepth > 0) { + this.pushIssue( + 'unclosedParenthesis', + this.state.parenthesesStack[this.state.parenthesesStack.length - 1].bounds[0], + ) + this.closeGroup(this.hedString.length) + } + } + + pushTag(i) { + // Called when a token has been parsed + + // if (!this.state.currentToken && isEndOfString) { // If empty token at end of string just return. + // return + // } + // If we're in the process of closing a group, reset the closingGroup flag (allows for empty groups) + if (this.state.closingGroup) { + // Empty groups are allowed. + this.state.closingGroup = false + } else if (this.state.slashFound) { + //Trailing token slash is an error + this.pushIssue('extraSlash', i) + } else if (!this.state.currentToken) { + // Column spec has already been called. + this.pushIssue('emptyTagFound', i) + } else if (this.state.columnSpliceIndex < 0) { + // Not a column splice so goes on group stack as a TagSpec + this.checkValueTagForInvalidCharacters() + this.state.currentGroupStack[this.state.groupDepth].push( + new TagSpec(this.state.currentToken.trim(), this.state.startingIndex, i, this.state.librarySchema), + ) + } + // Clear the current token and reset flags for the next iteration. + this.clearToken() + } + + clearToken() { + this.state.ignoringCharacters = false + this.state.resetIndexFlag = true + this.state.slashFound = false + this.state.librarySchema = '' + this.state.closingColumn = false + } + + closeGroup(i) { + const groupSpec = this.state.parenthesesStack.pop() + groupSpec.bounds[1] = i + 1 + this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) + this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) + this.state.groupDepth-- + //this.closingColumn = false + } + + checkValueTagForInvalidCharacters() { + const formToCheck = replaceTagNameWithPound(this.state.currentToken) + for (let i = 0; i < formToCheck.length; i++) { + const character = formToCheck.charAt(i) + if (invalidCharactersOutsideOfValues.has(character)) { + this.pushInvalidCharacterIssue(character, this.state.startingIndex + i) + } + } + } + + pushIssue(issueCode, index) { + this.syntaxIssues.push(generateIssue(issueCode, { index, string: this.hedString })) + } + + pushInvalidCharacterIssue(character, index) { + this.syntaxIssues.push( + generateIssue('invalidCharacter', { + character: unicodeName(character), + index, + string: this.hedString, + }), + ) + } +} diff --git a/spec_tests/temp1.spec.js b/spec_tests/temp1.spec.js new file mode 100644 index 00000000..f6a1de4b --- /dev/null +++ b/spec_tests/temp1.spec.js @@ -0,0 +1,140 @@ +import chai from 'chai' +const assert = chai.assert +import { beforeAll, describe, afterAll } from '@jest/globals' +import { parseHedString } from '../parser/main' +import { ParsedHedGroup } from '../parser/parsedHedGroup' +import { ParsedHedTag } from '../parser/parsedHedTag' +import { ParsedHedString } from '../parser/parsedHedString' +import { SpecialTagValidator } from '../validator/event/special' + +import * as hed from '../validator/event' +import { BidsIssue } from '../bids/types/issues' +import { buildSchemas } from '../validator/schema/init' +import { SchemaSpec, SchemasSpec } from '../common/schema/types' +import path from 'path' +import { BidsSidecar, BidsTsvFile } from '../bids' +const fs = require('fs') + +const displayLog = process.env.DISPLAY_LOG === 'true' + +const skippedErrors = { + VERSION_DEPRECATED: 'Not handling in the spec tests', + ELEMENT_DEPRECATED: 'Not handling in this round. This is a warning', + STYLE_WARNING: 'Not handling style warnings at this time', + 'invalid-character-name-value-class-deprecated': 'We will let this pass regardless of schema version.', +} +const readFileSync = fs.readFileSync +const test_file_name = 'javascript_tests.json' +//const test_file_name = 'temp3.json'; + +function comboListToStrings(items) { + const comboItems = [] + if (items === undefined || items.length === 0) { + return comboItems + } + for (const item of items) { + const nextItem = [JSON.stringify(item.sidecar), tsvToString(item.events)] + comboItems.push(nextItem) + } + return comboItems +} + +function getMergedSidecar(side1, side2) { + return Object.assign({}, JSON.parse(side1), side2) +} + +function loadTestData() { + const testFile = path.join(__dirname, test_file_name) + return JSON.parse(readFileSync(testFile, 'utf8')) +} + +const testInfo = loadTestData() + +function stringifyList(items) { + const stringItems = [] + if (items === undefined || items.length === 0) { + return stringItems + } + for (const item of items) { + stringItems.push(JSON.stringify(item)) + } + return stringItems +} + +function tsvListToStrings(eventList) { + const eventStrings = [] + if (eventList === undefined || eventList.length === 0) { + return eventStrings + } + for (const item of eventList) { + eventStrings.push(tsvToString(item)) + } + return eventStrings +} + +function tsvToString(events) { + return events.map((row) => row.join('\t')).join('\n') +} + +function getTagInfo(tag) { + const name = tag._schemaTag.name + const remainder = tag._remainder + const longName = tag.longName + const canonicalTagName = tag.canonicalTagName + return `name:${name} remainder:${remainder} longName:${longName} canonical:${canonicalTagName}` +} + +describe('HED validation using JSON tests', () => { + const schemaMap = new Map([ + ['8.2.0', undefined], + ['8.3.0', undefined], + ]) + + const badLog = [] + let totalTests = 0 + let wrongErrors = 0 + let unexpectedErrors = 0 + + beforeAll(async () => { + const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) + const specs2 = new SchemasSpec().addSchemaSpec(spec2) + const schemas2 = await buildSchemas(specs2) + const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) + const specs3 = new SchemasSpec().addSchemaSpec(spec3) + const schemas3 = await buildSchemas(specs3) + schemaMap.set('8.2.0', schemas2) + schemaMap.set('8.3.0', schemas3) + }) + + afterAll(() => { + const outBad = path.join(__dirname, 'tempLog.txt') + const summary = `Total tests:${totalTests} Wrong error codes:${wrongErrors} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + + test('It should indicate that something is a special tag', () => { + const special = new SpecialTagValidator() + const schema3 = schemaMap.get('8.3.0') + expect(schema3).toBeDefined() + const testString = 'Onset, Offset/Apple, (Onset, Inset, (Offset, Def/Apple), Def, (Red,(Blue)))' + const [parseTest, issuesTest] = parseHedString(testString, schema3) + + const issueList = special.checkTags(parseTest.tags) + + const output = [] + for (const item of parseTest.tags) { + output.push(getTagInfo(item)) + } + + for (const item of parseTest.tagGroups) { + for (const group of item.subGroupArrayIterator()) { + console.log(group) + } + } + console.log(output.join('\n')) + + console.log('help') + }) +}) diff --git a/tests/temp.spec.js b/tests/temp.spec.js new file mode 100644 index 00000000..6e7751c0 --- /dev/null +++ b/tests/temp.spec.js @@ -0,0 +1,83 @@ +import chai from 'chai' + +const assert = chai.assert +import { beforeAll, describe, it } from '@jest/globals' + +import { generateIssue } from '../common/issues/issues' +import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types' +import { recursiveMap } from '../utils/array' +import { parseHedString } from '../parser/parser' +import { ParsedHedTag } from '../parser/parsedHedTag' +import HedStringSplitter from '../parser/splitter' +import { buildSchemas } from '../validator/schema/init' +import ColumnSplicer from '../parser/columnSplicer' +import ParsedHedGroup from '../parser/parsedHedGroup' +import { HedStringTokenizer } from '../parser/tokenizer' +import { HedStringTokenizerNew } from '../parser/tokenizerNew' + +describe('HED string parsing', () => { + it('should include each group as its own single element', () => { + const hedString = 'y,' + const tok = new HedStringTokenizerNew(hedString) + const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() + assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') + //const hedString = 'Action/Move/My-flex,(Relation/Spatial-relation/Left-side-of,Action/Move/My-bend,Upper-extremity/My-elbow),Position/X-position/70 m,Position/Y-position/23 m' + // const [result, issues] = splitHedString(hedString, nullSchema) + // assert.isEmpty(Object.values(issues).flat(), 'Parsing issues occurred') + }) + + // const tokenizeTester = function(testStrings, testFunction) { + // for (const [testStringKey, testString] of Object.entries(testStrings)) { + // const testResult = testFunction(testStringKey, testString) + // } + // } + // + // const tokenizeTesterBad = function(testStrings, issueCodes, testFunction) { + // for (const [testStringKey, testString] of Object.entries(testStrings)) { + // const testResult = testFunction(testStringKey, issueCode.testStringKey, testString) + // } + // } + // + // it('should tokenize valid strings', () => { + // const testStrings = { + // oneBrace: 'x,{y}', + // braceParentheses1: '(({yz}))', + // leadingBlank: ' {x},z,', + // } + // tokenizeTester(testStrings, (key, string) => { + // const tok = new HedStringTokenizer(string) + // const [tagSpecs, groupBounds, issues] = tok.tokenize() + // assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) + // const tok1 = new HedStringTokenizerOld(string) + // const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() + // assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) + // }) + // }) + // + // it('should tokenize invalid strings', () => { + // const testStrings = { + // // oneBrace: 'x,{y}', + // // braceParentheses1: '(({yz}))', + // // leadingBlank: ' {x},z' + // //onlyComma: ' ,', + // doubleTrailingComma: 'x,,', + // } + // + // const expectedIssues = { + // onlyComma: 'emptyTagFound', + // doubleTrailingComma: 'emptyTagFound', + // } + // + // for (const [testStringKey, testString] of Object.entries(testStrings)) { + // const tok = new HedStringTokenizer(testString) + // const [tagSpecs, groupBounds, issues] = tok.tokenize() + // const issuesFlat = Object.values(issues).flat() + // const expectedIssue = expectedIssues[testStringKey] || '' + // assert.equal(issuesFlat['code'], expectedIssue, `Expected ${expectedIssue} for "${testString}"`) + // } + // // assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) + // // const tok1 = new HedStringTokenizerOld(string) + // // const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() + // // assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) + // }) +}) diff --git a/tests/temp.tockenizer.spec.js b/tests/temp.tockenizer.spec.js new file mode 100644 index 00000000..4c1562d7 --- /dev/null +++ b/tests/temp.tockenizer.spec.js @@ -0,0 +1,115 @@ +import chai from 'chai' +const assert = chai.assert +import { beforeAll, describe, afterAll } from '@jest/globals' + +import * as hed from '../validator/event' +import { BidsHedIssue } from '../bids/types/issues' +import path from 'path' +import { HedStringTokenizer } from '../parser/tokenizer' +import { generateIssue, IssueError } from '../common/issues/issues' +const fs = require('fs') + +const displayLog = process.env.DISPLAY_LOG === 'true' + +const skippedErrors = {} +const readFileSync = fs.readFileSync +// const test_file_name = 'javascriptTests.json' +const test_file_name = 'temp.json' + +function loadTestData() { + const testFile = path.join(__dirname, test_file_name) + return JSON.parse(readFileSync(testFile, 'utf8')) +} + +const testInfo = loadTestData() + +describe('HED tokenizer validation using JSON tests', () => { + const badLog = [] + let totalTests = 0 + let wrongErrors = 0 + let unexpectedErrors = 0 + + beforeAll(async () => {}) + + afterAll(() => { + const outBad = path.join(__dirname, 'runLog.txt') + const summary = `Total tests:${totalTests} Wrong error codes:${wrongErrors} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + + describe.each(testInfo)( + '$hedCode $code $name : $description', + ({ hedCode, code, name, description, warning, fails }) => { + let itemLog + let hasWarning + + const assertErrors = function (hedCode, code, expectError, iLog, header, issues) { + const log = [header] + totalTests += 1 + + const errors = Object.values(issues).flat() + if (errors.length > 0) { + log.push(`---has errors [${errorString}]`) + } + const expectedError = code + const wrongError = `---expected ${eCode} ${altErrorString} but got errors [${errorString}]` + const hasErrors = `---expected no errors but got errors [${errorString}]` + if (expectError && !expectedErrors.some((substring) => errorString.includes(substring))) { + log.push(wrongError) + iLog.push(log.join('\n')) + wrongErrors += 1 + assert(errorString.includes(eCode), `${header}---expected ${eCode} and got errors [${errorString}]`) + } else if (!expectError && errorString.length > 0) { + log.push(hasErrors) + iLog.push(log.join('\n')) + unexpectedErrors += 1 + assert(errorString.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } + + const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { + const status = expectError ? 'Expect fail' : 'Expect pass' + const header = `\n[${eHedCode} ${eName}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() + assertErrors(eHedCode, eCode, expectError, iLog, header, tokenizingIssues) + } + + /** + * Convert an Error into an Issue. + * + * @param {Error} issueError A thrown error. + * @returns {Issue} A HED issue. + */ + // const convertIssue = function (issueError) { + // if (issueError instanceof IssueError) { + // return issueError.issue + // } else { + // return generateIssue('internalError', { message: issueError.message }) + // } + // } + + beforeAll(async () => { + itemLog = [] + hasWarning = warning + }) + + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) + + // if (testInfo.passes.length > 0) { + // test.each(testInfo.passes)('Valid string: %s', (str) => { + // stringValidator(error_code, alt_codes, name, str, hedSchema, defs, false, itemLog) + // }) + // } + + if (testInfo.fails.length > 0) { + test.each(testInfo.fails)('NewTokenizer: Invalid string: %s ', (str) => { + stringTokenizer(hedCode, code, name, new HedStringTokenizer(str), true, itemLog) + }) + } + }, + ) +}) From 01ecd6a7355a0e4a362be501085ee3e441943a95 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:47:31 -0500 Subject: [PATCH 03/21] Updated the test of tokenizer --- spec_tests/temp1.spec.js | 140 --------------------------------- tests/data/tokenizerTests.json | 74 +++++++++++++++++ tests/temp.tockenizer.spec.js | 75 ++++++++++-------- 3 files changed, 115 insertions(+), 174 deletions(-) delete mode 100644 spec_tests/temp1.spec.js create mode 100644 tests/data/tokenizerTests.json diff --git a/spec_tests/temp1.spec.js b/spec_tests/temp1.spec.js deleted file mode 100644 index f6a1de4b..00000000 --- a/spec_tests/temp1.spec.js +++ /dev/null @@ -1,140 +0,0 @@ -import chai from 'chai' -const assert = chai.assert -import { beforeAll, describe, afterAll } from '@jest/globals' -import { parseHedString } from '../parser/main' -import { ParsedHedGroup } from '../parser/parsedHedGroup' -import { ParsedHedTag } from '../parser/parsedHedTag' -import { ParsedHedString } from '../parser/parsedHedString' -import { SpecialTagValidator } from '../validator/event/special' - -import * as hed from '../validator/event' -import { BidsIssue } from '../bids/types/issues' -import { buildSchemas } from '../validator/schema/init' -import { SchemaSpec, SchemasSpec } from '../common/schema/types' -import path from 'path' -import { BidsSidecar, BidsTsvFile } from '../bids' -const fs = require('fs') - -const displayLog = process.env.DISPLAY_LOG === 'true' - -const skippedErrors = { - VERSION_DEPRECATED: 'Not handling in the spec tests', - ELEMENT_DEPRECATED: 'Not handling in this round. This is a warning', - STYLE_WARNING: 'Not handling style warnings at this time', - 'invalid-character-name-value-class-deprecated': 'We will let this pass regardless of schema version.', -} -const readFileSync = fs.readFileSync -const test_file_name = 'javascript_tests.json' -//const test_file_name = 'temp3.json'; - -function comboListToStrings(items) { - const comboItems = [] - if (items === undefined || items.length === 0) { - return comboItems - } - for (const item of items) { - const nextItem = [JSON.stringify(item.sidecar), tsvToString(item.events)] - comboItems.push(nextItem) - } - return comboItems -} - -function getMergedSidecar(side1, side2) { - return Object.assign({}, JSON.parse(side1), side2) -} - -function loadTestData() { - const testFile = path.join(__dirname, test_file_name) - return JSON.parse(readFileSync(testFile, 'utf8')) -} - -const testInfo = loadTestData() - -function stringifyList(items) { - const stringItems = [] - if (items === undefined || items.length === 0) { - return stringItems - } - for (const item of items) { - stringItems.push(JSON.stringify(item)) - } - return stringItems -} - -function tsvListToStrings(eventList) { - const eventStrings = [] - if (eventList === undefined || eventList.length === 0) { - return eventStrings - } - for (const item of eventList) { - eventStrings.push(tsvToString(item)) - } - return eventStrings -} - -function tsvToString(events) { - return events.map((row) => row.join('\t')).join('\n') -} - -function getTagInfo(tag) { - const name = tag._schemaTag.name - const remainder = tag._remainder - const longName = tag.longName - const canonicalTagName = tag.canonicalTagName - return `name:${name} remainder:${remainder} longName:${longName} canonical:${canonicalTagName}` -} - -describe('HED validation using JSON tests', () => { - const schemaMap = new Map([ - ['8.2.0', undefined], - ['8.3.0', undefined], - ]) - - const badLog = [] - let totalTests = 0 - let wrongErrors = 0 - let unexpectedErrors = 0 - - beforeAll(async () => { - const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) - const specs2 = new SchemasSpec().addSchemaSpec(spec2) - const schemas2 = await buildSchemas(specs2) - const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) - const specs3 = new SchemasSpec().addSchemaSpec(spec3) - const schemas3 = await buildSchemas(specs3) - schemaMap.set('8.2.0', schemas2) - schemaMap.set('8.3.0', schemas3) - }) - - afterAll(() => { - const outBad = path.join(__dirname, 'tempLog.txt') - const summary = `Total tests:${totalTests} Wrong error codes:${wrongErrors} Unexpected errors:${unexpectedErrors}\n` - if (displayLog) { - fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') - } - }) - - test('It should indicate that something is a special tag', () => { - const special = new SpecialTagValidator() - const schema3 = schemaMap.get('8.3.0') - expect(schema3).toBeDefined() - const testString = 'Onset, Offset/Apple, (Onset, Inset, (Offset, Def/Apple), Def, (Red,(Blue)))' - const [parseTest, issuesTest] = parseHedString(testString, schema3) - - const issueList = special.checkTags(parseTest.tags) - - const output = [] - for (const item of parseTest.tags) { - output.push(getTagInfo(item)) - } - - for (const item of parseTest.tagGroups) { - for (const group of item.subGroupArrayIterator()) { - console.log(group) - } - } - console.log(output.join('\n')) - - console.log('help') - }) -}) diff --git a/tests/data/tokenizerTests.json b/tests/data/tokenizerTests.json new file mode 100644 index 00000000..40ac1963 --- /dev/null +++ b/tests/data/tokenizerTests.json @@ -0,0 +1,74 @@ +[ + { + "hedCode": "TAG_EMPTY", + "code": "emptyTagFound", + "name": "empty-tag-in-various-places", + "description": "Empty tags in various places (empty groups are allowed).", + "warning": false, + "tests": { + "fails": [ + { + "name": "end-in-comma", + "string": "x,y,", + "issueCount": 1, + "hedCode": "TAG_EMPTY", + "code": "emptyTagFound", + "explanation": "Can't end in a comma" + }, + { + "name": "double-in-comma", + "string": "x,,y,", + "issueCount": 1, + "hedCode": "TAG_EMPTY", + "code": "emptyTagFound", + "explanation": "Can't have double commas" + }, + { + "name": "leading-comma", + "string": ",x,y", + "issueCount": 1, + "hedCode": "TAG_EMPTY", + "code": "emptyTagFound", + "explanation": "Can't leading comma" + } + ], + "passes": [] + } + }, + { + "hedCode": "TAG_INVALID", + "code": "emptyTagFound", + "name": "empty-tag-in-various-places", + "description": "Empty tags in various places (empty groups are allowed).", + "warning": false, + "tests": { + "fails": [ + { + "name": "end-in-comma", + "string": "x,y,", + "issueCount": 1, + "hedCode": "TAG_EMPTY", + "code": "emptyTagFound", + "explanation": "Can't end in a comma" + }, + { + "name": "double-in-comma", + "string": "x,,y,", + "issueCount": 1, + "hedCode": "TAG_EMPTY", + "code": "emptyTagFound", + "explanation": "Can't have double commas" + } + ], + "passes": [] + } + }, + { + "name": "leading-comma", + "string": " x, y ", + "issueCount": 0, + "hedCode": "TAG_EMPTY", + "code": "", + "explanation": "Can have extra blanks" + } +] diff --git a/tests/temp.tockenizer.spec.js b/tests/temp.tockenizer.spec.js index 4c1562d7..1cf1bcce 100644 --- a/tests/temp.tockenizer.spec.js +++ b/tests/temp.tockenizer.spec.js @@ -6,6 +6,7 @@ import * as hed from '../validator/event' import { BidsHedIssue } from '../bids/types/issues' import path from 'path' import { HedStringTokenizer } from '../parser/tokenizer' +import { HedStringTokenizerNew } from '../parser/tokenizerNew' import { generateIssue, IssueError } from '../common/issues/issues' const fs = require('fs') @@ -14,7 +15,7 @@ const displayLog = process.env.DISPLAY_LOG === 'true' const skippedErrors = {} const readFileSync = fs.readFileSync // const test_file_name = 'javascriptTests.json' -const test_file_name = 'temp.json' +const test_file_name = './data/tokenizerTests.json' function loadTestData() { const testFile = path.join(__dirname, test_file_name) @@ -22,6 +23,7 @@ function loadTestData() { } const testInfo = loadTestData() +console.log(testInfo) describe('HED tokenizer validation using JSON tests', () => { const badLog = [] @@ -39,57 +41,58 @@ describe('HED tokenizer validation using JSON tests', () => { } }) + test('dummy test', () => { + const x = 1 + expect(x).toBeDefined() + }) + describe.each(testInfo)( '$hedCode $code $name : $description', - ({ hedCode, code, name, description, warning, fails }) => { + ({ hedCode, code, name, description, warning, tests }) => { let itemLog let hasWarning - const assertErrors = function (hedCode, code, expectError, iLog, header, issues) { + const assertErrors = function (eHedCode, eCode, expectError, iLog, header, issues) { const log = [header] totalTests += 1 - const errors = Object.values(issues).flat() + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') if (errors.length > 0) { log.push(`---has errors [${errorString}]`) } - const expectedError = code - const wrongError = `---expected ${eCode} ${altErrorString} but got errors [${errorString}]` + const expectedError = eCode + const wrongError = `---expected ${eHedCode} but got errors [${errorString}]` const hasErrors = `---expected no errors but got errors [${errorString}]` - if (expectError && !expectedErrors.some((substring) => errorString.includes(substring))) { + if (expectError && !errors.includes(eHedCode)) { log.push(wrongError) iLog.push(log.join('\n')) wrongErrors += 1 - assert(errorString.includes(eCode), `${header}---expected ${eCode} and got errors [${errorString}]`) - } else if (!expectError && errorString.length > 0) { + assert.strictEqual( + errors.includes(eHedCode), + true, + `${header}---expected ${eHedCode} and got errors [${errorString}]`, + ) + } else if (!expectError && errors.length > 0) { log.push(hasErrors) iLog.push(log.join('\n')) unexpectedErrors += 1 - assert(errorString.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) } } const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { const status = expectError ? 'Expect fail' : 'Expect pass' - const header = `\n[${eHedCode} ${eName}](${status})\tSTRING: "${tokenizer.hedString}"` + const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' + const header = `\n[${eHedCode} ${eName} ${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() - assertErrors(eHedCode, eCode, expectError, iLog, header, tokenizingIssues) + const issues = Object.values(tokenizingIssues).flat() + assertErrors(eHedCode, eCode, expectError, iLog, header, issues) } - /** - * Convert an Error into an Issue. - * - * @param {Error} issueError A thrown error. - * @returns {Issue} A HED issue. - */ - // const convertIssue = function (issueError) { - // if (issueError instanceof IssueError) { - // return issueError.issue - // } else { - // return generateIssue('internalError', { message: issueError.message }) - // } - // } - beforeAll(async () => { itemLog = [] hasWarning = warning @@ -99,15 +102,19 @@ describe('HED tokenizer validation using JSON tests', () => { badLog.push(itemLog.join('\n')) }) - // if (testInfo.passes.length > 0) { - // test.each(testInfo.passes)('Valid string: %s', (str) => { - // stringValidator(error_code, alt_codes, name, str, hedSchema, defs, false, itemLog) - // }) - // } + test('dummy test', () => { + const y = 1 + expect(y).toBeDefined() + }) + + if (tests.fails && tests.fails.length > 0) { + test.each(tests.fails)('NewTokenizer: Invalid string: %s ', (ex) => { + //console.log(ex) + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) + }) - if (testInfo.fails.length > 0) { - test.each(testInfo.fails)('NewTokenizer: Invalid string: %s ', (str) => { - stringTokenizer(hedCode, code, name, new HedStringTokenizer(str), true, itemLog) + test.each(tests.fails)('Original tokenizer: Invalid string: %s ', (ex) => { + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) }) } }, From 7b72f7877b1c27200c2d13e3d4665b439fdd983d Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 17 Oct 2024 06:25:22 -0500 Subject: [PATCH 04/21] First pass at testing tokenizer --- common/issues/data.js | 10 +++ tests/data/tokenizerTests.json | 100 +++++++++++++++++++------ tests/temp.tockenizer.spec.js | 125 ++++++++++++++------------------ tests/tockenizerPassing.spec.js | 119 ++++++++++++++++++++++++++++++ tests/tokenizerTestData.js | 17 +++++ 5 files changed, 279 insertions(+), 92 deletions(-) create mode 100644 tests/tockenizerPassing.spec.js create mode 100644 tests/tokenizerTestData.js diff --git a/common/issues/data.js b/common/issues/data.js index 2ef06151..67b59b3c 100644 --- a/common/issues/data.js +++ b/common/issues/data.js @@ -43,6 +43,16 @@ export default { level: 'error', message: stringTemplate`Invalid tag - "${'tag'}".`, }, + extraSlash: { + hedCode: 'TAG_INVALID', + level: 'error', + message: stringTemplate`Tag extra slash at index ${'index'} of string "${'string'}".`, + }, + extraBlank: { + hedCode: 'TAG_INVALID', + level: 'error', + message: stringTemplate`Tag extra blank at index ${'index'} of string "${'string'}".`, + }, extraCommaOrInvalid: { hedCode: 'TAG_INVALID', level: 'error', diff --git a/tests/data/tokenizerTests.json b/tests/data/tokenizerTests.json index 40ac1963..53c4d950 100644 --- a/tests/data/tokenizerTests.json +++ b/tests/data/tokenizerTests.json @@ -1,7 +1,5 @@ [ { - "hedCode": "TAG_EMPTY", - "code": "emptyTagFound", "name": "empty-tag-in-various-places", "description": "Empty tags in various places (empty groups are allowed).", "warning": false, @@ -36,39 +34,97 @@ } }, { - "hedCode": "TAG_INVALID", - "code": "emptyTagFound", - "name": "empty-tag-in-various-places", - "description": "Empty tags in various places (empty groups are allowed).", + "name": "extra-slash-in-various-places", + "description": "Tags can't have leading or trailing, or extra slashes", "warning": false, "tests": { "fails": [ { - "name": "end-in-comma", - "string": "x,y,", + "name": "leading-slash", + "string": "/x", "issueCount": 1, - "hedCode": "TAG_EMPTY", - "code": "emptyTagFound", - "explanation": "Can't end in a comma" + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have a leading slash" }, { - "name": "double-in-comma", - "string": "x,,y,", + "name": "double-slash", + "string": "x//y", "issueCount": 1, - "hedCode": "TAG_EMPTY", - "code": "emptyTagFound", - "explanation": "Can't have double commas" + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have double slash" + }, + { + "name": "triple-slash", + "string": "x///y", + "issueCount": 1, + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have double slash" + }, + { + "name": "trailing-slash", + "string": "x/y/", + "issueCount": 1, + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have ending slash" + }, + { + "name": "value-slash", + "string": "x /y", + "issueCount": 1, + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have ending slash" } ], "passes": [] } }, { - "name": "leading-comma", - "string": " x, y ", - "issueCount": 0, - "hedCode": "TAG_EMPTY", - "code": "", - "explanation": "Can have extra blanks" + "hedCode": "SIDECAR_BRACES_INVALID", + "code": "unopenedCurlyBrace", + "name": "unopened-curly-brace", + "description": "Tags can't have leading or trailing, or extra slashes", + "warning": false, + "tests": { + "fails": [ + { + "name": "leading-close-brace", + "string": "}x", + "issueCount": 1, + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have a leading slash" + }, + { + "name": "double-slash", + "string": "x//y", + "issueCount": 1, + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have double slash" + }, + { + "name": "triple-slash", + "string": "x///y", + "issueCount": 1, + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have double slash" + }, + { + "name": "trailing-slash", + "string": "x/y/", + "issueCount": 1, + "hedCode": "TAG_INVALID", + "code": "extraSlash", + "explanation": "Can't have ending slash" + } + ], + "passes": [] + } } ] diff --git a/tests/temp.tockenizer.spec.js b/tests/temp.tockenizer.spec.js index 1cf1bcce..ede77d6f 100644 --- a/tests/temp.tockenizer.spec.js +++ b/tests/temp.tockenizer.spec.js @@ -41,82 +41,67 @@ describe('HED tokenizer validation using JSON tests', () => { } }) - test('dummy test', () => { - const x = 1 - expect(x).toBeDefined() - }) + describe.each(testInfo)('$name : $description', ({ tests }) => { + let itemLog - describe.each(testInfo)( - '$hedCode $code $name : $description', - ({ hedCode, code, name, description, warning, tests }) => { - let itemLog - let hasWarning - - const assertErrors = function (eHedCode, eCode, expectError, iLog, header, issues) { - const log = [header] - totalTests += 1 - - let errors = [] - if (issues.length > 0) { - errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues - } - const errorString = errors.join(',') - if (errors.length > 0) { - log.push(`---has errors [${errorString}]`) - } - const expectedError = eCode - const wrongError = `---expected ${eHedCode} but got errors [${errorString}]` - const hasErrors = `---expected no errors but got errors [${errorString}]` - if (expectError && !errors.includes(eHedCode)) { - log.push(wrongError) - iLog.push(log.join('\n')) - wrongErrors += 1 - assert.strictEqual( - errors.includes(eHedCode), - true, - `${header}---expected ${eHedCode} and got errors [${errorString}]`, - ) - } else if (!expectError && errors.length > 0) { - log.push(hasErrors) - iLog.push(log.join('\n')) - unexpectedErrors += 1 - assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) - } - } + const assertErrors = function (eHedCode, eCode, expectError, iLog, header, issues) { + const log = [header] + totalTests += 1 - const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { - const status = expectError ? 'Expect fail' : 'Expect pass' - const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' - const header = `\n[${eHedCode} ${eName} ${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` - const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() - const issues = Object.values(tokenizingIssues).flat() - assertErrors(eHedCode, eCode, expectError, iLog, header, issues) + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + log.push(`---has errors [${errorString}]`) + } + const expectedError = eCode + const wrongError = `---expected ${eHedCode} but got errors [${errorString}]` + const hasErrors = `---expected no errors but got errors [${errorString}]` + if (expectError && !errors.includes(eHedCode)) { + log.push(wrongError) + iLog.push(log.join('\n')) + wrongErrors += 1 + assert.strictEqual( + errors.includes(eHedCode), + true, + `${header}---expected ${eHedCode} and got errors [${errorString}]`, + ) + } else if (!expectError && errors.length > 0) { + log.push(hasErrors) + iLog.push(log.join('\n')) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) } + } - beforeAll(async () => { - itemLog = [] - hasWarning = warning - }) + const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { + const status = expectError ? 'Expect fail' : 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' + const header = `\n[${eHedCode} ${eName} ${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() + const issues = Object.values(tokenizingIssues).flat() + assertErrors(eHedCode, eCode, expectError, iLog, header, issues) + } - afterAll(() => { - badLog.push(itemLog.join('\n')) - }) + beforeAll(async () => { + itemLog = [] + }) - test('dummy test', () => { - const y = 1 - expect(y).toBeDefined() - }) + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) - if (tests.fails && tests.fails.length > 0) { - test.each(tests.fails)('NewTokenizer: Invalid string: %s ', (ex) => { - //console.log(ex) - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) - }) + if (tests.fails && tests.fails.length > 0) { + test.each(tests.fails)('NewTokenizer: Invalid string: %s ', (ex) => { + //console.log(ex) + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) + }) - test.each(tests.fails)('Original tokenizer: Invalid string: %s ', (ex) => { - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) - }) - } - }, - ) + test.each(tests.fails && tests.fails.length > 0)('Original tokenizer: Invalid string: %s ', (ex) => { + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) + }) + } + }) }) diff --git a/tests/tockenizerPassing.spec.js b/tests/tockenizerPassing.spec.js new file mode 100644 index 00000000..c40485fb --- /dev/null +++ b/tests/tockenizerPassing.spec.js @@ -0,0 +1,119 @@ +import chai from 'chai' +const assert = chai.assert +import { beforeAll, describe, afterAll } from '@jest/globals' + +import * as hed from '../validator/event' +import { BidsHedIssue } from '../bids/types/issues' +import path from 'path' +import { HedStringTokenizer } from '../parser/tokenizer' +import { HedStringTokenizerNew } from '../parser/tokenizerNew' +import { generateIssue, IssueError } from '../common/issues/issues' +import passingTests from './tokenizerTestData' +const fs = require('fs') + +const displayLog = process.env.DISPLAY_LOG === 'true' + +const skippedErrors = {} +const readFileSync = fs.readFileSync +// const test_file_name = 'javascriptTests.json' + +describe('HED tokenizer validation - validData', () => { + const badLog = [] + let totalTests = 0 + let wrongErrors = 0 + let unexpectedErrors = 0 + + beforeAll(async () => {}) + + afterAll(() => { + const outBad = path.join(__dirname, 'runLog.txt') + const summary = `Total tests:${totalTests} Wrong error codes:${wrongErrors} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + + describe.each(testInfo)('$name : $description', ({ tests }) => { + let itemLog + + const checkForErrors = function (iLog, header, issues) { + const log = [header] + totalTests += 1 + + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + log.push(`---has errors [${errorString}]`) + } + const expectedError = eCode + const wrongError = `---expected ${eHedCode} but got errors [${errorString}]` + const hasErrors = `---expected no errors but got errors [${errorString}]` + if (expectError && !errors.includes(eHedCode)) { + log.push(wrongError) + iLog.push(log.join('\n')) + wrongErrors += 1 + assert.strictEqual( + errors.includes(eHedCode), + true, + `${header}---expected ${eHedCode} and got errors [${errorString}]`, + ) + } else if (!expectError && errors.length > 0) { + log.push(hasErrors) + iLog.push(log.join('\n')) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } + + const stringTokenizer = function (eName, tokenizer, iLog) { + const status = 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' + const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() + const issues = Object.values(tokenizingIssues).flat() + assertErrors(eHedCode, eCode, expectError, iLog, header, issues) + } + + const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { + const status = expectError ? 'Expect fail' : 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' + const header = `\n[${eHedCode} ${eName} ${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() + const issues = Object.values(tokenizingIssues).flat() + assertErrors(eHedCode, eCode, expectError, iLog, header, issues) + } + + beforeAll(async () => { + itemLog = [] + }) + + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) + + if (tests && tests.length > 0) { + test.each(tests)('NewTokenizer: Invalid string: %s ', (ex) => { + //console.log(ex) + stringTokenizer(ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) + }) + + test.each(tests.fails)('Original tokenizer: Invalid string: %s ', (ex) => { + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) + }) + } + + if (tests.fails && tests.fails.length > 0) { + test.each(tests.fails)('NewTokenizer: Invalid string: %s ', (ex) => { + //console.log(ex) + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) + }) + + test.each(tests.fails)('Original tokenizer: Invalid string: %s ', (ex) => { + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) + }) + } + }) +}) diff --git a/tests/tokenizerTestData.js b/tests/tokenizerTestData.js new file mode 100644 index 00000000..223be116 --- /dev/null +++ b/tests/tokenizerTestData.js @@ -0,0 +1,17 @@ +export const passingTests = [ + { + name: 'valid-strings-simple', + description: 'Simple tags and groups', + warning: false, + tests: [ + { + name: 'internal-blank', + string: 'x y', + issueCount: 1, + hedCode: 'TAG_EMPTY', + code: 'emptyTagFound', + explanation: 'Cannot end in a comma', + }, + ], + }, +] From 0758bfe02066812c5f4e89d3cdb49eb0408b24fa Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:16:45 -0500 Subject: [PATCH 05/21] Updated the tokenizer --- parser/tokenizer.js | 8 +- parser/tokenizerNew.js | 33 ++++- tests/data/tokenizerTests.json | 130 ------------------ tests/temp.spec.js | 2 +- ...r.spec.js => tockenizerErrorTests.spec.js} | 24 ++-- tests/tockenizerPassing.spec.js | 119 ---------------- tests/tokenizerErrorData.js | 130 ++++++++++++++++++ tests/tokenizerPassingData.js | 73 ++++++++++ tests/tokenizerPassingTests.spec.js | 91 ++++++++++++ tests/tokenizerTestData.js | 17 --- 10 files changed, 333 insertions(+), 294 deletions(-) delete mode 100644 tests/data/tokenizerTests.json rename tests/{temp.tockenizer.spec.js => tockenizerErrorTests.spec.js} (83%) delete mode 100644 tests/tockenizerPassing.spec.js create mode 100644 tests/tokenizerErrorData.js create mode 100644 tests/tokenizerPassingData.js create mode 100644 tests/tokenizerPassingTests.spec.js delete mode 100644 tests/tokenizerTestData.js diff --git a/parser/tokenizer.js b/parser/tokenizer.js index b308a9d9..4639e6a6 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -71,10 +71,10 @@ export class GroupSpec extends SubstringSpec { */ children - constructor(start, end) { + constructor(start, end, children) { super(start, end) - this.children = [] + this.children = children } } @@ -177,7 +177,7 @@ export class HedStringTokenizer { this.librarySchema = '' this.columnSpliceIndex = -1 this.currentGroupStack = [[]] - this.parenthesesStack = [new GroupSpec(0, this.hedString.length)] + this.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] this.ignoringCharacters = false this.closingGroup = false } @@ -218,7 +218,7 @@ export class HedStringTokenizer { openingGroupCharacter(i) { this.currentGroupStack.push([]) - this.parenthesesStack.push(new GroupSpec(i)) + this.parenthesesStack.push(new GroupSpec(i, undefined, [])) this.resetStartingIndex = true this.groupDepth++ } diff --git a/parser/tokenizerNew.js b/parser/tokenizerNew.js index 8af2ecbc..7aa2dda4 100644 --- a/parser/tokenizerNew.js +++ b/parser/tokenizerNew.js @@ -13,6 +13,18 @@ const CHARACTERS = { SLASH: '/', } +function getTrimmedBounds(originalString) { + const start = originalString.search(/\S/) + const end = originalString.search(/\S\s*$/) + + if (start === -1) { + // The string contains only whitespace + return null + } + + return [start, end + 1] +} + const invalidCharacters = new Set(['[', ']', '~', '"']) // Add control codes to invalidCharacters for (let i = 0x00; i <= 0x1f; i++) { @@ -72,10 +84,10 @@ export class GroupSpec extends SubstringSpec { */ children - constructor(start, end) { + constructor(start, end, children) { super(start, end) - this.children = [] + this.children = children } } @@ -106,7 +118,7 @@ class TokenizerState { this.librarySchema = '' this.columnSpliceIndex = -1 //Index of { if this token is column splice this.currentGroupStack = [[]] - this.parenthesesStack = [new GroupSpec(0)] + this.parenthesesStack = [] this.ignoringCharacters = false this.closingGroup = false // this.closingColumn = false @@ -120,7 +132,7 @@ export class HedStringTokenizerNew { constructor(hedString) { this.hedString = hedString this.syntaxIssues = [] - this.state = new TokenizerState() + this.state = null } /** @@ -140,7 +152,7 @@ export class HedStringTokenizerNew { this.state.currentToken = '' } } - this.pushTag(this.hedString.length - 1) + //this.pushTag(this.hedString.length - 1) if (this.state.columnSpliceIndex >= 0) { this.pushIssue('unclosedCurlyBrace', this.state.columnSpliceIndex) @@ -160,6 +172,7 @@ export class HedStringTokenizerNew { initializeTokenizer() { this.syntaxIssues = [] this.state = new TokenizerState() + this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] } tokenizeCharacter(i, character) { @@ -218,7 +231,7 @@ export class HedStringTokenizerNew { handleOpeningGroup(i) { this.state.currentGroupStack.push([]) - this.state.parenthesesStack.push(new GroupSpec(i)) + this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) this.state.resetIndexFlag = true this.state.groupDepth++ } @@ -326,8 +339,14 @@ export class HedStringTokenizerNew { } else if (this.state.columnSpliceIndex < 0) { // Not a column splice so goes on group stack as a TagSpec this.checkValueTagForInvalidCharacters() + let bounds = getTrimmedBounds(this.state.currentToken) this.state.currentGroupStack[this.state.groupDepth].push( - new TagSpec(this.state.currentToken.trim(), this.state.startingIndex, i, this.state.librarySchema), + new TagSpec( + this.state.currentToken.trim(), + this.state.startingIndex + bounds[0], + this.state.startingIndex + bounds[1], + this.state.librarySchema, + ), ) } // Clear the current token and reset flags for the next iteration. diff --git a/tests/data/tokenizerTests.json b/tests/data/tokenizerTests.json deleted file mode 100644 index 53c4d950..00000000 --- a/tests/data/tokenizerTests.json +++ /dev/null @@ -1,130 +0,0 @@ -[ - { - "name": "empty-tag-in-various-places", - "description": "Empty tags in various places (empty groups are allowed).", - "warning": false, - "tests": { - "fails": [ - { - "name": "end-in-comma", - "string": "x,y,", - "issueCount": 1, - "hedCode": "TAG_EMPTY", - "code": "emptyTagFound", - "explanation": "Can't end in a comma" - }, - { - "name": "double-in-comma", - "string": "x,,y,", - "issueCount": 1, - "hedCode": "TAG_EMPTY", - "code": "emptyTagFound", - "explanation": "Can't have double commas" - }, - { - "name": "leading-comma", - "string": ",x,y", - "issueCount": 1, - "hedCode": "TAG_EMPTY", - "code": "emptyTagFound", - "explanation": "Can't leading comma" - } - ], - "passes": [] - } - }, - { - "name": "extra-slash-in-various-places", - "description": "Tags can't have leading or trailing, or extra slashes", - "warning": false, - "tests": { - "fails": [ - { - "name": "leading-slash", - "string": "/x", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have a leading slash" - }, - { - "name": "double-slash", - "string": "x//y", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have double slash" - }, - { - "name": "triple-slash", - "string": "x///y", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have double slash" - }, - { - "name": "trailing-slash", - "string": "x/y/", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have ending slash" - }, - { - "name": "value-slash", - "string": "x /y", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have ending slash" - } - ], - "passes": [] - } - }, - { - "hedCode": "SIDECAR_BRACES_INVALID", - "code": "unopenedCurlyBrace", - "name": "unopened-curly-brace", - "description": "Tags can't have leading or trailing, or extra slashes", - "warning": false, - "tests": { - "fails": [ - { - "name": "leading-close-brace", - "string": "}x", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have a leading slash" - }, - { - "name": "double-slash", - "string": "x//y", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have double slash" - }, - { - "name": "triple-slash", - "string": "x///y", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have double slash" - }, - { - "name": "trailing-slash", - "string": "x/y/", - "issueCount": 1, - "hedCode": "TAG_INVALID", - "code": "extraSlash", - "explanation": "Can't have ending slash" - } - ], - "passes": [] - } - } -] diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 6e7751c0..8e886f9d 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -17,7 +17,7 @@ import { HedStringTokenizerNew } from '../parser/tokenizerNew' describe('HED string parsing', () => { it('should include each group as its own single element', () => { - const hedString = 'y,' + const hedString = '(xy)' const tok = new HedStringTokenizerNew(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') diff --git a/tests/temp.tockenizer.spec.js b/tests/tockenizerErrorTests.spec.js similarity index 83% rename from tests/temp.tockenizer.spec.js rename to tests/tockenizerErrorTests.spec.js index ede77d6f..dce34391 100644 --- a/tests/temp.tockenizer.spec.js +++ b/tests/tockenizerErrorTests.spec.js @@ -8,22 +8,14 @@ import path from 'path' import { HedStringTokenizer } from '../parser/tokenizer' import { HedStringTokenizerNew } from '../parser/tokenizerNew' import { generateIssue, IssueError } from '../common/issues/issues' -const fs = require('fs') - +import { errorTests } from './tokenizerErrorData' const displayLog = process.env.DISPLAY_LOG === 'true' +const fs = require('fs') const skippedErrors = {} -const readFileSync = fs.readFileSync -// const test_file_name = 'javascriptTests.json' -const test_file_name = './data/tokenizerTests.json' - -function loadTestData() { - const testFile = path.join(__dirname, test_file_name) - return JSON.parse(readFileSync(testFile, 'utf8')) -} -const testInfo = loadTestData() -console.log(testInfo) +//const testInfo = loadTestData() +console.log(errorTests) describe('HED tokenizer validation using JSON tests', () => { const badLog = [] @@ -41,7 +33,7 @@ describe('HED tokenizer validation using JSON tests', () => { } }) - describe.each(testInfo)('$name : $description', ({ tests }) => { + describe.each(errorTests)('$name : $description', ({ tests }) => { let itemLog const assertErrors = function (eHedCode, eCode, expectError, iLog, header, issues) { @@ -93,13 +85,13 @@ describe('HED tokenizer validation using JSON tests', () => { badLog.push(itemLog.join('\n')) }) - if (tests.fails && tests.fails.length > 0) { - test.each(tests.fails)('NewTokenizer: Invalid string: %s ', (ex) => { + if (tests && tests.length > 0) { + test.each(tests)('NewTokenizer: Invalid string: %s ', (ex) => { //console.log(ex) stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) }) - test.each(tests.fails && tests.fails.length > 0)('Original tokenizer: Invalid string: %s ', (ex) => { + test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) }) } diff --git a/tests/tockenizerPassing.spec.js b/tests/tockenizerPassing.spec.js deleted file mode 100644 index c40485fb..00000000 --- a/tests/tockenizerPassing.spec.js +++ /dev/null @@ -1,119 +0,0 @@ -import chai from 'chai' -const assert = chai.assert -import { beforeAll, describe, afterAll } from '@jest/globals' - -import * as hed from '../validator/event' -import { BidsHedIssue } from '../bids/types/issues' -import path from 'path' -import { HedStringTokenizer } from '../parser/tokenizer' -import { HedStringTokenizerNew } from '../parser/tokenizerNew' -import { generateIssue, IssueError } from '../common/issues/issues' -import passingTests from './tokenizerTestData' -const fs = require('fs') - -const displayLog = process.env.DISPLAY_LOG === 'true' - -const skippedErrors = {} -const readFileSync = fs.readFileSync -// const test_file_name = 'javascriptTests.json' - -describe('HED tokenizer validation - validData', () => { - const badLog = [] - let totalTests = 0 - let wrongErrors = 0 - let unexpectedErrors = 0 - - beforeAll(async () => {}) - - afterAll(() => { - const outBad = path.join(__dirname, 'runLog.txt') - const summary = `Total tests:${totalTests} Wrong error codes:${wrongErrors} Unexpected errors:${unexpectedErrors}\n` - if (displayLog) { - fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') - } - }) - - describe.each(testInfo)('$name : $description', ({ tests }) => { - let itemLog - - const checkForErrors = function (iLog, header, issues) { - const log = [header] - totalTests += 1 - - let errors = [] - if (issues.length > 0) { - errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues - } - const errorString = errors.join(',') - if (errors.length > 0) { - log.push(`---has errors [${errorString}]`) - } - const expectedError = eCode - const wrongError = `---expected ${eHedCode} but got errors [${errorString}]` - const hasErrors = `---expected no errors but got errors [${errorString}]` - if (expectError && !errors.includes(eHedCode)) { - log.push(wrongError) - iLog.push(log.join('\n')) - wrongErrors += 1 - assert.strictEqual( - errors.includes(eHedCode), - true, - `${header}---expected ${eHedCode} and got errors [${errorString}]`, - ) - } else if (!expectError && errors.length > 0) { - log.push(hasErrors) - iLog.push(log.join('\n')) - unexpectedErrors += 1 - assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) - } - } - - const stringTokenizer = function (eName, tokenizer, iLog) { - const status = 'Expect pass' - const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' - const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` - const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() - const issues = Object.values(tokenizingIssues).flat() - assertErrors(eHedCode, eCode, expectError, iLog, header, issues) - } - - const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { - const status = expectError ? 'Expect fail' : 'Expect pass' - const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' - const header = `\n[${eHedCode} ${eName} ${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` - const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() - const issues = Object.values(tokenizingIssues).flat() - assertErrors(eHedCode, eCode, expectError, iLog, header, issues) - } - - beforeAll(async () => { - itemLog = [] - }) - - afterAll(() => { - badLog.push(itemLog.join('\n')) - }) - - if (tests && tests.length > 0) { - test.each(tests)('NewTokenizer: Invalid string: %s ', (ex) => { - //console.log(ex) - stringTokenizer(ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) - }) - - test.each(tests.fails)('Original tokenizer: Invalid string: %s ', (ex) => { - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) - }) - } - - if (tests.fails && tests.fails.length > 0) { - test.each(tests.fails)('NewTokenizer: Invalid string: %s ', (ex) => { - //console.log(ex) - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) - }) - - test.each(tests.fails)('Original tokenizer: Invalid string: %s ', (ex) => { - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) - }) - } - }) -}) diff --git a/tests/tokenizerErrorData.js b/tests/tokenizerErrorData.js new file mode 100644 index 00000000..9d403005 --- /dev/null +++ b/tests/tokenizerErrorData.js @@ -0,0 +1,130 @@ +export const errorTests = [ + { + name: 'empty-tag-in-various-places', + description: 'Empty tags in various places (empty groups are allowed).', + tests: [ + { + name: 'end-in-comma', + string: 'x,y,', + issueCount: 1, + hedCode: 'TAG_EMPTY', + code: 'emptyTagFound', + warning: false, + explanation: 'Cannot end in a comma', + }, + { + name: 'double-in-comma', + string: 'x,,y,', + issueCount: 1, + hedCode: 'TAG_EMPTY', + code: 'emptyTagFound', + warning: false, + explanation: 'Cannot have double commas', + }, + { + name: 'leading-comma', + string: ',x,y', + issueCount: 1, + hedCode: 'TAG_EMPTY', + code: 'emptyTagFound', + warning: false, + explanation: 'Cannot have a leading comma', + }, + ], + }, + { + name: 'extra-slash-in-various-places', + description: 'Tags cannot have leading or trailing, or extra slashes', + tests: [ + { + name: 'leading-slash', + string: '/x', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have a leading slash', + }, + { + name: 'double-slash', + string: 'x//y', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have double slash', + }, + { + name: 'triple-slash', + string: 'x///y', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have double slash', + }, + { + name: 'trailing-slash', + string: 'x/y/', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have ending slash', + }, + { + name: 'value-slash', + string: 'x /y', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have ending slash', + }, + ], + }, + { + hedCode: 'SIDECAR_BRACES_INVALID', + code: 'unopenedCurlyBrace', + name: 'unopened-curly-brace', + description: 'Tags cannot have leading or trailing, or extra slashes', + tests: [ + { + name: 'leading-close-brace', + string: '}x', + issueCount: 1, + hedCode: 'SIDECAR_BRACES_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have a leading slash', + }, + { + name: 'double-slash', + string: 'x//y', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have double slash', + }, + { + name: 'triple-slash', + string: 'x///y', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have double slash', + }, + { + name: 'trailing-slash', + string: 'x/y/', + issueCount: 1, + hedCode: 'TAG_INVALID', + code: 'extraSlash', + warning: false, + explanation: 'Cannot have ending slash', + }, + ], + }, +] diff --git a/tests/tokenizerPassingData.js b/tests/tokenizerPassingData.js new file mode 100644 index 00000000..ec67fff8 --- /dev/null +++ b/tests/tokenizerPassingData.js @@ -0,0 +1,73 @@ +import { TagSpec, GroupSpec, ColumnSpliceSpec } from '../parser/tokenizerNew' + +export const passingTests = [ + { + name: 'valid-single-tags', + description: 'Single tags with no groups.', + warning: false, + tests: [ + // { + // name: 'simple-tag-no-blanks', + // string: 'xy', + // explanation: 'Should have bounds 0, 2', + // tagSpecs: [new TagSpec("xy", 0, 2, "")], + // groupSpec: new GroupSpec(0, undefined) + // }, + // { + // name: 'internal-blank', + // string: 'x y', + // explanation: 'Can have internal blank', + // tagSpecs: [new TagSpec("x y", 0, 3, "")], + // groupSpec: new GroupSpec(0, undefined) + // }, + // { + // name: 'extra-blanks-simple', + // string: ' xy ', + // explanation: 'Can have extra blanks', + // tagSpecs: [new TagSpec("xy", 1, 3, "")], + // groupSpec: new GroupSpec(0, undefined) + // } + ], + }, + // { + // name: 'valid-tags-no-groups', + // description: 'multiple tags with no groups.', + // warning: false, + // tests: [ + // { + // name: 'multiple-tags', + // string: 'xy,zy,wy', + // explanation: 'Can have extra blanks', + // tagSpecs: [new TagSpec("xy", 0, 2, ""), + // new TagSpec("zy", 3, 5, ""), + // new TagSpec("wy", 6, 8, "") + // ], + // groupSpec: new GroupSpec(0, undefined) + // }, + // { + // name: 'multiple-tags-with-blanks', + // string: ' xy, zy , wy ', + // explanation: 'Can have extra blanks', + // tagSpecs: [new TagSpec("xy", 1, 3, ""), + // new TagSpec("zy", 6, 8, ""), + // new TagSpec("wy", 11, 13, "") + // ], + // groupSpec: new GroupSpec(0, undefined,[]) + // }, + // ] + // }, + { + name: 'un-nested-groups', + description: 'Groups with no nesting', + warning: false, + tests: [ + { + name: 'single-non-empty-group-no-blanks', + string: '(xy)', + explanation: 'Single group', + tagSpecs: [[new TagSpec('xy', 1, 3, '')]], + groupSpec: new GroupSpec(0, 4, [new GroupSpec(0, 4, [])]), + }, + ], + }, +] diff --git a/tests/tokenizerPassingTests.spec.js b/tests/tokenizerPassingTests.spec.js new file mode 100644 index 00000000..6fb9e561 --- /dev/null +++ b/tests/tokenizerPassingTests.spec.js @@ -0,0 +1,91 @@ +import chai from 'chai' +const assert = chai.assert +import { beforeAll, describe, afterAll } from '@jest/globals' + +import * as hed from '../validator/event' +import { BidsHedIssue } from '../bids/types/issues' +import path from 'path' +import { HedStringTokenizer } from '../parser/tokenizer' +import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizerNew } from '../parser/tokenizerNew' +import { generateIssue, IssueError } from '../common/issues/issues' +import { passingTests } from './tokenizerPassingData' +const fs = require('fs') + +const displayLog = process.env.DISPLAY_LOG === 'true' + +const skippedErrors = {} + +describe('HED tokenizer validation - validData', () => { + const badLog = [] + let totalTests = 0 + let unexpectedErrors = 0 + + beforeAll(async () => {}) + + afterAll(() => { + const outBad = path.join(__dirname, 'runLog.txt') + const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + + describe.each(passingTests)('$name : $description', ({ tests }) => { + let itemLog + + const assertErrors = function (header, issues, iLog) { + iLog.push(`${header}\n`) + totalTests += 1 + + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + iLog.push(`---expected no errors but got errors [${errorString}]\n`) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } + + const stringTokenizer = function (eName, tokenizer, tSpecs, gSpec, explanation, iLog) { + const status = 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' + const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupSpec, tokenizingIssues] = tokenizer.tokenize() + // Test for no errors + const issues = Object.values(tokenizingIssues).flat() + assertErrors(header, issues, iLog) + assert.sameDeepMembers(tagSpecs, tSpecs, explanation) + assert.deepEqual(groupSpec, gSpec, explanation) + //assert.sameDeepMembers(groupSpec, gSpec, explanation) + } + + beforeAll(async () => { + itemLog = [] + }) + + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) + + if (tests && tests.length > 0) { + test.each(tests)('NewTokenizer: Invalid string: %s ', (ex) => { + //console.log(ex) + stringTokenizer( + ex.name, + new HedStringTokenizerNew(ex.string), + ex.tagSpecs, + ex.groupSpec, + ex.explanation, + itemLog, + ) + }) + + test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { + stringTokenizer(ex.name, new HedStringTokenizer(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog) + }) + } + }) +}) diff --git a/tests/tokenizerTestData.js b/tests/tokenizerTestData.js deleted file mode 100644 index 223be116..00000000 --- a/tests/tokenizerTestData.js +++ /dev/null @@ -1,17 +0,0 @@ -export const passingTests = [ - { - name: 'valid-strings-simple', - description: 'Simple tags and groups', - warning: false, - tests: [ - { - name: 'internal-blank', - string: 'x y', - issueCount: 1, - hedCode: 'TAG_EMPTY', - code: 'emptyTagFound', - explanation: 'Cannot end in a comma', - }, - ], - }, -] From 906bdbc03cd52f8e04cfc8234583077f42c1e901 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:07:05 -0500 Subject: [PATCH 06/21] More experiments with the tokenizer --- parser/tokenizer.js | 415 +++++++++++++++------------- parser/tokenizerNew.js | 396 -------------------------- parser/tokenizerOld.js | 385 ++++++++++++++++++++++++++ tests/stringParser.spec.js | 20 +- tests/temp.spec.js | 10 +- tests/tockenizerErrorTests.spec.js | 9 +- tests/tokenizerPassingData.js | 134 +++++---- tests/tokenizerPassingTests.spec.js | 21 +- 8 files changed, 723 insertions(+), 667 deletions(-) delete mode 100644 parser/tokenizerNew.js create mode 100644 parser/tokenizerOld.js diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 4639e6a6..6c0eb199 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -1,28 +1,41 @@ +import { replaceTagNameWithPound } from '../utils/hedStrings' import { unicodeName } from 'unicode-name' - import { generateIssue } from '../common/issues/issues' -import { stringIsEmpty } from '../utils/string' -import { replaceTagNameWithPound } from '../utils/hedStrings' -const openingGroupCharacter = '(' -const closingGroupCharacter = ')' -const openingColumnCharacter = '{' -const closingColumnCharacter = '}' -const commaCharacter = ',' -const colonCharacter = ':' -const slashCharacter = '/' +const CHARACTERS = { + BLANK: ' ', + OPENING_GROUP: '(', + CLOSING_GROUP: ')', + OPENING_COLUMN: '{', + CLOSING_COLUMN: '}', + COMMA: ',', + COLON: ':', + SLASH: '/', +} + +function getTrimmedBounds(originalString) { + const start = originalString.search(/\S/) + const end = originalString.search(/\S\s*$/) + + if (start === -1) { + // The string contains only whitespace + return null + } + + return [start, end + 1] +} const invalidCharacters = new Set(['[', ']', '~', '"']) -const invalidCharactersOutsideOfValues = new Set([':']) -// C0 control codes +// Add control codes to invalidCharacters for (let i = 0x00; i <= 0x1f; i++) { invalidCharacters.add(String.fromCodePoint(i)) } -// DEL and C1 control codes for (let i = 0x7f; i <= 0x9f; i++) { invalidCharacters.add(String.fromCodePoint(i)) } +const invalidCharactersOutsideOfValues = new Set([':']) + /** * A specification for a tokenized substring. */ @@ -95,41 +108,32 @@ export class ColumnSpliceSpec extends SubstringSpec { } } +class TokenizerState { + constructor() { + this.currentToken = '' // Characters in the token currently being parsed + this.groupDepth = 0 + this.startingIndex = 0 // Starting index of this token + this.resetIndexFlag = false + this.slashFound = false + this.commaFound = false // A comma is hanging there -- if there is nothing else coming, it is a problem + this.librarySchema = '' + this.columnSpliceIndex = -1 //Index of { if this token is column splice + this.currentGroupStack = [[]] + this.parenthesesStack = [] + this.ignoringCharacters = false // If we encounter error in a token, we want to just skip until we can recover. + this.closingGroup = false + // this.closingColumn = false + } +} + /** * Class for tokenizing HED strings. */ export class HedStringTokenizer { - /** - * The HED string being parsed. - * @type {string} - */ - hedString - - syntaxIssues - - /** - * The current substring being parsed. - * @type {string} - */ - currentTag - - /** - * Whether we are currently closing a group. - * @type {boolean} - */ - closingGroup - - groupDepth - startingIndex - resetStartingIndex - slashFound - librarySchema - currentGroupStack - parenthesesStack - ignoringCharacters - constructor(hedString) { this.hedString = hedString + this.syntaxIssues = [] + this.state = null } /** @@ -143,22 +147,22 @@ export class HedStringTokenizer { for (let i = 0; i < this.hedString.length; i++) { const character = this.hedString.charAt(i) this.tokenizeCharacter(i, character) - if (this.resetStartingIndex) { - this.resetStartingIndex = false - this.startingIndex = i + 1 - this.currentTag = '' + if (this.state.resetIndexFlag) { + this.state.resetIndexFlag = false + this.state.startingIndex = i + 1 + this.state.currentToken = '' } } this.pushTag(this.hedString.length, true) - if (this.columnSpliceIndex >= 0) { - this._pushSyntaxIssue('unclosedCurlyBrace', this.columnSpliceIndex) + if (this.state.columnSpliceIndex >= 0) { + this.pushIssue('unclosedCurlyBrace', this.state.columnSpliceIndex) } this.unwindGroupStack() - const tagSpecs = this.currentGroupStack.pop() - const groupSpecs = this.parenthesesStack.pop() + const tagSpecs = this.state.currentGroupStack.pop() + const groupSpecs = this.state.parenthesesStack.pop() const issues = { syntax: this.syntaxIssues, conversion: [], @@ -168,216 +172,237 @@ export class HedStringTokenizer { initializeTokenizer() { this.syntaxIssues = [] - - this.currentTag = '' - this.groupDepth = 0 - this.startingIndex = 0 - this.resetStartingIndex = false - this.slashFound = false - this.librarySchema = '' - this.columnSpliceIndex = -1 - this.currentGroupStack = [[]] - this.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] - this.ignoringCharacters = false - this.closingGroup = false + this.state = new TokenizerState() + this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] } tokenizeCharacter(i, character) { - let dispatchTable - if (this.ignoringCharacters) { - dispatchTable = { - [closingGroupCharacter]: (i /* character */) => { - this.clearTag() - this.closingGroupCharacter(i) - }, - [commaCharacter]: (/*i, character */) => this.clearTag(), - } + if (this.state.ignoringCharacters) { + this.handleIgnoringCharacters(i, character) } else { - dispatchTable = { - [openingGroupCharacter]: (i /* character */) => this.openingGroupCharacter(i), - [closingGroupCharacter]: (i /* character */) => { - this.pushTag(i, false) - this.closingGroupCharacter(i) - }, - [openingColumnCharacter]: (i /* character */) => this.openingColumnCharacter(i), - [closingColumnCharacter]: (i /* character */) => this.closingColumnCharacter(i), - [commaCharacter]: (i /* character */) => this.pushTag(i, false), - [colonCharacter]: (i, character) => this.colonCharacter(character), - [slashCharacter]: (i, character) => this.slashCharacter(character), - } + this.handleCharacter(i, character) } - const characterHandler = dispatchTable[character] + } + + handleIgnoringCharacters(i, character) { + // We have encountered a parsing error on this token and want to ignore until the next token. + const characterHandler = { + [CHARACTERS.CLOSING_GROUP]: () => { + this.clearToken() + this.handleClosingGroup(i) + }, + [CHARACTERS.COMMA]: () => { + this.clearToken() + this.handleClosingGroup(i) + }, + }[character] + if (characterHandler) { - characterHandler(i, character) + characterHandler() + } + } + + handleCharacter(i, character) { + const characterHandler = { + [CHARACTERS.OPENING_GROUP]: () => this.handleOpeningGroup(i), + [CHARACTERS.CLOSING_GROUP]: () => { + this.pushTag(i, false) + this.handleClosingGroup(i) + }, + [CHARACTERS.OPENING_COLUMN]: () => this.handleOpeningColumn(i), + [CHARACTERS.CLOSING_COLUMN]: () => { + this.pushTag(i) + this.handleClosingColumn(i) + }, + [CHARACTERS.COMMA]: () => { + this.state.commaFound = true + this.pushTag(i, false) + this.state.closingColumn = false + }, + [CHARACTERS.COLON]: () => this.handleColon(character), + [CHARACTERS.SLASH]: () => this.handleSlash(i), + }[character] // Selects the character handler based on the value of character + + if (characterHandler) { + characterHandler() } else if (invalidCharacters.has(character)) { - this._pushInvalidCharacterIssue(character, i) + this.pushInvalidCharacterIssue(character, i) } else { - this.otherCharacter(character) + this.handleRegularCharacter(character) } } - openingGroupCharacter(i) { - this.currentGroupStack.push([]) - this.parenthesesStack.push(new GroupSpec(i, undefined, [])) - this.resetStartingIndex = true - this.groupDepth++ + handleOpeningGroup(i) { + this.state.currentGroupStack.push([]) + this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) + this.state.resetIndexFlag = true + this.state.commaFound = false + this.state.groupDepth++ } - closingGroupCharacter(i) { - this.closingGroup = true - if (this.groupDepth <= 0) { - this._pushSyntaxIssue('unopenedParenthesis', i) + handleClosingGroup(i) { + this.state.closingGroup = true + // If the group depth is <= 0, it means there's no corresponding opening group. + if (this.state.groupDepth <= 0) { + this.pushIssue('unopenedParenthesis', i) return } + // Close the group by updating its bounds and moving it to the parent group. this.closeGroup(i) + this.commaFound = false } - openingColumnCharacter(i) { - if (this.currentTag.length > 0) { - this._pushInvalidCharacterIssue(openingColumnCharacter, i) - this.ignoringCharacters = true + handleOpeningColumn(i) { + // We're already in the middle of a token -- can't have an opening brace + if (this.state.currentToken.trim().length > 0) { + this.pushInvalidCharacterIssue(CHARACTERS.OPENING_COLUMN, i) + this.state.ignoringCharacters = true return } - if (this.columnSpliceIndex >= 0) { - this._pushSyntaxIssue('nestedCurlyBrace', i) + if (this.state.columnSpliceIndex >= 0) { + this.pushIssue('nestedCurlyBrace', i) } - this.columnSpliceIndex = i + this.state.columnSpliceIndex = i + this.state.commaFound = false } - closingColumnCharacter(i) { - this.closingGroup = true - if (this.columnSpliceIndex < 0) { - this._pushSyntaxIssue('unopenedCurlyBrace', i) + handleClosingColumn(i) { + // If a column splice is not in progress push an issue indicating an unopened curly brace. + if (this.state.columnSpliceIndex < 0) { + this.pushIssue('unopenedCurlyBrace', i) return } - if (!stringIsEmpty(this.currentTag)) { - this.currentGroupStack[this.groupDepth].push(new ColumnSpliceSpec(this.currentTag.trim(), this.startingIndex, i)) - } else { - this.syntaxIssues.push( - generateIssue('emptyCurlyBrace', { - string: this.hedString, - }), - ) + // Ensure that column slice is not empty + if (!this.state.currentToken) { + this.pushIssue('emptyCurlyBrace', i) + return } - this.columnSpliceIndex = -1 - this.resetStartingIndex = true - this.slashFound = false + + // Close the column by updating its bounds and moving it to the parent group, push a column splice on the stack. + this.state.currentGroupStack[this.state.groupDepth].push( + new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), + ) + this.state.columnSpliceIndex = -1 + this.clearToken() + this.state.closingColumn = true // Used to indicate that + this.state.commaFound = false } - colonCharacter(character) { - if (!this.slashFound && !this.librarySchema) { - this.librarySchema = this.currentTag - this.resetStartingIndex = true + handleColon(character) { + if (!this.state.slashFound && !this.state.librarySchema) { + this.state.librarySchema = this.state.currentToken + this.state.resetIndexFlag = true } else { - this.currentTag += character + this.state.currentToken += character + this.state.slashFound = false } } - slashCharacter(character) { - this.slashFound = true - this.currentTag += character + handleSlash(i) { + if (!this.state.currentToken.trim() || this.state.slashFound) { + // Leading slash is error -- ignore rest of the token + this.pushIssue('extraSlash', i) + this.state.ignoringCharacters = true + } else { + this.state.slashFound = true + this.state.currentToken += CHARACTERS.SLASH + } } - otherCharacter(character) { - if (this.ignoringCharacters) { - return + handleRegularCharacter(character) { + // if (character != CHARACTERS.BLANK && this.state.closingColumn) { + // this.pushIssue('unparsedCurlyBraces', i) + // } + if (!this.state.ignoringCharacters) { + this.state.currentToken += character + this.state.slashFound = false + this.state.resetIndexFlag = this.state.currentToken === '' } - this.currentTag += character - this.resetStartingIndex = stringIsEmpty(this.currentTag) } unwindGroupStack() { - // groupDepth is decremented in closeGroup. - // eslint-disable-next-line no-unmodified-loop-condition - while (this.groupDepth > 0) { - this._pushSyntaxIssue('unclosedParenthesis', this.parenthesesStack[this.parenthesesStack.length - 1].bounds[0]) + while (this.state.groupDepth > 0) { + this.pushIssue( + 'unclosedParenthesis', + this.state.parenthesesStack[this.state.parenthesesStack.length - 1].bounds[0], + ) this.closeGroup(this.hedString.length) } } - /** - * Push a tag to the current group. - * - * @param {number} i The current index. - * @param {boolean} isEndOfString Whether we are at the end of the string. - */ pushTag(i, isEndOfString) { - if (stringIsEmpty(this.currentTag) && isEndOfString) { + // Called when a token has been parsed + const token = this.state.currentToken.trim() + if (!token && isEndOfString) { + // If empty token at end of string just return. + if (this.state.commaFound) { + this.pushIssue('emptyTagFound', i) + } return - } else if (this.closingGroup) { - this.closingGroup = false - } else if (stringIsEmpty(this.currentTag)) { - this.syntaxIssues.push(generateIssue('emptyTagFound', { index: i })) - } else if (this.columnSpliceIndex < 0) { - this._checkValueTagForInvalidCharacters() - this.currentGroupStack[this.groupDepth].push( - new TagSpec(this.currentTag.trim(), this.startingIndex, i, this.librarySchema), + } + // If we're in the process of closing a group, reset the closingGroup flag (allows for empty groups) + if (this.state.closingGroup) { + // Empty groups are allowed. + this.state.closingGroup = false + } else if (this.state.slashFound) { + //Trailing token slash is an error + this.pushIssue('extraSlash', i) + } else if (!token) { + // Column spec has already been called. + this.pushIssue('emptyTagFound', i) + } else if (this.state.columnSpliceIndex < 0) { + // Not a column splice so goes on group stack as a TagSpec + this.checkValueTagForInvalidCharacters() + const bounds = getTrimmedBounds(this.state.currentToken) + this.state.currentGroupStack[this.state.groupDepth].push( + new TagSpec( + this.state.currentToken.trim(), + this.state.startingIndex + bounds[0], + this.state.startingIndex + bounds[1], + this.state.librarySchema, + ), ) } - this.resetStartingIndex = true - this.slashFound = false - this.librarySchema = '' + // Clear the current token and reset flags for the next iteration. + this.clearToken() } - clearTag() { - this.ignoringCharacters = false - this.resetStartingIndex = true - this.slashFound = false - this.librarySchema = '' + clearToken() { + this.state.ignoringCharacters = false + this.state.resetIndexFlag = true + this.state.slashFound = false + this.state.librarySchema = '' + this.state.closingColumn = false } closeGroup(i) { - const groupSpec = this.parenthesesStack.pop() + const groupSpec = this.state.parenthesesStack.pop() groupSpec.bounds[1] = i + 1 - this.parenthesesStack[this.groupDepth - 1].children.push(groupSpec) - this.currentGroupStack[this.groupDepth - 1].push(this.currentGroupStack.pop()) - this.groupDepth-- + this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) + this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) + this.state.groupDepth-- + //this.closingColumn = false } - /** - * Check an individual tag for invalid characters. - * - * @private - */ - _checkValueTagForInvalidCharacters() { - const formToCheck = replaceTagNameWithPound(this.currentTag) + checkValueTagForInvalidCharacters() { + const formToCheck = replaceTagNameWithPound(this.state.currentToken) for (let i = 0; i < formToCheck.length; i++) { const character = formToCheck.charAt(i) - if (!invalidCharactersOutsideOfValues.has(character)) { - continue + if (invalidCharactersOutsideOfValues.has(character)) { + this.pushInvalidCharacterIssue(character, this.state.startingIndex + i) } - this._pushInvalidCharacterIssue(character, this.startingIndex + i) } } - /** - * Push an issue to the syntax issue list. - * - * @param {string} issueCode The internal code of the issue to be pushed. - * @param {number} index The location of the issue. - * @private - */ - _pushSyntaxIssue(issueCode, index) { - this.syntaxIssues.push( - generateIssue(issueCode, { - index: index, - string: this.hedString, - }), - ) + pushIssue(issueCode, index) { + this.syntaxIssues.push(generateIssue(issueCode, { index, string: this.hedString })) } - /** - * Push an invalid character issue to the syntax issue list. - * - * @param {string} character The illegal character to be reported. - * @param {number} index The location of the character. - * @private - */ - _pushInvalidCharacterIssue(character, index) { + pushInvalidCharacterIssue(character, index) { this.syntaxIssues.push( generateIssue('invalidCharacter', { character: unicodeName(character), - index: index, + index, string: this.hedString, }), ) diff --git a/parser/tokenizerNew.js b/parser/tokenizerNew.js deleted file mode 100644 index 7aa2dda4..00000000 --- a/parser/tokenizerNew.js +++ /dev/null @@ -1,396 +0,0 @@ -import { replaceTagNameWithPound } from '../utils/hedStrings' -import { unicodeName } from 'unicode-name' -import { generateIssue } from '../common/issues/issues' - -const CHARACTERS = { - BLANK: ' ', - OPENING_GROUP: '(', - CLOSING_GROUP: ')', - OPENING_COLUMN: '{', - CLOSING_COLUMN: '}', - COMMA: ',', - COLON: ':', - SLASH: '/', -} - -function getTrimmedBounds(originalString) { - const start = originalString.search(/\S/) - const end = originalString.search(/\S\s*$/) - - if (start === -1) { - // The string contains only whitespace - return null - } - - return [start, end + 1] -} - -const invalidCharacters = new Set(['[', ']', '~', '"']) -// Add control codes to invalidCharacters -for (let i = 0x00; i <= 0x1f; i++) { - invalidCharacters.add(String.fromCodePoint(i)) -} -for (let i = 0x7f; i <= 0x9f; i++) { - invalidCharacters.add(String.fromCodePoint(i)) -} - -const invalidCharactersOutsideOfValues = new Set([':']) - -/** - * A specification for a tokenized substring. - */ -export class SubstringSpec { - /** - * The starting and ending bounds of the substring. - * @type {number[]} - */ - bounds - - constructor(start, end) { - this.bounds = [start, end] - } -} - -/** - * A specification for a tokenized tag. - */ -export class TagSpec extends SubstringSpec { - /** - * The tag this spec represents. - * @type {string} - */ - tag - /** - * The schema prefix for this tag, if any. - * @type {string} - */ - library - - constructor(tag, start, end, librarySchema) { - super(start, end) - - this.tag = tag.trim() - this.library = librarySchema - } -} - -/** - * A specification for a tokenized tag group. - */ -export class GroupSpec extends SubstringSpec { - /** - * The child group specifications. - * @type {GroupSpec[]} - */ - children - - constructor(start, end, children) { - super(start, end) - - this.children = children - } -} - -/** - * A specification for a tokenized column splice template. - */ -export class ColumnSpliceSpec extends SubstringSpec { - /** - * The column name this spec refers to. - * @type {string} - */ - columnName - - constructor(name, start, end) { - super(start, end) - - this.columnName = name.trim() - } -} - -class TokenizerState { - constructor() { - this.currentToken = '' // Characters in the token currently being parsed - this.groupDepth = 0 - this.startingIndex = 0 // Starting index of this token - this.resetIndexFlag = false - this.slashFound = false - this.librarySchema = '' - this.columnSpliceIndex = -1 //Index of { if this token is column splice - this.currentGroupStack = [[]] - this.parenthesesStack = [] - this.ignoringCharacters = false - this.closingGroup = false - // this.closingColumn = false - } -} - -/** - * Class for tokenizing HED strings. - */ -export class HedStringTokenizerNew { - constructor(hedString) { - this.hedString = hedString - this.syntaxIssues = [] - this.state = null - } - - /** - * Split the HED string into delimiters and tags. - * - * @returns {[TagSpec[], GroupSpec, Object]} The tag specifications, group bounds, and any issues found. - */ - tokenize() { - this.initializeTokenizer() - - for (let i = 0; i < this.hedString.length; i++) { - const character = this.hedString.charAt(i) - this.tokenizeCharacter(i, character) - if (this.state.resetIndexFlag) { - this.state.resetIndexFlag = false - this.state.startingIndex = i + 1 - this.state.currentToken = '' - } - } - //this.pushTag(this.hedString.length - 1) - - if (this.state.columnSpliceIndex >= 0) { - this.pushIssue('unclosedCurlyBrace', this.state.columnSpliceIndex) - } - - this.unwindGroupStack() - - const tagSpecs = this.state.currentGroupStack.pop() - const groupSpecs = this.state.parenthesesStack.pop() - const issues = { - syntax: this.syntaxIssues, - conversion: [], - } - return [tagSpecs, groupSpecs, issues] - } - - initializeTokenizer() { - this.syntaxIssues = [] - this.state = new TokenizerState() - this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] - } - - tokenizeCharacter(i, character) { - if (this.state.ignoringCharacters) { - this.handleIgnoringCharacters(i, character) - } else { - this.handleCharacter(i, character) - } - } - - handleIgnoringCharacters(i, character) { - const characterHandler = { - [CHARACTERS.CLOSING_GROUP]: () => { - this.clearToken() - this.handleClosingGroup(i) - }, - [CHARACTERS.COMMA]: () => { - this.clearToken() - this.handleClosingGroup(i) - }, - }[character] - - if (characterHandler) { - characterHandler() - } - } - - handleCharacter(i, character) { - const characterHandler = { - [CHARACTERS.OPENING_GROUP]: () => this.handleOpeningGroup(i), - [CHARACTERS.CLOSING_GROUP]: () => { - this.pushTag(i) - this.handleClosingGroup(i) - }, - [CHARACTERS.OPENING_COLUMN]: () => this.handleOpeningColumn(i), - [CHARACTERS.CLOSING_COLUMN]: () => { - this.pushTag(i) - this.handleClosingColumn(i) - }, - [CHARACTERS.COMMA]: () => { - this.pushTag(i) - //this.state.closingColumn = false - }, - [CHARACTERS.COLON]: () => this.handleColon(character), - [CHARACTERS.SLASH]: () => this.handleSlash(i), - }[character] // Selects the character handler based on the value of character - - if (characterHandler) { - characterHandler() - } else if (invalidCharacters.has(character)) { - this.pushInvalidCharacterIssue(character, i) - } else { - this.handleRegularCharacter(character) - } - } - - handleOpeningGroup(i) { - this.state.currentGroupStack.push([]) - this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) - this.state.resetIndexFlag = true - this.state.groupDepth++ - } - - handleClosingGroup(i) { - // If the group depth is <= 0, it means there's no corresponding opening group. - if (this.state.groupDepth <= 0) { - this.pushIssue('unopenedParenthesis', i) - return - } - // Close the group by updating its bounds and moving it to the parent group. - this.closeGroup(i) - } - - handleOpeningColumn(i) { - // We're already in the middle of a token -- can't have an opening brace - if (this.state.currentToken.trim().length > 0) { - this.pushInvalidCharacterIssue(CHARACTERS.OPENING_COLUMN, i) - this.state.ignoringCharacters = true - return - } - if (this.state.columnSpliceIndex >= 0) { - this.pushIssue('nestedCurlyBrace', i) - } - this.state.columnSpliceIndex = i - } - - handleClosingColumn(i) { - // If a column splice is not in progress push an issue indicating an unopened curly brace. - if (this.state.columnSpliceIndex < 0) { - this.pushIssue('unopenedCurlyBrace', i) - return - } - // Ensure that column slice is not empty - if (this.state.currentToken == '') { - this.pushIssue('emptyCurlyBrace', i) - return - } - - // Close the column by updating its bounds and moving it to the parent group, push a column splice on the stack. - this.state.currentGroupStack[this.state.groupDepth].push( - new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), - ) - this.state.columnSpliceIndex = -1 - this.clearToken() - this.state.closingColumn = true // Used to indicate that - } - - handleColon(character) { - if (!this.state.slashFound && !this.state.librarySchema) { - this.state.librarySchema = this.state.currentToken - this.state.resetIndexFlag = true - } else { - this.state.currentToken += character - } - } - - handleSlash(i) { - if (!this.state.currentToken || this.state.slashFound) { - // Leading slash is error -- ignore rest of the token - this.pushIssue('extraSlash', i) - this.state.ignoringCharacters = true - } else { - this.state.slashFound = true - this.state.currentToken += CHARACTERS.SLASH - } - } - - handleRegularCharacter(character) { - // if (character != CHARACTERS.BLANK && this.state.closingColumn) { - // this.pushIssue('unparsedCurlyBraces', i) - // } - if (!this.state.ignoringCharacters) { - this.state.currentToken += character - this.state.resetIndexFlag = this.state.currentToken === '' - } - } - - unwindGroupStack() { - while (this.state.groupDepth > 0) { - this.pushIssue( - 'unclosedParenthesis', - this.state.parenthesesStack[this.state.parenthesesStack.length - 1].bounds[0], - ) - this.closeGroup(this.hedString.length) - } - } - - pushTag(i) { - // Called when a token has been parsed - - // if (!this.state.currentToken && isEndOfString) { // If empty token at end of string just return. - // return - // } - // If we're in the process of closing a group, reset the closingGroup flag (allows for empty groups) - if (this.state.closingGroup) { - // Empty groups are allowed. - this.state.closingGroup = false - } else if (this.state.slashFound) { - //Trailing token slash is an error - this.pushIssue('extraSlash', i) - } else if (!this.state.currentToken) { - // Column spec has already been called. - this.pushIssue('emptyTagFound', i) - } else if (this.state.columnSpliceIndex < 0) { - // Not a column splice so goes on group stack as a TagSpec - this.checkValueTagForInvalidCharacters() - let bounds = getTrimmedBounds(this.state.currentToken) - this.state.currentGroupStack[this.state.groupDepth].push( - new TagSpec( - this.state.currentToken.trim(), - this.state.startingIndex + bounds[0], - this.state.startingIndex + bounds[1], - this.state.librarySchema, - ), - ) - } - // Clear the current token and reset flags for the next iteration. - this.clearToken() - } - - clearToken() { - this.state.ignoringCharacters = false - this.state.resetIndexFlag = true - this.state.slashFound = false - this.state.librarySchema = '' - this.state.closingColumn = false - } - - closeGroup(i) { - const groupSpec = this.state.parenthesesStack.pop() - groupSpec.bounds[1] = i + 1 - this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) - this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) - this.state.groupDepth-- - //this.closingColumn = false - } - - checkValueTagForInvalidCharacters() { - const formToCheck = replaceTagNameWithPound(this.state.currentToken) - for (let i = 0; i < formToCheck.length; i++) { - const character = formToCheck.charAt(i) - if (invalidCharactersOutsideOfValues.has(character)) { - this.pushInvalidCharacterIssue(character, this.state.startingIndex + i) - } - } - } - - pushIssue(issueCode, index) { - this.syntaxIssues.push(generateIssue(issueCode, { index, string: this.hedString })) - } - - pushInvalidCharacterIssue(character, index) { - this.syntaxIssues.push( - generateIssue('invalidCharacter', { - character: unicodeName(character), - index, - string: this.hedString, - }), - ) - } -} diff --git a/parser/tokenizerOld.js b/parser/tokenizerOld.js new file mode 100644 index 00000000..54c34f79 --- /dev/null +++ b/parser/tokenizerOld.js @@ -0,0 +1,385 @@ +import { unicodeName } from 'unicode-name' + +import { generateIssue } from '../common/issues/issues' +import { stringIsEmpty } from '../utils/string' +import { replaceTagNameWithPound } from '../utils/hedStrings' + +const openingGroupCharacter = '(' +const closingGroupCharacter = ')' +const openingColumnCharacter = '{' +const closingColumnCharacter = '}' +const commaCharacter = ',' +const colonCharacter = ':' +const slashCharacter = '/' + +const invalidCharacters = new Set(['[', ']', '~', '"']) +const invalidCharactersOutsideOfValues = new Set([':']) +// C0 control codes +for (let i = 0x00; i <= 0x1f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} +// DEL and C1 control codes +for (let i = 0x7f; i <= 0x9f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} + +/** + * A specification for a tokenized substring. + */ +class SubstringSpec { + /** + * The starting and ending bounds of the substring. + * @type {number[]} + */ + bounds + + constructor(start, end) { + this.bounds = [start, end] + } +} + +/** + * A specification for a tokenized tag. + */ +class TagSpec extends SubstringSpec { + /** + * The tag this spec represents. + * @type {string} + */ + tag + /** + * The schema prefix for this tag, if any. + * @type {string} + */ + library + + constructor(tag, start, end, librarySchema) { + super(start, end) + + this.tag = tag.trim() + this.library = librarySchema + } +} + +/** + * A specification for a tokenized tag group. + */ +class GroupSpec extends SubstringSpec { + /** + * The child group specifications. + * @type {GroupSpec[]} + */ + children + + constructor(start, end, children) { + super(start, end) + + this.children = children + } +} + +/** + * A specification for a tokenized column splice template. + */ +class ColumnSpliceSpec extends SubstringSpec { + /** + * The column name this spec refers to. + * @type {string} + */ + columnName + + constructor(name, start, end) { + super(start, end) + + this.columnName = name.trim() + } +} + +/** + * Class for tokenizing HED strings. + */ +export class HedStringTokenizerOld { + /** + * The HED string being parsed. + * @type {string} + */ + hedString + + syntaxIssues + + /** + * The current substring being parsed. + * @type {string} + */ + currentTag + + /** + * Whether we are currently closing a group. + * @type {boolean} + */ + closingGroup + + groupDepth + startingIndex + resetStartingIndex + slashFound + librarySchema + currentGroupStack + parenthesesStack + ignoringCharacters + + constructor(hedString) { + this.hedString = hedString + } + + /** + * Split the HED string into delimiters and tags. + * + * @returns {[TagSpec[], GroupSpec, Object]} The tag specifications, group bounds, and any issues found. + */ + tokenize() { + this.initializeTokenizer() + + for (let i = 0; i < this.hedString.length; i++) { + const character = this.hedString.charAt(i) + this.tokenizeCharacter(i, character) + if (this.resetStartingIndex) { + this.resetStartingIndex = false + this.startingIndex = i + 1 + this.currentTag = '' + } + } + this.pushTag(this.hedString.length, true) + + if (this.columnSpliceIndex >= 0) { + this._pushSyntaxIssue('unclosedCurlyBrace', this.columnSpliceIndex) + } + + this.unwindGroupStack() + + const tagSpecs = this.currentGroupStack.pop() + const groupSpecs = this.parenthesesStack.pop() + const issues = { + syntax: this.syntaxIssues, + conversion: [], + } + return [tagSpecs, groupSpecs, issues] + } + + initializeTokenizer() { + this.syntaxIssues = [] + + this.currentTag = '' + this.groupDepth = 0 + this.startingIndex = 0 + this.resetStartingIndex = false + this.slashFound = false + this.librarySchema = '' + this.columnSpliceIndex = -1 + this.currentGroupStack = [[]] + this.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] + this.ignoringCharacters = false + this.closingGroup = false + } + + tokenizeCharacter(i, character) { + let dispatchTable + if (this.ignoringCharacters) { + dispatchTable = { + [closingGroupCharacter]: (i /* character */) => { + this.clearTag() + this.closingGroupCharacter(i) + }, + [commaCharacter]: (/*i, character */) => this.clearTag(), + } + } else { + dispatchTable = { + [openingGroupCharacter]: (i /* character */) => this.openingGroupCharacter(i), + [closingGroupCharacter]: (i /* character */) => { + this.pushTag(i, false) + this.closingGroupCharacter(i) + }, + [openingColumnCharacter]: (i /* character */) => this.openingColumnCharacter(i), + [closingColumnCharacter]: (i /* character */) => this.closingColumnCharacter(i), + [commaCharacter]: (i /* character */) => this.pushTag(i, false), + [colonCharacter]: (i, character) => this.colonCharacter(character), + [slashCharacter]: (i, character) => this.slashCharacter(character), + } + } + const characterHandler = dispatchTable[character] + if (characterHandler) { + characterHandler(i, character) + } else if (invalidCharacters.has(character)) { + this._pushInvalidCharacterIssue(character, i) + } else { + this.otherCharacter(character) + } + } + + openingGroupCharacter(i) { + this.currentGroupStack.push([]) + this.parenthesesStack.push(new GroupSpec(i, undefined, [])) + this.resetStartingIndex = true + this.groupDepth++ + } + + closingGroupCharacter(i) { + this.closingGroup = true + if (this.groupDepth <= 0) { + this._pushSyntaxIssue('unopenedParenthesis', i) + return + } + this.closeGroup(i) + } + + openingColumnCharacter(i) { + if (this.currentTag.length > 0) { + this._pushInvalidCharacterIssue(openingColumnCharacter, i) + this.ignoringCharacters = true + return + } + if (this.columnSpliceIndex >= 0) { + this._pushSyntaxIssue('nestedCurlyBrace', i) + } + this.columnSpliceIndex = i + } + + closingColumnCharacter(i) { + this.closingGroup = true + if (this.columnSpliceIndex < 0) { + this._pushSyntaxIssue('unopenedCurlyBrace', i) + return + } + if (!stringIsEmpty(this.currentTag)) { + this.currentGroupStack[this.groupDepth].push(new ColumnSpliceSpec(this.currentTag.trim(), this.startingIndex, i)) + } else { + this.syntaxIssues.push( + generateIssue('emptyCurlyBrace', { + string: this.hedString, + }), + ) + } + this.columnSpliceIndex = -1 + this.resetStartingIndex = true + this.slashFound = false + } + + colonCharacter(character) { + if (!this.slashFound && !this.librarySchema) { + this.librarySchema = this.currentTag + this.resetStartingIndex = true + } else { + this.currentTag += character + } + } + + slashCharacter(character) { + this.slashFound = true + this.currentTag += character + } + + otherCharacter(character) { + if (this.ignoringCharacters) { + return + } + this.currentTag += character + this.resetStartingIndex = stringIsEmpty(this.currentTag) + } + + unwindGroupStack() { + // groupDepth is decremented in closeGroup. + // eslint-disable-next-line no-unmodified-loop-condition + while (this.groupDepth > 0) { + this._pushSyntaxIssue('unclosedParenthesis', this.parenthesesStack[this.parenthesesStack.length - 1].bounds[0]) + this.closeGroup(this.hedString.length) + } + } + + /** + * Push a tag to the current group. + * + * @param {number} i The current index. + * @param {boolean} isEndOfString Whether we are at the end of the string. + */ + pushTag(i, isEndOfString) { + if (stringIsEmpty(this.currentTag) && isEndOfString) { + return + } else if (this.closingGroup) { + this.closingGroup = false + } else if (stringIsEmpty(this.currentTag)) { + this.syntaxIssues.push(generateIssue('emptyTagFound', { index: i })) + } else if (this.columnSpliceIndex < 0) { + this._checkValueTagForInvalidCharacters() + this.currentGroupStack[this.groupDepth].push( + new TagSpec(this.currentTag.trim(), this.startingIndex, i, this.librarySchema), + ) + } + this.resetStartingIndex = true + this.slashFound = false + this.librarySchema = '' + } + + clearTag() { + this.ignoringCharacters = false + this.resetStartingIndex = true + this.slashFound = false + this.librarySchema = '' + } + + closeGroup(i) { + const groupSpec = this.parenthesesStack.pop() + groupSpec.bounds[1] = i + 1 + this.parenthesesStack[this.groupDepth - 1].children.push(groupSpec) + this.currentGroupStack[this.groupDepth - 1].push(this.currentGroupStack.pop()) + this.groupDepth-- + } + + /** + * Check an individual tag for invalid characters. + * + * @private + */ + _checkValueTagForInvalidCharacters() { + const formToCheck = replaceTagNameWithPound(this.currentTag) + for (let i = 0; i < formToCheck.length; i++) { + const character = formToCheck.charAt(i) + if (!invalidCharactersOutsideOfValues.has(character)) { + continue + } + this._pushInvalidCharacterIssue(character, this.startingIndex + i) + } + } + + /** + * Push an issue to the syntax issue list. + * + * @param {string} issueCode The internal code of the issue to be pushed. + * @param {number} index The location of the issue. + * @private + */ + _pushSyntaxIssue(issueCode, index) { + this.syntaxIssues.push( + generateIssue(issueCode, { + index: index, + string: this.hedString, + }), + ) + } + + /** + * Push an invalid character issue to the syntax issue list. + * + * @param {string} character The illegal character to be reported. + * @param {number} index The location of the character. + * @private + */ + _pushInvalidCharacterIssue(character, index) { + this.syntaxIssues.push( + generateIssue('invalidCharacter', { + character: unicodeName(character), + index: index, + string: this.hedString, + }), + ) + } +} diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js index c9335456..e0547563 100644 --- a/tests/stringParser.spec.js +++ b/tests/stringParser.spec.js @@ -228,25 +228,25 @@ describe('HED string parsing', () => { describe('Parsed HED strings', () => { it('must have the correct number of tags, top-level tags, and groups', () => { const hedString = - '/Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,/Action/Move/Bend,/Upper-extremity/Elbow),/Position/X-position/70 px,/Position/Y-position/23 px' + 'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' const [parsedString, issues] = parseHedString(hedString, nullSchema) assert.isEmpty(Object.values(issues).flat(), 'Parsing issues occurred') assert.sameDeepMembers(parsedString.tags.map(originalMap), [ - '/Action/Move/Flex', + 'Action/Move/Flex', 'Relation/Spatial-relation/Left-side-of', - '/Action/Move/Bend', - '/Upper-extremity/Elbow', - '/Position/X-position/70 px', - '/Position/Y-position/23 px', + 'Action/Move/Bend', + 'Upper-extremity/Elbow', + 'Position/X-position/70 px', + 'Position/Y-position/23 px', ]) assert.sameDeepMembers(parsedString.topLevelTags.map(originalMap), [ - '/Action/Move/Flex', - '/Position/X-position/70 px', - '/Position/Y-position/23 px', + 'Action/Move/Flex', + 'Position/X-position/70 px', + 'Position/Y-position/23 px', ]) assert.sameDeepMembers( parsedString.tagGroups.map((group) => group.tags.map(originalMap)), - [['Relation/Spatial-relation/Left-side-of', '/Action/Move/Bend', '/Upper-extremity/Elbow']], + [['Relation/Spatial-relation/Left-side-of', 'Action/Move/Bend', 'Upper-extremity/Elbow']], ) }) diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 8e886f9d..37a058cf 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -13,12 +13,16 @@ import { buildSchemas } from '../validator/schema/init' import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' import { HedStringTokenizer } from '../parser/tokenizer' -import { HedStringTokenizerNew } from '../parser/tokenizerNew' +import { HedStringTokenizerOld } from '../parser/tokenizerOld' describe('HED string parsing', () => { it('should include each group as its own single element', () => { - const hedString = '(xy)' - const tok = new HedStringTokenizerNew(hedString) + //const hedString = "Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px" + //const hedString = 'x/y w/z' + const hedString = '(r,z)' + //const hedString = 'r,' + //const hedString = 'r,y' + const tok = new HedStringTokenizer(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') //const hedString = 'Action/Move/My-flex,(Relation/Spatial-relation/Left-side-of,Action/Move/My-bend,Upper-extremity/My-elbow),Position/X-position/70 m,Position/Y-position/23 m' diff --git a/tests/tockenizerErrorTests.spec.js b/tests/tockenizerErrorTests.spec.js index dce34391..c304e171 100644 --- a/tests/tockenizerErrorTests.spec.js +++ b/tests/tockenizerErrorTests.spec.js @@ -6,7 +6,8 @@ import * as hed from '../validator/event' import { BidsHedIssue } from '../bids/types/issues' import path from 'path' import { HedStringTokenizer } from '../parser/tokenizer' -import { HedStringTokenizerNew } from '../parser/tokenizerNew' +import { HedStringTokenizerOld } from '../parser/tokenizerOld' +//import { HedStringTokenizerNew } from '../parser/tokenizerNew' import { generateIssue, IssueError } from '../common/issues/issues' import { errorTests } from './tokenizerErrorData' const displayLog = process.env.DISPLAY_LOG === 'true' @@ -70,7 +71,7 @@ describe('HED tokenizer validation using JSON tests', () => { const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { const status = expectError ? 'Expect fail' : 'Expect pass' - const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' + const tokType = tokenizer instanceof HedStringTokenizer ? 'New tokenizer' : 'Original tokenizer' const header = `\n[${eHedCode} ${eName} ${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() const issues = Object.values(tokenizingIssues).flat() @@ -88,11 +89,11 @@ describe('HED tokenizer validation using JSON tests', () => { if (tests && tests.length > 0) { test.each(tests)('NewTokenizer: Invalid string: %s ', (ex) => { //console.log(ex) - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerNew(ex.string), true, itemLog) + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) }) test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerOld(ex.string), true, itemLog) }) } }) diff --git a/tests/tokenizerPassingData.js b/tests/tokenizerPassingData.js index ec67fff8..d2dc2363 100644 --- a/tests/tokenizerPassingData.js +++ b/tests/tokenizerPassingData.js @@ -1,4 +1,5 @@ -import { TagSpec, GroupSpec, ColumnSpliceSpec } from '../parser/tokenizerNew' +//import { TagSpec, GroupSpec, ColumnSpliceSpec } from '../parser/tokenizerNew' +import { TagSpec, GroupSpec, ColumnSpliceSpec } from '../parser/tokenizer' export const passingTests = [ { @@ -6,56 +7,57 @@ export const passingTests = [ description: 'Single tags with no groups.', warning: false, tests: [ - // { - // name: 'simple-tag-no-blanks', - // string: 'xy', - // explanation: 'Should have bounds 0, 2', - // tagSpecs: [new TagSpec("xy", 0, 2, "")], - // groupSpec: new GroupSpec(0, undefined) - // }, - // { - // name: 'internal-blank', - // string: 'x y', - // explanation: 'Can have internal blank', - // tagSpecs: [new TagSpec("x y", 0, 3, "")], - // groupSpec: new GroupSpec(0, undefined) - // }, - // { - // name: 'extra-blanks-simple', - // string: ' xy ', - // explanation: 'Can have extra blanks', - // tagSpecs: [new TagSpec("xy", 1, 3, "")], - // groupSpec: new GroupSpec(0, undefined) - // } + { + name: 'simple-tag-no-blanks', + string: 'xy', + explanation: 'Should have bounds 0, 2', + tagSpecs: [new TagSpec('xy', 0, 2, '')], + groupSpec: new GroupSpec(0, 2, []), + }, + { + name: 'internal-blank', + string: 'x y', + explanation: 'Can have internal blank', + tagSpecs: [new TagSpec('x y', 0, 3, '')], + groupSpec: new GroupSpec(0, 3, []), + }, + { + name: 'extra-blanks-simple', + string: ' xy ', + explanation: 'Can have extra blanks', + tagSpecs: [new TagSpec('xy', 1, 3, '')], + groupSpec: new GroupSpec(0, 5, []), + }, + { + name: 'tag-with-slashes', + string: 'x/y/z', + explanation: 'Can have multiple slashes', + tagSpecs: [new TagSpec('x/y/z', 0, 5, '')], + groupSpec: new GroupSpec(0, 5, []), + }, + ], + }, + { + name: 'valid-tags-no-groups', + description: 'multiple tags with no groups.', + warning: false, + tests: [ + { + name: 'multiple-tags', + string: 'xy,zy,wy', + explanation: 'Multiple tags with no blanks', + tagSpecs: [new TagSpec('xy', 0, 2, ''), new TagSpec('zy', 3, 5, ''), new TagSpec('wy', 6, 8, '')], + groupSpec: new GroupSpec(0, 8, []), + }, + { + name: 'multiple-tags-with-blanks', + string: ' xy, zy , wy ', + explanation: 'Can have extra blanks', + tagSpecs: [new TagSpec('xy', 1, 3, ''), new TagSpec('zy', 6, 8, ''), new TagSpec('wy', 11, 13, '')], + groupSpec: new GroupSpec(0, 15, []), + }, ], }, - // { - // name: 'valid-tags-no-groups', - // description: 'multiple tags with no groups.', - // warning: false, - // tests: [ - // { - // name: 'multiple-tags', - // string: 'xy,zy,wy', - // explanation: 'Can have extra blanks', - // tagSpecs: [new TagSpec("xy", 0, 2, ""), - // new TagSpec("zy", 3, 5, ""), - // new TagSpec("wy", 6, 8, "") - // ], - // groupSpec: new GroupSpec(0, undefined) - // }, - // { - // name: 'multiple-tags-with-blanks', - // string: ' xy, zy , wy ', - // explanation: 'Can have extra blanks', - // tagSpecs: [new TagSpec("xy", 1, 3, ""), - // new TagSpec("zy", 6, 8, ""), - // new TagSpec("wy", 11, 13, "") - // ], - // groupSpec: new GroupSpec(0, undefined,[]) - // }, - // ] - // }, { name: 'un-nested-groups', description: 'Groups with no nesting', @@ -68,6 +70,40 @@ export const passingTests = [ tagSpecs: [[new TagSpec('xy', 1, 3, '')]], groupSpec: new GroupSpec(0, 4, [new GroupSpec(0, 4, [])]), }, + { + name: 'tag-after-group', + string: '(x), p', + explanation: 'A tag after a group.', + tagSpecs: [[new TagSpec('x', 1, 2, '')], new TagSpec('p', 5, 6, '')], + groupSpec: new GroupSpec(0, 6, [new GroupSpec(0, 3, [])]), + }, + { + name: 'multiple-tags-in-group', + string: '(x,y)', + explanation: 'Multiple tags in one group.', + tagSpecs: [[new TagSpec('x', 1, 2, '')], new TagSpec('y', 3, 4, '')], + groupSpec: new GroupSpec(0, 5, [new GroupSpec(0, 5, [])]), + }, + // { + // name: 'multiple-unnested-groups', + // string: 'q, (xy), (zw, uv), p', + // explanation: 'Multiple unnested tag groups and tags.', + // tagSpecs: [new TagSpec('q', 0, 1, ''), + // [new TagSpec('xy', 4, 6, '')], + // [new TagSpec('zw', 10, 12, ''), + // new TagSpec('uv', 14, 16, '')], + // new TagSpec('p', 19, 20, '')], + // groupSpec: new GroupSpec(0, 20, + // [new GroupSpec(3, 7, []), + // new GroupSpec(9, 17, [])]) + // }, + { + name: 'tag-after-group', + string: 'x/y,(r,v)', + explanation: 'A tag after a group.', + tagSpecs: [[new TagSpec('x', 1, 2, '')], new TagSpec('p', 5, 6, '')], + groupSpec: new GroupSpec(0, 6, [new GroupSpec(0, 3, [])]), + }, ], }, ] diff --git a/tests/tokenizerPassingTests.spec.js b/tests/tokenizerPassingTests.spec.js index 6fb9e561..bc6c4c3f 100644 --- a/tests/tokenizerPassingTests.spec.js +++ b/tests/tokenizerPassingTests.spec.js @@ -5,8 +5,10 @@ import { beforeAll, describe, afterAll } from '@jest/globals' import * as hed from '../validator/event' import { BidsHedIssue } from '../bids/types/issues' import path from 'path' -import { HedStringTokenizer } from '../parser/tokenizer' -import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizerNew } from '../parser/tokenizerNew' +//import { HedStringTokenizer } from '../parser/tokenizer' +import { HedStringTokenizerOld } from '../parser/tokenizerOld' +import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizer } from '../parser/tokenizer' +//import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizer } from '../parser/tokenizerNew' import { generateIssue, IssueError } from '../common/issues/issues' import { passingTests } from './tokenizerPassingData' const fs = require('fs') @@ -51,7 +53,7 @@ describe('HED tokenizer validation - validData', () => { const stringTokenizer = function (eName, tokenizer, tSpecs, gSpec, explanation, iLog) { const status = 'Expect pass' - const tokType = tokenizer instanceof HedStringTokenizer ? 'Original-tokenizer' : 'New tokenizer' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Tokenizer' : 'Original tokenizer' const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` const [tagSpecs, groupSpec, tokenizingIssues] = tokenizer.tokenize() // Test for no errors @@ -71,21 +73,20 @@ describe('HED tokenizer validation - validData', () => { }) if (tests && tests.length > 0) { - test.each(tests)('NewTokenizer: Invalid string: %s ', (ex) => { - //console.log(ex) + test.each(tests)('Tokenizer: Invalid string: %s ', (ex) => { + stringTokenizer(ex.name, new HedStringTokenizer(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog) + }) + + test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { stringTokenizer( ex.name, - new HedStringTokenizerNew(ex.string), + new HedStringTokenizerOld(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog, ) }) - - test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { - stringTokenizer(ex.name, new HedStringTokenizer(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog) - }) } }) }) From 2d8a51f4e60c35bcf794c657c92711b05b794697 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sat, 19 Oct 2024 11:28:11 -0500 Subject: [PATCH 07/21] First pass at rewritten tokenizer --- parser/tokenizerNew.js | 351 +++++++++++++++++++++++++++++++++++++++++ tests/temp.spec.js | 10 +- 2 files changed, 358 insertions(+), 3 deletions(-) create mode 100644 parser/tokenizerNew.js diff --git a/parser/tokenizerNew.js b/parser/tokenizerNew.js new file mode 100644 index 00000000..d7824dc1 --- /dev/null +++ b/parser/tokenizerNew.js @@ -0,0 +1,351 @@ +import { replaceTagNameWithPound } from '../utils/hedStrings' +import { unicodeName } from 'unicode-name' +import { generateIssue } from '../common/issues/issues' + +const CHARACTERS = { + BLANK: ' ', + OPENING_GROUP: '(', + CLOSING_GROUP: ')', + OPENING_COLUMN: '{', + CLOSING_COLUMN: '}', + COMMA: ',', + COLON: ':', + SLASH: '/', +} + +function getTrimmedBounds(originalString) { + const start = originalString.search(/\S/) + const end = originalString.search(/\S\s*$/) + + if (start === -1) { + // The string contains only whitespace + return null + } + + return [start, end + 1] +} + +const invalidCharacters = new Set(['[', ']', '~', '"']) +// Add control codes to invalidCharacters +for (let i = 0x00; i <= 0x1f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} +for (let i = 0x7f; i <= 0x9f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} + +const invalidCharactersOutsideOfValues = new Set([':']) + +/** + * A specification for a tokenized substring. + */ +export class SubstringSpec { + /** + * The starting and ending bounds of the substring. + * @type {number[]} + */ + bounds + + constructor(start, end) { + this.bounds = [start, end] + } +} + +/** + * A specification for a tokenized tag. + */ +export class TagSpec extends SubstringSpec { + /** + * The tag this spec represents. + * @type {string} + */ + tag + /** + * The schema prefix for this tag, if any. + * @type {string} + */ + library + + constructor(tag, start, end, librarySchema) { + super(start, end) + + this.tag = tag.trim() + this.library = librarySchema + } +} + +/** + * A specification for a tokenized tag group. + */ +export class GroupSpec extends SubstringSpec { + /** + * The child group specifications. + * @type {GroupSpec[]} + */ + children + + constructor(start, end, children) { + super(start, end) + + this.children = children + } +} + +/** + * A specification for a tokenized column splice template. + */ +export class ColumnSpliceSpec extends SubstringSpec { + /** + * The column name this spec refers to. + * @type {string} + */ + columnName + + constructor(name, start, end) { + super(start, end) + + this.columnName = name.trim() + } +} + +class TokenizerState { + constructor() { + this.currentToken = '' // Characters in the token currently being parsed + this.groupDepth = 0 + this.startingIndex = 0 // Starting index of this token + this.lastDelimiter = [undefined, -1] // Type and position of the last delimiter + this.librarySchema = '' + this.columnSpliceIndex = -1 //Index of the last { if this token is column splice + this.currentGroupStack = [[]] + this.parenthesesStack = [] + } +} + +/** + * Class for tokenizing HED strings. + */ +export class HedStringTokenizerNew { + constructor(hedString) { + this.hedString = hedString + this.issues = [] + this.state = null + } + + /** + * Split the HED string into delimiters and tags. + * + * @returns {[TagSpec[], GroupSpec, Object]} The tag specifications, group bounds, and any issues found. + */ + tokenize() { + this.initializeTokenizer() + + for (let i = 0; i < this.hedString.length; i++) { + const character = this.hedString.charAt(i) + this.handleCharacter(i, character) + //this.tokenizeCharacter(i, character) + if (this.issues.length > 0) { + return [null, null, this.issues] + } + } + this.finalizeTokenizer() + if (this.issues.length > 0) { + return [null, null, this.issues] + } else { + return [this.state.currentGroupStack.pop(), this.state.parenthesesStack.pop(), []] + } + } + + resetToken(i) { + this.state.startingIndex = i + 1 + this.state.currentToken = '' + this.state.librarySchema = '' + } + + finalizeTokenizer() { + if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + // Extra opening brace + this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) + } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_GROUP) { + // Extra opening parenthesis + this.pushIssue('unclosedParentheses', this.state.lastDelimiter[1]) + } else if ( + this.state.lastDelimiter[0] === CHARACTERS.COMMA && + this.hedString.slice(this.state.lastDelimiter[1] + 1).trim().length === 0 + ) { + this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma + } else { + this.unwindGroupStack() + } + } + + initializeTokenizer() { + this.issues = [] + this.state = new TokenizerState() + this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] + } + + handleCharacter(i, character) { + const characterHandler = { + [CHARACTERS.OPENING_GROUP]: () => this.handleOpeningGroup(i), + [CHARACTERS.CLOSING_GROUP]: () => this.handleClosingGroup(i), + [CHARACTERS.OPENING_COLUMN]: () => this.handleOpeningColumn(i), + [CHARACTERS.CLOSING_COLUMN]: () => this.handleClosingColumn(i), + [CHARACTERS.COMMA]: () => this.handleComma(i), + [CHARACTERS.COLON]: () => this.handleColon(character), + }[character] // Selects the character handler based on the value of character + + if (characterHandler) { + characterHandler() + } else if (invalidCharacters.has(character)) { + this.pushInvalidCharacterIssue(character, i) + } else { + this.state.currentToken += character + } + } + + handleComma(i) { + if ( + this.state.lastDelimiter[0] === CHARACTERS.COMMA && + this.hedString.slice(this.state.lastDelimiter[1] + 1, i).trim().length === 0 + ) { + this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Check for empty group between commas + } else if ( + this.state.currentToken.trim().length === 0 && + [CHARACTERS.CLOSING_GROUP, CHARACTERS.CLOSING_COLUMN].includes(this.state.lastDelimiter[0]) + ) { + this.resetToken(i) + } else { + this.pushTag(i) + this.state.lastDelimiter = [CHARACTERS.COMMA, i] + } + } + + handleOpeningGroup(i) { + if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) + } else { + this.state.currentGroupStack.push([]) + this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) + this.resetToken(i) + this.state.groupDepth++ + this.state.lastDelimiter = [CHARACTERS.OPENING_GROUP, i] + } + } + + handleClosingGroup(i) { + if (this.state.currentToken.trim().length > 0) { + // only push a tag if it has length > 0. Empty groups are allowed. + this.pushTag(i) + } + if (this.state.groupDepth <= 0) { + // If the group depth is <= 0, it means there's no corresponding opening group. + this.pushIssue('unopenedParenthesis', i) + } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) + } else { + // Close the group by updating its bounds and moving it to the parent group. + this.closeGroup(i) + this.state.lastDelimiter = [CHARACTERS.CLOSING_GROUP, i] + } + } + + handleOpeningColumn(i) { + if (this.state.currentToken.trim().length > 0) { + // In the middle of a token -- can't have an opening brace + this.pushInvalidCharacterIssue(CHARACTERS.OPENING_COLUMN, i) + } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + // + this.pushIssue('nestedCurlyBrace', i) + } else { + this.state.lastDelimiter = [CHARACTERS.OPENING_COLUMN, i] + } + } + + handleClosingColumn(i) { + if (this.state.lastDelimiter !== CHARACTERS.OPENING_COLUMN) { + // Column splice not in progress + this.pushIssue('unopenedCurlyBrace', i) + } else if (!this.state.currentToken.trim()) { + // Ensure that column slice is not empty + this.pushIssue('emptyCurlyBrace', i) + } else { + // Close column by updating bounds and moving it to the parent group, push a column splice on the stack. + this.state.currentGroupStack[this.state.groupDepth].push( + new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), + ) + this.resetToken(i) + this.state.lastDelimiter = [CHARACTERS.CLOSING_COLUMN, i] + } + } + + handleColon(i) { + if (!this.state.librarySchema) { + // If a colon has not been seen -- its a library. Ignore other colons. + this.state.librarySchema = this.state.currentToken + this.resetToken(i) + } else { + this.state.currentToken += CHARACTERS.COLON + } + } + + unwindGroupStack() { + while (this.state.groupDepth > 0) { + this.pushIssue( + 'unclosedParenthesis', + this.state.parenthesesStack[this.state.parenthesesStack.length - 1].bounds[0], + ) + this.closeGroup(this.hedString.length) + } + } + + pushTag(i) { + // Called when a token has been parsed + const token = this.state.currentToken.trim() + if (!token) { + // Empty tokens cannot be pushed + this.pushIssue('emptyTagFound', i) + } else if (token.startsWith(CHARACTERS.SLASH) || token.endsWith(CHARACTERS.SLASH)) { + this.pushIssue('extraSlash', this.state.startingIndex) + } else { + const bounds = getTrimmedBounds(this.state.currentToken) + this.state.currentGroupStack[this.state.groupDepth].push( + new TagSpec( + this.state.currentToken.trim(), + this.state.startingIndex + bounds[0], + this.state.startingIndex + bounds[1], + this.state.librarySchema, + ), + ) + this.resetToken(i) + } + } + + closeGroup(i) { + const groupSpec = this.state.parenthesesStack.pop() + groupSpec.bounds[1] = i + 1 + this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) + this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) + this.state.groupDepth-- + this.resetToken(i) + } + + checkValueTagForInvalidCharacters() { + const formToCheck = replaceTagNameWithPound(this.state.currentToken) + for (let i = 0; i < formToCheck.length; i++) { + const character = formToCheck.charAt(i) + if (invalidCharactersOutsideOfValues.has(character)) { + this.pushInvalidCharacterIssue(character, this.state.startingIndex + i) + } + } + } + + pushIssue(issueCode, index) { + this.issues.push(generateIssue(issueCode, { index, string: this.hedString })) + } + + pushInvalidCharacterIssue(character, index) { + this.issues.push( + generateIssue('invalidCharacter', { character: unicodeName(character), index, string: this.hedString }), + ) + } +} diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 37a058cf..74596729 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -14,15 +14,19 @@ import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' import { HedStringTokenizer } from '../parser/tokenizer' import { HedStringTokenizerOld } from '../parser/tokenizerOld' +import { HedStringTokenizerNew } from '../parser/tokenizerNew' describe('HED string parsing', () => { it('should include each group as its own single element', () => { - //const hedString = "Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px" + const hedString = + 'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' //const hedString = 'x/y w/z' - const hedString = '(r,z)' + //const hedString = '(r,z)' //const hedString = 'r,' //const hedString = 'r,y' - const tok = new HedStringTokenizer(hedString) + //const hedString = 'r' + //const hedString = '(r),p' + const tok = new HedStringTokenizerNew(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') //const hedString = 'Action/Move/My-flex,(Relation/Spatial-relation/Left-side-of,Action/Move/My-bend,Upper-extremity/My-elbow),Position/X-position/70 m,Position/Y-position/23 m' From bd7372405555109620dbdb3845fa25f6cfeadc1f Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:29:21 -0500 Subject: [PATCH 08/21] Working on slash handling --- parser/tokenizer.js | 283 ++++++++---------- .../{tokenizerNew.js => tokenizerFirstTry.js} | 273 ++++++++++------- .../{tokenizerOld.js => tokenizerOriginal.js} | 2 +- tests/temp.spec.js | 13 +- tests/tockenizerErrorTests.spec.js | 4 +- 5 files changed, 295 insertions(+), 280 deletions(-) rename parser/{tokenizerNew.js => tokenizerFirstTry.js} (50%) rename parser/{tokenizerOld.js => tokenizerOriginal.js} (99%) diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 6c0eb199..0664382b 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -113,16 +113,11 @@ class TokenizerState { this.currentToken = '' // Characters in the token currently being parsed this.groupDepth = 0 this.startingIndex = 0 // Starting index of this token - this.resetIndexFlag = false - this.slashFound = false - this.commaFound = false // A comma is hanging there -- if there is nothing else coming, it is a problem + this.lastDelimiter = [undefined, -1] // Type and position of the last delimiter this.librarySchema = '' - this.columnSpliceIndex = -1 //Index of { if this token is column splice + this.lastSlash = -1 // Position of the last slash in current token this.currentGroupStack = [[]] this.parenthesesStack = [] - this.ignoringCharacters = false // If we encounter error in a token, we want to just skip until we can recover. - this.closingGroup = false - // this.closingColumn = false } } @@ -132,7 +127,7 @@ class TokenizerState { export class HedStringTokenizer { constructor(hedString) { this.hedString = hedString - this.syntaxIssues = [] + this.issues = [] this.state = null } @@ -146,80 +141,58 @@ export class HedStringTokenizer { for (let i = 0; i < this.hedString.length; i++) { const character = this.hedString.charAt(i) - this.tokenizeCharacter(i, character) - if (this.state.resetIndexFlag) { - this.state.resetIndexFlag = false - this.state.startingIndex = i + 1 - this.state.currentToken = '' + this.handleCharacter(i, character) + //this.tokenizeCharacter(i, character) + if (this.issues.length > 0) { + return [null, null, this.issues] } } - this.pushTag(this.hedString.length, true) - - if (this.state.columnSpliceIndex >= 0) { - this.pushIssue('unclosedCurlyBrace', this.state.columnSpliceIndex) - } - - this.unwindGroupStack() - - const tagSpecs = this.state.currentGroupStack.pop() - const groupSpecs = this.state.parenthesesStack.pop() - const issues = { - syntax: this.syntaxIssues, - conversion: [], + this.finalizeTokenizer() + if (this.issues.length > 0) { + return [null, null, this.issues] + } else { + return [this.state.currentGroupStack.pop(), this.state.parenthesesStack.pop(), []] } - return [tagSpecs, groupSpecs, issues] } - initializeTokenizer() { - this.syntaxIssues = [] - this.state = new TokenizerState() - this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] + resetToken(i) { + this.state.startingIndex = i + 1 + this.state.currentToken = '' + this.state.librarySchema = '' + this.state.lastSlash = '-1' } - tokenizeCharacter(i, character) { - if (this.state.ignoringCharacters) { - this.handleIgnoringCharacters(i, character) + finalizeTokenizer() { + if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + // Extra opening brace + this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) + } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_GROUP) { + // Extra opening parenthesis + this.pushIssue('unclosedParentheses', this.state.lastDelimiter[1]) + } else if ( + this.state.lastDelimiter[0] === CHARACTERS.COMMA && + this.hedString.slice(this.state.lastDelimiter[1] + 1).trim().length === 0 + ) { + this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma } else { - this.handleCharacter(i, character) + this.unwindGroupStack() } } - handleIgnoringCharacters(i, character) { - // We have encountered a parsing error on this token and want to ignore until the next token. - const characterHandler = { - [CHARACTERS.CLOSING_GROUP]: () => { - this.clearToken() - this.handleClosingGroup(i) - }, - [CHARACTERS.COMMA]: () => { - this.clearToken() - this.handleClosingGroup(i) - }, - }[character] - - if (characterHandler) { - characterHandler() - } + initializeTokenizer() { + this.issues = [] + this.state = new TokenizerState() + this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] } handleCharacter(i, character) { const characterHandler = { [CHARACTERS.OPENING_GROUP]: () => this.handleOpeningGroup(i), - [CHARACTERS.CLOSING_GROUP]: () => { - this.pushTag(i, false) - this.handleClosingGroup(i) - }, + [CHARACTERS.CLOSING_GROUP]: () => this.handleClosingGroup(i), [CHARACTERS.OPENING_COLUMN]: () => this.handleOpeningColumn(i), - [CHARACTERS.CLOSING_COLUMN]: () => { - this.pushTag(i) - this.handleClosingColumn(i) - }, - [CHARACTERS.COMMA]: () => { - this.state.commaFound = true - this.pushTag(i, false) - this.state.closingColumn = false - }, - [CHARACTERS.COLON]: () => this.handleColon(character), + [CHARACTERS.CLOSING_COLUMN]: () => this.handleClosingColumn(i), + [CHARACTERS.COMMA]: () => this.handleComma(i), + [CHARACTERS.COLON]: () => this.handleColon(i), [CHARACTERS.SLASH]: () => this.handleSlash(i), }[character] // Selects the character handler based on the value of character @@ -228,95 +201,102 @@ export class HedStringTokenizer { } else if (invalidCharacters.has(character)) { this.pushInvalidCharacterIssue(character, i) } else { - this.handleRegularCharacter(character) + this.state.currentToken += character + } + } + + handleComma(i) { + if ( + this.state.lastDelimiter[0] === CHARACTERS.COMMA && + this.hedString.slice(this.state.lastDelimiter[1] + 1, i).trim().length === 0 + ) { + this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Check for empty group between commas + } else if ( + this.state.currentToken.trim().length === 0 && + [CHARACTERS.CLOSING_GROUP, CHARACTERS.CLOSING_COLUMN].includes(this.state.lastDelimiter[0]) + ) { + this.resetToken(i) + } else { + this.pushTag(i) + this.state.lastDelimiter = [CHARACTERS.COMMA, i] + } + } + + handleSlash(i) { + const afterLastSlash = this.state.lastSlash === -1 ? 0 : this.state.lastSlash + 1 + if (this.hedString.slice(afterLastSlash, i).trim().length === 0) { + this.pushIssue('extraSlash', i) + } else { + this.state.currentToken += CHARACTERS.SLASH + this.state.lastSlash = i } } handleOpeningGroup(i) { - this.state.currentGroupStack.push([]) - this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) - this.state.resetIndexFlag = true - this.state.commaFound = false - this.state.groupDepth++ + if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) + } else { + this.state.currentGroupStack.push([]) + this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) + this.resetToken(i) + this.state.groupDepth++ + this.state.lastDelimiter = [CHARACTERS.OPENING_GROUP, i] + } } handleClosingGroup(i) { - this.state.closingGroup = true - // If the group depth is <= 0, it means there's no corresponding opening group. + if (this.state.currentToken.trim().length > 0) { + // only push a tag if it has length > 0. Empty groups are allowed. + this.pushTag(i) + } if (this.state.groupDepth <= 0) { + // If the group depth is <= 0, it means there's no corresponding opening group. this.pushIssue('unopenedParenthesis', i) - return + } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) + } else { + // Close the group by updating its bounds and moving it to the parent group. + this.closeGroup(i) + this.state.lastDelimiter = [CHARACTERS.CLOSING_GROUP, i] } - // Close the group by updating its bounds and moving it to the parent group. - this.closeGroup(i) - this.commaFound = false } handleOpeningColumn(i) { - // We're already in the middle of a token -- can't have an opening brace if (this.state.currentToken.trim().length > 0) { + // In the middle of a token -- can't have an opening brace this.pushInvalidCharacterIssue(CHARACTERS.OPENING_COLUMN, i) - this.state.ignoringCharacters = true - return - } - if (this.state.columnSpliceIndex >= 0) { + } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + // this.pushIssue('nestedCurlyBrace', i) + } else { + this.state.lastDelimiter = [CHARACTERS.OPENING_COLUMN, i] } - this.state.columnSpliceIndex = i - this.state.commaFound = false } handleClosingColumn(i) { - // If a column splice is not in progress push an issue indicating an unopened curly brace. - if (this.state.columnSpliceIndex < 0) { + if (this.state.lastDelimiter !== CHARACTERS.OPENING_COLUMN) { + // Column splice not in progress this.pushIssue('unopenedCurlyBrace', i) - return - } - // Ensure that column slice is not empty - if (!this.state.currentToken) { + } else if (!this.state.currentToken.trim()) { + // Ensure that column slice is not empty this.pushIssue('emptyCurlyBrace', i) - return - } - - // Close the column by updating its bounds and moving it to the parent group, push a column splice on the stack. - this.state.currentGroupStack[this.state.groupDepth].push( - new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), - ) - this.state.columnSpliceIndex = -1 - this.clearToken() - this.state.closingColumn = true // Used to indicate that - this.state.commaFound = false - } - - handleColon(character) { - if (!this.state.slashFound && !this.state.librarySchema) { - this.state.librarySchema = this.state.currentToken - this.state.resetIndexFlag = true } else { - this.state.currentToken += character - this.state.slashFound = false + // Close column by updating bounds and moving it to the parent group, push a column splice on the stack. + this.state.currentGroupStack[this.state.groupDepth].push( + new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), + ) + this.resetToken(i) + this.state.lastDelimiter = [CHARACTERS.CLOSING_COLUMN, i] } } - handleSlash(i) { - if (!this.state.currentToken.trim() || this.state.slashFound) { - // Leading slash is error -- ignore rest of the token - this.pushIssue('extraSlash', i) - this.state.ignoringCharacters = true + handleColon(i) { + if (!this.state.librarySchema) { + // If colon has not been seen, it is a library. Ignore other colons. + this.state.librarySchema = this.state.currentToken + this.resetToken(i) } else { - this.state.slashFound = true - this.state.currentToken += CHARACTERS.SLASH - } - } - - handleRegularCharacter(character) { - // if (character != CHARACTERS.BLANK && this.state.closingColumn) { - // this.pushIssue('unparsedCurlyBraces', i) - // } - if (!this.state.ignoringCharacters) { - this.state.currentToken += character - this.state.slashFound = false - this.state.resetIndexFlag = this.state.currentToken === '' + this.state.currentToken += CHARACTERS.COLON } } @@ -330,29 +310,15 @@ export class HedStringTokenizer { } } - pushTag(i, isEndOfString) { + pushTag(i) { // Called when a token has been parsed const token = this.state.currentToken.trim() - if (!token && isEndOfString) { - // If empty token at end of string just return. - if (this.state.commaFound) { - this.pushIssue('emptyTagFound', i) - } - return - } - // If we're in the process of closing a group, reset the closingGroup flag (allows for empty groups) - if (this.state.closingGroup) { - // Empty groups are allowed. - this.state.closingGroup = false - } else if (this.state.slashFound) { - //Trailing token slash is an error - this.pushIssue('extraSlash', i) - } else if (!token) { - // Column spec has already been called. + if (!token) { + // Empty tokens cannot be pushed this.pushIssue('emptyTagFound', i) - } else if (this.state.columnSpliceIndex < 0) { - // Not a column splice so goes on group stack as a TagSpec - this.checkValueTagForInvalidCharacters() + } else if (token.startsWith(CHARACTERS.SLASH) || token.endsWith(CHARACTERS.SLASH)) { + this.pushIssue('extraSlash', this.state.startingIndex) + } else { const bounds = getTrimmedBounds(this.state.currentToken) this.state.currentGroupStack[this.state.groupDepth].push( new TagSpec( @@ -362,17 +328,8 @@ export class HedStringTokenizer { this.state.librarySchema, ), ) + this.resetToken(i) } - // Clear the current token and reset flags for the next iteration. - this.clearToken() - } - - clearToken() { - this.state.ignoringCharacters = false - this.state.resetIndexFlag = true - this.state.slashFound = false - this.state.librarySchema = '' - this.state.closingColumn = false } closeGroup(i) { @@ -381,7 +338,7 @@ export class HedStringTokenizer { this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) this.state.groupDepth-- - //this.closingColumn = false + this.resetToken(i) } checkValueTagForInvalidCharacters() { @@ -395,16 +352,12 @@ export class HedStringTokenizer { } pushIssue(issueCode, index) { - this.syntaxIssues.push(generateIssue(issueCode, { index, string: this.hedString })) + this.issues.push(generateIssue(issueCode, { index, string: this.hedString })) } pushInvalidCharacterIssue(character, index) { - this.syntaxIssues.push( - generateIssue('invalidCharacter', { - character: unicodeName(character), - index, - string: this.hedString, - }), + this.issues.push( + generateIssue('invalidCharacter', { character: unicodeName(character), index, string: this.hedString }), ) } } diff --git a/parser/tokenizerNew.js b/parser/tokenizerFirstTry.js similarity index 50% rename from parser/tokenizerNew.js rename to parser/tokenizerFirstTry.js index d7824dc1..6c0eb199 100644 --- a/parser/tokenizerNew.js +++ b/parser/tokenizerFirstTry.js @@ -113,21 +113,26 @@ class TokenizerState { this.currentToken = '' // Characters in the token currently being parsed this.groupDepth = 0 this.startingIndex = 0 // Starting index of this token - this.lastDelimiter = [undefined, -1] // Type and position of the last delimiter + this.resetIndexFlag = false + this.slashFound = false + this.commaFound = false // A comma is hanging there -- if there is nothing else coming, it is a problem this.librarySchema = '' - this.columnSpliceIndex = -1 //Index of the last { if this token is column splice + this.columnSpliceIndex = -1 //Index of { if this token is column splice this.currentGroupStack = [[]] this.parenthesesStack = [] + this.ignoringCharacters = false // If we encounter error in a token, we want to just skip until we can recover. + this.closingGroup = false + // this.closingColumn = false } } /** * Class for tokenizing HED strings. */ -export class HedStringTokenizerNew { +export class HedStringTokenizer { constructor(hedString) { this.hedString = hedString - this.issues = [] + this.syntaxIssues = [] this.state = null } @@ -141,57 +146,81 @@ export class HedStringTokenizerNew { for (let i = 0; i < this.hedString.length; i++) { const character = this.hedString.charAt(i) - this.handleCharacter(i, character) - //this.tokenizeCharacter(i, character) - if (this.issues.length > 0) { - return [null, null, this.issues] + this.tokenizeCharacter(i, character) + if (this.state.resetIndexFlag) { + this.state.resetIndexFlag = false + this.state.startingIndex = i + 1 + this.state.currentToken = '' } } - this.finalizeTokenizer() - if (this.issues.length > 0) { - return [null, null, this.issues] - } else { - return [this.state.currentGroupStack.pop(), this.state.parenthesesStack.pop(), []] + this.pushTag(this.hedString.length, true) + + if (this.state.columnSpliceIndex >= 0) { + this.pushIssue('unclosedCurlyBrace', this.state.columnSpliceIndex) } - } - resetToken(i) { - this.state.startingIndex = i + 1 - this.state.currentToken = '' - this.state.librarySchema = '' - } + this.unwindGroupStack() - finalizeTokenizer() { - if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { - // Extra opening brace - this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) - } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_GROUP) { - // Extra opening parenthesis - this.pushIssue('unclosedParentheses', this.state.lastDelimiter[1]) - } else if ( - this.state.lastDelimiter[0] === CHARACTERS.COMMA && - this.hedString.slice(this.state.lastDelimiter[1] + 1).trim().length === 0 - ) { - this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma - } else { - this.unwindGroupStack() + const tagSpecs = this.state.currentGroupStack.pop() + const groupSpecs = this.state.parenthesesStack.pop() + const issues = { + syntax: this.syntaxIssues, + conversion: [], } + return [tagSpecs, groupSpecs, issues] } initializeTokenizer() { - this.issues = [] + this.syntaxIssues = [] this.state = new TokenizerState() this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] } + tokenizeCharacter(i, character) { + if (this.state.ignoringCharacters) { + this.handleIgnoringCharacters(i, character) + } else { + this.handleCharacter(i, character) + } + } + + handleIgnoringCharacters(i, character) { + // We have encountered a parsing error on this token and want to ignore until the next token. + const characterHandler = { + [CHARACTERS.CLOSING_GROUP]: () => { + this.clearToken() + this.handleClosingGroup(i) + }, + [CHARACTERS.COMMA]: () => { + this.clearToken() + this.handleClosingGroup(i) + }, + }[character] + + if (characterHandler) { + characterHandler() + } + } + handleCharacter(i, character) { const characterHandler = { [CHARACTERS.OPENING_GROUP]: () => this.handleOpeningGroup(i), - [CHARACTERS.CLOSING_GROUP]: () => this.handleClosingGroup(i), + [CHARACTERS.CLOSING_GROUP]: () => { + this.pushTag(i, false) + this.handleClosingGroup(i) + }, [CHARACTERS.OPENING_COLUMN]: () => this.handleOpeningColumn(i), - [CHARACTERS.CLOSING_COLUMN]: () => this.handleClosingColumn(i), - [CHARACTERS.COMMA]: () => this.handleComma(i), + [CHARACTERS.CLOSING_COLUMN]: () => { + this.pushTag(i) + this.handleClosingColumn(i) + }, + [CHARACTERS.COMMA]: () => { + this.state.commaFound = true + this.pushTag(i, false) + this.state.closingColumn = false + }, [CHARACTERS.COLON]: () => this.handleColon(character), + [CHARACTERS.SLASH]: () => this.handleSlash(i), }[character] // Selects the character handler based on the value of character if (characterHandler) { @@ -199,92 +228,95 @@ export class HedStringTokenizerNew { } else if (invalidCharacters.has(character)) { this.pushInvalidCharacterIssue(character, i) } else { - this.state.currentToken += character - } - } - - handleComma(i) { - if ( - this.state.lastDelimiter[0] === CHARACTERS.COMMA && - this.hedString.slice(this.state.lastDelimiter[1] + 1, i).trim().length === 0 - ) { - this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Check for empty group between commas - } else if ( - this.state.currentToken.trim().length === 0 && - [CHARACTERS.CLOSING_GROUP, CHARACTERS.CLOSING_COLUMN].includes(this.state.lastDelimiter[0]) - ) { - this.resetToken(i) - } else { - this.pushTag(i) - this.state.lastDelimiter = [CHARACTERS.COMMA, i] + this.handleRegularCharacter(character) } } handleOpeningGroup(i) { - if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { - this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) - } else { - this.state.currentGroupStack.push([]) - this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) - this.resetToken(i) - this.state.groupDepth++ - this.state.lastDelimiter = [CHARACTERS.OPENING_GROUP, i] - } + this.state.currentGroupStack.push([]) + this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) + this.state.resetIndexFlag = true + this.state.commaFound = false + this.state.groupDepth++ } handleClosingGroup(i) { - if (this.state.currentToken.trim().length > 0) { - // only push a tag if it has length > 0. Empty groups are allowed. - this.pushTag(i) - } + this.state.closingGroup = true + // If the group depth is <= 0, it means there's no corresponding opening group. if (this.state.groupDepth <= 0) { - // If the group depth is <= 0, it means there's no corresponding opening group. this.pushIssue('unopenedParenthesis', i) - } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { - this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) - } else { - // Close the group by updating its bounds and moving it to the parent group. - this.closeGroup(i) - this.state.lastDelimiter = [CHARACTERS.CLOSING_GROUP, i] + return } + // Close the group by updating its bounds and moving it to the parent group. + this.closeGroup(i) + this.commaFound = false } handleOpeningColumn(i) { + // We're already in the middle of a token -- can't have an opening brace if (this.state.currentToken.trim().length > 0) { - // In the middle of a token -- can't have an opening brace this.pushInvalidCharacterIssue(CHARACTERS.OPENING_COLUMN, i) - } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { - // + this.state.ignoringCharacters = true + return + } + if (this.state.columnSpliceIndex >= 0) { this.pushIssue('nestedCurlyBrace', i) - } else { - this.state.lastDelimiter = [CHARACTERS.OPENING_COLUMN, i] } + this.state.columnSpliceIndex = i + this.state.commaFound = false } handleClosingColumn(i) { - if (this.state.lastDelimiter !== CHARACTERS.OPENING_COLUMN) { - // Column splice not in progress + // If a column splice is not in progress push an issue indicating an unopened curly brace. + if (this.state.columnSpliceIndex < 0) { this.pushIssue('unopenedCurlyBrace', i) - } else if (!this.state.currentToken.trim()) { - // Ensure that column slice is not empty + return + } + // Ensure that column slice is not empty + if (!this.state.currentToken) { this.pushIssue('emptyCurlyBrace', i) - } else { - // Close column by updating bounds and moving it to the parent group, push a column splice on the stack. - this.state.currentGroupStack[this.state.groupDepth].push( - new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), - ) - this.resetToken(i) - this.state.lastDelimiter = [CHARACTERS.CLOSING_COLUMN, i] + return } + + // Close the column by updating its bounds and moving it to the parent group, push a column splice on the stack. + this.state.currentGroupStack[this.state.groupDepth].push( + new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), + ) + this.state.columnSpliceIndex = -1 + this.clearToken() + this.state.closingColumn = true // Used to indicate that + this.state.commaFound = false } - handleColon(i) { - if (!this.state.librarySchema) { - // If a colon has not been seen -- its a library. Ignore other colons. + handleColon(character) { + if (!this.state.slashFound && !this.state.librarySchema) { this.state.librarySchema = this.state.currentToken - this.resetToken(i) + this.state.resetIndexFlag = true + } else { + this.state.currentToken += character + this.state.slashFound = false + } + } + + handleSlash(i) { + if (!this.state.currentToken.trim() || this.state.slashFound) { + // Leading slash is error -- ignore rest of the token + this.pushIssue('extraSlash', i) + this.state.ignoringCharacters = true } else { - this.state.currentToken += CHARACTERS.COLON + this.state.slashFound = true + this.state.currentToken += CHARACTERS.SLASH + } + } + + handleRegularCharacter(character) { + // if (character != CHARACTERS.BLANK && this.state.closingColumn) { + // this.pushIssue('unparsedCurlyBraces', i) + // } + if (!this.state.ignoringCharacters) { + this.state.currentToken += character + this.state.slashFound = false + this.state.resetIndexFlag = this.state.currentToken === '' } } @@ -298,15 +330,29 @@ export class HedStringTokenizerNew { } } - pushTag(i) { + pushTag(i, isEndOfString) { // Called when a token has been parsed const token = this.state.currentToken.trim() - if (!token) { - // Empty tokens cannot be pushed + if (!token && isEndOfString) { + // If empty token at end of string just return. + if (this.state.commaFound) { + this.pushIssue('emptyTagFound', i) + } + return + } + // If we're in the process of closing a group, reset the closingGroup flag (allows for empty groups) + if (this.state.closingGroup) { + // Empty groups are allowed. + this.state.closingGroup = false + } else if (this.state.slashFound) { + //Trailing token slash is an error + this.pushIssue('extraSlash', i) + } else if (!token) { + // Column spec has already been called. this.pushIssue('emptyTagFound', i) - } else if (token.startsWith(CHARACTERS.SLASH) || token.endsWith(CHARACTERS.SLASH)) { - this.pushIssue('extraSlash', this.state.startingIndex) - } else { + } else if (this.state.columnSpliceIndex < 0) { + // Not a column splice so goes on group stack as a TagSpec + this.checkValueTagForInvalidCharacters() const bounds = getTrimmedBounds(this.state.currentToken) this.state.currentGroupStack[this.state.groupDepth].push( new TagSpec( @@ -316,8 +362,17 @@ export class HedStringTokenizerNew { this.state.librarySchema, ), ) - this.resetToken(i) } + // Clear the current token and reset flags for the next iteration. + this.clearToken() + } + + clearToken() { + this.state.ignoringCharacters = false + this.state.resetIndexFlag = true + this.state.slashFound = false + this.state.librarySchema = '' + this.state.closingColumn = false } closeGroup(i) { @@ -326,7 +381,7 @@ export class HedStringTokenizerNew { this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) this.state.groupDepth-- - this.resetToken(i) + //this.closingColumn = false } checkValueTagForInvalidCharacters() { @@ -340,12 +395,16 @@ export class HedStringTokenizerNew { } pushIssue(issueCode, index) { - this.issues.push(generateIssue(issueCode, { index, string: this.hedString })) + this.syntaxIssues.push(generateIssue(issueCode, { index, string: this.hedString })) } pushInvalidCharacterIssue(character, index) { - this.issues.push( - generateIssue('invalidCharacter', { character: unicodeName(character), index, string: this.hedString }), + this.syntaxIssues.push( + generateIssue('invalidCharacter', { + character: unicodeName(character), + index, + string: this.hedString, + }), ) } } diff --git a/parser/tokenizerOld.js b/parser/tokenizerOriginal.js similarity index 99% rename from parser/tokenizerOld.js rename to parser/tokenizerOriginal.js index 54c34f79..068f29c9 100644 --- a/parser/tokenizerOld.js +++ b/parser/tokenizerOriginal.js @@ -98,7 +98,7 @@ class ColumnSpliceSpec extends SubstringSpec { /** * Class for tokenizing HED strings. */ -export class HedStringTokenizerOld { +export class HedStringTokenizerOriginal { /** * The HED string being parsed. * @type {string} diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 74596729..9041e7ad 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -13,20 +13,23 @@ import { buildSchemas } from '../validator/schema/init' import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' import { HedStringTokenizer } from '../parser/tokenizer' -import { HedStringTokenizerOld } from '../parser/tokenizerOld' -import { HedStringTokenizerNew } from '../parser/tokenizerNew' +import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' +import { HedStringTokenizerFirstTry } from '../parser/tokenizerFirstTry' describe('HED string parsing', () => { it('should include each group as its own single element', () => { - const hedString = - 'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' + //const hedString = + 'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' //const hedString = 'x/y w/z' //const hedString = '(r,z)' //const hedString = 'r,' //const hedString = 'r,y' //const hedString = 'r' //const hedString = '(r),p' - const tok = new HedStringTokenizerNew(hedString) + //const hedString = '/x' + //const hedString = 'x//y' + const hedString = 'x/' + const tok = new HedStringTokenizer(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') //const hedString = 'Action/Move/My-flex,(Relation/Spatial-relation/Left-side-of,Action/Move/My-bend,Upper-extremity/My-elbow),Position/X-position/70 m,Position/Y-position/23 m' diff --git a/tests/tockenizerErrorTests.spec.js b/tests/tockenizerErrorTests.spec.js index c304e171..dff65450 100644 --- a/tests/tockenizerErrorTests.spec.js +++ b/tests/tockenizerErrorTests.spec.js @@ -6,7 +6,7 @@ import * as hed from '../validator/event' import { BidsHedIssue } from '../bids/types/issues' import path from 'path' import { HedStringTokenizer } from '../parser/tokenizer' -import { HedStringTokenizerOld } from '../parser/tokenizerOld' +import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' //import { HedStringTokenizerNew } from '../parser/tokenizerNew' import { generateIssue, IssueError } from '../common/issues/issues' import { errorTests } from './tokenizerErrorData' @@ -93,7 +93,7 @@ describe('HED tokenizer validation using JSON tests', () => { }) test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { - stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerOld(ex.string), true, itemLog) + stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerOriginal(ex.string), true, itemLog) }) } }) From a918907d35bbbd43d131c02ef43d48e0e5063278 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sat, 19 Oct 2024 14:04:40 -0500 Subject: [PATCH 09/21] Updated the tokenizer to handle empty tags --- common/issues/data.js | 4 ++-- parser/tokenizer.js | 12 ++++++++++-- tests/temp.spec.js | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/common/issues/data.js b/common/issues/data.js index 67b59b3c..53a73239 100644 --- a/common/issues/data.js +++ b/common/issues/data.js @@ -46,12 +46,12 @@ export default { extraSlash: { hedCode: 'TAG_INVALID', level: 'error', - message: stringTemplate`Tag extra slash at index ${'index'} of string "${'string'}".`, + message: stringTemplate`Tag has extra slash at index ${'index'} of string "${'string'}".`, }, extraBlank: { hedCode: 'TAG_INVALID', level: 'error', - message: stringTemplate`Tag extra blank at index ${'index'} of string "${'string'}".`, + message: stringTemplate`Tag has extra blank at index ${'index'} of string "${'string'}".`, }, extraCommaOrInvalid: { hedCode: 'TAG_INVALID', diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 0664382b..77062686 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -174,6 +174,8 @@ export class HedStringTokenizer { this.hedString.slice(this.state.lastDelimiter[1] + 1).trim().length === 0 ) { this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma + } else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1).trim().length === 0) { + this.pushIssue('extraSlash', this.state.lastSlash) // Extra slash } else { this.unwindGroupStack() } @@ -223,9 +225,15 @@ export class HedStringTokenizer { } handleSlash(i) { - const afterLastSlash = this.state.lastSlash === -1 ? 0 : this.state.lastSlash + 1 - if (this.hedString.slice(afterLastSlash, i).trim().length === 0) { + if (this.hedString.slice(0, i).trim().length === 0) { + // Slash at beginning of tag. this.pushIssue('extraSlash', i) + } else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1, i).trim().length === 0) { + this.pushIssue('extraSlash', i) + } else if (i > 0 && this.hedString.charAt(i - 1) === CHARACTERS.BLANK) { + this.pushIssue('extraBlank', i - 1) + } else if (i < this.hedString.length - 1 && this.hedString.charAt(i + 1) === CHARACTERS.BLANK) { + this.pushIssue('extraBlank', i + 1) } else { this.state.currentToken += CHARACTERS.SLASH this.state.lastSlash = i diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 9041e7ad..83c9cc33 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -27,8 +27,8 @@ describe('HED string parsing', () => { //const hedString = 'r' //const hedString = '(r),p' //const hedString = '/x' - //const hedString = 'x//y' - const hedString = 'x/' + const hedString = 'x/ /y' + //const hedString = 'x/' const tok = new HedStringTokenizer(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') From f9b659da41d890f257d18cd689bec882f12af252 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sat, 19 Oct 2024 16:40:49 -0500 Subject: [PATCH 10/21] Basic tests are running --- parser/tokenizer.js | 25 +++++++++------ parser/tokenizerFirstTry.js | 2 +- tests/temp.spec.js | 3 +- tests/tockenizerErrorTests.spec.js | 5 ++- tests/tokenizerErrorData.js | 49 ++++++++++++++++------------- tests/tokenizerPassingData.js | 31 +++++++++--------- tests/tokenizerPassingTests.spec.js | 8 ++--- 7 files changed, 68 insertions(+), 55 deletions(-) diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 77062686..596475b5 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -138,7 +138,11 @@ export class HedStringTokenizer { */ tokenize() { this.initializeTokenizer() - + // Empty strings cannot be tokenized + if (this.hedString.trim().length === 0) { + this.pushIssue('emptyTagFound', 0) + return [null, null, this.issues] + } for (let i = 0; i < this.hedString.length; i++) { const character = this.hedString.charAt(i) this.handleCharacter(i, character) @@ -177,6 +181,9 @@ export class HedStringTokenizer { } else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1).trim().length === 0) { this.pushIssue('extraSlash', this.state.lastSlash) // Extra slash } else { + if (this.state.currentToken.trim().length > 0) { + this.pushTag(this.hedString.length) + } this.unwindGroupStack() } } @@ -229,11 +236,15 @@ export class HedStringTokenizer { // Slash at beginning of tag. this.pushIssue('extraSlash', i) } else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1, i).trim().length === 0) { - this.pushIssue('extraSlash', i) + this.pushIssue('extraSlash', i) // Slashes with only blanks between } else if (i > 0 && this.hedString.charAt(i - 1) === CHARACTERS.BLANK) { + // Blank before slash this.pushIssue('extraBlank', i - 1) } else if (i < this.hedString.length - 1 && this.hedString.charAt(i + 1) === CHARACTERS.BLANK) { + //Blank after this.pushIssue('extraBlank', i + 1) + } else if (this.hedString.slice(i).trim().length === 0) { + this.pushIssue('extraSlash', this.state.startingIndex) } else { this.state.currentToken += CHARACTERS.SLASH this.state.lastSlash = i @@ -319,13 +330,9 @@ export class HedStringTokenizer { } pushTag(i) { - // Called when a token has been parsed - const token = this.state.currentToken.trim() - if (!token) { - // Empty tokens cannot be pushed + if (this.state.currentToken.trim().length == 0) { this.pushIssue('emptyTagFound', i) - } else if (token.startsWith(CHARACTERS.SLASH) || token.endsWith(CHARACTERS.SLASH)) { - this.pushIssue('extraSlash', this.state.startingIndex) + return } else { const bounds = getTrimmedBounds(this.state.currentToken) this.state.currentGroupStack[this.state.groupDepth].push( @@ -346,7 +353,7 @@ export class HedStringTokenizer { this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) this.state.groupDepth-- - this.resetToken(i) + //this.resetToken(i) } checkValueTagForInvalidCharacters() { diff --git a/parser/tokenizerFirstTry.js b/parser/tokenizerFirstTry.js index 6c0eb199..4e7a0c84 100644 --- a/parser/tokenizerFirstTry.js +++ b/parser/tokenizerFirstTry.js @@ -129,7 +129,7 @@ class TokenizerState { /** * Class for tokenizing HED strings. */ -export class HedStringTokenizer { +export class HedStringTokenizerFirstTry { constructor(hedString) { this.hedString = hedString this.syntaxIssues = [] diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 83c9cc33..79d8b291 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -27,8 +27,9 @@ describe('HED string parsing', () => { //const hedString = 'r' //const hedString = '(r),p' //const hedString = '/x' - const hedString = 'x/ /y' + //const hedString = 'x/ /y' //const hedString = 'x/' + const hedString = 'x' const tok = new HedStringTokenizer(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') diff --git a/tests/tockenizerErrorTests.spec.js b/tests/tockenizerErrorTests.spec.js index dff65450..6df52c0e 100644 --- a/tests/tockenizerErrorTests.spec.js +++ b/tests/tockenizerErrorTests.spec.js @@ -87,12 +87,11 @@ describe('HED tokenizer validation using JSON tests', () => { }) if (tests && tests.length > 0) { - test.each(tests)('NewTokenizer: Invalid string: %s ', (ex) => { - //console.log(ex) + test.each(tests)('Tokenizer: %s ', (ex) => { stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) }) - test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { + test.each(tests)('Original tokenizer: %s ', (ex) => { stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerOriginal(ex.string), true, itemLog) }) } diff --git a/tests/tokenizerErrorData.js b/tests/tokenizerErrorData.js index 9d403005..17114f3f 100644 --- a/tests/tokenizerErrorData.js +++ b/tests/tokenizerErrorData.js @@ -77,17 +77,15 @@ export const errorTests = [ string: 'x /y', issueCount: 1, hedCode: 'TAG_INVALID', - code: 'extraSlash', + code: 'extraBlank', warning: false, - explanation: 'Cannot have ending slash', + explanation: 'Cannot extra blanks before or after slashes', }, ], }, { - hedCode: 'SIDECAR_BRACES_INVALID', - code: 'unopenedCurlyBrace', - name: 'unopened-curly-brace', - description: 'Tags cannot have leading or trailing, or extra slashes', + name: 'improper-curly-braces', + description: 'Curly braces cannot have commas or parentheses or other curly braces', tests: [ { name: 'leading-close-brace', @@ -99,31 +97,40 @@ export const errorTests = [ explanation: 'Cannot have a leading slash', }, { - name: 'double-slash', - string: 'x//y', + name: 'parenthesis-after-open-brace', + string: 'x, {y(z)}', issueCount: 1, - hedCode: 'TAG_INVALID', - code: 'extraSlash', + hedCode: 'SIDECAR_BRACES_INVALID', + code: 'unclosedCurlyBrace', warning: false, - explanation: 'Cannot have double slash', + explanation: 'Cannot parentheses inside curly braces', }, { - name: 'triple-slash', - string: 'x///y', + name: 'comma-inside-curly-brace', + string: 'x, {y,z}', issueCount: 1, - hedCode: 'TAG_INVALID', - code: 'extraSlash', + hedCode: 'SIDECAR_BRACES_INVALID', + code: 'unclosedCurlyBrace', warning: false, - explanation: 'Cannot have double slash', + explanation: 'Cannot have a comma inside curly brace', }, { - name: 'trailing-slash', - string: 'x/y/', + name: 'unclosed-curly-brace', + string: 'x, {y, z', issueCount: 1, - hedCode: 'TAG_INVALID', - code: 'extraSlash', + hedCode: 'SIDECAR_BRACES_INVALID', + code: 'unclosedCurlyBrace', warning: false, - explanation: 'Cannot have ending slash', + explanation: 'Open curly braces must be matched with closing curly braces', + }, + { + name: 'nested-curly-brace', + string: '{x}, {{y, z}}', + issueCount: 1, + hedCode: 'SIDECAR_BRACES_INVALID', + code: 'nestedCurlyBrace', + warning: false, + explanation: 'Curly braces cannot be nested', }, ], }, diff --git a/tests/tokenizerPassingData.js b/tests/tokenizerPassingData.js index d2dc2363..770449f1 100644 --- a/tests/tokenizerPassingData.js +++ b/tests/tokenizerPassingData.js @@ -81,28 +81,27 @@ export const passingTests = [ name: 'multiple-tags-in-group', string: '(x,y)', explanation: 'Multiple tags in one group.', - tagSpecs: [[new TagSpec('x', 1, 2, '')], new TagSpec('y', 3, 4, '')], + tagSpecs: [[new TagSpec('x', 1, 2, ''), new TagSpec('y', 3, 4, '')]], groupSpec: new GroupSpec(0, 5, [new GroupSpec(0, 5, [])]), }, - // { - // name: 'multiple-unnested-groups', - // string: 'q, (xy), (zw, uv), p', - // explanation: 'Multiple unnested tag groups and tags.', - // tagSpecs: [new TagSpec('q', 0, 1, ''), - // [new TagSpec('xy', 4, 6, '')], - // [new TagSpec('zw', 10, 12, ''), - // new TagSpec('uv', 14, 16, '')], - // new TagSpec('p', 19, 20, '')], - // groupSpec: new GroupSpec(0, 20, - // [new GroupSpec(3, 7, []), - // new GroupSpec(9, 17, [])]) - // }, + { + name: 'multiple-unnested-groups', + string: 'q, (xy), (zw, uv), p', + explanation: 'Multiple unnested tag groups and tags.', + tagSpecs: [ + new TagSpec('q', 0, 1, ''), + [new TagSpec('xy', 4, 6, '')], + [new TagSpec('zw', 10, 12, ''), new TagSpec('uv', 14, 16, '')], + new TagSpec('p', 19, 20, ''), + ], + groupSpec: new GroupSpec(0, 20, [new GroupSpec(3, 7, []), new GroupSpec(9, 17, [])]), + }, { name: 'tag-after-group', string: 'x/y,(r,v)', explanation: 'A tag after a group.', - tagSpecs: [[new TagSpec('x', 1, 2, '')], new TagSpec('p', 5, 6, '')], - groupSpec: new GroupSpec(0, 6, [new GroupSpec(0, 3, [])]), + tagSpecs: [new TagSpec('x/y', 0, 3, ''), [new TagSpec('r', 5, 6, ''), new TagSpec('v', 7, 8, '')]], + groupSpec: new GroupSpec(0, 9, [new GroupSpec(4, 9, [])]), }, ], }, diff --git a/tests/tokenizerPassingTests.spec.js b/tests/tokenizerPassingTests.spec.js index bc6c4c3f..4c9e95db 100644 --- a/tests/tokenizerPassingTests.spec.js +++ b/tests/tokenizerPassingTests.spec.js @@ -6,7 +6,7 @@ import * as hed from '../validator/event' import { BidsHedIssue } from '../bids/types/issues' import path from 'path' //import { HedStringTokenizer } from '../parser/tokenizer' -import { HedStringTokenizerOld } from '../parser/tokenizerOld' +import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizer } from '../parser/tokenizer' //import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizer } from '../parser/tokenizerNew' import { generateIssue, IssueError } from '../common/issues/issues' @@ -73,14 +73,14 @@ describe('HED tokenizer validation - validData', () => { }) if (tests && tests.length > 0) { - test.each(tests)('Tokenizer: Invalid string: %s ', (ex) => { + test.each(tests)('Tokenizer: %s ', (ex) => { stringTokenizer(ex.name, new HedStringTokenizer(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog) }) - test.each(tests)('Original tokenizer: Invalid string: %s ', (ex) => { + test.each(tests)('Original tokenizer: %s ', (ex) => { stringTokenizer( ex.name, - new HedStringTokenizerOld(ex.string), + new HedStringTokenizerOriginal(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, From 1d3307a7bc837fcc0e9dc47b529d8f8e46b83561 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sun, 20 Oct 2024 14:42:01 -0500 Subject: [PATCH 11/21] Initial implementation of the new tokenizer --- common/issues/data.js | 5 + parser/tokenizer.js | 30 +- parser/tokenizerFirstTry.js | 410 ------------------------- tests/event.spec.js | 558 +++------------------------------- tests/event2G.spec.js | 530 ++++++++++++++++++++++++++++++++ tests/stringParser.spec.js | 24 +- tests/temp.spec.js | 4 +- tests/tokenizerPassingData.js | 138 ++++++++- 8 files changed, 743 insertions(+), 956 deletions(-) delete mode 100644 parser/tokenizerFirstTry.js create mode 100644 tests/event2G.spec.js diff --git a/common/issues/data.js b/common/issues/data.js index 53a73239..2534ffbf 100644 --- a/common/issues/data.js +++ b/common/issues/data.js @@ -58,6 +58,11 @@ export default { level: 'error', message: stringTemplate`Either "${'previousTag'}" contains a comma when it should not or "${'tag'}" is not a valid tag.`, }, + invalidTagPrefix: { + hedCode: 'TAG_NAMESPACE_PREFIX_INVALID', + level: 'error', + message: stringTemplate`Either tag prefix at index ${'index'} contains non-alphabetic characters or does not have an associated schema.`, + }, multipleUniqueTags: { hedCode: 'TAG_NOT_UNIQUE', level: 'error', diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 596475b5..3884b945 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -141,21 +141,21 @@ export class HedStringTokenizer { // Empty strings cannot be tokenized if (this.hedString.trim().length === 0) { this.pushIssue('emptyTagFound', 0) - return [null, null, this.issues] + return [null, null, { syntax: this.issues }] } for (let i = 0; i < this.hedString.length; i++) { const character = this.hedString.charAt(i) this.handleCharacter(i, character) //this.tokenizeCharacter(i, character) if (this.issues.length > 0) { - return [null, null, this.issues] + return [null, null, { syntax: this.issues }] } } this.finalizeTokenizer() if (this.issues.length > 0) { - return [null, null, this.issues] + return [null, null, { syntax: this.issues }] } else { - return [this.state.currentGroupStack.pop(), this.state.parenthesesStack.pop(), []] + return [this.state.currentGroupStack.pop(), this.state.parenthesesStack.pop(), { syntax: [] }] } } @@ -216,10 +216,14 @@ export class HedStringTokenizer { handleComma(i) { if ( + // Empty token this.state.lastDelimiter[0] === CHARACTERS.COMMA && this.hedString.slice(this.state.lastDelimiter[1] + 1, i).trim().length === 0 ) { this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Check for empty group between commas + } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { + // Unclosed curly brace + this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) } else if ( this.state.currentToken.trim().length === 0 && [CHARACTERS.CLOSING_GROUP, CHARACTERS.CLOSING_COLUMN].includes(this.state.lastDelimiter[0]) @@ -293,16 +297,16 @@ export class HedStringTokenizer { } handleClosingColumn(i) { - if (this.state.lastDelimiter !== CHARACTERS.OPENING_COLUMN) { + if (this.state.lastDelimiter[0] !== CHARACTERS.OPENING_COLUMN) { // Column splice not in progress this.pushIssue('unopenedCurlyBrace', i) } else if (!this.state.currentToken.trim()) { - // Ensure that column slice is not empty + // Column slice cannot be empty this.pushIssue('emptyCurlyBrace', i) } else { // Close column by updating bounds and moving it to the parent group, push a column splice on the stack. this.state.currentGroupStack[this.state.groupDepth].push( - new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), + new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.lastDelimiter[1], i), ) this.resetToken(i) this.state.lastDelimiter = [CHARACTERS.CLOSING_COLUMN, i] @@ -310,12 +314,15 @@ export class HedStringTokenizer { } handleColon(i) { - if (!this.state.librarySchema) { + if (this.state.librarySchema || this.state.currentToken.trim().includes(CHARACTERS.BLANK)) { // If colon has not been seen, it is a library. Ignore other colons. - this.state.librarySchema = this.state.currentToken - this.resetToken(i) - } else { this.state.currentToken += CHARACTERS.COLON + } else if (/[^A-Za-z]/.test(this.state.currentToken.trim())) { + this.pushIssue('invalidTagPrefix', i) + } else { + const lib = this.state.currentToken + this.resetToken(i) + this.state.librarySchema = lib } } @@ -332,7 +339,6 @@ export class HedStringTokenizer { pushTag(i) { if (this.state.currentToken.trim().length == 0) { this.pushIssue('emptyTagFound', i) - return } else { const bounds = getTrimmedBounds(this.state.currentToken) this.state.currentGroupStack[this.state.groupDepth].push( diff --git a/parser/tokenizerFirstTry.js b/parser/tokenizerFirstTry.js deleted file mode 100644 index 4e7a0c84..00000000 --- a/parser/tokenizerFirstTry.js +++ /dev/null @@ -1,410 +0,0 @@ -import { replaceTagNameWithPound } from '../utils/hedStrings' -import { unicodeName } from 'unicode-name' -import { generateIssue } from '../common/issues/issues' - -const CHARACTERS = { - BLANK: ' ', - OPENING_GROUP: '(', - CLOSING_GROUP: ')', - OPENING_COLUMN: '{', - CLOSING_COLUMN: '}', - COMMA: ',', - COLON: ':', - SLASH: '/', -} - -function getTrimmedBounds(originalString) { - const start = originalString.search(/\S/) - const end = originalString.search(/\S\s*$/) - - if (start === -1) { - // The string contains only whitespace - return null - } - - return [start, end + 1] -} - -const invalidCharacters = new Set(['[', ']', '~', '"']) -// Add control codes to invalidCharacters -for (let i = 0x00; i <= 0x1f; i++) { - invalidCharacters.add(String.fromCodePoint(i)) -} -for (let i = 0x7f; i <= 0x9f; i++) { - invalidCharacters.add(String.fromCodePoint(i)) -} - -const invalidCharactersOutsideOfValues = new Set([':']) - -/** - * A specification for a tokenized substring. - */ -export class SubstringSpec { - /** - * The starting and ending bounds of the substring. - * @type {number[]} - */ - bounds - - constructor(start, end) { - this.bounds = [start, end] - } -} - -/** - * A specification for a tokenized tag. - */ -export class TagSpec extends SubstringSpec { - /** - * The tag this spec represents. - * @type {string} - */ - tag - /** - * The schema prefix for this tag, if any. - * @type {string} - */ - library - - constructor(tag, start, end, librarySchema) { - super(start, end) - - this.tag = tag.trim() - this.library = librarySchema - } -} - -/** - * A specification for a tokenized tag group. - */ -export class GroupSpec extends SubstringSpec { - /** - * The child group specifications. - * @type {GroupSpec[]} - */ - children - - constructor(start, end, children) { - super(start, end) - - this.children = children - } -} - -/** - * A specification for a tokenized column splice template. - */ -export class ColumnSpliceSpec extends SubstringSpec { - /** - * The column name this spec refers to. - * @type {string} - */ - columnName - - constructor(name, start, end) { - super(start, end) - - this.columnName = name.trim() - } -} - -class TokenizerState { - constructor() { - this.currentToken = '' // Characters in the token currently being parsed - this.groupDepth = 0 - this.startingIndex = 0 // Starting index of this token - this.resetIndexFlag = false - this.slashFound = false - this.commaFound = false // A comma is hanging there -- if there is nothing else coming, it is a problem - this.librarySchema = '' - this.columnSpliceIndex = -1 //Index of { if this token is column splice - this.currentGroupStack = [[]] - this.parenthesesStack = [] - this.ignoringCharacters = false // If we encounter error in a token, we want to just skip until we can recover. - this.closingGroup = false - // this.closingColumn = false - } -} - -/** - * Class for tokenizing HED strings. - */ -export class HedStringTokenizerFirstTry { - constructor(hedString) { - this.hedString = hedString - this.syntaxIssues = [] - this.state = null - } - - /** - * Split the HED string into delimiters and tags. - * - * @returns {[TagSpec[], GroupSpec, Object]} The tag specifications, group bounds, and any issues found. - */ - tokenize() { - this.initializeTokenizer() - - for (let i = 0; i < this.hedString.length; i++) { - const character = this.hedString.charAt(i) - this.tokenizeCharacter(i, character) - if (this.state.resetIndexFlag) { - this.state.resetIndexFlag = false - this.state.startingIndex = i + 1 - this.state.currentToken = '' - } - } - this.pushTag(this.hedString.length, true) - - if (this.state.columnSpliceIndex >= 0) { - this.pushIssue('unclosedCurlyBrace', this.state.columnSpliceIndex) - } - - this.unwindGroupStack() - - const tagSpecs = this.state.currentGroupStack.pop() - const groupSpecs = this.state.parenthesesStack.pop() - const issues = { - syntax: this.syntaxIssues, - conversion: [], - } - return [tagSpecs, groupSpecs, issues] - } - - initializeTokenizer() { - this.syntaxIssues = [] - this.state = new TokenizerState() - this.state.parenthesesStack = [new GroupSpec(0, this.hedString.length, [])] - } - - tokenizeCharacter(i, character) { - if (this.state.ignoringCharacters) { - this.handleIgnoringCharacters(i, character) - } else { - this.handleCharacter(i, character) - } - } - - handleIgnoringCharacters(i, character) { - // We have encountered a parsing error on this token and want to ignore until the next token. - const characterHandler = { - [CHARACTERS.CLOSING_GROUP]: () => { - this.clearToken() - this.handleClosingGroup(i) - }, - [CHARACTERS.COMMA]: () => { - this.clearToken() - this.handleClosingGroup(i) - }, - }[character] - - if (characterHandler) { - characterHandler() - } - } - - handleCharacter(i, character) { - const characterHandler = { - [CHARACTERS.OPENING_GROUP]: () => this.handleOpeningGroup(i), - [CHARACTERS.CLOSING_GROUP]: () => { - this.pushTag(i, false) - this.handleClosingGroup(i) - }, - [CHARACTERS.OPENING_COLUMN]: () => this.handleOpeningColumn(i), - [CHARACTERS.CLOSING_COLUMN]: () => { - this.pushTag(i) - this.handleClosingColumn(i) - }, - [CHARACTERS.COMMA]: () => { - this.state.commaFound = true - this.pushTag(i, false) - this.state.closingColumn = false - }, - [CHARACTERS.COLON]: () => this.handleColon(character), - [CHARACTERS.SLASH]: () => this.handleSlash(i), - }[character] // Selects the character handler based on the value of character - - if (characterHandler) { - characterHandler() - } else if (invalidCharacters.has(character)) { - this.pushInvalidCharacterIssue(character, i) - } else { - this.handleRegularCharacter(character) - } - } - - handleOpeningGroup(i) { - this.state.currentGroupStack.push([]) - this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) - this.state.resetIndexFlag = true - this.state.commaFound = false - this.state.groupDepth++ - } - - handleClosingGroup(i) { - this.state.closingGroup = true - // If the group depth is <= 0, it means there's no corresponding opening group. - if (this.state.groupDepth <= 0) { - this.pushIssue('unopenedParenthesis', i) - return - } - // Close the group by updating its bounds and moving it to the parent group. - this.closeGroup(i) - this.commaFound = false - } - - handleOpeningColumn(i) { - // We're already in the middle of a token -- can't have an opening brace - if (this.state.currentToken.trim().length > 0) { - this.pushInvalidCharacterIssue(CHARACTERS.OPENING_COLUMN, i) - this.state.ignoringCharacters = true - return - } - if (this.state.columnSpliceIndex >= 0) { - this.pushIssue('nestedCurlyBrace', i) - } - this.state.columnSpliceIndex = i - this.state.commaFound = false - } - - handleClosingColumn(i) { - // If a column splice is not in progress push an issue indicating an unopened curly brace. - if (this.state.columnSpliceIndex < 0) { - this.pushIssue('unopenedCurlyBrace', i) - return - } - // Ensure that column slice is not empty - if (!this.state.currentToken) { - this.pushIssue('emptyCurlyBrace', i) - return - } - - // Close the column by updating its bounds and moving it to the parent group, push a column splice on the stack. - this.state.currentGroupStack[this.state.groupDepth].push( - new ColumnSpliceSpec(this.state.currentToken.trim(), this.state.startingIndex, i), - ) - this.state.columnSpliceIndex = -1 - this.clearToken() - this.state.closingColumn = true // Used to indicate that - this.state.commaFound = false - } - - handleColon(character) { - if (!this.state.slashFound && !this.state.librarySchema) { - this.state.librarySchema = this.state.currentToken - this.state.resetIndexFlag = true - } else { - this.state.currentToken += character - this.state.slashFound = false - } - } - - handleSlash(i) { - if (!this.state.currentToken.trim() || this.state.slashFound) { - // Leading slash is error -- ignore rest of the token - this.pushIssue('extraSlash', i) - this.state.ignoringCharacters = true - } else { - this.state.slashFound = true - this.state.currentToken += CHARACTERS.SLASH - } - } - - handleRegularCharacter(character) { - // if (character != CHARACTERS.BLANK && this.state.closingColumn) { - // this.pushIssue('unparsedCurlyBraces', i) - // } - if (!this.state.ignoringCharacters) { - this.state.currentToken += character - this.state.slashFound = false - this.state.resetIndexFlag = this.state.currentToken === '' - } - } - - unwindGroupStack() { - while (this.state.groupDepth > 0) { - this.pushIssue( - 'unclosedParenthesis', - this.state.parenthesesStack[this.state.parenthesesStack.length - 1].bounds[0], - ) - this.closeGroup(this.hedString.length) - } - } - - pushTag(i, isEndOfString) { - // Called when a token has been parsed - const token = this.state.currentToken.trim() - if (!token && isEndOfString) { - // If empty token at end of string just return. - if (this.state.commaFound) { - this.pushIssue('emptyTagFound', i) - } - return - } - // If we're in the process of closing a group, reset the closingGroup flag (allows for empty groups) - if (this.state.closingGroup) { - // Empty groups are allowed. - this.state.closingGroup = false - } else if (this.state.slashFound) { - //Trailing token slash is an error - this.pushIssue('extraSlash', i) - } else if (!token) { - // Column spec has already been called. - this.pushIssue('emptyTagFound', i) - } else if (this.state.columnSpliceIndex < 0) { - // Not a column splice so goes on group stack as a TagSpec - this.checkValueTagForInvalidCharacters() - const bounds = getTrimmedBounds(this.state.currentToken) - this.state.currentGroupStack[this.state.groupDepth].push( - new TagSpec( - this.state.currentToken.trim(), - this.state.startingIndex + bounds[0], - this.state.startingIndex + bounds[1], - this.state.librarySchema, - ), - ) - } - // Clear the current token and reset flags for the next iteration. - this.clearToken() - } - - clearToken() { - this.state.ignoringCharacters = false - this.state.resetIndexFlag = true - this.state.slashFound = false - this.state.librarySchema = '' - this.state.closingColumn = false - } - - closeGroup(i) { - const groupSpec = this.state.parenthesesStack.pop() - groupSpec.bounds[1] = i + 1 - this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) - this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) - this.state.groupDepth-- - //this.closingColumn = false - } - - checkValueTagForInvalidCharacters() { - const formToCheck = replaceTagNameWithPound(this.state.currentToken) - for (let i = 0; i < formToCheck.length; i++) { - const character = formToCheck.charAt(i) - if (invalidCharactersOutsideOfValues.has(character)) { - this.pushInvalidCharacterIssue(character, this.state.startingIndex + i) - } - } - } - - pushIssue(issueCode, index) { - this.syntaxIssues.push(generateIssue(issueCode, { index, string: this.hedString })) - } - - pushInvalidCharacterIssue(character, index) { - this.syntaxIssues.push( - generateIssue('invalidCharacter', { - character: unicodeName(character), - index, - string: this.hedString, - }), - ) - } -} diff --git a/tests/event.spec.js b/tests/event.spec.js index dfc22f2e..88e22882 100644 --- a/tests/event.spec.js +++ b/tests/event.spec.js @@ -64,22 +64,19 @@ describe('HED string and event validation', () => { it('should not have mismatched parentheses', () => { const testStrings = { extraOpening: - '/Action/Reach/To touch,((/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', // The extra comma is needed to avoid a comma error. extraClosing: - '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', wrongOrder: - '/Action/Reach/To touch,((/Attribute/Object side/Left),/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px),(/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch,((Attribute/Object side/Left),Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px),(Attribute/Location/Screen/Left/23 px', valid: - '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', } const expectedIssues = { extraOpening: [generateIssue('parentheses', { opening: 2, closing: 1 })], extraClosing: [generateIssue('parentheses', { opening: 1, closing: 2 })], - wrongOrder: [ - generateIssue('unopenedParenthesis', { index: 125, string: testStrings.wrongOrder }), - generateIssue('unclosedParenthesis', { index: 127, string: testStrings.wrongOrder }), - ], + wrongOrder: [generateIssue('unopenedParenthesis', { index: 121, string: testStrings.wrongOrder })], valid: [], } // No-op function as this check is done during the parsing stage. @@ -90,31 +87,31 @@ describe('HED string and event validation', () => { it('should not have malformed delimiters', () => { const testStrings = { missingOpeningComma: - '/Action/Reach/To touch(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', missingClosingComma: - '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm)/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm)Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', extraOpeningComma: - ',/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + ',Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', extraClosingComma: - '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px,', + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px,', multipleExtraOpeningDelimiter: - ',,/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + ',,Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', multipleExtraClosingDelimiter: - '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px,,', + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px,,', multipleExtraMiddleDelimiter: - '/Action/Reach/To touch,,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,,/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch,,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,,Attribute/Location/Screen/Left/23 px', valid: - '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px', + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', validDoubleOpeningParentheses: - '/Action/Reach/To touch,((/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px),Event/Duration/3 ms', + 'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px),Event/Duration/3 ms', validDoubleClosingParentheses: - '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm,(/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px)),Event/Duration/3 ms', + 'Action/Reach/To touch,(Attribute/Object side/Left,Participant/Effect/Body part/Arm,(Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px)),Event/Duration/3 ms', } const expectedIssues = { - missingOpeningComma: [generateIssue('commaMissing', { tag: '/Action/Reach/To touch(' })], + missingOpeningComma: [generateIssue('commaMissing', { tag: 'Action/Reach/To touch(' })], missingClosingComma: [ generateIssue('commaMissing', { - tag: '/Participant/Effect/Body part/Arm)', + tag: 'Participant/Effect/Body part/Arm)', }), ], extraOpeningComma: [ @@ -158,12 +155,12 @@ describe('HED string and event validation', () => { multipleExtraMiddleDelimiter: [ generateIssue('extraDelimiter', { character: ',', - index: 23, + index: 22, string: testStrings.multipleExtraMiddleDelimiter, }), generateIssue('extraDelimiter', { character: ',', - index: 125, + index: 121, string: testStrings.multipleExtraMiddleDelimiter, }), ], @@ -178,68 +175,68 @@ describe('HED string and event validation', () => { it('should not have invalid characters', () => { const testStrings = { - openingBrace: '/Attribute/Object side/Left,/Participant/Effect{/Body part/Arm', - closingBrace: '/Attribute/Object side/Left,/Participant/Effect}/Body part/Arm', - openingBracket: '/Attribute/Object side/Left,/Participant/Effect[/Body part/Arm', - closingBracket: '/Attribute/Object side/Left,/Participant/Effect]/Body part/Arm', - tilde: '/Attribute/Object side/Left,/Participant/Effect~/Body part/Arm', - doubleQuote: '/Attribute/Object side/Left,/Participant/Effect"/Body part/Arm', - null: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\0', - tab: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\t', + openingBrace: 'Attribute/Object side/Left,Participant/Effect{Body part/Arm', + closingBrace: 'Attribute/Object side/Left,Participant/Effect}/Body part/Arm', + openingBracket: 'Attribute/Object side/Left,Participant/Effect[Body part/Arm', + closingBracket: 'Attribute/Object side/Left,Participant/Effect]Body part/Arm', + tilde: 'Attribute/Object side/Left,Participant/Effect~/Body part/Arm', + doubleQuote: 'Attribute/Object side/Left,Participant/Effect"/Body part/Arm', + null: 'Attribute/Object side/Left,Participant/Effect/Body part/Arm\0', + tab: 'Attribute/Object side/Left,Participant/Effect/Body part/Arm\t', } const expectedIssues = { openingBrace: [ generateIssue('invalidCharacter', { character: 'LEFT CURLY BRACKET', - index: 47, + index: 45, string: testStrings.openingBrace, }), ], closingBrace: [ generateIssue('unopenedCurlyBrace', { - index: 47, + index: 45, string: testStrings.closingBrace, }), ], openingBracket: [ generateIssue('invalidCharacter', { character: 'LEFT SQUARE BRACKET', - index: 47, + index: 45, string: testStrings.openingBracket, }), ], closingBracket: [ generateIssue('invalidCharacter', { character: 'RIGHT SQUARE BRACKET', - index: 47, + index: 45, string: testStrings.closingBracket, }), ], tilde: [ generateIssue('invalidCharacter', { character: 'TILDE', - index: 47, + index: 45, string: testStrings.tilde, }), ], doubleQuote: [ generateIssue('invalidCharacter', { character: 'QUOTATION MARK', - index: 47, + index: 45, string: testStrings.doubleQuote, }), ], null: [ generateIssue('invalidCharacter', { character: 'NULL', - index: 61, + index: 59, string: testStrings.null, }), ], tab: [ generateIssue('invalidCharacter', { character: 'CHARACTER TABULATION', - index: 61, + index: 59, string: testStrings.tab, }), ], @@ -351,491 +348,6 @@ describe('HED string and event validation', () => { }) }) - describe('HED-2G validation', () => { - describe('Later HED-2G schemas', () => { - const hedSchemaFile = 'tests/data/HED7.1.1.xml' - let hedSchemas - - beforeAll(async () => { - const spec1 = new SchemaSpec('', '7.1.1', '', hedSchemaFile) - const specs = new SchemasSpec().addSchemaSpec(spec1) - hedSchemas = await buildSchemas(specs) - }) - - /** - * HED 2 semantic validation base function. - * - * This base function uses the HED 2-specific {@link Hed2Validator} validator class. - * - * @param {Object} testStrings A mapping of test strings. - * @param {Object} expectedIssues The expected issues for each test string. - * @param {function(HedValidator): void} testFunction A test-specific function that executes the required validation check. - * @param {Object?} testOptions Any needed custom options for the validator. - */ - const validatorSemanticBase = function (testStrings, expectedIssues, testFunction, testOptions = {}) { - validatorBase(hedSchemas, Hed2Validator, testStrings, expectedIssues, testFunction, testOptions) - } - - describe('Full HED Strings', () => { - const validatorSemantic = validatorSemanticBase - - // TODO: Rewrite as HED 3 test - it.skip('should not validate strings with extensions that are valid node names', () => { - const testStrings = { - // Event/Duration/20 cm is an obviously invalid tag that should not be caught due to the first error. - red: 'Attribute/Red, Event/Duration/20 cm', - redAndBlue: 'Attribute/Red, Attribute/Blue, Event/Duration/20 cm', - } - const expectedIssues = { - red: [ - generateIssue('invalidParentNode', { - tag: 'Red', - parentTag: 'Attribute/Visual/Color/Red', - }), - ], - redAndBlue: [ - generateIssue('invalidParentNode', { - tag: 'Red', - parentTag: 'Attribute/Visual/Color/Red', - }), - generateIssue('invalidParentNode', { - tag: 'Blue', - parentTag: 'Attribute/Visual/Color/Blue', - }), - ], - } - // This is a no-op function since this is checked during string parsing. - return validatorSemantic( - testStrings, - expectedIssues, - // eslint-disable-next-line no-unused-vars - (validator) => {}, - ) - }) - }) - - describe('Individual HED Tags', () => { - /** - * HED 2 individual tag semantic validation base function. - * - * @param {Object} testStrings A mapping of test strings. - * @param {Object} expectedIssues The expected issues for each test string. - * @param {function(HedValidator, ParsedHedTag, ParsedHedTag): void} testFunction A test-specific function that executes the required validation check. - * @param {Object?} testOptions Any needed custom options for the validator. - */ - const validatorSemantic = function (testStrings, expectedIssues, testFunction, testOptions) { - return validatorSemanticBase( - testStrings, - expectedIssues, - (validator) => { - let previousTag = new ParsedHedTag('', '', [0, 0], validator.hedSchemas) - for (const tag of validator.parsedString.tags) { - testFunction(validator, tag, previousTag) - previousTag = tag - } - }, - testOptions, - ) - } - - it('should exist in the schema or be an allowed extension', () => { - const testStrings = { - takesValue: 'Event/Duration/3 ms', - full: 'Attribute/Object side/Left', - extensionAllowed: 'Item/Object/Person/Driver', - leafExtension: 'Event/Category/Initial context/Something', - nonExtensionAllowed: 'Event/Nonsense', - illegalComma: 'Event/Label/This is a label,This/Is/A/Tag', - placeholder: 'Item/Object/#', - } - const expectedIssues = { - takesValue: [], - full: [], - extensionAllowed: [generateIssue('extension', { tag: testStrings.extensionAllowed })], - leafExtension: [generateIssue('invalidTag', { tag: testStrings.leafExtension })], - nonExtensionAllowed: [ - generateIssue('invalidTag', { - tag: testStrings.nonExtensionAllowed, - }), - ], - illegalComma: [ - generateIssue('extraCommaOrInvalid', { - previousTag: 'Event/Label/This is a label', - tag: 'This/Is/A/Tag', - }), - ], - placeholder: [ - generateIssue('invalidTag', { - tag: testStrings.placeholder, - }), - ], - } - return validatorSemantic( - testStrings, - expectedIssues, - (validator, tag, previousTag) => { - validator.checkIfTagIsValid(tag, previousTag) - }, - { checkForWarnings: true }, - ) - }) - - it('should have a child when required', () => { - const testStrings = { - hasChild: 'Event/Category/Experimental stimulus', - missingChild: 'Event/Category', - } - const expectedIssues = { - hasChild: [], - missingChild: [generateIssue('childRequired', { tag: testStrings.missingChild })], - } - return validatorSemantic( - testStrings, - expectedIssues, - // eslint-disable-next-line no-unused-vars - (validator, tag, previousTag) => { - validator.checkIfTagRequiresChild(tag) - }, - { checkForWarnings: true }, - ) - }) - - it('should have a proper unit when required', () => { - const testStrings = { - correctUnit: 'Event/Duration/3 ms', - correctUnitScientific: 'Event/Duration/3.5e1 ms', - correctSingularUnit: 'Event/Duration/1 millisecond', - correctPluralUnit: 'Event/Duration/3 milliseconds', - correctNoPluralUnit: 'Attribute/Temporal rate/3 hertz', - correctPrefixUnit: 'Participant/Effect/Cognitive/Reward/$19.69', - correctNonSymbolCapitalizedUnit: 'Event/Duration/3 MilliSeconds', - correctSymbolCapitalizedUnit: 'Attribute/Temporal rate/3 kHz', - missingRequiredUnit: 'Event/Duration/3', - incorrectUnit: 'Event/Duration/3 cm', - incorrectNonNumericValue: 'Event/Duration/A ms', - incorrectPluralUnit: 'Attribute/Temporal rate/3 hertzs', - incorrectSymbolCapitalizedUnit: 'Attribute/Temporal rate/3 hz', - incorrectSymbolCapitalizedUnitModifier: 'Attribute/Temporal rate/3 KHz', - incorrectNonSIUnitModifier: 'Event/Duration/1 millihour', - incorrectNonSIUnitSymbolModifier: 'Attribute/Path/Velocity/100 Mkph', - notRequiredNumber: 'Attribute/Visual/Color/Red/0.5', - notRequiredScientific: 'Attribute/Visual/Color/Red/5e-1', - properTime: 'Item/2D shape/Clock face/08:30', - invalidTime: 'Item/2D shape/Clock face/54:54', - } - const legalTimeUnits = ['s', 'second', 'day', 'minute', 'hour'] - const legalFrequencyUnits = ['Hz', 'hertz'] - const legalSpeedUnits = ['m-per-s', 'kph', 'mph'] - const expectedIssues = { - correctUnit: [], - correctUnitScientific: [], - correctSingularUnit: [], - correctPluralUnit: [], - correctNoPluralUnit: [], - correctPrefixUnit: [], - correctNonSymbolCapitalizedUnit: [], - correctSymbolCapitalizedUnit: [], - missingRequiredUnit: [ - generateIssue('unitClassDefaultUsed', { - defaultUnit: 's', - tag: testStrings.missingRequiredUnit, - }), - ], - incorrectUnit: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectUnit, - unitClassUnits: legalTimeUnits.sort().join(','), - }), - ], - incorrectNonNumericValue: [ - generateIssue('invalidValue', { - tag: testStrings.incorrectNonNumericValue, - }), - ], - incorrectPluralUnit: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectPluralUnit, - unitClassUnits: legalFrequencyUnits.sort().join(','), - }), - ], - incorrectSymbolCapitalizedUnit: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectSymbolCapitalizedUnit, - unitClassUnits: legalFrequencyUnits.sort().join(','), - }), - ], - incorrectSymbolCapitalizedUnitModifier: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectSymbolCapitalizedUnitModifier, - unitClassUnits: legalFrequencyUnits.sort().join(','), - }), - ], - incorrectNonSIUnitModifier: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectNonSIUnitModifier, - unitClassUnits: legalTimeUnits.sort().join(','), - }), - ], - incorrectNonSIUnitSymbolModifier: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectNonSIUnitSymbolModifier, - unitClassUnits: legalSpeedUnits.sort().join(','), - }), - ], - notRequiredNumber: [], - notRequiredScientific: [], - properTime: [], - invalidTime: [ - generateIssue('invalidValue', { - tag: testStrings.invalidTime, - }), - ], - } - return validatorSemantic( - testStrings, - expectedIssues, - // eslint-disable-next-line no-unused-vars - (validator, tag, previousTag) => { - validator.checkIfTagUnitClassUnitsAreValid(tag) - }, - { checkForWarnings: true }, - ) - }) - }) - - describe('HED Tag Levels', () => { - /** - * HED 2 Tag level semantic validation base function. - * - * @param {Object} testStrings A mapping of test strings. - * @param {Object} expectedIssues The expected issues for each test string. - * @param {function(HedValidator, ParsedHedSubstring[]): void} testFunction A test-specific function that executes the required validation check. - * @param {Object?} testOptions Any needed custom options for the validator. - */ - const validatorSemantic = function (testStrings, expectedIssues, testFunction, testOptions = {}) { - return validatorSemanticBase( - testStrings, - expectedIssues, - (validator) => { - for (const tagGroup of validator.parsedString.tagGroups) { - for (const subGroup of tagGroup.subGroupArrayIterator()) { - testFunction(validator, subGroup) - } - } - testFunction(validator, validator.parsedString.parseTree) - }, - testOptions, - ) - } - - it('should not have multiple copies of a unique tag', () => { - const testStrings = { - legal: - 'Event/Description/Rail vehicles,Item/Object/Vehicle/Train,(Item/Object/Vehicle/Train,Event/Category/Experimental stimulus)', - multipleDesc: - 'Event/Description/Rail vehicles,Event/Description/Locomotive-pulled or multiple units,Item/Object/Vehicle/Train,(Item/Object/Vehicle/Train,Event/Category/Experimental stimulus)', - } - const expectedIssues = { - legal: [], - multipleDesc: [generateIssue('multipleUniqueTags', { tag: 'event/description' })], - } - return validatorSemantic(testStrings, expectedIssues, (validator, tagLevel) => { - validator.checkForMultipleUniqueTags(tagLevel) - }) - }) - }) - - describe('Top-level Tags', () => { - const validatorSemantic = validatorSemanticBase - - it('should include all required tags', () => { - const testStrings = { - complete: - 'Event/Label/Bus,Event/Category/Experimental stimulus,Event/Description/Shown a picture of a bus,Item/Object/Vehicle/Bus', - missingLabel: - 'Event/Category/Experimental stimulus,Event/Description/Shown a picture of a bus,Item/Object/Vehicle/Bus', - missingCategory: 'Event/Label/Bus,Event/Description/Shown a picture of a bus,Item/Object/Vehicle/Bus', - missingDescription: 'Event/Label/Bus,Event/Category/Experimental stimulus,Item/Object/Vehicle/Bus', - missingAllRequired: 'Item/Object/Vehicle/Bus', - } - const expectedIssues = { - complete: [], - missingLabel: [ - generateIssue('requiredPrefixMissing', { - tagPrefix: 'event/label', - }), - ], - missingCategory: [ - generateIssue('requiredPrefixMissing', { - tagPrefix: 'event/category', - }), - ], - missingDescription: [ - generateIssue('requiredPrefixMissing', { - tagPrefix: 'event/description', - }), - ], - missingAllRequired: [ - generateIssue('requiredPrefixMissing', { - tagPrefix: 'event/label', - }), - generateIssue('requiredPrefixMissing', { - tagPrefix: 'event/category', - }), - generateIssue('requiredPrefixMissing', { - tagPrefix: 'event/description', - }), - ], - } - return validatorSemantic( - testStrings, - expectedIssues, - (validator) => { - validator.checkForRequiredTags() - }, - { checkForWarnings: true }, - ) - }) - }) - }) - - describe('Pre-v7.1.0 HED schemas', () => { - const hedSchemaFile = 'tests/data/HED7.0.4.xml' - let hedSchemas - - beforeAll(async () => { - const spec2 = new SchemaSpec('', '7.0.4', '', hedSchemaFile) - const specs = new SchemasSpec().addSchemaSpec(spec2) - hedSchemas = await buildSchemas(specs) - }) - - /** - * HED 2 semantic validation base function. - * - * This base function uses the HED 2-specific {@link Hed2Validator} validator class. - * - * @param {Object} testStrings A mapping of test strings. - * @param {Object} expectedIssues The expected issues for each test string. - * @param {function(HedValidator): void} testFunction A test-specific function that executes the required validation check. - * @param {Object?} testOptions Any needed custom options for the validator. - */ - const validatorSemanticBase = function (testStrings, expectedIssues, testFunction, testOptions = {}) { - validatorBase(hedSchemas, Hed2Validator, testStrings, expectedIssues, testFunction, testOptions) - } - - describe('Individual HED Tags', () => { - /** - * HED 2 individual tag semantic validation base function. - * - * @param {Object} testStrings A mapping of test strings. - * @param {Object} expectedIssues The expected issues for each test string. - * @param {function(HedValidator, ParsedHedTag, ParsedHedTag): void} testFunction A test-specific function that executes the required validation check. - * @param {Object?} testOptions Any needed custom options for the validator. - */ - const validatorSemantic = function (testStrings, expectedIssues, testFunction, testOptions) { - return validatorSemanticBase( - testStrings, - expectedIssues, - (validator) => { - let previousTag = new ParsedHedTag('', '', [0, 0], validator.hedSchemas) - for (const tag of validator.parsedString.tags) { - testFunction(validator, tag, previousTag) - previousTag = tag - } - }, - testOptions, - ) - } - - it('should have a proper unit when required', () => { - const testStrings = { - correctUnit: 'Event/Duration/3 ms', - correctUnitWord: 'Event/Duration/3 milliseconds', - correctUnitScientific: 'Event/Duration/3.5e1 ms', - missingRequiredUnit: 'Event/Duration/3', - incorrectUnit: 'Event/Duration/3 cm', - incorrectNonNumericValue: 'Event/Duration/A ms', - incorrectUnitWord: 'Event/Duration/3 nanoseconds', - incorrectModifier: 'Event/Duration/3 ns', - notRequiredNumber: 'Attribute/Visual/Color/Red/0.5', - notRequiredScientific: 'Attribute/Visual/Color/Red/5e-1', - properTime: 'Item/2D shape/Clock face/08:30', - invalidTime: 'Item/2D shape/Clock face/54:54', - } - const legalTimeUnits = [ - 's', - 'second', - 'seconds', - 'centiseconds', - 'centisecond', - 'cs', - 'hour:min', - 'day', - 'days', - 'ms', - 'milliseconds', - 'millisecond', - 'minute', - 'minutes', - 'hour', - 'hours', - ] - const expectedIssues = { - correctUnit: [], - correctUnitWord: [], - correctUnitScientific: [], - missingRequiredUnit: [ - generateIssue('unitClassDefaultUsed', { - defaultUnit: 's', - tag: testStrings.missingRequiredUnit, - }), - ], - incorrectUnit: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectUnit, - unitClassUnits: legalTimeUnits.sort().join(','), - }), - ], - incorrectNonNumericValue: [ - generateIssue('invalidValue', { - tag: testStrings.incorrectNonNumericValue, - }), - ], - incorrectUnitWord: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectUnitWord, - unitClassUnits: legalTimeUnits.sort().join(','), - }), - ], - incorrectModifier: [ - generateIssue('unitClassInvalidUnit', { - tag: testStrings.incorrectModifier, - unitClassUnits: legalTimeUnits.sort().join(','), - }), - ], - notRequiredNumber: [], - notRequiredScientific: [], - properTime: [], - invalidTime: [ - generateIssue('invalidValue', { - tag: testStrings.invalidTime, - }), - ], - } - return validatorSemantic( - testStrings, - expectedIssues, - // eslint-disable-next-line no-unused-vars - (validator, tag, previousTag) => { - validator.checkIfTagUnitClassUnitsAreValid(tag) - }, - { checkForWarnings: true }, - ) - }) - }) - }) - }) - describe('HED-3G validation', () => { const hedSchemaFile = 'tests/data/HED8.2.0.xml' let hedSchemas diff --git a/tests/event2G.spec.js b/tests/event2G.spec.js new file mode 100644 index 00000000..0e714b74 --- /dev/null +++ b/tests/event2G.spec.js @@ -0,0 +1,530 @@ +import chai from 'chai' +const assert = chai.assert +import { beforeAll, describe, it } from '@jest/globals' + +import * as hed from '../validator/event' +import { buildSchemas } from '../validator/schema/init' +import { parseHedString } from '../parser/parser' +import { ParsedHedTag } from '../parser/parsedHedTag' +import { HedValidator, Hed2Validator, Hed3Validator } from '../validator/event' +import { generateIssue } from '../common/issues/issues' +import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types' + +describe('HED string and event validation', () => { + /** + * Validation base function. + * + * @param {Schemas} hedSchemas The HED schema collection used for testing. + * @param {typeof HedValidator} ValidatorClass A subclass of {@link HedValidator} to use for validation. + * @param {Object} testStrings A mapping of test strings. + * @param {Object} expectedIssues The expected issues for each test string. + * @param {function(HedValidator): void} testFunction A test-specific function that executes the required validation check. + * @param {Object?} testOptions Any needed custom options for the validator. + */ + const validatorBase = function ( + hedSchemas, + ValidatorClass, + testStrings, + expectedIssues, + testFunction, + testOptions = {}, + ) { + for (const [testStringKey, testString] of Object.entries(testStrings)) { + assert.property(expectedIssues, testStringKey, testStringKey + ' is not in expectedIssues') + const [parsedTestString, parsingIssues] = parseHedString(testString, hedSchemas) + const validator = new ValidatorClass(parsedTestString, hedSchemas, testOptions) + const flattenedParsingIssues = Object.values(parsingIssues).flat() + if (flattenedParsingIssues.length === 0) { + testFunction(validator) + } + const issues = [].concat(flattenedParsingIssues, validator.issues) + assert.sameDeepMembers(issues, expectedIssues[testStringKey], testString) + } + } + + describe('HED-2G validation', () => { + describe('Later HED-2G schemas', () => { + const hedSchemaFile = 'tests/data/HED7.1.1.xml' + let hedSchemas + + beforeAll(async () => { + const spec1 = new SchemaSpec('', '7.1.1', '', hedSchemaFile) + const specs = new SchemasSpec().addSchemaSpec(spec1) + hedSchemas = await buildSchemas(specs) + }) + + /** + * HED 2 semantic validation base function. + * + * This base function uses the HED 2-specific {@link Hed2Validator} validator class. + * + * @param {Object} testStrings A mapping of test strings. + * @param {Object} expectedIssues The expected issues for each test string. + * @param {function(HedValidator): void} testFunction A test-specific function that executes the required validation check. + * @param {Object?} testOptions Any needed custom options for the validator. + */ + const validatorSemanticBase = function (testStrings, expectedIssues, testFunction, testOptions = {}) { + validatorBase(hedSchemas, Hed2Validator, testStrings, expectedIssues, testFunction, testOptions) + } + + describe('Full HED Strings', () => { + const validatorSemantic = validatorSemanticBase + + // TODO: Rewrite as HED 3 test + it.skip('should not validate strings with extensions that are valid node names', () => { + const testStrings = { + // Event/Duration/20 cm is an obviously invalid tag that should not be caught due to the first error. + red: 'Attribute/Red, Event/Duration/20 cm', + redAndBlue: 'Attribute/Red, Attribute/Blue, Event/Duration/20 cm', + } + const expectedIssues = { + red: [ + generateIssue('invalidParentNode', { + tag: 'Red', + parentTag: 'Attribute/Visual/Color/Red', + }), + ], + redAndBlue: [ + generateIssue('invalidParentNode', { + tag: 'Red', + parentTag: 'Attribute/Visual/Color/Red', + }), + generateIssue('invalidParentNode', { + tag: 'Blue', + parentTag: 'Attribute/Visual/Color/Blue', + }), + ], + } + // This is a no-op function since this is checked during string parsing. + return validatorSemantic( + testStrings, + expectedIssues, + // eslint-disable-next-line no-unused-vars + (validator) => {}, + ) + }) + }) + + describe('Individual HED Tags', () => { + /** + * HED 2 individual tag semantic validation base function. + * + * @param {Object} testStrings A mapping of test strings. + * @param {Object} expectedIssues The expected issues for each test string. + * @param {function(HedValidator, ParsedHedTag, ParsedHedTag): void} testFunction A test-specific function that executes the required validation check. + * @param {Object?} testOptions Any needed custom options for the validator. + */ + const validatorSemantic = function (testStrings, expectedIssues, testFunction, testOptions) { + return validatorSemanticBase( + testStrings, + expectedIssues, + (validator) => { + let previousTag = new ParsedHedTag('', '', [0, 0], validator.hedSchemas) + for (const tag of validator.parsedString.tags) { + testFunction(validator, tag, previousTag) + previousTag = tag + } + }, + testOptions, + ) + } + //TODO: Rewrite for HED-3 + it('should exist in the schema or be an allowed extension', () => { + const testStrings = { + takesValue: 'Event/Duration/3 ms', + full: 'Attribute/Object side/Left', + extensionAllowed: 'Item/Object/Person/Driver', + leafExtension: 'Event/Category/Initial context/Something', + nonExtensionAllowed: 'Event/Nonsense', + illegalComma: 'Event/Label/This is a label,This/Is/A/Tag', + placeholder: 'Item/Object/#', + } + const expectedIssues = { + takesValue: [], + full: [], + extensionAllowed: [generateIssue('extension', { tag: testStrings.extensionAllowed })], + leafExtension: [generateIssue('invalidTag', { tag: testStrings.leafExtension })], + nonExtensionAllowed: [ + generateIssue('invalidTag', { + tag: testStrings.nonExtensionAllowed, + }), + ], + illegalComma: [ + generateIssue('extraCommaOrInvalid', { + previousTag: 'Event/Label/This is a label', + tag: 'This/Is/A/Tag', + }), + ], + placeholder: [ + generateIssue('invalidTag', { + tag: testStrings.placeholder, + }), + ], + } + return validatorSemantic( + testStrings, + expectedIssues, + (validator, tag, previousTag) => { + validator.checkIfTagIsValid(tag, previousTag) + }, + { checkForWarnings: true }, + ) + }) + + it('should have a child when required', () => { + const testStrings = { + hasChild: 'Event/Category/Experimental stimulus', + missingChild: 'Event/Category', + } + const expectedIssues = { + hasChild: [], + missingChild: [generateIssue('childRequired', { tag: testStrings.missingChild })], + } + return validatorSemantic( + testStrings, + expectedIssues, + // eslint-disable-next-line no-unused-vars + (validator, tag, previousTag) => { + validator.checkIfTagRequiresChild(tag) + }, + { checkForWarnings: true }, + ) + }) + + it('should have a proper unit when required', () => { + const testStrings = { + correctUnit: 'Event/Duration/3 ms', + correctUnitScientific: 'Event/Duration/3.5e1 ms', + correctSingularUnit: 'Event/Duration/1 millisecond', + correctPluralUnit: 'Event/Duration/3 milliseconds', + correctNoPluralUnit: 'Attribute/Temporal rate/3 hertz', + correctPrefixUnit: 'Participant/Effect/Cognitive/Reward/$19.69', + correctNonSymbolCapitalizedUnit: 'Event/Duration/3 MilliSeconds', + correctSymbolCapitalizedUnit: 'Attribute/Temporal rate/3 kHz', + missingRequiredUnit: 'Event/Duration/3', + incorrectUnit: 'Event/Duration/3 cm', + incorrectNonNumericValue: 'Event/Duration/A ms', + incorrectPluralUnit: 'Attribute/Temporal rate/3 hertzs', + incorrectSymbolCapitalizedUnit: 'Attribute/Temporal rate/3 hz', + incorrectSymbolCapitalizedUnitModifier: 'Attribute/Temporal rate/3 KHz', + incorrectNonSIUnitModifier: 'Event/Duration/1 millihour', + incorrectNonSIUnitSymbolModifier: 'Attribute/Path/Velocity/100 Mkph', + notRequiredNumber: 'Attribute/Visual/Color/Red/0.5', + notRequiredScientific: 'Attribute/Visual/Color/Red/5e-1', + properTime: 'Item/2D shape/Clock face/08:30', + invalidTime: 'Item/2D shape/Clock face/54:54', + } + const legalTimeUnits = ['s', 'second', 'day', 'minute', 'hour'] + const legalFrequencyUnits = ['Hz', 'hertz'] + const legalSpeedUnits = ['m-per-s', 'kph', 'mph'] + const expectedIssues = { + correctUnit: [], + correctUnitScientific: [], + correctSingularUnit: [], + correctPluralUnit: [], + correctNoPluralUnit: [], + correctPrefixUnit: [], + correctNonSymbolCapitalizedUnit: [], + correctSymbolCapitalizedUnit: [], + missingRequiredUnit: [ + generateIssue('unitClassDefaultUsed', { + defaultUnit: 's', + tag: testStrings.missingRequiredUnit, + }), + ], + incorrectUnit: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectUnit, + unitClassUnits: legalTimeUnits.sort().join(','), + }), + ], + incorrectNonNumericValue: [ + generateIssue('invalidValue', { + tag: testStrings.incorrectNonNumericValue, + }), + ], + incorrectPluralUnit: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectPluralUnit, + unitClassUnits: legalFrequencyUnits.sort().join(','), + }), + ], + incorrectSymbolCapitalizedUnit: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectSymbolCapitalizedUnit, + unitClassUnits: legalFrequencyUnits.sort().join(','), + }), + ], + incorrectSymbolCapitalizedUnitModifier: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectSymbolCapitalizedUnitModifier, + unitClassUnits: legalFrequencyUnits.sort().join(','), + }), + ], + incorrectNonSIUnitModifier: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectNonSIUnitModifier, + unitClassUnits: legalTimeUnits.sort().join(','), + }), + ], + incorrectNonSIUnitSymbolModifier: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectNonSIUnitSymbolModifier, + unitClassUnits: legalSpeedUnits.sort().join(','), + }), + ], + notRequiredNumber: [], + notRequiredScientific: [], + properTime: [], + invalidTime: [ + generateIssue('invalidValue', { + tag: testStrings.invalidTime, + }), + ], + } + return validatorSemantic( + testStrings, + expectedIssues, + // eslint-disable-next-line no-unused-vars + (validator, tag, previousTag) => { + validator.checkIfTagUnitClassUnitsAreValid(tag) + }, + { checkForWarnings: true }, + ) + }) + }) + + //TODO: Replace with HED-3 + describe('HED Tag Levels', () => { + /** + * HED 2 Tag level semantic validation base function. + * + * @param {Object} testStrings A mapping of test strings. + * @param {Object} expectedIssues The expected issues for each test string. + * @param {function(HedValidator, ParsedHedSubstring[]): void} testFunction A test-specific function that executes the required validation check. + * @param {Object?} testOptions Any needed custom options for the validator. + */ + const validatorSemantic = function (testStrings, expectedIssues, testFunction, testOptions = {}) { + return validatorSemanticBase( + testStrings, + expectedIssues, + (validator) => { + for (const tagGroup of validator.parsedString.tagGroups) { + for (const subGroup of tagGroup.subGroupArrayIterator()) { + testFunction(validator, subGroup) + } + } + testFunction(validator, validator.parsedString.parseTree) + }, + testOptions, + ) + } + + it('should not have multiple copies of a unique tag', () => { + const testStrings = { + legal: + 'Event/Description/Rail vehicles,Item/Object/Vehicle/Train,(Item/Object/Vehicle/Train,Event/Category/Experimental stimulus)', + multipleDesc: + 'Event/Description/Rail vehicles,Event/Description/Locomotive-pulled or multiple units,Item/Object/Vehicle/Train,(Item/Object/Vehicle/Train,Event/Category/Experimental stimulus)', + } + const expectedIssues = { + legal: [], + multipleDesc: [generateIssue('multipleUniqueTags', { tag: 'event/description' })], + } + return validatorSemantic(testStrings, expectedIssues, (validator, tagLevel) => { + validator.checkForMultipleUniqueTags(tagLevel) + }) + }) + }) + + describe('Top-level Tags', () => { + const validatorSemantic = validatorSemanticBase + + it('should include all required tags', () => { + const testStrings = { + complete: + 'Event/Label/Bus,Event/Category/Experimental stimulus,Event/Description/Shown a picture of a bus,Item/Object/Vehicle/Bus', + missingLabel: + 'Event/Category/Experimental stimulus,Event/Description/Shown a picture of a bus,Item/Object/Vehicle/Bus', + missingCategory: 'Event/Label/Bus,Event/Description/Shown a picture of a bus,Item/Object/Vehicle/Bus', + missingDescription: 'Event/Label/Bus,Event/Category/Experimental stimulus,Item/Object/Vehicle/Bus', + missingAllRequired: 'Item/Object/Vehicle/Bus', + } + const expectedIssues = { + complete: [], + missingLabel: [ + generateIssue('requiredPrefixMissing', { + tagPrefix: 'event/label', + }), + ], + missingCategory: [ + generateIssue('requiredPrefixMissing', { + tagPrefix: 'event/category', + }), + ], + missingDescription: [ + generateIssue('requiredPrefixMissing', { + tagPrefix: 'event/description', + }), + ], + missingAllRequired: [ + generateIssue('requiredPrefixMissing', { + tagPrefix: 'event/label', + }), + generateIssue('requiredPrefixMissing', { + tagPrefix: 'event/category', + }), + generateIssue('requiredPrefixMissing', { + tagPrefix: 'event/description', + }), + ], + } + return validatorSemantic( + testStrings, + expectedIssues, + (validator) => { + validator.checkForRequiredTags() + }, + { checkForWarnings: true }, + ) + }) + }) + }) + + describe('Pre-v7.1.0 HED schemas', () => { + const hedSchemaFile = 'tests/data/HED7.0.4.xml' + let hedSchemas + + beforeAll(async () => { + const spec2 = new SchemaSpec('', '7.0.4', '', hedSchemaFile) + const specs = new SchemasSpec().addSchemaSpec(spec2) + hedSchemas = await buildSchemas(specs) + }) + + /** + * HED 2 semantic validation base function. + * + * This base function uses the HED 2-specific {@link Hed2Validator} validator class. + * + * @param {Object} testStrings A mapping of test strings. + * @param {Object} expectedIssues The expected issues for each test string. + * @param {function(HedValidator): void} testFunction A test-specific function that executes the required validation check. + * @param {Object?} testOptions Any needed custom options for the validator. + */ + const validatorSemanticBase = function (testStrings, expectedIssues, testFunction, testOptions = {}) { + validatorBase(hedSchemas, Hed2Validator, testStrings, expectedIssues, testFunction, testOptions) + } + + describe('Individual HED Tags', () => { + /** + * HED 2 individual tag semantic validation base function. + * + * @param {Object} testStrings A mapping of test strings. + * @param {Object} expectedIssues The expected issues for each test string. + * @param {function(HedValidator, ParsedHedTag, ParsedHedTag): void} testFunction A test-specific function that executes the required validation check. + * @param {Object?} testOptions Any needed custom options for the validator. + */ + const validatorSemantic = function (testStrings, expectedIssues, testFunction, testOptions) { + return validatorSemanticBase( + testStrings, + expectedIssues, + (validator) => { + let previousTag = new ParsedHedTag('', '', [0, 0], validator.hedSchemas) + for (const tag of validator.parsedString.tags) { + testFunction(validator, tag, previousTag) + previousTag = tag + } + }, + testOptions, + ) + } + + it('should have a proper unit when required', () => { + const testStrings = { + correctUnit: 'Event/Duration/3 ms', + correctUnitWord: 'Event/Duration/3 milliseconds', + correctUnitScientific: 'Event/Duration/3.5e1 ms', + missingRequiredUnit: 'Event/Duration/3', + incorrectUnit: 'Event/Duration/3 cm', + incorrectNonNumericValue: 'Event/Duration/A ms', + incorrectUnitWord: 'Event/Duration/3 nanoseconds', + incorrectModifier: 'Event/Duration/3 ns', + notRequiredNumber: 'Attribute/Visual/Color/Red/0.5', + notRequiredScientific: 'Attribute/Visual/Color/Red/5e-1', + properTime: 'Item/2D shape/Clock face/08:30', + invalidTime: 'Item/2D shape/Clock face/54:54', + } + const legalTimeUnits = [ + 's', + 'second', + 'seconds', + 'centiseconds', + 'centisecond', + 'cs', + 'hour:min', + 'day', + 'days', + 'ms', + 'milliseconds', + 'millisecond', + 'minute', + 'minutes', + 'hour', + 'hours', + ] + const expectedIssues = { + correctUnit: [], + correctUnitWord: [], + correctUnitScientific: [], + missingRequiredUnit: [ + generateIssue('unitClassDefaultUsed', { + defaultUnit: 's', + tag: testStrings.missingRequiredUnit, + }), + ], + incorrectUnit: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectUnit, + unitClassUnits: legalTimeUnits.sort().join(','), + }), + ], + incorrectNonNumericValue: [ + generateIssue('invalidValue', { + tag: testStrings.incorrectNonNumericValue, + }), + ], + incorrectUnitWord: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectUnitWord, + unitClassUnits: legalTimeUnits.sort().join(','), + }), + ], + incorrectModifier: [ + generateIssue('unitClassInvalidUnit', { + tag: testStrings.incorrectModifier, + unitClassUnits: legalTimeUnits.sort().join(','), + }), + ], + notRequiredNumber: [], + notRequiredScientific: [], + properTime: [], + invalidTime: [ + generateIssue('invalidValue', { + tag: testStrings.invalidTime, + }), + ], + } + return validatorSemantic( + testStrings, + expectedIssues, + // eslint-disable-next-line no-unused-vars + (validator, tag, previousTag) => { + validator.checkIfTagUnitClassUnitsAreValid(tag) + }, + { checkForWarnings: true }, + ) + }) + }) + }) + }) +}) diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js index e0547563..979c14e9 100644 --- a/tests/stringParser.spec.js +++ b/tests/stringParser.spec.js @@ -81,7 +81,6 @@ describe('HED string parsing', () => { } const expectedIssues = { openingSquare: { - conversion: [], syntax: [ generateIssue('invalidCharacter', { character: 'LEFT SQUARE BRACKET', @@ -91,7 +90,6 @@ describe('HED string parsing', () => { ], }, closingSquare: { - conversion: [], syntax: [ generateIssue('invalidCharacter', { character: 'RIGHT SQUARE BRACKET', @@ -101,7 +99,6 @@ describe('HED string parsing', () => { ], }, tilde: { - conversion: [], syntax: [ generateIssue('invalidCharacter', { character: 'TILDE', @@ -168,17 +165,28 @@ describe('HED string parsing', () => { it('should not include blanks', () => { const testStrings = { - trailingBlank: '/Item/Object/Man-made-object/Vehicle/Car, /Action/Perform/Operate,', + okay: 'Item/Object/Man-made-object/Vehicle/Car, Action/Perform/Operate', + internalBlank: 'Item /Object', } const expectedList = [ - new ParsedHedTag('/Item/Object/Man-made-object/Vehicle/Car', [0, 40]), - new ParsedHedTag('/Action/Perform/Operate', [42, 65]), + new ParsedHedTag('Item/Object/Man-made-object/Vehicle/Car', [0, 39]), + new ParsedHedTag('Action/Perform/Operate', [41, 63]), ] const expectedResults = { - trailingBlank: expectedList, + okay: expectedList, + internalBlank: [], } const expectedIssues = { - trailingBlank: {}, + okay: {}, + internalBlank: { + syntax: [ + generateIssue('invalidCharacter', { + character: 'RIGHT SQUARE BRACKET', + index: 56, + string: testStrings.closingSquare, + }), + ], + }, } validatorWithIssues(testStrings, expectedResults, expectedIssues, (string) => { return splitHedString(string, nullSchema) diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 79d8b291..0634043a 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -19,7 +19,7 @@ import { HedStringTokenizerFirstTry } from '../parser/tokenizerFirstTry' describe('HED string parsing', () => { it('should include each group as its own single element', () => { //const hedString = - 'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' + //'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' //const hedString = 'x/y w/z' //const hedString = '(r,z)' //const hedString = 'r,' @@ -29,7 +29,7 @@ describe('HED string parsing', () => { //const hedString = '/x' //const hedString = 'x/ /y' //const hedString = 'x/' - const hedString = 'x' + const hedString = 'x:z' const tok = new HedStringTokenizer(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') diff --git a/tests/tokenizerPassingData.js b/tests/tokenizerPassingData.js index 770449f1..bf22e1a5 100644 --- a/tests/tokenizerPassingData.js +++ b/tests/tokenizerPassingData.js @@ -35,10 +35,45 @@ export const passingTests = [ tagSpecs: [new TagSpec('x/y/z', 0, 5, '')], groupSpec: new GroupSpec(0, 5, []), }, + { + name: 'tag-in-column-spec', + string: '{xy}', + explanation: 'Single column spec', + tagSpecs: [new ColumnSpliceSpec('xy', 0, 3, '')], + groupSpec: new GroupSpec(0, 4, []), + }, + { + name: 'tag-in-column-spec-multiple-blanks', + string: ' { xy } ', + explanation: 'Single column spec with multiple blanks', + tagSpecs: [new ColumnSpliceSpec('xy', 2, 8, '')], + groupSpec: new GroupSpec(0, 10, []), + }, + { + name: 'tag-with-colons-no-blanks', + string: 'xy:wz', + explanation: 'Tag with a single colon and no blanks', + tagSpecs: [new TagSpec('wz', 3, 5, 'xy')], + groupSpec: new GroupSpec(0, 5, []), + }, + { + name: 'tag-with-multiple-colons', + string: 'xy:wz x:y', + explanation: 'Tag with one colon marking library and another as part of a value', + tagSpecs: [new TagSpec('wz x:y', 3, 9, 'xy')], + groupSpec: new GroupSpec(0, 9, []), + }, + { + name: 'tags-with-one-value column', + string: 'xy x:y', + explanation: 'Tag with one colon as part of a value', + tagSpecs: [new TagSpec('xy x:y', 0, 6, '')], + groupSpec: new GroupSpec(0, 6, []), + }, ], }, { - name: 'valid-tags-no-groups', + name: 'multiple-tags-no-groups', description: 'multiple tags with no groups.', warning: false, tests: [ @@ -56,6 +91,13 @@ export const passingTests = [ tagSpecs: [new TagSpec('xy', 1, 3, ''), new TagSpec('zy', 6, 8, ''), new TagSpec('wy', 11, 13, '')], groupSpec: new GroupSpec(0, 15, []), }, + { + name: 'multiple-tags-with-blanks', + string: ' xy, zy , wy ', + explanation: 'Can have extra blanks', + tagSpecs: [new TagSpec('xy', 1, 3, ''), new TagSpec('zy', 6, 8, ''), new TagSpec('wy', 11, 13, '')], + groupSpec: new GroupSpec(0, 15, []), + }, ], }, { @@ -105,4 +147,98 @@ export const passingTests = [ }, ], }, + { + name: 'Complex nested groups', + description: 'Groups with complex nesting', + warning: false, + tests: [ + { + name: 'Single-multi-nested-group', + string: '(((xy)))', + explanation: 'Single group with deep nesting', + tagSpecs: [[[[new TagSpec('xy', 3, 5, '')]]]], + groupSpec: new GroupSpec(0, 8, [new GroupSpec(0, 8, [new GroupSpec(1, 7, [new GroupSpec(2, 6, [])])])]), + }, + { + name: 'Nested-group-with-splice', + string: '((({xy})))', + explanation: 'A tag after a group.', + tagSpecs: [[[[new ColumnSpliceSpec('xy', 3, 6)]]]], + groupSpec: new GroupSpec(0, 10, [new GroupSpec(0, 10, [new GroupSpec(1, 9, [new GroupSpec(2, 8, [])])])]), + }, + { + name: 'multiple-tags-in-group', + string: '(x,y)', + explanation: 'Multiple tags in one group.', + tagSpecs: [[new TagSpec('x', 1, 2, ''), new TagSpec('y', 3, 4, '')]], + groupSpec: new GroupSpec(0, 5, [new GroupSpec(0, 5, [])]), + }, + { + name: 'multiple-unnested-groups', + string: 'q, (xy), (zw, uv), p', + explanation: 'Multiple unnested tag groups and tags.', + tagSpecs: [ + new TagSpec('q', 0, 1, ''), + [new TagSpec('xy', 4, 6, '')], + [new TagSpec('zw', 10, 12, ''), new TagSpec('uv', 14, 16, '')], + new TagSpec('p', 19, 20, ''), + ], + groupSpec: new GroupSpec(0, 20, [new GroupSpec(3, 7, []), new GroupSpec(9, 17, [])]), + }, + { + name: 'tag-after-group', + string: 'x/y,(r,v)', + explanation: 'A tag after a group.', + tagSpecs: [new TagSpec('x/y', 0, 3, ''), [new TagSpec('r', 5, 6, ''), new TagSpec('v', 7, 8, '')]], + groupSpec: new GroupSpec(0, 9, [new GroupSpec(4, 9, [])]), + }, + ], + }, + // { + // name: 'Complex nested groups', + // description: 'Groups with complex nesting', + // warning: false, + // tests: [ + // { + // name: 'Single-nested-group', + // string: '(xy)', + // explanation: 'Single group', + // tagSpecs: [[new TagSpec('xy', 1, 3, '')]], + // groupSpec: new GroupSpec(0, 4, [new GroupSpec(0, 4, [])]), + // }, + // { + // name: 'tag-after-group', + // string: '(x), p', + // explanation: 'A tag after a group.', + // tagSpecs: [[new TagSpec('x', 1, 2, '')], new TagSpec('p', 5, 6, '')], + // groupSpec: new GroupSpec(0, 6, [new GroupSpec(0, 3, [])]), + // }, + // { + // name: 'multiple-tags-in-group', + // string: '(x,y)', + // explanation: 'Multiple tags in one group.', + // tagSpecs: [[new TagSpec('x', 1, 2, ''), new TagSpec('y', 3, 4, '')]], + // groupSpec: new GroupSpec(0, 5, [new GroupSpec(0, 5, [])]), + // }, + // { + // name: 'multiple-unnested-groups', + // string: 'q, (xy), (zw, uv), p', + // explanation: 'Multiple unnested tag groups and tags.', + // tagSpecs: [ + // new TagSpec('q', 0, 1, ''), + // [new TagSpec('xy', 4, 6, '')], + // [new TagSpec('zw', 10, 12, ''), new TagSpec('uv', 14, 16, '')], + // new TagSpec('p', 19, 20, ''), + // ], + // groupSpec: new GroupSpec(0, 20, [new GroupSpec(3, 7, []), new GroupSpec(9, 17, [])]), + // }, + // { + // name: 'tag-after-group', + // string: 'x/y,(r,v)', + // explanation: 'A tag after a group.', + // tagSpecs: [new TagSpec('x/y', 0, 3, ''), [new TagSpec('r', 5, 6, ''), new TagSpec('v', 7, 8, '')]], + // groupSpec: new GroupSpec(0, 9, [new GroupSpec(4, 9, [])]), + // }, + // ], + // } ] From c4274073f9bae737b37b9bafe71ef6d8577937b7 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 21 Oct 2024 10:29:47 -0500 Subject: [PATCH 12/21] stringParser tests now past with new tokenizer --- parser/tokenizer.js | 41 +++++++---- tests/bids.spec.data.js | 50 ++++++------- tests/bids.spec.js | 14 +--- tests/stringParser.spec.js | 32 +++----- tests/temp.spec.js | 6 +- tests/tokenizerPassingData.js | 133 ++++++++++++++++------------------ 6 files changed, 136 insertions(+), 140 deletions(-) diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 3884b945..58e20c21 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -215,24 +215,30 @@ export class HedStringTokenizer { } handleComma(i) { - if ( - // Empty token - this.state.lastDelimiter[0] === CHARACTERS.COMMA && - this.hedString.slice(this.state.lastDelimiter[1] + 1, i).trim().length === 0 - ) { + if (this.state.lastDelimiter[0] === undefined && this.hedString.slice(0, i).length === 0) { + // Start of string empty + this.pushIssue('emptyTagFound', i) + return + } + const trimmed = this.hedString.slice(this.state.lastDelimiter[1] + 1, i).trim() + if (this.state.lastDelimiter[0] === CHARACTERS.COMMA && trimmed.length === 0) { + // empty token after a previous comma this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Check for empty group between commas } else if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { // Unclosed curly brace this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) - } else if ( - this.state.currentToken.trim().length === 0 && - [CHARACTERS.CLOSING_GROUP, CHARACTERS.CLOSING_COLUMN].includes(this.state.lastDelimiter[0]) + } + if ( + [CHARACTERS.CLOSING_GROUP, CHARACTERS.CLOSING_COLUMN].includes(this.state.lastDelimiter[0]) && + trimmed.length > 0 ) { - this.resetToken(i) - } else { + this.pushIssue('invalidTag', i, trimmed) + } else if (trimmed.length > 0) { this.pushTag(i) - this.state.lastDelimiter = [CHARACTERS.COMMA, i] + } else { + this.resetToken(i) } + this.state.lastDelimiter = [CHARACTERS.COMMA, i] } handleSlash(i) { @@ -268,8 +274,7 @@ export class HedStringTokenizer { } handleClosingGroup(i) { - if (this.state.currentToken.trim().length > 0) { - // only push a tag if it has length > 0. Empty groups are allowed. + if ([CHARACTERS.OPENING_GROUP, CHARACTERS.COMMA].includes(this.state.lastDelimiter[0])) { this.pushTag(i) } if (this.state.groupDepth <= 0) { @@ -320,7 +325,7 @@ export class HedStringTokenizer { } else if (/[^A-Za-z]/.test(this.state.currentToken.trim())) { this.pushIssue('invalidTagPrefix', i) } else { - const lib = this.state.currentToken + const lib = this.state.currentToken.trimStart() this.resetToken(i) this.state.librarySchema = lib } @@ -356,6 +361,10 @@ export class HedStringTokenizer { closeGroup(i) { const groupSpec = this.state.parenthesesStack.pop() groupSpec.bounds[1] = i + 1 + if (this.hedString.slice(groupSpec.bounds[0] + 1, i).trim().length === 0) { + //The group is empty + this.pushIssue('emptyTagFound', i) + } this.state.parenthesesStack[this.state.groupDepth - 1].children.push(groupSpec) this.state.currentGroupStack[this.state.groupDepth - 1].push(this.state.currentGroupStack.pop()) this.state.groupDepth-- @@ -376,6 +385,10 @@ export class HedStringTokenizer { this.issues.push(generateIssue(issueCode, { index, string: this.hedString })) } + pushInvalidTag(issueCode, index, tag) { + this.issues.push(generateIssue(issueCode, { index, tag: tag, string: this.hedString })) + } + pushInvalidCharacterIssue(character, index) { this.issues.push( generateIssue('invalidCharacter', { character: unicodeName(character), index, string: this.hedString }), diff --git a/tests/bids.spec.data.js b/tests/bids.spec.data.js index dd9183f0..6209599d 100644 --- a/tests/bids.spec.data.js +++ b/tests/bids.spec.data.js @@ -90,7 +90,7 @@ const sidecars = [ }, { multiple_value_tags: { - HED: 'Duration/# s, RGB-blue/#', + HED: 'Label/#, Description/#', }, }, { @@ -706,39 +706,39 @@ const tsvFiles = [ const datasetDescriptions = [ // Good datasetDescription.json files [ - { Name: 'OnlyBase', BIDSVersion: '1.7.0', HEDVersion: '8.1.0' }, - { Name: 'BaseAndTest', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts:testlib_1.0.2'] }, - { Name: 'OnlyTestAsLib', BIDSVersion: '1.7.0', HEDVersion: ['ts:testlib_1.0.2'] }, - { Name: 'BaseAndTwoTests', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts:testlib_1.0.2', 'bg:testlib_1.0.2'] }, - { Name: 'TwoTests', BIDSVersion: '1.7.0', HEDVersion: ['ts:testlib_1.0.2', 'bg:testlib_1.0.2'] }, - { Name: 'OnlyScoreAsBase', BIDSVersion: '1.7.0', HEDVersion: 'score_1.0.0' }, - { Name: 'OnlyScoreAsLib', BIDSVersion: '1.7.0', HEDVersion: 'sc:score_1.0.0' }, - { Name: 'OnlyTestAsBase', BIDSVersion: '1.7.0', HEDVersion: 'testlib_1.0.2' }, - { Name: 'GoodLazyPartneredSchemas', BIDSVersion: '1.7.0', HEDVersion: ['testlib_2.0.0', 'testlib_3.0.0'] }, + { Name: 'OnlyBase', BIDSVersion: '1.10.0', HEDVersion: '8.3.0' }, + { Name: 'BaseAndTest', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts:testlib_1.0.2'] }, + { Name: 'OnlyTestAsLib', BIDSVersion: '1.10.0', HEDVersion: ['ts:testlib_1.0.2'] }, + { Name: 'BaseAndTwoTests', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts:testlib_1.0.2', 'bg:testlib_1.0.2'] }, + { Name: 'TwoTests', BIDSVersion: '1.10.0', HEDVersion: ['ts:testlib_1.0.2', 'bg:testlib_1.0.2'] }, + { Name: 'OnlyScoreAsBase', BIDSVersion: '1.10.0', HEDVersion: 'score_1.0.0' }, + { Name: 'OnlyScoreAsLib', BIDSVersion: '1.10.0', HEDVersion: 'sc:score_1.0.0' }, + { Name: 'OnlyTestAsBase', BIDSVersion: '1.10.0', HEDVersion: 'testlib_1.0.2' }, + { Name: 'GoodLazyPartneredSchemas', BIDSVersion: '1.10.0', HEDVersion: ['testlib_2.0.0', 'testlib_3.0.0'] }, { Name: 'GoodLazyPartneredSchemasWithStandard', - BIDSVersion: '1.7.0', + BIDSVersion: '1.10.0', HEDVersion: ['testlib_2.0.0', 'testlib_3.0.0', '8.2.0'], }, ], // Bad datasetDescription.json files [ - { Name: 'NonExistentLibrary', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts:badlib_1.0.2'] }, - { Name: 'LeadingColon', BIDSVersion: '1.7.0', HEDVersion: [':testlib_1.0.2', '8.1.0'] }, - { Name: 'BadNickName', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 't-s:testlib_1.0.2'] }, - { Name: 'MultipleColons1', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts::testlib_1.0.2'] }, - { Name: 'MultipleColons2', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', ':ts:testlib_1.0.2'] }, - { Name: 'NoLibraryName', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts:_1.0.2'] }, - { Name: 'BadVersion1', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts:testlib1.0.2'] }, - { Name: 'BadVersion2', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts:testlib_1.a.2'] }, - { Name: 'BadRemote1', BIDSVersion: '1.7.0', HEDVersion: ['8.1.0', 'ts:testlib_1.800.2'] }, - { Name: 'BadRemote2', BIDSVersion: '1.7.0', HEDVersion: '8.828.0' }, - { Name: 'NoHedVersion', BIDSVersion: '1.7.0' }, - { Name: 'BadLazyPartneredSchema1', BIDSVersion: '1.7.0', HEDVersion: ['testlib_2.0.0', 'testlib_2.1.0'] }, - { Name: 'BadLazyPartneredSchema2', BIDSVersion: '1.7.0', HEDVersion: ['testlib_2.1.0', 'testlib_3.0.0'] }, + { Name: 'NonExistentLibrary', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts:badlib_1.0.2'] }, + { Name: 'LeadingColon', BIDSVersion: '1.10.0', HEDVersion: [':testlib_1.0.2', '8.3.0'] }, + { Name: 'BadNickName', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 't-s:testlib_1.0.2'] }, + { Name: 'MultipleColons1', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts::testlib_1.0.2'] }, + { Name: 'MultipleColons2', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', ':ts:testlib_1.0.2'] }, + { Name: 'NoLibraryName', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts:_1.0.2'] }, + { Name: 'BadVersion1', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts:testlib1.0.2'] }, + { Name: 'BadVersion2', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts:testlib_1.a.2'] }, + { Name: 'BadRemote1', BIDSVersion: '1.10.0', HEDVersion: ['8.3.0', 'ts:testlib_1.800.2'] }, + { Name: 'BadRemote2', BIDSVersion: '1.10.0', HEDVersion: '8.828.0' }, + { Name: 'NoHedVersion', BIDSVersion: '1.10.0' }, + { Name: 'BadLazyPartneredSchema1', BIDSVersion: '1.10.0', HEDVersion: ['testlib_2.0.0', 'testlib_2.1.0'] }, + { Name: 'BadLazyPartneredSchema2', BIDSVersion: '1.10.0', HEDVersion: ['testlib_2.1.0', 'testlib_3.0.0'] }, { Name: 'LazyPartneredSchemasWithWrongStandard', - BIDSVersion: '1.7.0', + BIDSVersion: '1.10.0', HEDVersion: ['testlib_2.0.0', 'testlib_3.0.0', '8.1.0'], }, ], diff --git a/tests/bids.spec.js b/tests/bids.spec.js index f62b3de9..cff7aec0 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -16,16 +16,10 @@ describe('BIDS datasets', () => { * @type {SchemasSpec} */ let specs - /** - * @type {SchemasSpec} - */ - let specs2 beforeAll(() => { - const spec1 = new SchemaSpec('', '8.0.0') + const spec1 = new SchemaSpec('', '8.3.0') specs = new SchemasSpec().addSchemaSpec(spec1) - const spec2 = new SchemaSpec('', '7.2.0') - specs2 = new SchemasSpec().addSchemaSpec(spec2) }) /** @@ -121,11 +115,11 @@ describe('BIDS datasets', () => { placeholderDatasets[4].file, ), BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholder', { tag: 'Duration/# s', sidecarKey: 'multiple_value_tags' }), + generateIssue('invalidPlaceholder', { tag: 'Label/#', sidecarKey: 'multiple_value_tags' }), placeholderDatasets[5].file, ), BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholder', { tag: 'RGB-blue/#', sidecarKey: 'multiple_value_tags' }), + generateIssue('invalidPlaceholder', { tag: 'Description/#', sidecarKey: 'multiple_value_tags' }), placeholderDatasets[5].file, ), BidsHedIssue.fromHedIssue( @@ -158,7 +152,7 @@ describe('BIDS datasets', () => { const maglevError = generateIssue('invalidTag', { tag: 'Maglev' }) const maglevWarning = generateIssue('extension', { tag: 'Train/Maglev' }) const expectedIssues = { - all_good: [], + //all_good: [], all_bad: [ BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[0].file, { tsvLine: 2 }), BidsHedIssue.fromHedIssue(cloneDeep(maglevWarning), badDatasets[1].file, { tsvLine: 2 }), diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js index 979c14e9..0a18d024 100644 --- a/tests/stringParser.spec.js +++ b/tests/stringParser.spec.js @@ -143,30 +143,30 @@ describe('HED string parsing', () => { it('should include each group as its own single element', () => { const hedString = - '/Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,/Action/Move/Bend,/Upper-extremity/Elbow),/Position/X-position/70 px,/Position/Y-position/23 px' + 'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' const [result, issues] = splitHedString(hedString, nullSchema) assert.isEmpty(Object.values(issues).flat(), 'Parsing issues occurred') assert.deepStrictEqual(result, [ - new ParsedHedTag('/Action/Move/Flex', [0, 17]), + new ParsedHedTag('Action/Move/Flex', [0, 16]), new ParsedHedGroup( [ - new ParsedHedTag('Relation/Spatial-relation/Left-side-of', [19, 57]), - new ParsedHedTag('/Action/Move/Bend', [58, 75]), - new ParsedHedTag('/Upper-extremity/Elbow', [76, 98]), + new ParsedHedTag('Relation/Spatial-relation/Left-side-of', [18, 56]), + new ParsedHedTag('Action/Move/Bend', [57, 73]), + new ParsedHedTag('Upper-extremity/Elbow', [74, 95]), ], nullSchema, hedString, - [18, 99], + [17, 96], ), - new ParsedHedTag('/Position/X-position/70 px', [100, 126]), - new ParsedHedTag('/Position/Y-position/23 px', [127, 153]), + new ParsedHedTag('Position/X-position/70 px', [97, 122]), + new ParsedHedTag('Position/Y-position/23 px', [123, 148]), ]) }) it('should not include blanks', () => { const testStrings = { okay: 'Item/Object/Man-made-object/Vehicle/Car, Action/Perform/Operate', - internalBlank: 'Item /Object', + internalBlank: 'Item Object', } const expectedList = [ new ParsedHedTag('Item/Object/Man-made-object/Vehicle/Car', [0, 39]), @@ -174,19 +174,11 @@ describe('HED string parsing', () => { ] const expectedResults = { okay: expectedList, - internalBlank: [], + internalBlank: [new ParsedHedTag('Item Object', [0, 11])], } const expectedIssues = { okay: {}, - internalBlank: { - syntax: [ - generateIssue('invalidCharacter', { - character: 'RIGHT SQUARE BRACKET', - index: 56, - string: testStrings.closingSquare, - }), - ], - }, + internalBlank: {}, } validatorWithIssues(testStrings, expectedResults, expectedIssues, (string) => { return splitHedString(string, nullSchema) @@ -260,7 +252,7 @@ describe('HED string parsing', () => { it('must include properly formatted tags', () => { const hedString = - '/Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,/Action/Move/Bend,/Upper-extremity/Elbow),/Position/X-position/70 px,/Position/Y-position/23 px' + 'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,/Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' const formattedHedString = 'action/move/flex,(relation/spatial-relation/left-side-of,action/move/bend,upper-extremity/elbow),position/x-position/70 px,position/y-position/23 px' const [parsedString, issues] = parseHedString(hedString, nullSchema) diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 0634043a..20d80cf6 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -14,7 +14,6 @@ import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' import { HedStringTokenizer } from '../parser/tokenizer' import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' -import { HedStringTokenizerFirstTry } from '../parser/tokenizerFirstTry' describe('HED string parsing', () => { it('should include each group as its own single element', () => { @@ -29,7 +28,10 @@ describe('HED string parsing', () => { //const hedString = '/x' //const hedString = 'x/ /y' //const hedString = 'x/' - const hedString = 'x:z' + //const hedString = '((x))' + //const hedString = '((xy))' + //const hedString = '((xy), ( h:p, ((q, r ))), g), h,' + const hedString = '((xy), g), h' const tok = new HedStringTokenizer(hedString) const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') diff --git a/tests/tokenizerPassingData.js b/tests/tokenizerPassingData.js index bf22e1a5..7bb7cf21 100644 --- a/tests/tokenizerPassingData.js +++ b/tests/tokenizerPassingData.js @@ -148,8 +148,8 @@ export const passingTests = [ ], }, { - name: 'Complex nested groups', - description: 'Groups with complex nesting', + name: 'Nested groups', + description: 'Nested groups with complex nesting', warning: false, tests: [ { @@ -160,85 +160,80 @@ export const passingTests = [ groupSpec: new GroupSpec(0, 8, [new GroupSpec(0, 8, [new GroupSpec(1, 7, [new GroupSpec(2, 6, [])])])]), }, { - name: 'Nested-group-with-splice', + name: 'Single-nested-group-with-extra-tag', + string: '((xy)), g', + explanation: 'Nested group with trailing tag', + tagSpecs: [[[new TagSpec('xy', 2, 4, '')]], new TagSpec('g', 8, 9, '')], + groupSpec: new GroupSpec(0, 9, [new GroupSpec(0, 6, [new GroupSpec(1, 5, [])])]), + }, + { + name: 'Single-nested-group-with-splice', string: '((({xy})))', - explanation: 'A tag after a group.', + explanation: 'A single nested group with a column splice.', tagSpecs: [[[[new ColumnSpliceSpec('xy', 3, 6)]]]], groupSpec: new GroupSpec(0, 10, [new GroupSpec(0, 10, [new GroupSpec(1, 9, [new GroupSpec(2, 8, [])])])]), }, { - name: 'multiple-tags-in-group', - string: '(x,y)', - explanation: 'Multiple tags in one group.', - tagSpecs: [[new TagSpec('x', 1, 2, ''), new TagSpec('y', 3, 4, '')]], - groupSpec: new GroupSpec(0, 5, [new GroupSpec(0, 5, [])]), + name: 'Complex-nested-group-1', + string: '((xy), ( h:p, ((q, r ))))', + explanation: 'Single group', + tagSpecs: [ + [ + [new TagSpec('xy', 2, 4, '')], + [new TagSpec('p', 11, 12, 'h'), [[new TagSpec('q', 16, 17, ''), new TagSpec('r', 19, 20, '')]]], + ], + ], + groupSpec: new GroupSpec(0, 25, [ + new GroupSpec(0, 25, [ + new GroupSpec(1, 5, []), + new GroupSpec(7, 24, [new GroupSpec(14, 23, [new GroupSpec(15, 22, [])])]), + ]), + ]), }, { - name: 'multiple-unnested-groups', - string: 'q, (xy), (zw, uv), p', - explanation: 'Multiple unnested tag groups and tags.', + name: 'Complex-nested-group-2', + string: '((xy), g), h', + explanation: 'Nested groups with tags', + tagSpecs: [[[new TagSpec('xy', 2, 4, '')], new TagSpec('g', 7, 8, '')], new TagSpec('h', 11, 12, '')], + groupSpec: new GroupSpec(0, 12, [new GroupSpec(0, 9, [new GroupSpec(1, 5, [])])]), + }, + { + name: 'Complex-nested-group-3', + string: '((xy), ( h:p, ((q, r ))), g)', + explanation: 'Single group', tagSpecs: [ - new TagSpec('q', 0, 1, ''), - [new TagSpec('xy', 4, 6, '')], - [new TagSpec('zw', 10, 12, ''), new TagSpec('uv', 14, 16, '')], - new TagSpec('p', 19, 20, ''), + [ + [new TagSpec('xy', 2, 4, '')], + [new TagSpec('p', 11, 12, 'h'), [[new TagSpec('q', 16, 17, ''), new TagSpec('r', 19, 20, '')]]], + new TagSpec('g', 26, 27, ''), + ], ], - groupSpec: new GroupSpec(0, 20, [new GroupSpec(3, 7, []), new GroupSpec(9, 17, [])]), + groupSpec: new GroupSpec(0, 28, [ + new GroupSpec(0, 28, [ + new GroupSpec(1, 5, []), + new GroupSpec(7, 24, [new GroupSpec(14, 23, [new GroupSpec(15, 22, [])])]), + ]), + ]), }, { - name: 'tag-after-group', - string: 'x/y,(r,v)', - explanation: 'A tag after a group.', - tagSpecs: [new TagSpec('x/y', 0, 3, ''), [new TagSpec('r', 5, 6, ''), new TagSpec('v', 7, 8, '')]], - groupSpec: new GroupSpec(0, 9, [new GroupSpec(4, 9, [])]), + name: 'Complex-nested-group-4', + string: '((xy), ( h:p, ((q, r ))), g), h', + explanation: 'Single group', + tagSpecs: [ + [ + [new TagSpec('xy', 2, 4, '')], + [new TagSpec('p', 11, 12, 'h'), [[new TagSpec('q', 16, 17, ''), new TagSpec('r', 19, 20, '')]]], + new TagSpec('g', 26, 27, ''), + ], + new TagSpec('h', 30, 31, ''), + ], + groupSpec: new GroupSpec(0, 31, [ + new GroupSpec(0, 28, [ + new GroupSpec(1, 5, []), + new GroupSpec(7, 24, [new GroupSpec(14, 23, [new GroupSpec(15, 22, [])])]), + ]), + ]), }, ], }, - // { - // name: 'Complex nested groups', - // description: 'Groups with complex nesting', - // warning: false, - // tests: [ - // { - // name: 'Single-nested-group', - // string: '(xy)', - // explanation: 'Single group', - // tagSpecs: [[new TagSpec('xy', 1, 3, '')]], - // groupSpec: new GroupSpec(0, 4, [new GroupSpec(0, 4, [])]), - // }, - // { - // name: 'tag-after-group', - // string: '(x), p', - // explanation: 'A tag after a group.', - // tagSpecs: [[new TagSpec('x', 1, 2, '')], new TagSpec('p', 5, 6, '')], - // groupSpec: new GroupSpec(0, 6, [new GroupSpec(0, 3, [])]), - // }, - // { - // name: 'multiple-tags-in-group', - // string: '(x,y)', - // explanation: 'Multiple tags in one group.', - // tagSpecs: [[new TagSpec('x', 1, 2, ''), new TagSpec('y', 3, 4, '')]], - // groupSpec: new GroupSpec(0, 5, [new GroupSpec(0, 5, [])]), - // }, - // { - // name: 'multiple-unnested-groups', - // string: 'q, (xy), (zw, uv), p', - // explanation: 'Multiple unnested tag groups and tags.', - // tagSpecs: [ - // new TagSpec('q', 0, 1, ''), - // [new TagSpec('xy', 4, 6, '')], - // [new TagSpec('zw', 10, 12, ''), new TagSpec('uv', 14, 16, '')], - // new TagSpec('p', 19, 20, ''), - // ], - // groupSpec: new GroupSpec(0, 20, [new GroupSpec(3, 7, []), new GroupSpec(9, 17, [])]), - // }, - // { - // name: 'tag-after-group', - // string: 'x/y,(r,v)', - // explanation: 'A tag after a group.', - // tagSpecs: [new TagSpec('x/y', 0, 3, ''), [new TagSpec('r', 5, 6, ''), new TagSpec('v', 7, 8, '')]], - // groupSpec: new GroupSpec(0, 9, [new GroupSpec(4, 9, [])]), - // }, - // ], - // } ] From c397a129434ef7a8fbf0d569a835d2b25cdb76c0 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:30:52 -0500 Subject: [PATCH 13/21] Worked on the other tests --- tests/bids.spec.data.js | 4 +- tests/bids.spec.js | 11 +- tests/event2G.spec.js | 2 +- tests/temp.spec.js | 197 +++++++++++++++++----------- tests/tokenizerPassingTests.spec.js | 58 ++++++++ 5 files changed, 184 insertions(+), 88 deletions(-) diff --git a/tests/bids.spec.data.js b/tests/bids.spec.data.js index 6209599d..e7603ec0 100644 --- a/tests/bids.spec.data.js +++ b/tests/bids.spec.data.js @@ -340,7 +340,7 @@ const sidecars = [ event_code: { HED: { face: '(Red, Blue), (Green, (Yellow)), {HED}', - ball: '{response_time}, (Def/Acc/3.5 m-per-s^2)', + ball: '(Def/Acc/3.5 m-per-s^2)', dog: 'Orange, {event_type}', }, }, @@ -352,7 +352,7 @@ const sidecars = [ }, event_type: { HED: { - banana: 'Blue, {response_time}', + banana: 'Blue, {event_code}', apple: 'Green', }, }, diff --git a/tests/bids.spec.js b/tests/bids.spec.js index cff7aec0..281e6ff4 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -152,7 +152,7 @@ describe('BIDS datasets', () => { const maglevError = generateIssue('invalidTag', { tag: 'Maglev' }) const maglevWarning = generateIssue('extension', { tag: 'Train/Maglev' }) const expectedIssues = { - //all_good: [], + all_good: [], all_bad: [ BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[0].file, { tsvLine: 2 }), BidsHedIssue.fromHedIssue(cloneDeep(maglevWarning), badDatasets[1].file, { tsvLine: 2 }), @@ -577,18 +577,11 @@ describe('BIDS datasets', () => { ), BidsHedIssue.fromHedIssue( generateIssue('recursiveCurlyBracesWithKey', { - column: 'response_time', + column: 'event_code', referrer: 'event_type', }), standaloneSidecars[7].file, ), - BidsHedIssue.fromHedIssue( - generateIssue('recursiveCurlyBracesWithKey', { - column: 'response_time', - referrer: 'event_code', - }), - standaloneSidecars[7].file, - ), BidsHedIssue.fromHedIssue( generateIssue('recursiveCurlyBracesWithKey', { column: 'response_time', diff --git a/tests/event2G.spec.js b/tests/event2G.spec.js index 0e714b74..fb462dee 100644 --- a/tests/event2G.spec.js +++ b/tests/event2G.spec.js @@ -42,7 +42,7 @@ describe('HED string and event validation', () => { } } - describe('HED-2G validation', () => { + describe.skip('HED-2G validation', () => { describe('Later HED-2G schemas', () => { const hedSchemaFile = 'tests/data/HED7.1.1.xml' let hedSchemas diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 20d80cf6..34f9296b 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -14,84 +14,129 @@ import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' import { HedStringTokenizer } from '../parser/tokenizer' import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' +import { BidsEventFile } from '../bids' +import { BidsSidecar } from '../bids/types/json' +import path from 'path' describe('HED string parsing', () => { - it('should include each group as its own single element', () => { - //const hedString = - //'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' - //const hedString = 'x/y w/z' - //const hedString = '(r,z)' - //const hedString = 'r,' - //const hedString = 'r,y' - //const hedString = 'r' - //const hedString = '(r),p' - //const hedString = '/x' - //const hedString = 'x/ /y' - //const hedString = 'x/' - //const hedString = '((x))' - //const hedString = '((xy))' - //const hedString = '((xy), ( h:p, ((q, r ))), g), h,' - const hedString = '((xy), g), h' - const tok = new HedStringTokenizer(hedString) - const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() - assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') - //const hedString = 'Action/Move/My-flex,(Relation/Spatial-relation/Left-side-of,Action/Move/My-bend,Upper-extremity/My-elbow),Position/X-position/70 m,Position/Y-position/23 m' - // const [result, issues] = splitHedString(hedString, nullSchema) - // assert.isEmpty(Object.values(issues).flat(), 'Parsing issues occurred') - }) + const schemaMap = new Map([ + ['8.2.0', undefined], + ['8.3.0', undefined], + ]) - // const tokenizeTester = function(testStrings, testFunction) { - // for (const [testStringKey, testString] of Object.entries(testStrings)) { - // const testResult = testFunction(testStringKey, testString) - // } - // } - // - // const tokenizeTesterBad = function(testStrings, issueCodes, testFunction) { - // for (const [testStringKey, testString] of Object.entries(testStrings)) { - // const testResult = testFunction(testStringKey, issueCode.testStringKey, testString) - // } - // } - // - // it('should tokenize valid strings', () => { - // const testStrings = { - // oneBrace: 'x,{y}', - // braceParentheses1: '(({yz}))', - // leadingBlank: ' {x},z,', - // } - // tokenizeTester(testStrings, (key, string) => { - // const tok = new HedStringTokenizer(string) - // const [tagSpecs, groupBounds, issues] = tok.tokenize() - // assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) - // const tok1 = new HedStringTokenizerOld(string) - // const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() - // assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) - // }) - // }) - // - // it('should tokenize invalid strings', () => { - // const testStrings = { - // // oneBrace: 'x,{y}', - // // braceParentheses1: '(({yz}))', - // // leadingBlank: ' {x},z' - // //onlyComma: ' ,', - // doubleTrailingComma: 'x,,', - // } - // - // const expectedIssues = { - // onlyComma: 'emptyTagFound', - // doubleTrailingComma: 'emptyTagFound', - // } - // - // for (const [testStringKey, testString] of Object.entries(testStrings)) { - // const tok = new HedStringTokenizer(testString) - // const [tagSpecs, groupBounds, issues] = tok.tokenize() - // const issuesFlat = Object.values(issues).flat() - // const expectedIssue = expectedIssues[testStringKey] || '' - // assert.equal(issuesFlat['code'], expectedIssue, `Expected ${expectedIssue} for "${testString}"`) - // } - // // assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) - // // const tok1 = new HedStringTokenizerOld(string) - // // const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() - // // assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) + beforeAll(async () => { + const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) + const specs2 = new SchemasSpec().addSchemaSpec(spec2) + const schemas2 = await buildSchemas(specs2) + const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) + const specs3 = new SchemasSpec().addSchemaSpec(spec3) + const schemas3 = await buildSchemas(specs3) + schemaMap.set('8.2.0', schemas2) + schemaMap.set('8.3.0', schemas3) + }) + // it('should include each group as its own single element', () => { + //const hedString = + //'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' + //const hedString = 'x/y w/z' + //const hedString = '(r,z)' + //const hedString = 'r,' + //const hedString = 'r,y' + //const hedString = 'r' + //const hedString = '(r),p' + //const hedString = '/x' + //const hedString = 'x/ /y' + //const hedString = 'x/' + //const hedString = '((x))' + //const hedString = '((xy))' + //const hedString = '((xy), ( h:p, ((q, r ))), g), h,' + // const hedString = '((xy), g), h' + // const tok = new HedStringTokenizer(hedString) + // const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() + // assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') + //const hedString = 'Action/Move/My-flex,(Relation/Spatial-relation/Left-side-of,Action/Move/My-bend,Upper-extremity/My-elbow),Position/X-position/70 m,Position/Y-position/23 m' + // const [result, issues] = splitHedString(hedString, nullSchema) + // assert.isEmpty(Object.values(issues).flat(), 'Parsing issues occurred') // }) + + it('should validate a sidecar', () => { + const nameE = '/sub03/su03_task-test_run-1_events.tsv' + const nameJ = '/sub03/su03_task-test_run-1_events.json' + const eventString = 'onset\tduration\n' + '7\t4.0' + const schema = schemaMap.get('8.3.0') + const sidecarObject = { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + ball: '{response_time}, Black', + }, + }, + response_time: { + Description: 'Has description with HED', + HED: 'Label/#, {event_code}', + }, + } + const bidsSidecar = new BidsSidecar('thisOne', sidecarObject, { relativePath: nameJ, path: nameJ }) + assert(bidsSidecar instanceof BidsSidecar) + + const sidecarIssues = bidsSidecar.validate(schema) + assert.isEmpty(Object.values(sidecarIssues).flat(), 'Parsing issues occurred') + //const bidsEvents = new BidsEventFile("thatOne", [], bidsSidecar, eventString, {relativePath: nameE, path: nameE}) + //assert.InstanceOf(bidsEvents, BidsEventFile) + }) }) + +// const tokenizeTester = function(testStrings, testFunction) { +// for (const [testStringKey, testString] of Object.entries(testStrings)) { +// const testResult = testFunction(testStringKey, testString) +// } +// } +// +// const tokenizeTesterBad = function(testStrings, issueCodes, testFunction) { +// for (const [testStringKey, testString] of Object.entries(testStrings)) { +// const testResult = testFunction(testStringKey, issueCode.testStringKey, testString) +// } +// } +// +// it('should tokenize valid strings', () => { +// const testStrings = { +// oneBrace: 'x,{y}', +// braceParentheses1: '(({yz}))', +// leadingBlank: ' {x},z,', +// } +// tokenizeTester(testStrings, (key, string) => { +// const tok = new HedStringTokenizer(string) +// const [tagSpecs, groupBounds, issues] = tok.tokenize() +// assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) +// const tok1 = new HedStringTokenizerOld(string) +// const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() +// assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) +// }) +// }) +// +// it('should tokenize invalid strings', () => { +// const testStrings = { +// // oneBrace: 'x,{y}', +// // braceParentheses1: '(({yz}))', +// // leadingBlank: ' {x},z' +// //onlyComma: ' ,', +// doubleTrailingComma: 'x,,', +// } +// +// const expectedIssues = { +// onlyComma: 'emptyTagFound', +// doubleTrailingComma: 'emptyTagFound', +// } +// +// for (const [testStringKey, testString] of Object.entries(testStrings)) { +// const tok = new HedStringTokenizer(testString) +// const [tagSpecs, groupBounds, issues] = tok.tokenize() +// const issuesFlat = Object.values(issues).flat() +// const expectedIssue = expectedIssues[testStringKey] || '' +// assert.equal(issuesFlat['code'], expectedIssue, `Expected ${expectedIssue} for "${testString}"`) +// } +// // assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) +// // const tok1 = new HedStringTokenizerOld(string) +// // const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() +// // assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) +// }) +// }) diff --git a/tests/tokenizerPassingTests.spec.js b/tests/tokenizerPassingTests.spec.js index 4c9e95db..5e62a908 100644 --- a/tests/tokenizerPassingTests.spec.js +++ b/tests/tokenizerPassingTests.spec.js @@ -89,4 +89,62 @@ describe('HED tokenizer validation - validData', () => { }) } }) + + describe.each(passingTests)('$name : $description', ({ tests }) => { + let itemLog + + const assertErrors = function (header, issues, iLog) { + iLog.push(`${header}\n`) + totalTests += 1 + + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + iLog.push(`---expected no errors but got errors [${errorString}]\n`) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } + + const stringTokenizer = function (eName, tokenizer, tSpecs, gSpec, explanation, iLog) { + const status = 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Tokenizer' : 'Original tokenizer' + const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupSpec, tokenizingIssues] = tokenizer.tokenize() + // Test for no errors + const issues = Object.values(tokenizingIssues).flat() + assertErrors(header, issues, iLog) + assert.sameDeepMembers(tagSpecs, tSpecs, explanation) + assert.deepEqual(groupSpec, gSpec, explanation) + //assert.sameDeepMembers(groupSpec, gSpec, explanation) + } + + beforeAll(async () => { + itemLog = [] + }) + + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) + + if (tests && tests.length > 0) { + test.each(tests)('Tokenizer: %s ', (ex) => { + stringTokenizer(ex.name, new HedStringTokenizer(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog) + }) + + test.each(tests)('Original tokenizer: %s ', (ex) => { + stringTokenizer( + ex.name, + new HedStringTokenizerOriginal(ex.string), + ex.tagSpecs, + ex.groupSpec, + ex.explanation, + itemLog, + ) + }) + } + }) }) From 6ef9400e82b6a1d5a82b2c13707c6ed34b607b5c Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:07:17 -0500 Subject: [PATCH 14/21] Separated the tests on the original tokenizer temporarily --- tests/tockenizerErrorTests.spec.js | 79 +++++++++- tests/tokenizerPassingTests.spec.js | 235 ++++++++++++++-------------- 2 files changed, 192 insertions(+), 122 deletions(-) diff --git a/tests/tockenizerErrorTests.spec.js b/tests/tockenizerErrorTests.spec.js index 6df52c0e..b84ac6c2 100644 --- a/tests/tockenizerErrorTests.spec.js +++ b/tests/tockenizerErrorTests.spec.js @@ -1,14 +1,9 @@ import chai from 'chai' const assert = chai.assert import { beforeAll, describe, afterAll } from '@jest/globals' - -import * as hed from '../validator/event' -import { BidsHedIssue } from '../bids/types/issues' import path from 'path' import { HedStringTokenizer } from '../parser/tokenizer' import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' -//import { HedStringTokenizerNew } from '../parser/tokenizerNew' -import { generateIssue, IssueError } from '../common/issues/issues' import { errorTests } from './tokenizerErrorData' const displayLog = process.env.DISPLAY_LOG === 'true' const fs = require('fs') @@ -18,7 +13,7 @@ const skippedErrors = {} //const testInfo = loadTestData() console.log(errorTests) -describe('HED tokenizer validation using JSON tests', () => { +describe('Tokenizer validation using JSON tests', () => { const badLog = [] let totalTests = 0 let wrongErrors = 0 @@ -90,7 +85,79 @@ describe('HED tokenizer validation using JSON tests', () => { test.each(tests)('Tokenizer: %s ', (ex) => { stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizer(ex.string), true, itemLog) }) + } + }) +}) + +describe('Original tokenizer validation using JSON tests', () => { + const badLog = [] + let totalTests = 0 + let wrongErrors = 0 + let unexpectedErrors = 0 + + beforeAll(async () => {}) + + afterAll(() => { + const outBad = path.join(__dirname, 'runLogOriginal.txt') + const summary = `Total tests:${totalTests} Wrong error codes:${wrongErrors} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + describe.each(errorTests)('$name : $description', ({ tests }) => { + let itemLog + + const assertErrors = function (eHedCode, eCode, expectError, iLog, header, issues) { + const log = [header] + totalTests += 1 + + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + log.push(`---has errors [${errorString}]`) + } + const expectedError = eCode + const wrongError = `---expected ${eHedCode} but got errors [${errorString}]` + const hasErrors = `---expected no errors but got errors [${errorString}]` + if (expectError && !errors.includes(eHedCode)) { + log.push(wrongError) + iLog.push(log.join('\n')) + wrongErrors += 1 + assert.strictEqual( + errors.includes(eHedCode), + true, + `${header}---expected ${eHedCode} and got errors [${errorString}]`, + ) + } else if (!expectError && errors.length > 0) { + log.push(hasErrors) + iLog.push(log.join('\n')) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } + + const stringTokenizer = function (eHedCode, eCode, eName, tokenizer, expectError, iLog) { + const status = expectError ? 'Expect fail' : 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'New tokenizer' : 'Original tokenizer' + const header = `\n[${eHedCode} ${eName} ${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupBounds, tokenizingIssues] = tokenizer.tokenize() + const issues = Object.values(tokenizingIssues).flat() + assertErrors(eHedCode, eCode, expectError, iLog, header, issues) + } + + beforeAll(async () => { + itemLog = [] + }) + + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) + + if (tests && tests.length > 0) { test.each(tests)('Original tokenizer: %s ', (ex) => { stringTokenizer(ex.hedCode, ex.code, ex.name, new HedStringTokenizerOriginal(ex.string), true, itemLog) }) diff --git a/tests/tokenizerPassingTests.spec.js b/tests/tokenizerPassingTests.spec.js index 5e62a908..c91ac41b 100644 --- a/tests/tokenizerPassingTests.spec.js +++ b/tests/tokenizerPassingTests.spec.js @@ -1,15 +1,9 @@ import chai from 'chai' const assert = chai.assert import { beforeAll, describe, afterAll } from '@jest/globals' - -import * as hed from '../validator/event' -import { BidsHedIssue } from '../bids/types/issues' import path from 'path' -//import { HedStringTokenizer } from '../parser/tokenizer' import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' -import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizer } from '../parser/tokenizer' -//import { TagSpec, GroupSpec, ColumnSpliceSpec, HedStringTokenizer } from '../parser/tokenizerNew' -import { generateIssue, IssueError } from '../common/issues/issues' +import { HedStringTokenizer } from '../parser/tokenizer' import { passingTests } from './tokenizerPassingData' const fs = require('fs') @@ -17,134 +11,143 @@ const displayLog = process.env.DISPLAY_LOG === 'true' const skippedErrors = {} -describe('HED tokenizer validation - validData', () => { - const badLog = [] - let totalTests = 0 - let unexpectedErrors = 0 - - beforeAll(async () => {}) - - afterAll(() => { - const outBad = path.join(__dirname, 'runLog.txt') - const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` - if (displayLog) { - fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') - } - }) +describe('HED tokenizer validation', () => { + describe('Tokenizer validation - validData', () => { + const badLog = [] + let totalTests = 0 + let unexpectedErrors = 0 - describe.each(passingTests)('$name : $description', ({ tests }) => { - let itemLog + beforeAll(async () => {}) - const assertErrors = function (header, issues, iLog) { - iLog.push(`${header}\n`) - totalTests += 1 - - let errors = [] - if (issues.length > 0) { - errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues - } - const errorString = errors.join(',') - if (errors.length > 0) { - iLog.push(`---expected no errors but got errors [${errorString}]\n`) - unexpectedErrors += 1 - assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + afterAll(() => { + const outBad = path.join(__dirname, 'runLog.txt') + const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') } - } - - const stringTokenizer = function (eName, tokenizer, tSpecs, gSpec, explanation, iLog) { - const status = 'Expect pass' - const tokType = tokenizer instanceof HedStringTokenizer ? 'Tokenizer' : 'Original tokenizer' - const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` - const [tagSpecs, groupSpec, tokenizingIssues] = tokenizer.tokenize() - // Test for no errors - const issues = Object.values(tokenizingIssues).flat() - assertErrors(header, issues, iLog) - assert.sameDeepMembers(tagSpecs, tSpecs, explanation) - assert.deepEqual(groupSpec, gSpec, explanation) - //assert.sameDeepMembers(groupSpec, gSpec, explanation) - } - - beforeAll(async () => { - itemLog = [] }) - afterAll(() => { - badLog.push(itemLog.join('\n')) - }) + describe.each(passingTests)('$name : $description', ({ tests }) => { + let itemLog + const assertErrors = function (header, issues, iLog) { + iLog.push(`${header}\n`) + totalTests += 1 + + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + iLog.push(`---expected no errors but got errors [${errorString}]\n`) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } - if (tests && tests.length > 0) { - test.each(tests)('Tokenizer: %s ', (ex) => { - stringTokenizer(ex.name, new HedStringTokenizer(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog) + const stringTokenizer = function (eName, tokenizer, tSpecs, gSpec, explanation, iLog) { + const status = 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Tokenizer' : 'Original tokenizer' + const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupSpec, tokenizingIssues] = tokenizer.tokenize() + // Test for no errors + const issues = Object.values(tokenizingIssues).flat() + assertErrors(header, issues, iLog) + assert.sameDeepMembers(tagSpecs, tSpecs, explanation) + assert.deepEqual(groupSpec, gSpec, explanation) + //assert.sameDeepMembers(groupSpec, gSpec, explanation) + } + + beforeAll(async () => { + itemLog = [] }) - test.each(tests)('Original tokenizer: %s ', (ex) => { - stringTokenizer( - ex.name, - new HedStringTokenizerOriginal(ex.string), - ex.tagSpecs, - ex.groupSpec, - ex.explanation, - itemLog, - ) + afterAll(() => { + badLog.push(itemLog.join('\n')) }) - } + + if (tests && tests.length > 0) { + test.each(tests)('Tokenizer: %s ', (ex) => { + stringTokenizer( + ex.name, + new HedStringTokenizer(ex.string), + ex.tagSpecs, + ex.groupSpec, + ex.explanation, + itemLog, + ) + }) + } + }) }) - describe.each(passingTests)('$name : $description', ({ tests }) => { - let itemLog + describe('Original tokenizer validation - validData', () => { + const badLog = [] + let totalTests = 0 + let unexpectedErrors = 0 - const assertErrors = function (header, issues, iLog) { - iLog.push(`${header}\n`) - totalTests += 1 + beforeAll(async () => {}) - let errors = [] - if (issues.length > 0) { - errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues - } - const errorString = errors.join(',') - if (errors.length > 0) { - iLog.push(`---expected no errors but got errors [${errorString}]\n`) - unexpectedErrors += 1 - assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + afterAll(() => { + const outBad = path.join(__dirname, 'runLogOriginal.txt') + const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') } - } - - const stringTokenizer = function (eName, tokenizer, tSpecs, gSpec, explanation, iLog) { - const status = 'Expect pass' - const tokType = tokenizer instanceof HedStringTokenizer ? 'Tokenizer' : 'Original tokenizer' - const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` - const [tagSpecs, groupSpec, tokenizingIssues] = tokenizer.tokenize() - // Test for no errors - const issues = Object.values(tokenizingIssues).flat() - assertErrors(header, issues, iLog) - assert.sameDeepMembers(tagSpecs, tSpecs, explanation) - assert.deepEqual(groupSpec, gSpec, explanation) - //assert.sameDeepMembers(groupSpec, gSpec, explanation) - } - - beforeAll(async () => { - itemLog = [] }) - afterAll(() => { - badLog.push(itemLog.join('\n')) - }) + describe.each(passingTests)('$name : $description', ({ tests }) => { + let itemLog + + const assertErrors = function (header, issues, iLog) { + iLog.push(`${header}\n`) + totalTests += 1 + + let errors = [] + if (issues.length > 0) { + errors = issues.map((dict) => dict.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + iLog.push(`---expected no errors but got errors [${errorString}]\n`) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } + + const stringTokenizer = function (eName, tokenizer, tSpecs, gSpec, explanation, iLog) { + const status = 'Expect pass' + const tokType = tokenizer instanceof HedStringTokenizer ? 'Tokenizer' : 'Original tokenizer' + const header = `\n[${tokType}](${status})\tSTRING: "${tokenizer.hedString}"` + const [tagSpecs, groupSpec, tokenizingIssues] = tokenizer.tokenize() + // Test for no errors + const issues = Object.values(tokenizingIssues).flat() + assertErrors(header, issues, iLog) + assert.sameDeepMembers(tagSpecs, tSpecs, explanation) + assert.deepEqual(groupSpec, gSpec, explanation) + //assert.sameDeepMembers(groupSpec, gSpec, explanation) + } - if (tests && tests.length > 0) { - test.each(tests)('Tokenizer: %s ', (ex) => { - stringTokenizer(ex.name, new HedStringTokenizer(ex.string), ex.tagSpecs, ex.groupSpec, ex.explanation, itemLog) + beforeAll(async () => { + itemLog = [] }) - test.each(tests)('Original tokenizer: %s ', (ex) => { - stringTokenizer( - ex.name, - new HedStringTokenizerOriginal(ex.string), - ex.tagSpecs, - ex.groupSpec, - ex.explanation, - itemLog, - ) + afterAll(() => { + badLog.push(itemLog.join('\n')) }) - } + + if (tests && tests.length > 0) { + test.each(tests)('Original tokenizer: %s ', (ex) => { + stringTokenizer( + ex.name, + new HedStringTokenizerOriginal(ex.string), + ex.tagSpecs, + ex.groupSpec, + ex.explanation, + itemLog, + ) + }) + } + }) }) }) From 62f5a2b80752b97e2b696554ba57e0a8971c758e Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Tue, 22 Oct 2024 06:20:48 -0500 Subject: [PATCH 15/21] Corrected the error message on curly brace recursion --- tests/bids.spec.js | 1 + tests/temp.spec.js | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/bids.spec.js b/tests/bids.spec.js index 281e6ff4..7ae2ccee 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -612,6 +612,7 @@ describe('BIDS datasets', () => { ), BidsHedIssue.fromHedIssue( generateIssue('emptyCurlyBrace', { + index: 1, string: standaloneSidecars[9].hedData.get('event_code4').ball, }), standaloneSidecars[9].file, diff --git a/tests/temp.spec.js b/tests/temp.spec.js index 34f9296b..0a8beafb 100644 --- a/tests/temp.spec.js +++ b/tests/temp.spec.js @@ -16,7 +16,9 @@ import { HedStringTokenizer } from '../parser/tokenizer' import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' import { BidsEventFile } from '../bids' import { BidsSidecar } from '../bids/types/json' +import { BidsDataset } from '../bids/types/dataset' import path from 'path' +import validate from '../bids/validate' describe('HED string parsing', () => { const schemaMap = new Map([ @@ -61,27 +63,27 @@ describe('HED string parsing', () => { it('should validate a sidecar', () => { const nameE = '/sub03/su03_task-test_run-1_events.tsv' const nameJ = '/sub03/su03_task-test_run-1_events.json' - const eventString = 'onset\tduration\n' + '7\t4.0' + const eventString = 'onset\tduration\n' + '7\tsomething' const schema = schemaMap.get('8.3.0') const sidecarObject = { - event_code: { - HED: { - face: '(Red, Blue), (Green, (Yellow))', - ball: '{response_time}, Black', - }, - }, - response_time: { - Description: 'Has description with HED', - HED: 'Label/#, {event_code}', + valid_definition: { + HED: { definition: '(Definition/ValidDefinition, (Square))' }, }, } + const bidsSidecar = new BidsSidecar('thisOne', sidecarObject, { relativePath: nameJ, path: nameJ }) assert(bidsSidecar instanceof BidsSidecar) - + // [sidecars[2][0], 'onset\tduration\n' + '7\tsomething'] const sidecarIssues = bidsSidecar.validate(schema) assert.isEmpty(Object.values(sidecarIssues).flat(), 'Parsing issues occurred') - //const bidsEvents = new BidsEventFile("thatOne", [], bidsSidecar, eventString, {relativePath: nameE, path: nameE}) - //assert.InstanceOf(bidsEvents, BidsEventFile) + const bidsEvents = new BidsEventFile('thatOne', [], sidecarObject, eventString, { + relativePath: nameE, + path: nameE, + }) + assert(bidsEvents instanceof BidsEventFile) + const bidsDataset = new BidsDataset([bidsEvents], [bidsSidecar]) + assert(bidsDataset instanceof BidsDataset) + //const issues = validateBidsDataSet() }) }) From cf03a72318b43b1097c5e0edaddc029660e6044f Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Tue, 22 Oct 2024 06:24:08 -0500 Subject: [PATCH 16/21] Removed temporary test --- .gitignore | 1 + tests/temp.spec.js | 144 --------------------------------------------- 2 files changed, 1 insertion(+), 144 deletions(-) delete mode 100644 tests/temp.spec.js diff --git a/.gitignore b/.gitignore index 55d6a413..0a1f28ec 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ dist/ spec_tests/*.txt spec_tests/temp*.json spec_tests/temp.spec.js +tests/temp.spec.js # Unit test / coverage reports htmlcov/ diff --git a/tests/temp.spec.js b/tests/temp.spec.js deleted file mode 100644 index 0a8beafb..00000000 --- a/tests/temp.spec.js +++ /dev/null @@ -1,144 +0,0 @@ -import chai from 'chai' - -const assert = chai.assert -import { beforeAll, describe, it } from '@jest/globals' - -import { generateIssue } from '../common/issues/issues' -import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types' -import { recursiveMap } from '../utils/array' -import { parseHedString } from '../parser/parser' -import { ParsedHedTag } from '../parser/parsedHedTag' -import HedStringSplitter from '../parser/splitter' -import { buildSchemas } from '../validator/schema/init' -import ColumnSplicer from '../parser/columnSplicer' -import ParsedHedGroup from '../parser/parsedHedGroup' -import { HedStringTokenizer } from '../parser/tokenizer' -import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' -import { BidsEventFile } from '../bids' -import { BidsSidecar } from '../bids/types/json' -import { BidsDataset } from '../bids/types/dataset' -import path from 'path' -import validate from '../bids/validate' - -describe('HED string parsing', () => { - const schemaMap = new Map([ - ['8.2.0', undefined], - ['8.3.0', undefined], - ]) - - beforeAll(async () => { - const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) - const specs2 = new SchemasSpec().addSchemaSpec(spec2) - const schemas2 = await buildSchemas(specs2) - const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) - const specs3 = new SchemasSpec().addSchemaSpec(spec3) - const schemas3 = await buildSchemas(specs3) - schemaMap.set('8.2.0', schemas2) - schemaMap.set('8.3.0', schemas3) - }) - // it('should include each group as its own single element', () => { - //const hedString = - //'Action/Move/Flex,(Relation/Spatial-relation/Left-side-of,Action/Move/Bend,Upper-extremity/Elbow),Position/X-position/70 px,Position/Y-position/23 px' - //const hedString = 'x/y w/z' - //const hedString = '(r,z)' - //const hedString = 'r,' - //const hedString = 'r,y' - //const hedString = 'r' - //const hedString = '(r),p' - //const hedString = '/x' - //const hedString = 'x/ /y' - //const hedString = 'x/' - //const hedString = '((x))' - //const hedString = '((xy))' - //const hedString = '((xy), ( h:p, ((q, r ))), g), h,' - // const hedString = '((xy), g), h' - // const tok = new HedStringTokenizer(hedString) - // const [tagSpecs, groupBounds, tokenizingIssues] = tok.tokenize() - // assert.isEmpty(Object.values(tokenizingIssues).flat(), 'Parsing issues occurred') - //const hedString = 'Action/Move/My-flex,(Relation/Spatial-relation/Left-side-of,Action/Move/My-bend,Upper-extremity/My-elbow),Position/X-position/70 m,Position/Y-position/23 m' - // const [result, issues] = splitHedString(hedString, nullSchema) - // assert.isEmpty(Object.values(issues).flat(), 'Parsing issues occurred') - // }) - - it('should validate a sidecar', () => { - const nameE = '/sub03/su03_task-test_run-1_events.tsv' - const nameJ = '/sub03/su03_task-test_run-1_events.json' - const eventString = 'onset\tduration\n' + '7\tsomething' - const schema = schemaMap.get('8.3.0') - const sidecarObject = { - valid_definition: { - HED: { definition: '(Definition/ValidDefinition, (Square))' }, - }, - } - - const bidsSidecar = new BidsSidecar('thisOne', sidecarObject, { relativePath: nameJ, path: nameJ }) - assert(bidsSidecar instanceof BidsSidecar) - // [sidecars[2][0], 'onset\tduration\n' + '7\tsomething'] - const sidecarIssues = bidsSidecar.validate(schema) - assert.isEmpty(Object.values(sidecarIssues).flat(), 'Parsing issues occurred') - const bidsEvents = new BidsEventFile('thatOne', [], sidecarObject, eventString, { - relativePath: nameE, - path: nameE, - }) - assert(bidsEvents instanceof BidsEventFile) - const bidsDataset = new BidsDataset([bidsEvents], [bidsSidecar]) - assert(bidsDataset instanceof BidsDataset) - //const issues = validateBidsDataSet() - }) -}) - -// const tokenizeTester = function(testStrings, testFunction) { -// for (const [testStringKey, testString] of Object.entries(testStrings)) { -// const testResult = testFunction(testStringKey, testString) -// } -// } -// -// const tokenizeTesterBad = function(testStrings, issueCodes, testFunction) { -// for (const [testStringKey, testString] of Object.entries(testStrings)) { -// const testResult = testFunction(testStringKey, issueCode.testStringKey, testString) -// } -// } -// -// it('should tokenize valid strings', () => { -// const testStrings = { -// oneBrace: 'x,{y}', -// braceParentheses1: '(({yz}))', -// leadingBlank: ' {x},z,', -// } -// tokenizeTester(testStrings, (key, string) => { -// const tok = new HedStringTokenizer(string) -// const [tagSpecs, groupBounds, issues] = tok.tokenize() -// assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) -// const tok1 = new HedStringTokenizerOld(string) -// const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() -// assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) -// }) -// }) -// -// it('should tokenize invalid strings', () => { -// const testStrings = { -// // oneBrace: 'x,{y}', -// // braceParentheses1: '(({yz}))', -// // leadingBlank: ' {x},z' -// //onlyComma: ' ,', -// doubleTrailingComma: 'x,,', -// } -// -// const expectedIssues = { -// onlyComma: 'emptyTagFound', -// doubleTrailingComma: 'emptyTagFound', -// } -// -// for (const [testStringKey, testString] of Object.entries(testStrings)) { -// const tok = new HedStringTokenizer(testString) -// const [tagSpecs, groupBounds, issues] = tok.tokenize() -// const issuesFlat = Object.values(issues).flat() -// const expectedIssue = expectedIssues[testStringKey] || '' -// assert.equal(issuesFlat['code'], expectedIssue, `Expected ${expectedIssue} for "${testString}"`) -// } -// // assert.isEmpty(Object.values(issues).flat(), `${key}: ${issues}`) -// // const tok1 = new HedStringTokenizerOld(string) -// // const [tagSpec1, groupBounds1, issues1] = tok1.tokenize() -// // assert.isEmpty(Object.values(issues1).flat(), `${key}: ${issues1}`) -// }) -// }) From cd53a2c16b543229bfa1d2a0e0b0fb15710cc6af Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Tue, 22 Oct 2024 06:28:20 -0500 Subject: [PATCH 17/21] Skipped tests on old tokenizer --- tests/tockenizerErrorTests.spec.js | 2 +- tests/tokenizerPassingTests.spec.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tockenizerErrorTests.spec.js b/tests/tockenizerErrorTests.spec.js index b84ac6c2..5370f71b 100644 --- a/tests/tockenizerErrorTests.spec.js +++ b/tests/tockenizerErrorTests.spec.js @@ -89,7 +89,7 @@ describe('Tokenizer validation using JSON tests', () => { }) }) -describe('Original tokenizer validation using JSON tests', () => { +describe.skip('Original tokenizer validation using JSON tests', () => { const badLog = [] let totalTests = 0 let wrongErrors = 0 diff --git a/tests/tokenizerPassingTests.spec.js b/tests/tokenizerPassingTests.spec.js index c91ac41b..2115df42 100644 --- a/tests/tokenizerPassingTests.spec.js +++ b/tests/tokenizerPassingTests.spec.js @@ -81,7 +81,7 @@ describe('HED tokenizer validation', () => { }) }) - describe('Original tokenizer validation - validData', () => { + describe.skip('Original tokenizer validation - validData', () => { const badLog = [] let totalTests = 0 let unexpectedErrors = 0 From f6034df52c9a43bfbe9eaca6edef3e0c8953d2e7 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:16:26 -0500 Subject: [PATCH 18/21] bidsTests just started --- bids/validator/bidsHedTsvValidator.js | 33 +++--- tests/bids.spec.js | 17 ++- tests/bidsErrorData.js | 30 +++++ tests/bidsErrorTests.spec.js | 132 +++++++++++++++++++++ tests/bidsTestData.js | 73 ++++++++++++ tests/bidsTests.spec.js | 162 ++++++++++++++++++++++++++ tests/runLog.txt | 7 ++ tests/tockenizerErrorTests.spec.js | 2 +- tests/tokenizerPassingTests.spec.js | 2 +- 9 files changed, 442 insertions(+), 16 deletions(-) create mode 100644 tests/bidsErrorData.js create mode 100644 tests/bidsErrorTests.spec.js create mode 100644 tests/bidsTestData.js create mode 100644 tests/bidsTests.spec.js create mode 100644 tests/runLog.txt diff --git a/bids/validator/bidsHedTsvValidator.js b/bids/validator/bidsHedTsvValidator.js index 231ea14b..74bf9da2 100644 --- a/bids/validator/bidsHedTsvValidator.js +++ b/bids/validator/bidsHedTsvValidator.js @@ -82,6 +82,10 @@ export class BidsHedTsvValidator { * @private */ _validateHedColumn() { + if (this.tsvFile.hedColumnHedStrings.length === 0) { + // no HED column strings to validate + return [] + } return this.tsvFile.hedColumnHedStrings.flatMap((hedString, rowIndexMinusTwo) => this._validateHedColumnString(hedString, rowIndexMinusTwo + 2), ) @@ -230,13 +234,14 @@ export class BidsHedTsvParser { */ _parseHedRows(tsvHedRows) { const hedStrings = [] - - tsvHedRows.forEach((row, index) => { - const hedString = this._parseHedRow(row, index + 2) - if (hedString !== null) { - hedStrings.push(hedString) - } - }) + if (tsvHedRows.size > 0) { + tsvHedRows.forEach((row, index) => { + const hedString = this._parseHedRow(row, index + 2) + if (hedString !== null) { + hedStrings.push(hedString) + } + }) + } return hedStrings } @@ -248,13 +253,15 @@ export class BidsHedTsvParser { * @private */ _mergeEventRows(rowStrings) { - const groupedTsvRows = groupBy(rowStrings, (rowString) => rowString.onset) - const sortedOnsetTimes = Array.from(groupedTsvRows.keys()).sort((a, b) => a - b) const eventStrings = [] - for (const onset of sortedOnsetTimes) { - const onsetRows = groupedTsvRows.get(onset) - const onsetEventString = new BidsTsvEvent(this.tsvFile, onsetRows) - eventStrings.push(onsetEventString) + if (rowStrings.length > 0) { + const groupedTsvRows = groupBy(rowStrings, (rowString) => rowString.onset) + const sortedOnsetTimes = Array.from(groupedTsvRows.keys()).sort((a, b) => a - b) + for (const onset of sortedOnsetTimes) { + const onsetRows = groupedTsvRows.get(onset) + const onsetEventString = new BidsTsvEvent(this.tsvFile, onsetRows) + eventStrings.push(onsetEventString) + } } return eventStrings } diff --git a/tests/bids.spec.js b/tests/bids.spec.js index 7ae2ccee..589cfa83 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -176,7 +176,15 @@ describe('BIDS datasets', () => { all_bad: new BidsDataset(badDatasets, []), } const expectedIssues = { - all_good: [], + all_good: [ + BidsHedIssue.fromHedIssue( + generateIssue('duplicateTag', { + tag: 'Boat', + }), + goodDatasets[0].file, + { tsvLine: 5 }, + ), + ], all_bad: [ // BidsHedIssue.fromHedIssue(generateIssue('invalidTag', { tag: 'Confused' }), badDatasets[0].file), BidsHedIssue.fromHedIssue(generateIssue('invalidTag', { tag: 'Confused' }), badDatasets[0].file), @@ -220,6 +228,13 @@ describe('BIDS datasets', () => { badDatasets[3].file, { tsvLine: 2 }, ), + BidsHedIssue.fromHedIssue( + generateIssue('invalidTopLevelTagGroupTag', { + tag: 'Duration/ferry s', + }), + badDatasets[3].file, + { tsvLine: 2 }, + ), BidsHedIssue.fromHedIssue( generateIssue('sidecarKeyMissing', { key: 'purple', diff --git a/tests/bidsErrorData.js b/tests/bidsErrorData.js new file mode 100644 index 00000000..90b0ffa7 --- /dev/null +++ b/tests/bidsErrorData.js @@ -0,0 +1,30 @@ +import { BidsHedIssue } from '../bids' +import { generateIssue } from '../common/issues/issues' + +export const errorBidsTests = [ + { + name: 'invalid-bids-datasets', + description: 'Who knows', + warning: false, + tests: [ + { + name: 'valid-sidecar-bad-tag-tsv', + explanation: 'Valid-sidecar, but invalid tsv', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + }, + }, + }, + sidecarValid: true, + sidecarErrors: [], + eventsString: 'onset\tduration\tHED\n' + '7\t4\tBaloney', + eventsValid: false, + eventsErrors: [BidsHedIssue.fromHedIssue(generateIssue('invalidTag', {}), 'valid-sidecar-invalid-tsv.tsv')], + comboValid: false, + }, + ], + }, +] diff --git a/tests/bidsErrorTests.spec.js b/tests/bidsErrorTests.spec.js new file mode 100644 index 00000000..03527246 --- /dev/null +++ b/tests/bidsErrorTests.spec.js @@ -0,0 +1,132 @@ +import chai from 'chai' +const assert = chai.assert +import { beforeAll, describe, afterAll } from '@jest/globals' +import path from 'path' +import { BidsHedIssue } from '../bids/types/issues' +import { buildSchemas } from '../validator/schema/init' +import { SchemaSpec, SchemasSpec } from '../common/schema/types' +import { BidsDataset, BidsEventFile, BidsHedTsvValidator, BidsSidecar, BidsTsvFile } from '../bids' +import { generateIssue, IssueError } from '../common/issues/issues' + +import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' +import { HedStringTokenizer } from '../parser/tokenizer' +import { passingBidsTests } from './bidsPassingData' +import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator' +import parseTSV from '../bids/tsvParser' +const fs = require('fs') + +const displayLog = process.env.DISPLAY_LOG === 'true' + +const skippedErrors = {} + +describe('HED tokenizer validation', () => { + const schemaMap = new Map([ + ['8.2.0', undefined], + ['8.3.0', undefined], + ]) + + const badLog = [] + let totalTests = 0 + let wrongErrors = 0 + let unexpectedErrors = 0 + + beforeAll(async () => { + const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) + const specs2 = new SchemasSpec().addSchemaSpec(spec2) + const schemas2 = await buildSchemas(specs2) + const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) + const specs3 = new SchemasSpec().addSchemaSpec(spec3) + const schemas3 = await buildSchemas(specs3) + schemaMap.set('8.2.0', schemas2) + schemaMap.set('8.3.0', schemas3) + }) + + afterAll(() => { + const outBad = path.join(__dirname, 'runLog.txt') + const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + + describe('BIDS validation - validData', () => { + const badLog = [] + let totalTests = 0 + let unexpectedErrors = 0 + + beforeAll(async () => {}) + + afterAll(() => { + const outBad = path.join(__dirname, 'runLog.txt') + const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + + describe.each(passingBidsTests)('$name : $description', ({ tests }) => { + let itemLog + + const assertErrors = function (header, issues, iLog) { + iLog.push(`${header}\n`) + totalTests += 1 + + let errors = [] + if (issues.length > 0) { + errors = issues.map((item) => item.hedIssue.hedCode) // list of hedCodes in the issues + } + const errorString = errors.join(',') + if (errors.length > 0) { + iLog.push(`---expected no errors but got errors [${errorString}]\n`) + unexpectedErrors += 1 + assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) + } + } + + const validate = function (test, iLog) { + // Make sure that the schema is available + const header = `\n[${test.name} (Expect pass)]` + const thisSchema = schemaMap.get(test.schemaVersion) + assert(thisSchema !== undefined, `${test.schemaVersion} is not available in test ${test.name}`) + + // Validate the sidecar by itself + const sidecarName = test.name + '.json' + const bidsSidecar = new BidsSidecar('thisOne', test.sidecar, { relativePath: sidecarName, path: sidecarName }) + assert(bidsSidecar instanceof BidsSidecar, 'Test') + const sidecarIssues = bidsSidecar.validate(thisSchema) + assertErrors(header + ':Validating just the sidecar', sidecarIssues, iLog) + + // Parse the events file + const eventName = test.name + '.tsv' + const parsedTsv = parseTSV(test.eventsString) + assert(parsedTsv instanceof Map, `${eventName} cannot be parsed`) + + // Validate the events file by itself + const bidsTsv = new BidsTsvFile(test.name, parsedTsv, { relativePath: 'eventName' }, [], {}) + const validator = new BidsHedTsvValidator(bidsTsv, thisSchema) + validator.validate() + assertErrors(header + ':Parsing events alone', validator.issues, iLog) + + // Validate the events file with the sidecar + const bidsTsvSide = new BidsTsvFile(test.name, parsedTsv, { relativePath: 'eventName' }, [], bidsSidecar) + const validatorSide = new BidsHedTsvValidator(bidsTsvSide, thisSchema) + validatorSide.validate() + assertErrors(header + ':Parsing events with ', validatorSide.issues, iLog) + } + + beforeAll(async () => { + itemLog = [] + }) + + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) + + if (tests && tests.length > 0) { + test.each(tests)('BIDS: %s ', (test) => { + validate(test, itemLog) + }) + } + }) + }) +}) diff --git a/tests/bidsTestData.js b/tests/bidsTestData.js new file mode 100644 index 00000000..71222435 --- /dev/null +++ b/tests/bidsTestData.js @@ -0,0 +1,73 @@ +import { BidsHedIssue } from '../bids' +import { generateIssue } from '../common/issues/issues' + +export const bidsTestData = [ + // { + // name: 'valid-bids-datasets', + // description: 'Who knows', + // tests: [ + // { + // name: 'no-hed-at-all', + // explanation: 'Neither the sidecar or tsv has HED', + // schemaVersion: '8.3.0', + // sidecar: { + // duration: { + // description: 'Duration of the event in seconds.', + // }, + // }, + // eventsString: 'onset\tduration\n' + '7\t4', + // sidecarOnlyErrors: [], + // eventsOnlyErrors: [], + // comboErrors: [] + // }, + // { + // name: 'only-header-in-tsv', + // explanation: 'TSV only has header and some extra white space', + // schemaVersion: '8.3.0', + // sidecar: { + // duration: { + // description: 'Duration of the event in seconds.', + // }, + // }, + // eventsString: 'onset\tduration\n', + // sidecarOnlyErrors: [], + // eventsOnlyErrors: [], + // comboErrors: [] + // }, + // ] + // }, + { + name: 'invalid-bids-datasets', + description: 'Who knows this', + tests: [ + { + name: 'valid-sidecar-bad-tag-tsv', + explanation: 'Valid-sidecar, but invalid tsv', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + }, + }, + }, + eventsString: 'onset\tduration\tHED\n' + '7\t4\tBaloney', + sidecarOnlyErrors: [], + eventsOnlyErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('invalidTag', { tag: 'Baloney' }), + { relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, + { tsvLine: 2 }, + ), + ], + comboErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('invalidTag', { tag: 'Baloney' }), + { relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, + { tsvLine: 2 }, + ), + ], + }, + ], + }, +] diff --git a/tests/bidsTests.spec.js b/tests/bidsTests.spec.js new file mode 100644 index 00000000..b8dbf071 --- /dev/null +++ b/tests/bidsTests.spec.js @@ -0,0 +1,162 @@ +import chai from 'chai' +const assert = chai.assert +const difference = require('lodash/difference') +import { beforeAll, describe, afterAll } from '@jest/globals' +import path from 'path' +import { BidsHedIssue } from '../bids/types/issues' +import { buildSchemas } from '../validator/schema/init' +import { SchemaSpec, SchemasSpec } from '../common/schema/types' +import { BidsDataset, BidsEventFile, BidsHedTsvValidator, BidsSidecar, BidsTsvFile } from '../bids' +import { generateIssue, IssueError } from '../common/issues/issues' + +import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' +import { HedStringTokenizer } from '../parser/tokenizer' +import { bidsTestData } from './bidsTestData' +import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator' +import parseTSV from '../bids/tsvParser' +const fs = require('fs') + +//const displayLog = process.env.DISPLAY_LOG === 'true' +const displayLog = true +const skippedErrors = {} + +// Return an array of hedCode values extracted from an issues list. +function extractHedCodes(issues) { + const errors = [] + for (const issue of issues) { + if (issue instanceof BidsHedIssue) { + errors.push(`${issue.hedIssue.hedCode}`) + } else { + errors.push(`${issue.hedCode}`) + } + } + return errors +} + +describe('BIDS validation', () => { + const schemaMap = new Map([ + ['8.2.0', undefined], + ['8.3.0', undefined], + ]) + + const badLog = [] + let totalTests + let wrongErrors + let missingErrors + + beforeAll(async () => { + const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) + const specs2 = new SchemasSpec().addSchemaSpec(spec2) + const schemas2 = await buildSchemas(specs2) + const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) + const specs3 = new SchemasSpec().addSchemaSpec(spec3) + const schemas3 = await buildSchemas(specs3) + schemaMap.set('8.2.0', schemas2) + schemaMap.set('8.3.0', schemas3) + totalTests = 0 + wrongErrors = 0 + missingErrors = 0 + }) + + afterAll(() => { + const outBad = path.join(__dirname, 'runLog.txt') + const summary = `Total tests:${totalTests} Wrong errors:${wrongErrors} MissingErrors:${missingErrors}\n` + if (displayLog) { + fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') + } + }) + + describe.each(bidsTestData)('$name : $description', ({ tests }) => { + let itemLog + + const assertErrors = function (test, type, expectedErrors, issues, iLog) { + const status = expectedErrors.length > 0 ? 'Expect fail' : 'Expect pass' + const header = `[${test.name}:${type}:](${status})` + const log = [header] + totalTests += 1 + + const errors = extractHedCodes(issues) + const errorString = errors.join(',') + if (errors.length > 0) { + log.push(`---has errors [${errorString}]`) + } + if (expectedErrors.length === 0 && errorString.length > 0) { + const hasErrors = `---expected no errors but got errors [${errorString}]` + log.push(hasErrors) + iLog.push(log.join('\n')) + wrongErrors += 1 + assert(errorString.length === 0, `${header}${hasErrors}]`) + return + } + const expectedErrorCodes = extractHedCodes(expectedErrors) + const wrong = difference(errors, expectedErrorCodes) + const missing = difference(expectedErrors, errors) + let errorMessage = '' + if (wrong.length > 0) { + errorMessage = `---received unexpected errors ${wrong.join(',')}\n` + wrongErrors += 1 + } + if (missing.length > 0) { + errorMessage = errorMessage + `---did not receive expected errors ${missing.join(',')}` + missingErrors += 1 + } + if (errorMessage.length > 0) { + log.push(errorMessage) + iLog.push(log.join('\n')) + } + assert.sameDeepMembers(issues, expectedErrors, header) + } + + const validate = function (test, iLog) { + // Make sure that the schema is available + const header = `\n[${test.name} (Expect pass)]` + iLog.push(header) + const thisSchema = schemaMap.get(test.schemaVersion) + assert(thisSchema !== undefined, `${test.schemaVersion} is not available in test ${test.name}`) + + // Validate the sidecar by itself + const sidecarName = test.name + '.json' + const bidsSidecar = new BidsSidecar('thisOne', test.sidecar, { relativePath: sidecarName, path: sidecarName }) + assert(bidsSidecar instanceof BidsSidecar, 'Test') + const sidecarIssues = bidsSidecar.validate(thisSchema) + assertErrors(test, 'Sidecar only', test.sidecarOnlyErrors, sidecarIssues, iLog) + + // Parse the events file + const eventName = test.name + '.tsv' + const parsedTsv = parseTSV(test.eventsString) + assert(parsedTsv instanceof Map, `${eventName} cannot be parsed`) + + // Validate the events file by itself + // const bidsTsv = new BidsTsvFile(test.name, parsedTsv, { relativePath: eventName }, [], {}) + // const validator = new BidsHedTsvValidator(bidsTsv, thisSchema) + // validator.validate() + // assertErrors(test, 'Events only', test.eventsOnlyErrors, validator.issues, iLog) + + // Validate the events file with the sidecar + const bidsTsvSide = new BidsTsvFile( + test.name, + parsedTsv, + { relativePath: eventName, path: eventName }, + [], + bidsSidecar, + ) + const validatorWithSide = new BidsHedTsvValidator(bidsTsvSide, thisSchema) + validatorWithSide.validate() + assertErrors(test, 'Events+side', test.comboErrors, validatorWithSide.issues, iLog) + } + + beforeAll(async () => { + itemLog = [] + }) + + afterAll(() => { + badLog.push(itemLog.join('\n')) + }) + + if (tests && tests.length > 0) { + test.each(tests)('$name $explanation ', (test) => { + validate(test, itemLog) + }) + } + }) +}) diff --git a/tests/runLog.txt b/tests/runLog.txt new file mode 100644 index 00000000..8aee2242 --- /dev/null +++ b/tests/runLog.txt @@ -0,0 +1,7 @@ +Total tests:5 Wrong errors:0 MissingErrors:0 + +[no-hed-at-all (Expect pass)] + +[only-header-in-tsv (Expect pass)] + +[valid-sidecar-bad-tag-tsv (Expect pass)] \ No newline at end of file diff --git a/tests/tockenizerErrorTests.spec.js b/tests/tockenizerErrorTests.spec.js index 5370f71b..b84ac6c2 100644 --- a/tests/tockenizerErrorTests.spec.js +++ b/tests/tockenizerErrorTests.spec.js @@ -89,7 +89,7 @@ describe('Tokenizer validation using JSON tests', () => { }) }) -describe.skip('Original tokenizer validation using JSON tests', () => { +describe('Original tokenizer validation using JSON tests', () => { const badLog = [] let totalTests = 0 let wrongErrors = 0 diff --git a/tests/tokenizerPassingTests.spec.js b/tests/tokenizerPassingTests.spec.js index 2115df42..c91ac41b 100644 --- a/tests/tokenizerPassingTests.spec.js +++ b/tests/tokenizerPassingTests.spec.js @@ -81,7 +81,7 @@ describe('HED tokenizer validation', () => { }) }) - describe.skip('Original tokenizer validation - validData', () => { + describe('Original tokenizer validation - validData', () => { const badLog = [] let totalTests = 0 let unexpectedErrors = 0 From 3473342ee377a0a0ae25c985b98a8342d64b500a Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 24 Oct 2024 07:09:25 -0500 Subject: [PATCH 19/21] Updated bids tests to include a valid and invalid case --- tests/bidsTestData.js | 114 ++++++++++++++++++++++++++-------------- tests/bidsTests.spec.js | 10 ++-- tests/runLog.txt | 24 +++++++-- 3 files changed, 101 insertions(+), 47 deletions(-) diff --git a/tests/bidsTestData.js b/tests/bidsTestData.js index 71222435..792f1b09 100644 --- a/tests/bidsTestData.js +++ b/tests/bidsTestData.js @@ -2,47 +2,57 @@ import { BidsHedIssue } from '../bids' import { generateIssue } from '../common/issues/issues' export const bidsTestData = [ - // { - // name: 'valid-bids-datasets', - // description: 'Who knows', - // tests: [ - // { - // name: 'no-hed-at-all', - // explanation: 'Neither the sidecar or tsv has HED', - // schemaVersion: '8.3.0', - // sidecar: { - // duration: { - // description: 'Duration of the event in seconds.', - // }, - // }, - // eventsString: 'onset\tduration\n' + '7\t4', - // sidecarOnlyErrors: [], - // eventsOnlyErrors: [], - // comboErrors: [] - // }, - // { - // name: 'only-header-in-tsv', - // explanation: 'TSV only has header and some extra white space', - // schemaVersion: '8.3.0', - // sidecar: { - // duration: { - // description: 'Duration of the event in seconds.', - // }, - // }, - // eventsString: 'onset\tduration\n', - // sidecarOnlyErrors: [], - // eventsOnlyErrors: [], - // comboErrors: [] - // }, - // ] - // }, { - name: 'invalid-bids-datasets', - description: 'Who knows this', + name: 'valid-bids-datasets-with-limited-hed', + description: 'HED or data is missing in various places', + tests: [ + { + name: 'no-hed-at-all-but-both-tsv-json-non-empty', + explanation: 'Neither the sidecar or tsv has HED but neither non-empty', + schemaVersion: '8.3.0', + sidecar: { + duration: { + description: 'Duration of the event in seconds.', + }, + }, + eventsString: 'onset\tduration\n' + '7\t4', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + { + name: 'only-header-in-tsv-with-return', + explanation: 'TSV only has header and trailing return and white space', + schemaVersion: '8.3.0', + sidecar: { + duration: { + description: 'Duration of the event in seconds.', + }, + }, + eventsString: 'onset\tduration\n ', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + { + name: 'empty-json-empty-tsv', + explanation: 'Both sidecar and tsv are empty except for white space', + schemaVersion: '8.3.0', + sidecar: {}, + eventsString: '\n \n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + ], + }, + { + name: 'valid-json-invalid-tsv', + description: 'JSON is valid but tsv is invalid', tests: [ { name: 'valid-sidecar-bad-tag-tsv', - explanation: 'Valid-sidecar, but invalid tsv', + explanation: 'Unrelated sidecar is valid but HED column tag is invalid', schemaVersion: '8.3.0', sidecar: { event_code: { @@ -63,7 +73,35 @@ export const bidsTestData = [ comboErrors: [ BidsHedIssue.fromHedIssue( generateIssue('invalidTag', { tag: 'Baloney' }), - { relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, + { path: 'valid-sidecar-bad-tag-tsv.tsv', relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, + { tsvLine: 2 }, + ), + ], + }, + { + name: 'valid-sidecar-tsv-curly-brace', + explanation: 'The sidecar is valid, but tsv HED column has braces}', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + }, + }, + }, + eventsString: 'onset\tduration\tevent_code\tHED\n' + '7\t4\tface\tRed,{blue}', + sidecarOnlyErrors: [], + eventsOnlyErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('curlyBracesInHedColumn', { column: '{blue}' }), + { relativePath: 'valid-sidecar-tsv-curly-brace.tsv' }, + { tsvLine: 2 }, + ), + ], + comboErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('curlyBracesInHedColumn', { column: '{blue}' }), + { path: 'valid-sidecar-tsv-curly-brace.tsv', relativePath: 'valid-sidecar-tsv-curly-brace.tsv' }, { tsvLine: 2 }, ), ], diff --git a/tests/bidsTests.spec.js b/tests/bidsTests.spec.js index b8dbf071..3b30537a 100644 --- a/tests/bidsTests.spec.js +++ b/tests/bidsTests.spec.js @@ -127,10 +127,10 @@ describe('BIDS validation', () => { assert(parsedTsv instanceof Map, `${eventName} cannot be parsed`) // Validate the events file by itself - // const bidsTsv = new BidsTsvFile(test.name, parsedTsv, { relativePath: eventName }, [], {}) - // const validator = new BidsHedTsvValidator(bidsTsv, thisSchema) - // validator.validate() - // assertErrors(test, 'Events only', test.eventsOnlyErrors, validator.issues, iLog) + const bidsTsv = new BidsTsvFile(test.name, parsedTsv, { relativePath: eventName }, [], {}) + const validator = new BidsHedTsvValidator(bidsTsv, thisSchema) + validator.validate() + assertErrors(test, 'Events only', test.eventsOnlyErrors, validator.issues, iLog) // Validate the events file with the sidecar const bidsTsvSide = new BidsTsvFile( @@ -138,7 +138,7 @@ describe('BIDS validation', () => { parsedTsv, { relativePath: eventName, path: eventName }, [], - bidsSidecar, + test.sidecar, ) const validatorWithSide = new BidsHedTsvValidator(bidsTsvSide, thisSchema) validatorWithSide.validate() diff --git a/tests/runLog.txt b/tests/runLog.txt index 8aee2242..4555c989 100644 --- a/tests/runLog.txt +++ b/tests/runLog.txt @@ -1,7 +1,23 @@ -Total tests:5 Wrong errors:0 MissingErrors:0 +Total tests:15 Wrong errors:0 MissingErrors:4 -[no-hed-at-all (Expect pass)] +[no-hed-at-all-but-both-tsv-json-non-empty (Expect pass)] -[only-header-in-tsv (Expect pass)] +[only-header-in-tsv-with-return (Expect pass)] -[valid-sidecar-bad-tag-tsv (Expect pass)] \ No newline at end of file +[empty-json-empty-tsv (Expect pass)] + +[valid-sidecar-bad-tag-tsv (Expect pass)] +[valid-sidecar-bad-tag-tsv:Events only:](Expect fail) +---has errors [TAG_INVALID] +---did not receive expected errors [object Object] +[valid-sidecar-bad-tag-tsv:Events+side:](Expect fail) +---has errors [TAG_INVALID] +---did not receive expected errors [object Object] + +[valid-sidecar-tsv-curly-brace (Expect pass)] +[valid-sidecar-tsv-curly-brace:Events only:](Expect fail) +---has errors [CHARACTER_INVALID] +---did not receive expected errors [object Object] +[valid-sidecar-tsv-curly-brace:Events+side:](Expect fail) +---has errors [CHARACTER_INVALID] +---did not receive expected errors [object Object] \ No newline at end of file From 1c2f68627f7b3494e2883ee39e4cbd4585c3d3d2 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:57:21 -0500 Subject: [PATCH 20/21] Continued reorganizing the bids spec tests --- tests/bids.spec.data.js | 10 +- tests/bids.spec.js | 225 ++++++++++++++++----------------- tests/bidsErrorData.js | 30 ----- tests/bidsErrorTests.spec.js | 132 ------------------- tests/bidsTestData.js | 111 ---------------- tests/bidsTests.data.js | 239 +++++++++++++++++++++++++++++++++++ tests/bidsTests.spec.js | 91 ++++++++----- tests/runLog.txt | 41 +++--- 8 files changed, 431 insertions(+), 448 deletions(-) delete mode 100644 tests/bidsErrorData.js delete mode 100644 tests/bidsErrorTests.spec.js delete mode 100644 tests/bidsTestData.js create mode 100644 tests/bidsTests.data.js diff --git a/tests/bids.spec.data.js b/tests/bids.spec.data.js index e7603ec0..a55f7b68 100644 --- a/tests/bids.spec.data.js +++ b/tests/bids.spec.data.js @@ -546,11 +546,11 @@ const tsvFiles = [ ], // sub03 - Valid combined sidecar/TSV data [ - [sidecars[2][0], 'onset\tduration\n' + '7\tsomething'], - [sidecars[0][0], 'onset\tduration\tcolor\n' + '7\tsomething\tred'], - [sidecars[0][1], 'onset\tduration\tspeed\n' + '7\tsomething\t60'], - [sidecars[2][0], hedColumnOnlyHeader + '7\tsomething\tLaptop-computer'], - [sidecars[0][0], 'onset\tduration\tcolor\tHED\n' + '7\tsomething\tgreen\tLaptop-computer'], + [sidecars[2][0], 'onset\tduration\n' + '7\t4'], + [sidecars[0][0], 'onset\tduration\tcolor\n' + '7\t4\tred'], + [sidecars[0][1], 'onset\tduration\tspeed\n' + '7\t4\t60'], + [sidecars[2][0], hedColumnOnlyHeader + '7\t4\tLaptop-computer'], + [sidecars[0][0], 'onset\tduration\tcolor\tHED\n' + '7\t4\tgreen\tLaptop-computer'], [ Object.assign({}, sidecars[0][0], sidecars[0][1]), 'onset\tduration\tcolor\tvehicle\tspeed\n' + '7\tsomething\tblue\ttrain\t150', diff --git a/tests/bids.spec.js b/tests/bids.spec.js index 589cfa83..33e8c867 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -60,133 +60,124 @@ describe('BIDS datasets', () => { }), ) } - - describe('Sidecar-only datasets', () => { - it('should validate non-placeholder HED strings in BIDS sidecars', () => { - const goodDatasets = bidsSidecars[0] - const testDatasets = { - single: new BidsDataset([], [bidsSidecars[0][0]]), - all_good: new BidsDataset([], goodDatasets), - warning_and_good: new BidsDataset([], goodDatasets.concat([bidsSidecars[1][0]])), - error_and_good: new BidsDataset([], goodDatasets.concat([bidsSidecars[1][1]])), - } - const expectedIssues = { - single: [], - all_good: [], - warning_and_good: [ - BidsHedIssue.fromHedIssue( - generateIssue('extension', { tag: 'Train/Maglev', sidecarKey: 'transport' }), - bidsSidecars[1][0].file, - ), - ], - error_and_good: [ - BidsHedIssue.fromHedIssue(generateIssue('invalidTag', { tag: 'Confused' }), bidsSidecars[1][1].file), - ], - } - validator(testDatasets, expectedIssues, specs) - }, 10000) - - it('should validate placeholders in BIDS sidecars', () => { - const placeholderDatasets = bidsSidecars[2] - const testDatasets = { - placeholders: new BidsDataset([], placeholderDatasets), - } - const expectedIssues = { - placeholders: [ - BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholderInDefinition', { - definition: 'InvalidDefinitionGroup', - sidecarKey: 'invalid_definition_group', - }), - placeholderDatasets[2].file, - ), - BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholderInDefinition', { - definition: 'InvalidDefinitionTag', - sidecarKey: 'invalid_definition_tag', - }), - placeholderDatasets[3].file, - ), - BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholderInDefinition', { - definition: 'MultiplePlaceholdersInGroupDefinition', - sidecarKey: 'multiple_placeholders_in_group', - }), - placeholderDatasets[4].file, - ), - BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholder', { tag: 'Label/#', sidecarKey: 'multiple_value_tags' }), - placeholderDatasets[5].file, - ), - BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholder', { tag: 'Description/#', sidecarKey: 'multiple_value_tags' }), - placeholderDatasets[5].file, - ), - BidsHedIssue.fromHedIssue( - generateIssue('missingPlaceholder', { string: 'Sad', sidecarKey: 'no_value_tags' }), - placeholderDatasets[6].file, - ), - BidsHedIssue.fromHedIssue( - generateIssue('invalidPlaceholder', { tag: 'RGB-green/#', sidecarKey: 'value_in_categorical' }), - placeholderDatasets[7].file, - ), - ], - } - return validator(testDatasets, expectedIssues, specs) - }, 10000) - }) - - describe('TSV-only datasets', () => { - it('should validate HED strings in BIDS event files', () => { - const goodDatasets = bidsTsvFiles[0] - const badDatasets = bidsTsvFiles[1] - const testDatasets = { - all_good: new BidsDataset(goodDatasets, []), - all_bad: new BidsDataset(badDatasets, []), - } - const legalSpeedUnits = ['m-per-s', 'kph', 'mph'] - const speedIssue = generateIssue('unitClassInvalidUnit', { - tag: 'Speed/300 miles', - unitClassUnits: legalSpeedUnits.sort().join(','), - }) - const maglevError = generateIssue('invalidTag', { tag: 'Maglev' }) - const maglevWarning = generateIssue('extension', { tag: 'Train/Maglev' }) - const expectedIssues = { - all_good: [], - all_bad: [ - BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[0].file, { tsvLine: 2 }), - BidsHedIssue.fromHedIssue(cloneDeep(maglevWarning), badDatasets[1].file, { tsvLine: 2 }), - BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[2].file, { tsvLine: 3 }), - BidsHedIssue.fromHedIssue(cloneDeep(maglevError), badDatasets[3].file, { tsvLine: 2 }), - BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[3].file, { tsvLine: 3 }), - BidsHedIssue.fromHedIssue(cloneDeep(maglevWarning), badDatasets[4].file, { tsvLine: 2 }), - BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[4].file, { tsvLine: 3 }), - ], - } - return validator(testDatasets, expectedIssues, specs) - }, 10000) - }) + // + // describe('Sidecar-only datasets', () => { + // it('should validate non-placeholder HED strings in BIDS sidecars', () => { + // const goodDatasets = bidsSidecars[0] + // const testDatasets = { + // single: new BidsDataset([], [bidsSidecars[0][0]]), + // all_good: new BidsDataset([], goodDatasets), + // warning_and_good: new BidsDataset([], goodDatasets.concat([bidsSidecars[1][0]])), + // error_and_good: new BidsDataset([], goodDatasets.concat([bidsSidecars[1][1]])), + // } + // const expectedIssues = { + // single: [], + // all_good: [], + // warning_and_good: [ + // BidsHedIssue.fromHedIssue( + // generateIssue('extension', { tag: 'Train/Maglev', sidecarKey: 'transport' }), + // bidsSidecars[1][0].file, + // ), + // ], + // error_and_good: [ + // BidsHedIssue.fromHedIssue(generateIssue('invalidTag', { tag: 'Confused' }), bidsSidecars[1][1].file), + // ], + // } + // validator(testDatasets, expectedIssues, specs) + // }, 10000) + // + // it('should validate placeholders in BIDS sidecars', () => { + // const placeholderDatasets = bidsSidecars[2] + // const testDatasets = { + // placeholders: new BidsDataset([], placeholderDatasets), + // } + // const expectedIssues = { + // placeholders: [ + // BidsHedIssue.fromHedIssue( + // generateIssue('invalidPlaceholderInDefinition', { + // definition: 'InvalidDefinitionGroup', + // sidecarKey: 'invalid_definition_group', + // }), + // placeholderDatasets[2].file, + // ), + // BidsHedIssue.fromHedIssue( + // generateIssue('invalidPlaceholderInDefinition', { + // definition: 'InvalidDefinitionTag', + // sidecarKey: 'invalid_definition_tag', + // }), + // placeholderDatasets[3].file, + // ), + // BidsHedIssue.fromHedIssue( + // generateIssue('invalidPlaceholderInDefinition', { + // definition: 'MultiplePlaceholdersInGroupDefinition', + // sidecarKey: 'multiple_placeholders_in_group', + // }), + // placeholderDatasets[4].file, + // ), + // BidsHedIssue.fromHedIssue( + // generateIssue('invalidPlaceholder', { tag: 'Label/#', sidecarKey: 'multiple_value_tags' }), + // placeholderDatasets[5].file, + // ), + // BidsHedIssue.fromHedIssue( + // generateIssue('invalidPlaceholder', { tag: 'Description/#', sidecarKey: 'multiple_value_tags' }), + // placeholderDatasets[5].file, + // ), + // BidsHedIssue.fromHedIssue( + // generateIssue('missingPlaceholder', { string: 'Sad', sidecarKey: 'no_value_tags' }), + // placeholderDatasets[6].file, + // ), + // BidsHedIssue.fromHedIssue( + // generateIssue('invalidPlaceholder', { tag: 'RGB-green/#', sidecarKey: 'value_in_categorical' }), + // placeholderDatasets[7].file, + // ), + // ], + // } + // return validator(testDatasets, expectedIssues, specs) + // }, 10000) + // }) + // + // describe('TSV-only datasets', () => { + // it('should validate HED strings in BIDS event files', () => { + // const goodDatasets = bidsTsvFiles[0] + // const badDatasets = bidsTsvFiles[1] + // const testDatasets = { + // all_good: new BidsDataset(goodDatasets, []), + // all_bad: new BidsDataset(badDatasets, []), + // } + // const legalSpeedUnits = ['m-per-s', 'kph', 'mph'] + // const speedIssue = generateIssue('unitClassInvalidUnit', { + // tag: 'Speed/300 miles', + // unitClassUnits: legalSpeedUnits.sort().join(','), + // }) + // const maglevError = generateIssue('invalidTag', { tag: 'Maglev' }) + // const maglevWarning = generateIssue('extension', { tag: 'Train/Maglev' }) + // const expectedIssues = { + // all_good: [], + // all_bad: [ + // BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[0].file, { tsvLine: 2 }), + // BidsHedIssue.fromHedIssue(cloneDeep(maglevWarning), badDatasets[1].file, { tsvLine: 2 }), + // BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[2].file, { tsvLine: 3 }), + // BidsHedIssue.fromHedIssue(cloneDeep(maglevError), badDatasets[3].file, { tsvLine: 2 }), + // BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[3].file, { tsvLine: 3 }), + // BidsHedIssue.fromHedIssue(cloneDeep(maglevWarning), badDatasets[4].file, { tsvLine: 2 }), + // BidsHedIssue.fromHedIssue(cloneDeep(speedIssue), badDatasets[4].file, { tsvLine: 3 }), + // ], + // } + // return validator(testDatasets, expectedIssues, specs) + // }, 10000) + // }) describe('Combined datasets', () => { it('should validate BIDS event files combined with JSON sidecar data', () => { const goodDatasets = bidsTsvFiles[2] const badDatasets = bidsTsvFiles[3] const testDatasets = { - all_good: new BidsDataset(goodDatasets, []), + /* all_good: new BidsDataset(goodDatasets, []),*/ all_bad: new BidsDataset(badDatasets, []), } const expectedIssues = { - all_good: [ - BidsHedIssue.fromHedIssue( - generateIssue('duplicateTag', { - tag: 'Boat', - }), - goodDatasets[0].file, - { tsvLine: 5 }, - ), - ], + all_good: [], all_bad: [ - // BidsHedIssue.fromHedIssue(generateIssue('invalidTag', { tag: 'Confused' }), badDatasets[0].file), BidsHedIssue.fromHedIssue(generateIssue('invalidTag', { tag: 'Confused' }), badDatasets[0].file), // TODO: Catch warning in sidecar validation /* BidsHedIssue.fromHedIssue( diff --git a/tests/bidsErrorData.js b/tests/bidsErrorData.js deleted file mode 100644 index 90b0ffa7..00000000 --- a/tests/bidsErrorData.js +++ /dev/null @@ -1,30 +0,0 @@ -import { BidsHedIssue } from '../bids' -import { generateIssue } from '../common/issues/issues' - -export const errorBidsTests = [ - { - name: 'invalid-bids-datasets', - description: 'Who knows', - warning: false, - tests: [ - { - name: 'valid-sidecar-bad-tag-tsv', - explanation: 'Valid-sidecar, but invalid tsv', - schemaVersion: '8.3.0', - sidecar: { - event_code: { - HED: { - face: '(Red, Blue), (Green, (Yellow))', - }, - }, - }, - sidecarValid: true, - sidecarErrors: [], - eventsString: 'onset\tduration\tHED\n' + '7\t4\tBaloney', - eventsValid: false, - eventsErrors: [BidsHedIssue.fromHedIssue(generateIssue('invalidTag', {}), 'valid-sidecar-invalid-tsv.tsv')], - comboValid: false, - }, - ], - }, -] diff --git a/tests/bidsErrorTests.spec.js b/tests/bidsErrorTests.spec.js deleted file mode 100644 index 03527246..00000000 --- a/tests/bidsErrorTests.spec.js +++ /dev/null @@ -1,132 +0,0 @@ -import chai from 'chai' -const assert = chai.assert -import { beforeAll, describe, afterAll } from '@jest/globals' -import path from 'path' -import { BidsHedIssue } from '../bids/types/issues' -import { buildSchemas } from '../validator/schema/init' -import { SchemaSpec, SchemasSpec } from '../common/schema/types' -import { BidsDataset, BidsEventFile, BidsHedTsvValidator, BidsSidecar, BidsTsvFile } from '../bids' -import { generateIssue, IssueError } from '../common/issues/issues' - -import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' -import { HedStringTokenizer } from '../parser/tokenizer' -import { passingBidsTests } from './bidsPassingData' -import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator' -import parseTSV from '../bids/tsvParser' -const fs = require('fs') - -const displayLog = process.env.DISPLAY_LOG === 'true' - -const skippedErrors = {} - -describe('HED tokenizer validation', () => { - const schemaMap = new Map([ - ['8.2.0', undefined], - ['8.3.0', undefined], - ]) - - const badLog = [] - let totalTests = 0 - let wrongErrors = 0 - let unexpectedErrors = 0 - - beforeAll(async () => { - const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) - const specs2 = new SchemasSpec().addSchemaSpec(spec2) - const schemas2 = await buildSchemas(specs2) - const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) - const specs3 = new SchemasSpec().addSchemaSpec(spec3) - const schemas3 = await buildSchemas(specs3) - schemaMap.set('8.2.0', schemas2) - schemaMap.set('8.3.0', schemas3) - }) - - afterAll(() => { - const outBad = path.join(__dirname, 'runLog.txt') - const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` - if (displayLog) { - fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') - } - }) - - describe('BIDS validation - validData', () => { - const badLog = [] - let totalTests = 0 - let unexpectedErrors = 0 - - beforeAll(async () => {}) - - afterAll(() => { - const outBad = path.join(__dirname, 'runLog.txt') - const summary = `Total tests:${totalTests} Unexpected errors:${unexpectedErrors}\n` - if (displayLog) { - fs.writeFileSync(outBad, summary + badLog.join('\n'), 'utf8') - } - }) - - describe.each(passingBidsTests)('$name : $description', ({ tests }) => { - let itemLog - - const assertErrors = function (header, issues, iLog) { - iLog.push(`${header}\n`) - totalTests += 1 - - let errors = [] - if (issues.length > 0) { - errors = issues.map((item) => item.hedIssue.hedCode) // list of hedCodes in the issues - } - const errorString = errors.join(',') - if (errors.length > 0) { - iLog.push(`---expected no errors but got errors [${errorString}]\n`) - unexpectedErrors += 1 - assert(errors.length === 0, `${header}---expected no errors but got errors [${errorString}]`) - } - } - - const validate = function (test, iLog) { - // Make sure that the schema is available - const header = `\n[${test.name} (Expect pass)]` - const thisSchema = schemaMap.get(test.schemaVersion) - assert(thisSchema !== undefined, `${test.schemaVersion} is not available in test ${test.name}`) - - // Validate the sidecar by itself - const sidecarName = test.name + '.json' - const bidsSidecar = new BidsSidecar('thisOne', test.sidecar, { relativePath: sidecarName, path: sidecarName }) - assert(bidsSidecar instanceof BidsSidecar, 'Test') - const sidecarIssues = bidsSidecar.validate(thisSchema) - assertErrors(header + ':Validating just the sidecar', sidecarIssues, iLog) - - // Parse the events file - const eventName = test.name + '.tsv' - const parsedTsv = parseTSV(test.eventsString) - assert(parsedTsv instanceof Map, `${eventName} cannot be parsed`) - - // Validate the events file by itself - const bidsTsv = new BidsTsvFile(test.name, parsedTsv, { relativePath: 'eventName' }, [], {}) - const validator = new BidsHedTsvValidator(bidsTsv, thisSchema) - validator.validate() - assertErrors(header + ':Parsing events alone', validator.issues, iLog) - - // Validate the events file with the sidecar - const bidsTsvSide = new BidsTsvFile(test.name, parsedTsv, { relativePath: 'eventName' }, [], bidsSidecar) - const validatorSide = new BidsHedTsvValidator(bidsTsvSide, thisSchema) - validatorSide.validate() - assertErrors(header + ':Parsing events with ', validatorSide.issues, iLog) - } - - beforeAll(async () => { - itemLog = [] - }) - - afterAll(() => { - badLog.push(itemLog.join('\n')) - }) - - if (tests && tests.length > 0) { - test.each(tests)('BIDS: %s ', (test) => { - validate(test, itemLog) - }) - } - }) - }) -}) diff --git a/tests/bidsTestData.js b/tests/bidsTestData.js deleted file mode 100644 index 792f1b09..00000000 --- a/tests/bidsTestData.js +++ /dev/null @@ -1,111 +0,0 @@ -import { BidsHedIssue } from '../bids' -import { generateIssue } from '../common/issues/issues' - -export const bidsTestData = [ - { - name: 'valid-bids-datasets-with-limited-hed', - description: 'HED or data is missing in various places', - tests: [ - { - name: 'no-hed-at-all-but-both-tsv-json-non-empty', - explanation: 'Neither the sidecar or tsv has HED but neither non-empty', - schemaVersion: '8.3.0', - sidecar: { - duration: { - description: 'Duration of the event in seconds.', - }, - }, - eventsString: 'onset\tduration\n' + '7\t4', - sidecarOnlyErrors: [], - eventsOnlyErrors: [], - comboErrors: [], - }, - { - name: 'only-header-in-tsv-with-return', - explanation: 'TSV only has header and trailing return and white space', - schemaVersion: '8.3.0', - sidecar: { - duration: { - description: 'Duration of the event in seconds.', - }, - }, - eventsString: 'onset\tduration\n ', - sidecarOnlyErrors: [], - eventsOnlyErrors: [], - comboErrors: [], - }, - { - name: 'empty-json-empty-tsv', - explanation: 'Both sidecar and tsv are empty except for white space', - schemaVersion: '8.3.0', - sidecar: {}, - eventsString: '\n \n', - sidecarOnlyErrors: [], - eventsOnlyErrors: [], - comboErrors: [], - }, - ], - }, - { - name: 'valid-json-invalid-tsv', - description: 'JSON is valid but tsv is invalid', - tests: [ - { - name: 'valid-sidecar-bad-tag-tsv', - explanation: 'Unrelated sidecar is valid but HED column tag is invalid', - schemaVersion: '8.3.0', - sidecar: { - event_code: { - HED: { - face: '(Red, Blue), (Green, (Yellow))', - }, - }, - }, - eventsString: 'onset\tduration\tHED\n' + '7\t4\tBaloney', - sidecarOnlyErrors: [], - eventsOnlyErrors: [ - BidsHedIssue.fromHedIssue( - generateIssue('invalidTag', { tag: 'Baloney' }), - { relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, - { tsvLine: 2 }, - ), - ], - comboErrors: [ - BidsHedIssue.fromHedIssue( - generateIssue('invalidTag', { tag: 'Baloney' }), - { path: 'valid-sidecar-bad-tag-tsv.tsv', relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, - { tsvLine: 2 }, - ), - ], - }, - { - name: 'valid-sidecar-tsv-curly-brace', - explanation: 'The sidecar is valid, but tsv HED column has braces}', - schemaVersion: '8.3.0', - sidecar: { - event_code: { - HED: { - face: '(Red, Blue), (Green, (Yellow))', - }, - }, - }, - eventsString: 'onset\tduration\tevent_code\tHED\n' + '7\t4\tface\tRed,{blue}', - sidecarOnlyErrors: [], - eventsOnlyErrors: [ - BidsHedIssue.fromHedIssue( - generateIssue('curlyBracesInHedColumn', { column: '{blue}' }), - { relativePath: 'valid-sidecar-tsv-curly-brace.tsv' }, - { tsvLine: 2 }, - ), - ], - comboErrors: [ - BidsHedIssue.fromHedIssue( - generateIssue('curlyBracesInHedColumn', { column: '{blue}' }), - { path: 'valid-sidecar-tsv-curly-brace.tsv', relativePath: 'valid-sidecar-tsv-curly-brace.tsv' }, - { tsvLine: 2 }, - ), - ], - }, - ], - }, -] diff --git a/tests/bidsTests.data.js b/tests/bidsTests.data.js new file mode 100644 index 00000000..65f41e90 --- /dev/null +++ b/tests/bidsTests.data.js @@ -0,0 +1,239 @@ +import { BidsHedIssue } from '../bids' +import { generateIssue } from '../common/issues/issues' + +export const bidsTestData = [ + /* { + name: 'valid-bids-datasets-with-limited-hed', + description: 'HED or data is missing in various places', + tests: [ + { + testname: 'no-hed-at-all-but-both-tsv-json-non-empty', + explanation: 'Neither the sidecar or tsv has HED but neither non-empty', + schemaVersion: '8.3.0', + sidecar: { + duration: { + description: 'Duration of the event in seconds.', + }, + }, + eventsString: 'onset\tduration\n' + '7\t4', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + { + testname: 'only-header-in-tsv-with-return', + explanation: 'TSV only has header and trailing return and white space', + schemaVersion: '8.3.0', + sidecar: { + duration: { + description: 'Duration of the event in seconds.', + }, + }, + eventsString: 'onset\tduration\n ', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + { + testname: 'empty-json-empty-tsv', + explanation: 'Both sidecar and tsv are empty except for white space', + schemaVersion: '8.3.0', + sidecar: {}, + eventsString: '\n \n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + ], + }, + { + name: 'valid-json-invalid-tsv', + description: 'JSON is valid but tsv is invalid', + tests: [ + { + testname: 'valid-sidecar-bad-tag-tsv', + explanation: 'Unrelated sidecar is valid but HED column tag is invalid', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + }, + }, + }, + eventsString: 'onset\tduration\tHED\n' + '7\t4\tBaloney', + sidecarOnlyErrors: [], + eventsOnlyErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('invalidTag', { tag: 'Baloney' }), + { relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, + { tsvLine: 2 }, + ), + ], + comboErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('invalidTag', { tag: 'Baloney' }), + { path: 'valid-sidecar-bad-tag-tsv.tsv', relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, + { tsvLine: 2 }, + ), + ], + }, + { + testname: 'valid-sidecar-tsv-curly-brace', + explanation: 'The sidecar is valid, but tsv HED column has braces}', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + }, + }, + }, + eventsString: 'onset\tduration\tevent_code\tHED\n' + '7\t4\tface\tRed,{blue}', + sidecarOnlyErrors: [], + eventsOnlyErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('curlyBracesInHedColumn', { column: '{blue}' }), + { relativePath: 'valid-sidecar-tsv-curly-brace.tsv' }, + { tsvLine: 2 }, + ), + ], + comboErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('curlyBracesInHedColumn', { column: '{blue}' }), + { path: 'valid-sidecar-tsv-curly-brace.tsv', relativePath: 'valid-sidecar-tsv-curly-brace.tsv' }, + { tsvLine: 2 }, + ), + ], + }, + ], + }, + { + name: 'duplicate-tag-test', + description: 'Duplicate tags can appear in isolation or in combiantion', + tests: [ + { + testname: 'first-level-duplicate-json-tsv', + explanation: 'Each is okay but when combined, duplicate tag', + schemaVersion: '8.3.0', + sidecar: { + vehicle: { + HED: { + car: 'Car', + train: 'Train', + boat: 'Boat', + }, + }, + speed: { + HED: 'Speed/# mph', + }, + transport: { + HED: { + car: 'Car', + train: 'Train', + boat: 'Boat', + maglev: 'Vehicle', + }, + } + }, + eventsString: 'onset\tduration\tvehicle\ttransport\tspeed\n' + '19\t6\tboat\tboat\t5\n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('duplicateTag', { tag: 'Boat' }), + { path: 'first-level-duplicate-json-tsv.tsv', relativePath: 'first-level-duplicate-json-tsv.tsv' }, + { tsvLine: 2 }, + ), + ], + }, + ], + },*/ + + { + name: 'curly-brace-tests', + description: 'Curly braces tested in various places', + tests: [ + { + testname: 'valid-curly-brace-in-sidecar-with-simple-splice', + explanation: 'Valid curly brace in sidecar and valid value is spliced in', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + ball: '{ball_type}, Black', + }, + }, + ball_type: { + Description: 'Has description with HED', + HED: 'Label/#', + }, + }, + eventsString: 'onset\tduration\tevent_code\tball_type\n' + '19\t6\tball\tbig-one\n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + { + testname: 'valid-curly-brace-in-sidecar-with-n/a-splice', + explanation: 'Valid curly brace in sidecar and but tsv splice entry is n/a', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + ball: '{ball_type}, Black', + }, + }, + ball_type: { + Description: 'Has description with HED', + HED: 'Label/#', + }, + }, + eventsString: 'onset\tduration\tevent_code\tball_type\n' + '19\t6\tball\tn/a\n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + { + testname: 'valid-curly-brace-in-sidecar-with-HED-column-splice', + explanation: 'Valid curly brace in sidecar with HED column splice', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + ball: '{ball_type}, Black, ({HED})', + }, + }, + ball_type: { + Description: 'Has description with HED', + HED: 'Label/#', + }, + }, + eventsString: 'onset\tduration\tevent_code\tball_type\tHED\n' + '19\t6\tball\tn/a\tPurple\n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + { + testname: 'invalid-curly-brace-column-slice-has-no hed', + explanation: 'A column name is used in a splice but does not have HED', + schemaVersion: '8.3.0', + sidecar: { + event_code: { + HED: { + face: '(Red, Blue), (Green, (Yellow))', + ball: '{ball_type}, Black', + }, + }, + }, + eventsString: 'onset\tduration\tevent_code\tball_type\n' + '19\t6\tball\tn/a\tPurple\n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [], + }, + ], + }, +] diff --git a/tests/bidsTests.spec.js b/tests/bidsTests.spec.js index 3b30537a..0bd2e461 100644 --- a/tests/bidsTests.spec.js +++ b/tests/bidsTests.spec.js @@ -11,14 +11,35 @@ import { generateIssue, IssueError } from '../common/issues/issues' import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' import { HedStringTokenizer } from '../parser/tokenizer' -import { bidsTestData } from './bidsTestData' +import { bidsTestData } from './bidsTests.data' import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator' import parseTSV from '../bids/tsvParser' const fs = require('fs') //const displayLog = process.env.DISPLAY_LOG === 'true' const displayLog = true -const skippedErrors = {} +const skippedErrors = new Map() + +// Ability to select individual tests to run +const runAll = true +let onlyRun = new Map() +if (!runAll) { + onlyRun = new Map([['curly-brace-tests', ['invalid-curly-brace-column-slice-has-no hed']]]) +} + +function shouldRun(name, testname) { + if (onlyRun.size === 0) return true + if (onlyRun.get(name) === undefined) return false + + const cases = onlyRun.get(name) + if (cases.length === 0) return true + + if (cases.includes(testname)) { + return true + } else { + return false + } +} // Return an array of hedCode values extracted from an issues list. function extractHedCodes(issues) { @@ -66,13 +87,13 @@ describe('BIDS validation', () => { } }) - describe.each(bidsTestData)('$name : $description', ({ tests }) => { + describe.each(bidsTestData)('$name : $description', ({ name, description, tests }) => { let itemLog const assertErrors = function (test, type, expectedErrors, issues, iLog) { const status = expectedErrors.length > 0 ? 'Expect fail' : 'Expect pass' - const header = `[${test.name}:${type}:](${status})` - const log = [header] + const header = `[${name}:${test.testname}][${type}](${status})` + const log = [] totalTests += 1 const errors = extractHedCodes(issues) @@ -83,51 +104,57 @@ describe('BIDS validation', () => { if (expectedErrors.length === 0 && errorString.length > 0) { const hasErrors = `---expected no errors but got errors [${errorString}]` log.push(hasErrors) - iLog.push(log.join('\n')) + log.push(`Received issues: ${JSON.stringify(issues)}`) + iLog.push(header + '\n' + log.join('\n')) wrongErrors += 1 assert(errorString.length === 0, `${header}${hasErrors}]`) - return - } - const expectedErrorCodes = extractHedCodes(expectedErrors) - const wrong = difference(errors, expectedErrorCodes) - const missing = difference(expectedErrors, errors) - let errorMessage = '' - if (wrong.length > 0) { - errorMessage = `---received unexpected errors ${wrong.join(',')}\n` - wrongErrors += 1 - } - if (missing.length > 0) { - errorMessage = errorMessage + `---did not receive expected errors ${missing.join(',')}` - missingErrors += 1 - } - if (errorMessage.length > 0) { - log.push(errorMessage) - iLog.push(log.join('\n')) + } else { + const expectedErrorCodes = extractHedCodes(expectedErrors) + const wrong = difference(errors, expectedErrorCodes) + const missing = difference(expectedErrorCodes, errors) + let errorMessage = '' + if (wrong.length > 0) { + errorMessage = `---received unexpected errors ${wrong.join(',')}\n` + wrongErrors += 1 + } + if (missing.length > 0) { + errorMessage = errorMessage + `---did not receive expected errors ${missing.join(',')}` + missingErrors += 1 + } + + if (errorMessage.length > 0) { + log.push(errorMessage) + log.push(`Expected issues:\n${JSON.stringify(expectedErrors)}`) + log.push(`Received issues:\n${JSON.stringify(issues)}`) + iLog.push(header + '\n' + log.join('\n')) + } else { + iLog.push(header) + } + assert.sameDeepMembers(issues, expectedErrors, header) } - assert.sameDeepMembers(issues, expectedErrors, header) } const validate = function (test, iLog) { // Make sure that the schema is available - const header = `\n[${test.name} (Expect pass)]` + const header = `[${test.testname} (Expect pass)]` iLog.push(header) const thisSchema = schemaMap.get(test.schemaVersion) assert(thisSchema !== undefined, `${test.schemaVersion} is not available in test ${test.name}`) // Validate the sidecar by itself - const sidecarName = test.name + '.json' + const sidecarName = test.testname + '.json' const bidsSidecar = new BidsSidecar('thisOne', test.sidecar, { relativePath: sidecarName, path: sidecarName }) assert(bidsSidecar instanceof BidsSidecar, 'Test') const sidecarIssues = bidsSidecar.validate(thisSchema) assertErrors(test, 'Sidecar only', test.sidecarOnlyErrors, sidecarIssues, iLog) // Parse the events file - const eventName = test.name + '.tsv' + const eventName = test.testname + '.tsv' const parsedTsv = parseTSV(test.eventsString) assert(parsedTsv instanceof Map, `${eventName} cannot be parsed`) // Validate the events file by itself - const bidsTsv = new BidsTsvFile(test.name, parsedTsv, { relativePath: eventName }, [], {}) + const bidsTsv = new BidsTsvFile(test.testname, parsedTsv, { relativePath: eventName }, [], {}) const validator = new BidsHedTsvValidator(bidsTsv, thisSchema) validator.validate() assertErrors(test, 'Events only', test.eventsOnlyErrors, validator.issues, iLog) @@ -154,8 +181,12 @@ describe('BIDS validation', () => { }) if (tests && tests.length > 0) { - test.each(tests)('$name $explanation ', (test) => { - validate(test, itemLog) + test.each(tests)('$testname: $explanation ', (test) => { + if (shouldRun(name, test.testname)) { + validate(test, itemLog) + } else { + itemLog.push(`----Skipping ${name}: ${test.testname}`) + } }) } }) diff --git a/tests/runLog.txt b/tests/runLog.txt index 4555c989..a0cce50f 100644 --- a/tests/runLog.txt +++ b/tests/runLog.txt @@ -1,23 +1,18 @@ -Total tests:15 Wrong errors:0 MissingErrors:4 - -[no-hed-at-all-but-both-tsv-json-non-empty (Expect pass)] - -[only-header-in-tsv-with-return (Expect pass)] - -[empty-json-empty-tsv (Expect pass)] - -[valid-sidecar-bad-tag-tsv (Expect pass)] -[valid-sidecar-bad-tag-tsv:Events only:](Expect fail) ----has errors [TAG_INVALID] ----did not receive expected errors [object Object] -[valid-sidecar-bad-tag-tsv:Events+side:](Expect fail) ----has errors [TAG_INVALID] ----did not receive expected errors [object Object] - -[valid-sidecar-tsv-curly-brace (Expect pass)] -[valid-sidecar-tsv-curly-brace:Events only:](Expect fail) ----has errors [CHARACTER_INVALID] ----did not receive expected errors [object Object] -[valid-sidecar-tsv-curly-brace:Events+side:](Expect fail) ----has errors [CHARACTER_INVALID] ----did not receive expected errors [object Object] \ No newline at end of file +Total tests:10 Wrong errors:1 MissingErrors:0 +[valid-curly-brace-in-sidecar-with-simple-splice (Expect pass)] +[curly-brace-tests:valid-curly-brace-in-sidecar-with-simple-splice][Sidecar only](Expect pass) +[curly-brace-tests:valid-curly-brace-in-sidecar-with-simple-splice][Events only](Expect pass) +[curly-brace-tests:valid-curly-brace-in-sidecar-with-simple-splice][Events+side](Expect pass) +[valid-curly-brace-in-sidecar-with-n/a-splice (Expect pass)] +[curly-brace-tests:valid-curly-brace-in-sidecar-with-n/a-splice][Sidecar only](Expect pass) +[curly-brace-tests:valid-curly-brace-in-sidecar-with-n/a-splice][Events only](Expect pass) +[curly-brace-tests:valid-curly-brace-in-sidecar-with-n/a-splice][Events+side](Expect pass) +[valid-curly-brace-in-sidecar-with-HED-column-splice (Expect pass)] +[curly-brace-tests:valid-curly-brace-in-sidecar-with-HED-column-splice][Sidecar only](Expect pass) +[curly-brace-tests:valid-curly-brace-in-sidecar-with-HED-column-splice][Events only](Expect pass) +[curly-brace-tests:valid-curly-brace-in-sidecar-with-HED-column-splice][Events+side](Expect pass) +[invalid-curly-brace-column-slice-has-no hed (Expect pass)] +[curly-brace-tests:invalid-curly-brace-column-slice-has-no hed][Sidecar only](Expect pass) +---has errors [SIDECAR_BRACES_INVALID] +---expected no errors but got errors [SIDECAR_BRACES_INVALID] +[{"code":104,"file":{"relativePath":"invalid-curly-brace-column-slice-has-no hed.json","path":"invalid-curly-brace-column-slice-has-no hed.json"},"evidence":"ERROR: [SIDECAR_BRACES_INVALID] Column name \"ball_type\", used in curly braces, is not mapped to a defined column. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#sidecar-braces-invalid.)","hedIssue":{"internalCode":"undefinedCurlyBraces","code":"undefinedCurlyBraces","hedCode":"SIDECAR_BRACES_INVALID","level":"error","message":"ERROR: [SIDECAR_BRACES_INVALID] Column name \"ball_type\", used in curly braces, is not mapped to a defined column. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#sidecar-braces-invalid.)","parameters":{"column":"ball_type"}}}] \ No newline at end of file From 31e322914d6a4f97d30b9457d87cb9bf4a69157f Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:09:34 -0500 Subject: [PATCH 21/21] Updated the tests --- tests/bidsTests.data.js | 82 ++++++++++++++++++++++------------------- tests/bidsTests.spec.js | 5 +-- tests/runLog.txt | 33 ++++++++++++++++- 3 files changed, 76 insertions(+), 44 deletions(-) diff --git a/tests/bidsTests.data.js b/tests/bidsTests.data.js index 65f41e90..c8c3d4e4 100644 --- a/tests/bidsTests.data.js +++ b/tests/bidsTests.data.js @@ -2,7 +2,7 @@ import { BidsHedIssue } from '../bids' import { generateIssue } from '../common/issues/issues' export const bidsTestData = [ - /* { + { name: 'valid-bids-datasets-with-limited-hed', description: 'HED or data is missing in various places', tests: [ @@ -109,46 +109,46 @@ export const bidsTestData = [ ], }, { - name: 'duplicate-tag-test', - description: 'Duplicate tags can appear in isolation or in combiantion', - tests: [ - { - testname: 'first-level-duplicate-json-tsv', - explanation: 'Each is okay but when combined, duplicate tag', - schemaVersion: '8.3.0', - sidecar: { - vehicle: { - HED: { - car: 'Car', - train: 'Train', - boat: 'Boat', - }, + name: 'duplicate-tag-test', + description: 'Duplicate tags can appear in isolation or in combiantion', + tests: [ + { + testname: 'first-level-duplicate-json-tsv', + explanation: 'Each is okay but when combined, duplicate tag', + schemaVersion: '8.3.0', + sidecar: { + vehicle: { + HED: { + car: 'Car', + train: 'Train', + boat: 'Boat', }, - speed: { - HED: 'Speed/# mph', + }, + speed: { + HED: 'Speed/# mph', + }, + transport: { + HED: { + car: 'Car', + train: 'Train', + boat: 'Boat', + maglev: 'Vehicle', }, - transport: { - HED: { - car: 'Car', - train: 'Train', - boat: 'Boat', - maglev: 'Vehicle', - }, - } }, - eventsString: 'onset\tduration\tvehicle\ttransport\tspeed\n' + '19\t6\tboat\tboat\t5\n', - sidecarOnlyErrors: [], - eventsOnlyErrors: [], - comboErrors: [ - BidsHedIssue.fromHedIssue( - generateIssue('duplicateTag', { tag: 'Boat' }), - { path: 'first-level-duplicate-json-tsv.tsv', relativePath: 'first-level-duplicate-json-tsv.tsv' }, - { tsvLine: 2 }, - ), - ], }, - ], - },*/ + eventsString: 'onset\tduration\tvehicle\ttransport\tspeed\n' + '19\t6\tboat\tboat\t5\n', + sidecarOnlyErrors: [], + eventsOnlyErrors: [], + comboErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('duplicateTag', { tag: 'Boat' }), + { path: 'first-level-duplicate-json-tsv.tsv', relativePath: 'first-level-duplicate-json-tsv.tsv' }, + { tsvLine: 2 }, + ), + ], + }, + ], + }, { name: 'curly-brace-tests', @@ -232,7 +232,13 @@ export const bidsTestData = [ eventsString: 'onset\tduration\tevent_code\tball_type\n' + '19\t6\tball\tn/a\tPurple\n', sidecarOnlyErrors: [], eventsOnlyErrors: [], - comboErrors: [], + comboErrors: [ + BidsHedIssue.fromHedIssue( + generateIssue('invalidTag', { tag: 'Baloney' }), + { relativePath: 'valid-sidecar-bad-tag-tsv.tsv' }, + { column: 'ball_type' }, + ), + ], }, ], }, diff --git a/tests/bidsTests.spec.js b/tests/bidsTests.spec.js index 0bd2e461..c9cb4b96 100644 --- a/tests/bidsTests.spec.js +++ b/tests/bidsTests.spec.js @@ -9,16 +9,13 @@ import { SchemaSpec, SchemasSpec } from '../common/schema/types' import { BidsDataset, BidsEventFile, BidsHedTsvValidator, BidsSidecar, BidsTsvFile } from '../bids' import { generateIssue, IssueError } from '../common/issues/issues' -import { HedStringTokenizerOriginal } from '../parser/tokenizerOriginal' -import { HedStringTokenizer } from '../parser/tokenizer' import { bidsTestData } from './bidsTests.data' -import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator' import parseTSV from '../bids/tsvParser' const fs = require('fs') //const displayLog = process.env.DISPLAY_LOG === 'true' const displayLog = true -const skippedErrors = new Map() +const skippedTests = new Map() // Ability to select individual tests to run const runAll = true diff --git a/tests/runLog.txt b/tests/runLog.txt index a0cce50f..2b7c1ad7 100644 --- a/tests/runLog.txt +++ b/tests/runLog.txt @@ -1,4 +1,33 @@ -Total tests:10 Wrong errors:1 MissingErrors:0 +Total tests:28 Wrong errors:1 MissingErrors:1 +[no-hed-at-all-but-both-tsv-json-non-empty (Expect pass)] +[valid-bids-datasets-with-limited-hed:no-hed-at-all-but-both-tsv-json-non-empty][Sidecar only](Expect pass) +[valid-bids-datasets-with-limited-hed:no-hed-at-all-but-both-tsv-json-non-empty][Events only](Expect pass) +[valid-bids-datasets-with-limited-hed:no-hed-at-all-but-both-tsv-json-non-empty][Events+side](Expect pass) +[only-header-in-tsv-with-return (Expect pass)] +[valid-bids-datasets-with-limited-hed:only-header-in-tsv-with-return][Sidecar only](Expect pass) +[valid-bids-datasets-with-limited-hed:only-header-in-tsv-with-return][Events only](Expect pass) +[valid-bids-datasets-with-limited-hed:only-header-in-tsv-with-return][Events+side](Expect pass) +[empty-json-empty-tsv (Expect pass)] +[valid-bids-datasets-with-limited-hed:empty-json-empty-tsv][Sidecar only](Expect pass) +[valid-bids-datasets-with-limited-hed:empty-json-empty-tsv][Events only](Expect pass) +[valid-bids-datasets-with-limited-hed:empty-json-empty-tsv][Events+side](Expect pass) +[valid-sidecar-bad-tag-tsv (Expect pass)] +[valid-json-invalid-tsv:valid-sidecar-bad-tag-tsv][Sidecar only](Expect pass) +[valid-json-invalid-tsv:valid-sidecar-bad-tag-tsv][Events only](Expect fail) +[valid-json-invalid-tsv:valid-sidecar-bad-tag-tsv][Events+side](Expect fail) +[valid-sidecar-tsv-curly-brace (Expect pass)] +[valid-json-invalid-tsv:valid-sidecar-tsv-curly-brace][Sidecar only](Expect pass) +[valid-json-invalid-tsv:valid-sidecar-tsv-curly-brace][Events only](Expect fail) +[valid-json-invalid-tsv:valid-sidecar-tsv-curly-brace][Events+side](Expect fail) +[first-level-duplicate-json-tsv (Expect pass)] +[duplicate-tag-test:first-level-duplicate-json-tsv][Sidecar only](Expect pass) +[duplicate-tag-test:first-level-duplicate-json-tsv][Events only](Expect pass) +[duplicate-tag-test:first-level-duplicate-json-tsv][Events+side](Expect fail) +---did not receive expected errors TAG_EXPRESSION_REPEATED +Expected issues: +[{"code":104,"file":{"path":"first-level-duplicate-json-tsv.tsv","relativePath":"first-level-duplicate-json-tsv.tsv"},"evidence":"ERROR: [TAG_EXPRESSION_REPEATED] Duplicate tag - \"Boat\". TSV line: 2. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#tag-expression-repeated.)","hedIssue":{"internalCode":"duplicateTag","code":"duplicateTag","hedCode":"TAG_EXPRESSION_REPEATED","level":"error","message":"ERROR: [TAG_EXPRESSION_REPEATED] Duplicate tag - \"Boat\". TSV line: 2. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#tag-expression-repeated.)","parameters":{"tag":"Boat","tsvLine":"2"}}}] +Received issues: +[] [valid-curly-brace-in-sidecar-with-simple-splice (Expect pass)] [curly-brace-tests:valid-curly-brace-in-sidecar-with-simple-splice][Sidecar only](Expect pass) [curly-brace-tests:valid-curly-brace-in-sidecar-with-simple-splice][Events only](Expect pass) @@ -15,4 +44,4 @@ Total tests:10 Wrong errors:1 MissingErrors:0 [curly-brace-tests:invalid-curly-brace-column-slice-has-no hed][Sidecar only](Expect pass) ---has errors [SIDECAR_BRACES_INVALID] ---expected no errors but got errors [SIDECAR_BRACES_INVALID] -[{"code":104,"file":{"relativePath":"invalid-curly-brace-column-slice-has-no hed.json","path":"invalid-curly-brace-column-slice-has-no hed.json"},"evidence":"ERROR: [SIDECAR_BRACES_INVALID] Column name \"ball_type\", used in curly braces, is not mapped to a defined column. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#sidecar-braces-invalid.)","hedIssue":{"internalCode":"undefinedCurlyBraces","code":"undefinedCurlyBraces","hedCode":"SIDECAR_BRACES_INVALID","level":"error","message":"ERROR: [SIDECAR_BRACES_INVALID] Column name \"ball_type\", used in curly braces, is not mapped to a defined column. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#sidecar-braces-invalid.)","parameters":{"column":"ball_type"}}}] \ No newline at end of file +Received issues: [{"code":104,"file":{"relativePath":"invalid-curly-brace-column-slice-has-no hed.json","path":"invalid-curly-brace-column-slice-has-no hed.json"},"evidence":"ERROR: [SIDECAR_BRACES_INVALID] Column name \"ball_type\", used in curly braces, is not mapped to a defined column. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#sidecar-braces-invalid.)","hedIssue":{"internalCode":"undefinedCurlyBraces","code":"undefinedCurlyBraces","hedCode":"SIDECAR_BRACES_INVALID","level":"error","message":"ERROR: [SIDECAR_BRACES_INVALID] Column name \"ball_type\", used in curly braces, is not mapped to a defined column. (For more information on this HED error, see https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#sidecar-braces-invalid.)","parameters":{"column":"ball_type"}}}] \ No newline at end of file