Skip to content

Commit

Permalink
Merge pull request #228 from VisLab/update-tokenizer
Browse files Browse the repository at this point in the history
Refactored special and added more tests
  • Loading branch information
VisLab authored Nov 30, 2024
2 parents eadd081 + 829f892 commit 3ef09a9
Show file tree
Hide file tree
Showing 23 changed files with 882 additions and 492 deletions.
2 changes: 1 addition & 1 deletion bids/types/json.js
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ export class BidsSidecarKey {
}

_parseValueString(hedSchemas) {
const [parsedString, parsingIssues] = parseHedString(this.valueString, hedSchemas)
const [parsedString, parsingIssues] = parseHedString(this.valueString, hedSchemas, false)
const flatIssues = Object.values(parsingIssues).flat()
this.parsedValueString = parsedString
return flatIssues
Expand Down
2 changes: 1 addition & 1 deletion bids/validator/tsvValidator.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ export class BidsHedTsvValidator {
definitionsAllowed: 'no',
}

const [parsedString, parsingIssues] = parseHedString(hedString, this.hedSchemas)
const [parsedString, parsingIssues] = parseHedString(hedString, this.hedSchemas, true)
issues.push(
...BidsHedIssue.fromHedIssues(Object.values(parsingIssues).flat(), this.tsvFile.file, { tsvLine: rowIndex }),
)
Expand Down
12 changes: 11 additions & 1 deletion common/issues/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ export default {
level: 'error',
message: stringTemplate`Descendant tag required - "${'tag'}".`,
},
valueRequired: {
hedCode: 'TAG_REQUIRES_CHILD',
level: 'error',
message: stringTemplate`Tag "${'tag'}" requires a value.`,
},
childForbidden: {
hedCode: 'TAG_INVALID',
level: 'error',
Expand Down Expand Up @@ -124,11 +129,16 @@ export default {
level: 'error',
message: stringTemplate`Illegal nested definition in tag group for definition "${'definition'}".`,
},
missingDefinition: {
missingDefinitionForDef: {
hedCode: 'DEF_INVALID',
level: 'error',
message: stringTemplate`Def tag found for definition name "${'definition'}" does not correspond to an existing definition.`,
},
missingDefinitionForDefExpand: {
hedCode: 'DEF_EXPAND_INVALID',
level: 'error',
message: stringTemplate`Def-expand tag found for definition name "${'definition'}" does not correspond to an existing definition.`,
},
duplicateDefinition: {
hedCode: 'DEFINITION_INVALID',
level: 'error',
Expand Down
18 changes: 9 additions & 9 deletions data/json/specialTags.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"Def": {
"name": "Def",
"allowExtension": false,
"noExtension": true,
"allowValue": true,
"allowTwoLevelValue": true,
"requireValue": true,
Expand All @@ -19,7 +19,7 @@
},
"Def-expand": {
"name": "Def-expand",
"allowExtension": false,
"noExtension": true,
"allowValue": true,
"allowTwoLevelValue": true,
"requireValue": true,
Expand All @@ -37,7 +37,7 @@
},
"Definition": {
"name": "Definition",
"allowExtension": false,
"noExtension": true,
"allowValue": true,
"allowTwoLevelValue": true,
"requireValue": true,
Expand All @@ -55,7 +55,7 @@
},
"Delay": {
"name": "Delay",
"allowExtension": false,
"noExtension": true,
"allowValue": true,
"allowTwoLevelValue": false,
"requireValue": true,
Expand All @@ -73,7 +73,7 @@
},
"Duration": {
"name": "Duration",
"allowExtension": false,
"noExtension": true,
"allowValue": true,
"allowTwoLevelValue": false,
"requireValue": true,
Expand All @@ -91,7 +91,7 @@
},
"Event-context": {
"name": "Event-context",
"allowExtension": false,
"noExtension": true,
"allowValue": false,
"allowTwoLevelValue": false,
"requireValue": false,
Expand All @@ -109,7 +109,7 @@
},
"Inset": {
"name": "Inset",
"allowExtension": false,
"noExtension": true,
"allowValue": false,
"allowTwoLevelValue": false,
"requireValue": false,
Expand All @@ -127,7 +127,7 @@
},
"Offset": {
"name": "Offset",
"allowExtension": false,
"noExtension": true,
"allowValue": false,
"allowTwoLevelValue": false,
"requireValue": false,
Expand All @@ -145,7 +145,7 @@
},
"Onset": {
"name": "Onset",
"allowExtension": false,
"noExtension": true,
"allowValue": false,
"allowTwoLevelValue": false,
"requireValue": false,
Expand Down
16 changes: 8 additions & 8 deletions parser/parsedHedGroup.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import { getTagName } from '../utils/hedStrings'
import ParsedHedSubstring from './parsedHedSubstring'
import ParsedHedTag from './parsedHedTag'
import ParsedHedColumnSplice from './parsedHedColumnSplice'
import { SpecialChecker } from './special'

/**
* A parsed HED tag group.
Expand Down Expand Up @@ -272,28 +271,29 @@ export default class ParsedHedGroup extends ParsedHedSubstring {
})
}

get specialTagList() {
/* get specialTagList() {
return this._memoize('specialTagList', () => {
const special = new SpecialChecker()
return this.allTags.filter((obj) => special.specialNames.includes(obj.schemaTag.name))
})
}
}*/

get hasForbiddenSubgroupTags() {
/* get hasForbiddenSubgroupTags() {
return this._memoize('hasForbiddenSubgroupTags', () => {
return this.allTags.some((obj) => new SpecialChecker().hasForbiddenSubgroupTags.includes(obj.schemaTag.name))
})
}
/**
}*/

/* /!**
* A list of all column splices at all levels
*
* @returns {ParsedHedColumnSplice[]} The "name" portion of the canonical tag.
*/
*!/
get allColumnSplices() {
return this._memoize('allColumnSplices', () => {
return Array.from(this.columnSpliceIterator())
})
}
}*/

/**
* Determine the name of this group's definition.
Expand Down
6 changes: 0 additions & 6 deletions parser/parsedHedString.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,6 @@ export class ParsedHedString {
*/
context

/**
* A list of the special tags that are in the string
* @type {ParsedHedTag[]}
*/
specialTags

/**
* Constructor.
* @param {string} hedString The original HED string.
Expand Down
78 changes: 44 additions & 34 deletions parser/parsedHedTag.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import { IssueError } from '../common/issues/issues'
import { getTagLevels } from '../utils/hedStrings'
import ParsedHedSubstring from './parsedHedSubstring'
import { SchemaValueTag } from '../schema/entries'
import TagConverter from './tagConverter'
import { SpecialChecker } from './special'

const allowedRegEx = /^[^{}\,]*$/

//TODO This is temporary until special tag handling is available.
const threeLevelTags = ['Definition', 'Def', 'Def-expand']
/**
* A parsed HED tag.
*/
Expand Down Expand Up @@ -60,9 +58,9 @@ export default class ParsedHedTag extends ParsedHedSubstring {
_value

/**
* If definition
* If definition this is the second value if
*
* @type {Array}
* @type {string}
* @private
*/
_splitValue
Expand Down Expand Up @@ -121,22 +119,30 @@ export default class ParsedHedTag extends ParsedHedSubstring {
}

/**
* Handle the remainder portion
* Handle the remainder portion for value tag (converter handles others)
*
* @param (SchemaTag) schemaTag - the part of the tag that is in the schema
* @param {string} remainder - the leftover part
* @throws {IssueError} If parsing the remainder section fails.
*/
_handleRemainder(schemaTag, remainder) {
if (remainder === '' || !(schemaTag instanceof SchemaValueTag)) {
this._extension = remainder
if (!(schemaTag instanceof SchemaValueTag)) {
return
}
if (threeLevelTags.includes(this.schemaTag.name)) {
this._handleSpecial(remainder)
return
// Check that there is a value if required
const special = SpecialChecker.getInstance()
if (
(schemaTag.hasAttributeName('requireChild') || special.requireValueTags.includes(schemaTag.name)) &&
remainder === ''
) {
IssueError.generateAndThrow('valueRequired', { tag: this.originalTag })
}
this._splitValue = null
// Check if this could have a two-level value
const [value, rest] = this._getSplitValue(remainder, special)
this._splitValue = rest

const [actualUnit, actualUnitString, actualValueString] = this._separateUnits(schemaTag, remainder)
// Resolve the units and check
const [actualUnit, actualUnitString, actualValueString] = this._separateUnits(schemaTag, value)
this._units = actualUnit
this._value = actualValueString

Expand All @@ -162,18 +168,18 @@ export default class ParsedHedTag extends ParsedHedSubstring {
return [actualUnit, actualUnitString, actualValueString]
}

// TODO: Fix this
/**
* Handle special -- handles special three-level tags
* Handle special three-level tags
* @param {string} remainder - the remainder of the tag string after schema tag
* @param {SpecialChecker} special - the special checker for checking the special tag properties
*/
_handleSpecial(remainder) {
const splitValue = remainder.split('/', 2)
const entryManager = this.schema.entries.valueClasses
if (entryManager.getEntry('nameClass').validateValue(splitValue[0])) {
this._splitValue = splitValue
} else {
IssueError.generateAndThrow('invalidValue', { tag: this.originalTag })
_getSplitValue(remainder, special) {
if (!special.allowTwoLevelValueTags.includes(this.schemaTag.name)) {
return [remainder, null]
}
const split = remainder.split('/', 2)
const rest = split.length > 1 ? split[1] : null
return [split[0], rest]
}

/**
Expand Down Expand Up @@ -247,27 +253,29 @@ export default class ParsedHedTag extends ParsedHedSubstring {
}
}

/**
/*
/!**
* The trailing portion of {@link canonicalTag}.
*
* @returns {string} The "name" portion of the canonical tag.
*/
*!/
get canonicalTagName() {
return this._memoize('canonicalTagName', () => {
return ParsedHedTag.getTagName(this.canonicalTag)
})
}
}*/

/**
/*
/!**
* The trailing portion of {@link formattedTag}.
*
* @returns {string} The "name" portion of the formatted tag.
*/
*!/
get formattedTagName() {
return this._memoize('formattedTagName', () => {
return ParsedHedTag.getTagName(this.formattedTag)
})
}
}*/

/**
* The trailing portion of {@link originalTag}.
Expand Down Expand Up @@ -317,16 +325,17 @@ export default class ParsedHedTag extends ParsedHedSubstring {
})
}

/**
/*
/!**
* The parent portion of {@link originalTag}.
*
* @returns {string} The "parent" portion of the original tag.
*/
*!/
get parentOriginalTag() {
return this._memoize('parentOriginalTag', () => {
return ParsedHedTag.getParentTag(this.originalTag)
})
}
}*/

/**
* Iterate through a tag's ancestor tag strings.
Expand Down Expand Up @@ -363,11 +372,12 @@ export default class ParsedHedTag extends ParsedHedSubstring {
return false
}

/**
/*
/!**
* Check if any level of this HED tag allows extensions.
*
* @returns {boolean} Whether any level of this HED tag allows extensions.
*/
*!/
get allowsExtensions() {
return this._memoize('allowsExtensions', () => {
if (this.originalTagName === '#') {
Expand All @@ -381,7 +391,7 @@ export default class ParsedHedTag extends ParsedHedSubstring {
this.schema?.tagHasAttribute(tagSubstring, extensionAllowedAttribute),
)
})
}
}*/

/**
* Determine if this HED tag is equivalent to another HED tag.
Expand Down
2 changes: 1 addition & 1 deletion parser/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class HedStringParser {
return [null, parsingIssues]
}
const parsedString = new ParsedHedString(this.hedString, parsedTags)
const checkIssues = new SpecialChecker().checkHedString(parsedString, fullCheck)
const checkIssues = SpecialChecker.getInstance().checkHedString(parsedString, fullCheck)
mergeParsingIssues(parsingIssues, { syntaxIssues: checkIssues })
if (checkIssues.length > 0) {
return [null, parsingIssues]
Expand Down
Loading

0 comments on commit 3ef09a9

Please sign in to comment.