Skip to content

Commit

Permalink
First rewrite of hed tag parser
Browse files Browse the repository at this point in the history
  • Loading branch information
VisLab committed Nov 9, 2024
1 parent bbf27de commit 47eba7b
Show file tree
Hide file tree
Showing 13 changed files with 888 additions and 63 deletions.
2 changes: 1 addition & 1 deletion common/issues/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ export default {
invalidExtension: {
hedCode: 'TAG_EXTENSION_INVALID',
level: 'error',
message: stringTemplate`"${'tag'}" appears as an extension of "${'parentTag'}", which does not allow tag extensions.`,
message: stringTemplate`"${'tag'}" appears as an extension of "${'parentTag'}", which does not allow this tag extension.`,
},
emptyTagFound: {
hedCode: 'TAG_EMPTY',
Expand Down
70 changes: 68 additions & 2 deletions parser/parsedHedTag.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ import { getParentTag, getTagLevels, getTagName } from '../utils/hedStrings'
import ParsedHedSubstring from './parsedHedSubstring'
import { SchemaValueTag } from '../schema/entries'
import TagConverter from './tagConverter'
import { Schema } from '../schema/containers'
import { getRegExp } from './tempRegex'

import RegexClass from '../schema/regExps'

/**
* A parsed HED tag.
Expand Down Expand Up @@ -39,6 +41,30 @@ export default class ParsedHedTag extends ParsedHedSubstring {
*/
_remainder

/**
* The extension if any
*
* @type {string}
* @private
*/
_extension

/**
* The value if any
*
* @type {string}
* @private
*/
_value

/**
* The units if any
*
* @type {string}
* @private
*/
_units

/**
* Constructor.
*
Expand All @@ -49,7 +75,8 @@ export default class ParsedHedTag extends ParsedHedSubstring {
*/
constructor(tagSpec, hedSchemas, hedString) {
super(tagSpec.tag, tagSpec.bounds) // Sets originalTag and originalBounds
this._convertTag(hedSchemas, hedString, tagSpec) // Sets various parameters
this._convertTag(hedSchemas, hedString, tagSpec) // Sets various forms of the tag.
this._handleRemainder()
//this._checkTagAttributes() // Checks various aspects like requireChild or extensionAllowed.
//this.formattedTag = this._formatTag()
//this.formattedTag = this.canonicalTag.toLowerCase()
Expand Down Expand Up @@ -86,6 +113,45 @@ export default class ParsedHedTag extends ParsedHedSubstring {
this.formattedTag = this.canonicalTag.toLowerCase()
}

/**
* Handle the remainder portion
*
* @throws {IssueError} If parsing the remainder section fails.
*/
_handleRemainder() {
if (this._remainder === '') {
return
}
// if (this.allowsExtensions) {
// this._handleExtension()
// } else if (this.takesValue) { // Its a value tag
// return
// } else {
// //IssueError.generateAndThrow('invalidTag', {tag: this.originalTag})
// }
}

/**
* Handle potenial extensions

Check failure on line 135 in parser/parsedHedTag.js

View workflow job for this annotation

GitHub Actions / Check for spelling errors

potenial ==> potential
*
* @throws {IssueError} If parsing the remainder section fails.
*/
_handleExtension() {
this._extension = this._remainder
const testReg = getRegExp('nameClass')
if (!testReg.test(this._extension)) {
IssueError.generateAndThrow('invalidExtension', { tag: this.originalTag })
}
}

// _handleExtension() {
// this._extension = this._remainder
// const testit = RegexClass.testRegex('nameClass', this._extension)
// if (!RegexClass.testRegex('nameClass', this._extension)) {
// IssueError.generateAndThrow('invalidExtension', {tag: this.originalTag})
// }
// }

/**
* Nicely format this tag.
*
Expand Down
56 changes: 39 additions & 17 deletions parser/tagConverter.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { IssueError } from '../common/issues/issues'
import { getTagSlashIndices } from '../utils/hedStrings'
import { SchemaValueTag } from '../schema/entries'

import { getRegExp } from './tempRegex'
/**
* Converter from a tag specification to a schema-based tag object.
*/
Expand Down Expand Up @@ -56,6 +56,7 @@ export default class TagConverter {
constructor(tagSpec, hedSchemas) {
this.hedSchemas = hedSchemas
this.tagMapping = hedSchemas.getSchema(tagSpec.library).entries.tags

this.tagSpec = tagSpec
this.tagString = tagSpec.tag
this.tagLevels = this.tagString.split('/')
Expand All @@ -67,6 +68,7 @@ export default class TagConverter {
* Retrieve the {@link SchemaTag} object for a tag specification.
*
* @returns {[SchemaTag, string]} The schema's corresponding tag object and the remainder of the tag string.
* @throws {IssueError} If tag conversion.
*/
convert() {
let parentTag = undefined
Expand All @@ -86,45 +88,50 @@ export default class TagConverter {
}

_validateChildTag(parentTag, tagLevelIndex) {
if (this.schemaTag instanceof SchemaValueTag) {
IssueError.generateAndThrow('internalConsistencyError', {
message: 'Child tag is a value tag which should have been handled earlier.',
})
}

const childTag = this._getSchemaTag(tagLevelIndex)
if (childTag === undefined) {
// This is an extended tag
if (tagLevelIndex === 0) {
IssueError.generateAndThrow('invalidTag', { tag: this.tagString })
}
if (parentTag !== undefined && !parentTag.hasAttributeName('extensionAllowed')) {
IssueError.generateAndThrow('invalidExtension', {
tag: this.tagLevels[tagLevelIndex],
parentTag: parentTag.longName,
parentTag: this.tagLevels.slice(0, tagLevelIndex).join('/'),
})
}
this._checkExtensions(tagLevelIndex)
return childTag
}

if (tagLevelIndex > 0 && (childTag.parent === undefined || childTag.parent !== parentTag)) {
IssueError.generateAndThrow('invalidParentNode', {
tag: this.tagLevels[tagLevelIndex],
parentTag: childTag.longName,
parentTag: this.tagLevels.slice(0, tagLevelIndex).join('/'),
})
}

return childTag
}

_checkExtensions(tagLevelIndex) {
// A non-tag has been detected --- from here on must be non-tags.
this._checkNameClass(tagLevelIndex) // This is an extension
for (let index = tagLevelIndex + 1; index < this.tagLevels.length; index++) {
const child = this._getSchemaTag(index)
if (child !== undefined) {
// A schema tag showed up after a non-schema tag
IssueError.generateAndThrow('invalidParentNode', {
tag: this.tagLevels[index],
parentTag: this.tagLevels.slice(0, index).join('/'),
})
}
this._checkNameClass(index)
}
}

_getSchemaTag(tagLevelIndex) {
let tagLevel = this.tagLevels[tagLevelIndex].toLowerCase()
// TODO: These two checks should probably be removed as the tokenizer handles this.
// if (tagLevelIndex === 0) {
// tagLevel = tagLevel.trimLeft()
// }
// if (tagLevel === '' || tagLevel !== tagLevel.trim()) {
// IssueError.generateAndThrow('invalidTag', { tag: this.tagString })
// }
const tagLevel = this.tagLevels[tagLevelIndex].toLowerCase()
return this.tagMapping.getEntry(tagLevel)
}

Expand All @@ -138,4 +145,19 @@ export default class TagConverter {
IssueError.generateAndThrow('childRequired', { tag: this.tagString })
}
}

_checkNameClass(index) {
// Check whether the tagLevel is a valid name class
// TODO: this test should be in the schema and the RegExp only created once.
const valueClasses = this.hedSchemas.getSchema(this.tagSpec.library).entries.valueClasses
const myRex = valueClasses._definitions.get('nameClass')?._charClassRegex
const my = new RegExp(myRex)
if (!my.test(this.tagLevels[index])) {
// An extension is not name class
IssueError.generateAndThrow('invalidExtension', {
tag: this.tagLevels[index],
parentTag: this.tagLevels.slice(0, index).join('/'),
})
}
}
}
25 changes: 25 additions & 0 deletions parser/tempRegex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import regexData from '../data/json/class_regex.json'

// Function to get the RegExp
export function getRegExp(name) {
if (!regexData.class_chars[name]) {
throw new Error(`Invalid class name: ${name}`)
}

const charNames = regexData.class_chars[name]
if (charNames.length === 0) {
throw new Error(`No character definitions for class: ${name}`)
}

// Join the individual character regex patterns
const pattern = charNames
.map((charName) => {
if (!regexData.char_regex[charName]) {
throw new Error(`Invalid character name: ${charName}`)
}
return regexData.char_regex[charName]
})
.join('|')

return new RegExp(`^(?:${pattern})+$`)
}
4 changes: 2 additions & 2 deletions schema/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ export default class SchemaParser {
for (const [name, valueAttributes] of valueAttributeDefinitions) {
const booleanAttributes = booleanAttributeDefinitions.get(name)
//valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes))
const charClassRegex = this._getValueClassChars(name)
const charRegex = this._getValueClassChars(name)
const wordRegex = new RegExp(classRegex.class_words[name] ?? '^.+$')
valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes, charClassRegex, wordRegex))
valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes, charRegex, wordRegex))
}
this.valueClasses = new SchemaEntryManager(valueClasses)
}
Expand Down
21 changes: 21 additions & 0 deletions schema/regExps.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import classRegex from '../data/json/class_regex.json'

export class RegexClass {
// Static method that returns the RegExp object

static getValueClassChars(name) {
let classChars
if (Array.isArray(classRegex.class_chars[name]) && classRegex.class_chars[name].length > 0) {
classChars =
'^(?:' + classRegex.class_chars[name].map((charClass) => classRegex.char_regex[charClass]).join('|') + ')+$'
} else {
classChars = '^.+$' // Any non-empty line or string.
}
return new RegExp(classChars)
}

static testRegex(name, value) {
const regex = RegexClass.getValueClassChars(name)
return regex.test(value)
}
}
Loading

0 comments on commit 47eba7b

Please sign in to comment.