Skip to content

Commit

Permalink
Merge pull request #218 from VisLab/update-tokenizer
Browse files Browse the repository at this point in the history
Started on the parsedHedTag tests
  • Loading branch information
VisLab authored Nov 7, 2024
2 parents 2788f74 + d4bffc2 commit fb23337
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 4 deletions.
57 changes: 57 additions & 0 deletions data/json/class_regex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"char_regex": {
"alphanumeric": "[A-Za-z0-9]",
"ampersand": "&",
"ascii": "[\\x00-\\x7F]",
"asterisk": "\\*",
"at-sign": "@",
"backslash": "\\",
"blank": " ",
"caret": "\\^",
"colon": ":",
"comma": ",",
"date-time": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?",
"dollar": "\\$",
"digits": "[0-9]",
"double-quote": "\"",
"equals": "=",
"exclamation": "!",
"greater-than": ">",
"hyphen": "-",
"left-paren": "(",
"less-than": "<",
"letters": "[A-Za-z]",
"lowercase": "[a-z]",
"name": "[\\w\\-\\u0080-\\uFFFF]",
"newline": "\\n",
"nonascii": "[\\u0080-\\uFFFF]",
"number-sign": "#",
"numeric": "[0-9.\\-+^Ee]",
"percent-sign": "%",
"period": "\\.",
"plus": "\\+",
"printable": "[\\x20-\\x7E]",
"question-mark": "\\?",
"right-paren": "(",
"semicolon": ";",
"single-quote": "'",
"forward-slash": "/",
"tab": "\\t",
"text": "[^\\x00-\\x1F\\x7F,{}]",
"tilde": "~",
"underscore": "_",
"uppercase": "[A-Z]",
"vertical-bar": "|"
},
"class_chars": {
"dateTimeClass": [],
"nameClass": ["alphanumeric", "underscore", "hyphen", "nonascii"],
"numericClass": [],
"textClass": ["text"],
"testClass": ["newline", "tab", "nonascii"]
},
"class_words": {
"dateTimeClass": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?$",
"numericClass": "^[+-]?(\\d+(\\.\\d*)?|\\.\\d+)([eE][+-]?\\d+)?$"
}
}
2 changes: 2 additions & 0 deletions parser/tagConverter.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export default class TagConverter {
this.tagString = tagSpec.tag
this.tagLevels = this.tagString.split('/')
this.tagSlashes = getTagSlashIndices(this.tagString)
this.remainder = undefined
}

/**
Expand Down Expand Up @@ -117,6 +118,7 @@ export default class TagConverter {

_getSchemaTag(tagLevelIndex) {
let tagLevel = this.tagLevels[tagLevelIndex].toLowerCase()
// TODO: These two checks should probably be removed as the tokenizer handles this.
if (tagLevelIndex === 0) {
tagLevel = tagLevel.trimLeft()
}
Expand Down
67 changes: 64 additions & 3 deletions schema/entries.js
Original file line number Diff line number Diff line change
Expand Up @@ -662,8 +662,43 @@ export class SchemaUnitModifier extends SchemaEntryWithAttributes {
* SchemaValueClass class
*/
export class SchemaValueClass extends SchemaEntryWithAttributes {
constructor(name, booleanAttributes, valueAttributes) {
/**
* The character class-based regular expression.
* @type {RegExp}
* @private
*/
_charClassRegex
/**
* The "word form"-based regular expression.
* @type {RegExp}
* @private
*/
_wordRegex

/**
* Constructor.
*
* @param {string} name The name of this value class.
* @param {Set<SchemaAttribute>} booleanAttributes The boolean attributes for this value class.
* @param {Map<SchemaAttribute, *>} valueAttributes The value attributes for this value class.
* @param {RegExp} charClassRegex The character class-based regular expression for this value class.
* @param {RegExp} wordRegex The "word form"-based regular expression for this value class.
*/

constructor(name, booleanAttributes, valueAttributes, charClassRegex, wordRegex) {
super(name, booleanAttributes, valueAttributes)
this._charClassRegex = charClassRegex
this._wordRegex = wordRegex
}

/**
* Determine if a value is valid according to this value class.
*
* @param {string} value A HED value.
* @returns {boolean} Whether the value conforms to this value class.
*/
validateValue(value) {
return this._wordRegex.test(value) && this._charClassRegex.test(value)
}
}

Expand All @@ -683,6 +718,14 @@ export class SchemaTag extends SchemaEntryWithAttributes {
* @private
*/
_unitClasses

/**
* This tag's value-classes
* @type {SchemaValueClass[]}
* @private
*/
_valueClasses

/**
* This tag's value-taking child.
* @type {SchemaValueTag}
Expand All @@ -697,11 +740,13 @@ export class SchemaTag extends SchemaEntryWithAttributes {
* @param {Set<SchemaAttribute>} booleanAttributes The boolean attributes for this tag.
* @param {Map<SchemaAttribute, *>} valueAttributes The value attributes for this tag.
* @param {SchemaUnitClass[]} unitClasses The unit classes for this tag.
* @param {SchemaValueClass[]} valueClasses The value classes for this tag.
* @constructor
*/
constructor(name, booleanAttributes, valueAttributes, unitClasses) {
constructor(name, booleanAttributes, valueAttributes, unitClasses, valueClasses) {
super(name, booleanAttributes, valueAttributes)
this._unitClasses = unitClasses ?? []
this._valueClasses = valueClasses ?? []
}

/**
Expand All @@ -717,7 +762,23 @@ export class SchemaTag extends SchemaEntryWithAttributes {
* @returns {boolean}
*/
get hasUnitClasses() {
return this.unitClasses.length !== 0
return this._unitClasses.length !== 0
}

/**
* This tag's value classes.
* @type {SchemaValueClass[]}
*/
get valueClasses() {
return this._valueClasses.slice()
}

/**
* Whether this tag has any value classes.
* @returns {boolean}
*/
get hasValueClasses() {
return this._valueClasses.length !== 0
}

/**
Expand Down
19 changes: 18 additions & 1 deletion schema/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import { IssueError } from '../common/issues/issues'

const specialTags = require('../data/json/specialTags.json')

import classRegex from '../data/json/class_regex.json'

const lc = (str) => str.toLowerCase()

export default class SchemaParser {
Expand Down Expand Up @@ -90,6 +92,7 @@ export default class SchemaParser {
this.parseAttributes()
this.parseUnitModifiers()
this.parseUnitClasses()
this.parseValueClasses()
this.parseTags()
}

Expand Down Expand Up @@ -200,12 +203,26 @@ export default class SchemaParser {
this._addCustomAttributes()
}

_getValueClassChars(name) {
let classChars
if (Array.isArray(classRegex.class_chars[name]) && classRegex.class_chars[name].length > 0) {
classChars =
'^(?:' + classRegex.class_chars[name].map((charClass) => classRegex.char_regex[charClass]).join('|') + ')+$'
} else {
classChars = '^.+$' // Any non-empty line or string.
}
return new RegExp(classChars)
}

parseValueClasses() {
const valueClasses = new Map()
const [booleanAttributeDefinitions, valueAttributeDefinitions] = this._parseDefinitions('valueClass')
for (const [name, valueAttributes] of valueAttributeDefinitions) {
const booleanAttributes = booleanAttributeDefinitions.get(name)
valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes))
//valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes))
const charClassRegex = this._getValueClassChars(name)
const wordRegex = new RegExp(classRegex.class_words[name] ?? '^.+$')
valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes, charClassRegex, wordRegex))
}
this.valueClasses = new SchemaEntryManager(valueClasses)
}
Expand Down
66 changes: 66 additions & 0 deletions tests/parsedHedTagTests.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import chai from 'chai'
const assert = chai.assert
import { beforeAll, describe, afterAll } from '@jest/globals'

import ParsedHedTag from '../parser/parsedHedTag'
import { shouldRun } from './testUtilities'
import { parsedHedTagTests } from './testData/parsedHedTagTests.data'
import { SchemaSpec, SchemasSpec } from '../schema/specs'
import path from 'path'
import { buildSchemas } from '../schema/init'
import { SchemaTag } from '../schema/entries'

// Ability to select individual tests to run
const skipMap = new Map()
const runAll = true
const runMap = new Map([[]])

describe('TagSpec converter tests using JSON tests', () => {
const schemaMap = new Map([
['8.2.0', undefined],
['8.3.0', undefined],
])

beforeAll(async () => {
const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml'))
const specs2 = new SchemasSpec().addSchemaSpec(spec2)
const schemas2 = await buildSchemas(specs2)
const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml'))
const specs3 = new SchemasSpec().addSchemaSpec(spec3)
const schemas3 = await buildSchemas(specs3)
schemaMap.set('8.2.0', schemas2)
schemaMap.set('8.3.0', schemas3)
})

afterAll(() => {})

describe.each(parsedHedTagTests)('$name : $description', ({ name, tests }) => {
const hedTagTest = function (test) {
const status = test.errors.length > 0 ? 'Expect fail' : 'Expect pass'
const header = `\n[${test.testname}](${status}): ${test.explanation}`

const thisSchema = schemaMap.get(test.schemaVersion)
assert.isDefined(thisSchema, `header: ${test.schemaVersion} is not available in test ${test.name}`)

const tag = new ParsedHedTag(test.tagSpec, thisSchema)

assert.strictEqual(tag.formattedTag, test.formattedTag)
assert.strictEqual(tag.format(false), test.tagShort)
assert.strictEqual(tag.format(true), test.tagLong)
}

beforeAll(async () => {})

afterAll(() => {})

if (tests && tests.length > 0) {
test.each(tests)('$testname: $explanation for "$string"', (test) => {
if (shouldRun(name, test.testname, runAll, runMap, skipMap)) {
hedTagTest(test)
} else {
console.log(`----Skipping ${name}: ${test.testname}`)
}
})
}
})
})
45 changes: 45 additions & 0 deletions tests/testData/parsedHedTagTests.data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { generateIssue } from '../../common/issues/issues'
import { ColumnSpliceSpec, GroupSpec, TagSpec } from '../../parser/tokenizer'

export const parsedHedTagTests = [
{
name: 'valid-tags',
description: 'Valid placeholders in various places',
warning: false,
tests: [
{
testname: 'valid-tag-one-level',
explanation: '"Item" is a top-level-tag.',
schemaVersion: '8.3.0',
fullString: 'Item',
tagSpec: new TagSpec('Item', 0, 5, ''),
tagLong: 'Item',
tagShort: 'Item',
formattedTag: 'item',
errors: [],
},
{
testname: 'valid-tag-with-blanks',
explanation: '" Item " has surrounding blanks.',
schemaVersion: '8.3.0',
fullString: ' Item ',
tagSpec: new TagSpec('Item', 1, 6, ''),
tagLong: 'Item',
tagShort: 'Item',
formattedTag: 'item',
errors: [],
},
{
testname: 'valid-tag-with-blanks',
explanation: '" Item " has surrounding blanks.',
schemaVersion: '8.3.0',
fullString: ' Item ',
tagSpec: new TagSpec('Item', 1, 6, ''),
tagLong: 'Item',
tagShort: 'Item',
formattedTag: 'item',
errors: [],
},
],
},
]

0 comments on commit fb23337

Please sign in to comment.