From d4bffc2e9b966ebb2624ac5d770c8a034549fa1b Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:46:30 -0600 Subject: [PATCH] Started on the parsedHedTag tests --- data/json/class_regex.json | 57 ++++++++++++++++++++ parser/tagConverter.js | 2 + schema/entries.js | 67 ++++++++++++++++++++++-- schema/parser.js | 19 ++++++- tests/parsedHedTagTests.spec.js | 66 +++++++++++++++++++++++ tests/testData/parsedHedTagTests.data.js | 45 ++++++++++++++++ 6 files changed, 252 insertions(+), 4 deletions(-) create mode 100644 data/json/class_regex.json create mode 100644 tests/parsedHedTagTests.spec.js create mode 100644 tests/testData/parsedHedTagTests.data.js diff --git a/data/json/class_regex.json b/data/json/class_regex.json new file mode 100644 index 00000000..8e827088 --- /dev/null +++ b/data/json/class_regex.json @@ -0,0 +1,57 @@ +{ + "char_regex": { + "alphanumeric": "[A-Za-z0-9]", + "ampersand": "&", + "ascii": "[\\x00-\\x7F]", + "asterisk": "\\*", + "at-sign": "@", + "backslash": "\\", + "blank": " ", + "caret": "\\^", + "colon": ":", + "comma": ",", + "date-time": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?", + "dollar": "\\$", + "digits": "[0-9]", + "double-quote": "\"", + "equals": "=", + "exclamation": "!", + "greater-than": ">", + "hyphen": "-", + "left-paren": "(", + "less-than": "<", + "letters": "[A-Za-z]", + "lowercase": "[a-z]", + "name": "[\\w\\-\\u0080-\\uFFFF]", + "newline": "\\n", + "nonascii": "[\\u0080-\\uFFFF]", + "number-sign": "#", + "numeric": "[0-9.\\-+^Ee]", + "percent-sign": "%", + "period": "\\.", + "plus": "\\+", + "printable": "[\\x20-\\x7E]", + "question-mark": "\\?", + "right-paren": "(", + "semicolon": ";", + "single-quote": "'", + "forward-slash": "/", + "tab": "\\t", + "text": "[^\\x00-\\x1F\\x7F,{}]", + "tilde": "~", + "underscore": "_", + "uppercase": "[A-Z]", + "vertical-bar": "|" + }, + "class_chars": { + "dateTimeClass": [], + "nameClass": ["alphanumeric", "underscore", "hyphen", "nonascii"], + "numericClass": [], + "textClass": ["text"], + "testClass": ["newline", "tab", "nonascii"] + }, + "class_words": { + "dateTimeClass": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?$", + "numericClass": "^[+-]?(\\d+(\\.\\d*)?|\\.\\d+)([eE][+-]?\\d+)?$" + } +} diff --git a/parser/tagConverter.js b/parser/tagConverter.js index 485d49d2..3adc9d4e 100644 --- a/parser/tagConverter.js +++ b/parser/tagConverter.js @@ -60,6 +60,7 @@ export default class TagConverter { this.tagString = tagSpec.tag this.tagLevels = this.tagString.split('/') this.tagSlashes = getTagSlashIndices(this.tagString) + this.remainder = undefined } /** @@ -117,6 +118,7 @@ export default class TagConverter { _getSchemaTag(tagLevelIndex) { let tagLevel = this.tagLevels[tagLevelIndex].toLowerCase() + // TODO: These two checks should probably be removed as the tokenizer handles this. if (tagLevelIndex === 0) { tagLevel = tagLevel.trimLeft() } diff --git a/schema/entries.js b/schema/entries.js index 9292f14f..1bf4c886 100644 --- a/schema/entries.js +++ b/schema/entries.js @@ -662,8 +662,43 @@ export class SchemaUnitModifier extends SchemaEntryWithAttributes { * SchemaValueClass class */ export class SchemaValueClass extends SchemaEntryWithAttributes { - constructor(name, booleanAttributes, valueAttributes) { + /** + * The character class-based regular expression. + * @type {RegExp} + * @private + */ + _charClassRegex + /** + * The "word form"-based regular expression. + * @type {RegExp} + * @private + */ + _wordRegex + + /** + * Constructor. + * + * @param {string} name The name of this value class. + * @param {Set} booleanAttributes The boolean attributes for this value class. + * @param {Map} valueAttributes The value attributes for this value class. + * @param {RegExp} charClassRegex The character class-based regular expression for this value class. + * @param {RegExp} wordRegex The "word form"-based regular expression for this value class. + */ + + constructor(name, booleanAttributes, valueAttributes, charClassRegex, wordRegex) { super(name, booleanAttributes, valueAttributes) + this._charClassRegex = charClassRegex + this._wordRegex = wordRegex + } + + /** + * Determine if a value is valid according to this value class. + * + * @param {string} value A HED value. + * @returns {boolean} Whether the value conforms to this value class. + */ + validateValue(value) { + return this._wordRegex.test(value) && this._charClassRegex.test(value) } } @@ -683,6 +718,14 @@ export class SchemaTag extends SchemaEntryWithAttributes { * @private */ _unitClasses + + /** + * This tag's value-classes + * @type {SchemaValueClass[]} + * @private + */ + _valueClasses + /** * This tag's value-taking child. * @type {SchemaValueTag} @@ -697,11 +740,13 @@ export class SchemaTag extends SchemaEntryWithAttributes { * @param {Set} booleanAttributes The boolean attributes for this tag. * @param {Map} valueAttributes The value attributes for this tag. * @param {SchemaUnitClass[]} unitClasses The unit classes for this tag. + * @param {SchemaValueClass[]} valueClasses The value classes for this tag. * @constructor */ - constructor(name, booleanAttributes, valueAttributes, unitClasses) { + constructor(name, booleanAttributes, valueAttributes, unitClasses, valueClasses) { super(name, booleanAttributes, valueAttributes) this._unitClasses = unitClasses ?? [] + this._valueClasses = valueClasses ?? [] } /** @@ -717,7 +762,23 @@ export class SchemaTag extends SchemaEntryWithAttributes { * @returns {boolean} */ get hasUnitClasses() { - return this.unitClasses.length !== 0 + return this._unitClasses.length !== 0 + } + + /** + * This tag's value classes. + * @type {SchemaValueClass[]} + */ + get valueClasses() { + return this._valueClasses.slice() + } + + /** + * Whether this tag has any value classes. + * @returns {boolean} + */ + get hasValueClasses() { + return this._valueClasses.length !== 0 } /** diff --git a/schema/parser.js b/schema/parser.js index 62fe10bd..42e24559 100644 --- a/schema/parser.js +++ b/schema/parser.js @@ -26,6 +26,8 @@ import { IssueError } from '../common/issues/issues' const specialTags = require('../data/json/specialTags.json') +import classRegex from '../data/json/class_regex.json' + const lc = (str) => str.toLowerCase() export default class SchemaParser { @@ -90,6 +92,7 @@ export default class SchemaParser { this.parseAttributes() this.parseUnitModifiers() this.parseUnitClasses() + this.parseValueClasses() this.parseTags() } @@ -200,12 +203,26 @@ export default class SchemaParser { this._addCustomAttributes() } + _getValueClassChars(name) { + let classChars + if (Array.isArray(classRegex.class_chars[name]) && classRegex.class_chars[name].length > 0) { + classChars = + '^(?:' + classRegex.class_chars[name].map((charClass) => classRegex.char_regex[charClass]).join('|') + ')+$' + } else { + classChars = '^.+$' // Any non-empty line or string. + } + return new RegExp(classChars) + } + parseValueClasses() { const valueClasses = new Map() const [booleanAttributeDefinitions, valueAttributeDefinitions] = this._parseDefinitions('valueClass') for (const [name, valueAttributes] of valueAttributeDefinitions) { const booleanAttributes = booleanAttributeDefinitions.get(name) - valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes)) + //valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes)) + const charClassRegex = this._getValueClassChars(name) + const wordRegex = new RegExp(classRegex.class_words[name] ?? '^.+$') + valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes, charClassRegex, wordRegex)) } this.valueClasses = new SchemaEntryManager(valueClasses) } diff --git a/tests/parsedHedTagTests.spec.js b/tests/parsedHedTagTests.spec.js new file mode 100644 index 00000000..d8fa28aa --- /dev/null +++ b/tests/parsedHedTagTests.spec.js @@ -0,0 +1,66 @@ +import chai from 'chai' +const assert = chai.assert +import { beforeAll, describe, afterAll } from '@jest/globals' + +import ParsedHedTag from '../parser/parsedHedTag' +import { shouldRun } from './testUtilities' +import { parsedHedTagTests } from './testData/parsedHedTagTests.data' +import { SchemaSpec, SchemasSpec } from '../schema/specs' +import path from 'path' +import { buildSchemas } from '../schema/init' +import { SchemaTag } from '../schema/entries' + +// Ability to select individual tests to run +const skipMap = new Map() +const runAll = true +const runMap = new Map([[]]) + +describe('TagSpec converter tests using JSON tests', () => { + const schemaMap = new Map([ + ['8.2.0', undefined], + ['8.3.0', undefined], + ]) + + beforeAll(async () => { + const spec2 = new SchemaSpec('', '8.2.0', '', path.join(__dirname, '../tests/data/HED8.2.0.xml')) + const specs2 = new SchemasSpec().addSchemaSpec(spec2) + const schemas2 = await buildSchemas(specs2) + const spec3 = new SchemaSpec('', '8.3.0', '', path.join(__dirname, '../tests/data/HED8.3.0.xml')) + const specs3 = new SchemasSpec().addSchemaSpec(spec3) + const schemas3 = await buildSchemas(specs3) + schemaMap.set('8.2.0', schemas2) + schemaMap.set('8.3.0', schemas3) + }) + + afterAll(() => {}) + + describe.each(parsedHedTagTests)('$name : $description', ({ name, tests }) => { + const hedTagTest = function (test) { + const status = test.errors.length > 0 ? 'Expect fail' : 'Expect pass' + const header = `\n[${test.testname}](${status}): ${test.explanation}` + + const thisSchema = schemaMap.get(test.schemaVersion) + assert.isDefined(thisSchema, `header: ${test.schemaVersion} is not available in test ${test.name}`) + + const tag = new ParsedHedTag(test.tagSpec, thisSchema) + + assert.strictEqual(tag.formattedTag, test.formattedTag) + assert.strictEqual(tag.format(false), test.tagShort) + assert.strictEqual(tag.format(true), test.tagLong) + } + + beforeAll(async () => {}) + + afterAll(() => {}) + + if (tests && tests.length > 0) { + test.each(tests)('$testname: $explanation for "$string"', (test) => { + if (shouldRun(name, test.testname, runAll, runMap, skipMap)) { + hedTagTest(test) + } else { + console.log(`----Skipping ${name}: ${test.testname}`) + } + }) + } + }) +}) diff --git a/tests/testData/parsedHedTagTests.data.js b/tests/testData/parsedHedTagTests.data.js new file mode 100644 index 00000000..fd270f87 --- /dev/null +++ b/tests/testData/parsedHedTagTests.data.js @@ -0,0 +1,45 @@ +import { generateIssue } from '../../common/issues/issues' +import { ColumnSpliceSpec, GroupSpec, TagSpec } from '../../parser/tokenizer' + +export const parsedHedTagTests = [ + { + name: 'valid-tags', + description: 'Valid placeholders in various places', + warning: false, + tests: [ + { + testname: 'valid-tag-one-level', + explanation: '"Item" is a top-level-tag.', + schemaVersion: '8.3.0', + fullString: 'Item', + tagSpec: new TagSpec('Item', 0, 5, ''), + tagLong: 'Item', + tagShort: 'Item', + formattedTag: 'item', + errors: [], + }, + { + testname: 'valid-tag-with-blanks', + explanation: '" Item " has surrounding blanks.', + schemaVersion: '8.3.0', + fullString: ' Item ', + tagSpec: new TagSpec('Item', 1, 6, ''), + tagLong: 'Item', + tagShort: 'Item', + formattedTag: 'item', + errors: [], + }, + { + testname: 'valid-tag-with-blanks', + explanation: '" Item " has surrounding blanks.', + schemaVersion: '8.3.0', + fullString: ' Item ', + tagSpec: new TagSpec('Item', 1, 6, ''), + tagLong: 'Item', + tagShort: 'Item', + formattedTag: 'item', + errors: [], + }, + ], + }, +]