Must Support (#86)

* WIP add content profile grammar and begin listener * Add data to content profile specifications (except path) * Add content profile listener and add config file field for content profile * Remove extraneous comment * Add handling for invalid content profile file in config * Add support for Value in chain, fix styling, add extra error handling * Add null checks for configuration * Clear currentDef and currentRule when appropriate When the parser exits the CP definition or a CP rule definition, clear the corresponding member property. Otherwise old rules can get applied to new CPs! * Add support for inherited attributes in content profiles * Address comments based on review feedback * Content Profile paths must use the effectiveIdentifier After discussion w/ Mark, we determined that CPs should always use the "effective identifiers" in the paths. For example, if field Foo is constrained to SonOfFoo, then the CP path should use SonOfFoo. In addition fixed an issue where the ChoiceValue options were being iterated as identifiers when they are really Values (that have identifiers). * Minor tweak for clarity (based on code review) * Update error numbers * Upgrade shr-models to 5.8.0 Take in the new models for ContentProfiles. * v5.7.0
standardhealth · Mar 27, 2019 · a371487 · a371487
1 parent fced66d
commit a371487
Show file tree

Hide file tree

Showing 10 changed files with 2,065 additions and 7 deletions.
diff --git a/lib/contentProfileListener.js b/lib/contentProfileListener.js
@@ -0,0 +1,216 @@
+const bunyan = require('bunyan');
+const {FileStream, CommonTokenStream} = require('antlr4/index');
+const {ParseTreeWalker} = require('antlr4/tree');
+const {SHRContentProfileLexer} = require('./parsers/SHRContentProfileLexer');
+const {SHRContentProfileParser} = require('./parsers/SHRContentProfileParser');
+const {SHRContentProfileParserListener} = require('./parsers/SHRContentProfileParserListener');
+const {SHRErrorListener} = require('./errorListener.js');
+const {Specifications, Version, ContentProfile, ContentProfileRule, Identifier, IdentifiableValue, ChoiceValue, PrimitiveIdentifier, PRIMITIVES} = require('shr-models');
+
+var rootLogger = bunyan.createLogger({name: 'shr-text-import'});
+var logger = rootLogger;
+function setLogger(bunyanLogger) {
+  rootLogger = logger = bunyanLogger;
+}
+
+class ContentProfileImporter extends SHRContentProfileParserListener {
+  constructor(specifications = new Specifications()) {
+    super();
+    // The specifications it collects
+    this._specs = specifications;
+    // The currently active namespace
+    this._currentNs = '';
+    // The currently active grammar version
+    this._currentGrammarVersion = '';
+    // The currently active content profile definition
+    this._currentDef = null;
+    // The currently active content profile rule
+    this._currentRule = null;
+  }
+
+  get specifications() { return this._specs; }
+
+  importFile(file) {
+    // Setup a child logger to associate logs with the current file
+    const lastLogger = logger;
+    logger = rootLogger.child({ file: file });
+    logger.debug('Start importing content profile file');
+    try {
+      const errListener = new SHRErrorListener(logger);
+      const chars = new FileStream(file);
+      const lexer = new SHRContentProfileLexer(chars);
+      lexer.removeErrorListeners();
+      lexer.addErrorListener(errListener);
+      const tokens  = new CommonTokenStream(lexer);
+      const parser = new SHRContentProfileParser(tokens);
+      parser.removeErrorListeners();
+      parser.addErrorListener(errListener);
+      parser.buildParseTrees = true;
+      const tree = parser.doc();
+      const walker = new ParseTreeWalker();
+      walker.walk(this, tree);
+    } finally {
+      logger.debug('Done importing content profile file');
+      this.logger = lastLogger;
+    }
+  }
+
+  enterDoc(ctx) {
+    // set grammar version
+    const version = ctx.docHeader().version();
+    const major = parseInt(version.WHOLE_NUMBER()[0], 10);
+    const minor = parseInt(version.WHOLE_NUMBER()[1], 10);
+    this._currentGrammarVersion = new Version(major, minor);
+
+    logger.debug({version: this._currentGrammarVersion.toString()}, 'Entered content profile file');
+  }
+
+  exitDoc(ctx) {
+    // clear current namespace, current content, current rule, and grammar version
+    logger.debug('Exiting content profile file');
+    this._currentNs = null;
+    this._currentDef = null;
+    this._currentRule = null;
+    this._currentGrammarVersion = null;
+  }
+
+  enterNamespaceHeader(ctx) {
+    // set current namespace
+    this._currentNs = ctx.namespace().getText();
+  }
+
+  enterContentHeader(ctx) {
+    // set current content
+    const name = ctx.simpleName().getText();
+    const identifier = new Identifier(this._currentNs, name);
+    this._currentDef = new ContentProfile(identifier);
+    this._currentDef.grammarVersion = this._currentGrammarVersion;
+  }
+
+  enterCpRule(ctx) {
+    // find identifier for each data element in path,
+    // then create and set current rule with that path
+
+    const pathStr = ctx.simpleOrPathName().getText();
+    const names = pathStr.split('.');
+    let path = [];
+    let currentElement = this._specs.dataElements.findByIdentifier(this._currentDef.identifier);
+
+    if (currentElement) {
+      for (const name of names) {
+        let parentElements = this.getRecursiveBasedOns(currentElement.identifier);
+        if (PRIMITIVES.includes(name)) {
+          path.push(new PrimitiveIdentifier(name));
+          break;
+        } else if (name === 'Value' || name === '_Value') {
+          path.push(new Identifier('', '_Value'));
+
+          // find value from self or most recent ancestor
+          let value = null;
+          for (const id of parentElements) {
+            const el = this._specs.dataElements.findByIdentifier(id);
+            value = el ? el.value : null;
+
+            if (value) {
+              break;
+            }
+          }
+
+          if (value && !(value instanceof ChoiceValue)) {
+            currentElement = this._specs.dataElements.findByIdentifier(value.effectiveIdentifier);
+          } else {
+            break; // Exit loop to reach error condition below
+          }
+        } else {
+          // Collect fields from current elements and all parents
+          let fields = new Map();
+          for (const id of parentElements) {
+            const el = this._specs.dataElements.findByIdentifier(id);
+            [el.value, ...el.fields].forEach(field => {
+              const key = (field && field.identifier) ? field.identifier : 'value';
+              if (field && !fields.has(key)) fields.set(key, field);
+            });
+          }
+
+          let value = Array.from(fields.values()).find(field => {
+            if (field instanceof IdentifiableValue) {
+              // match name on effectiveIdentifier since CP requires author to use constrained type name
+              return field.effectiveIdentifier.name === name;
+            } else if (field instanceof ChoiceValue) {
+              // match name on one of the choice option's effectiveIdentifier
+              return field.aggregateOptions.some(o => o.effectiveIdentifier && o.effectiveIdentifier.name === name);
+            }
+
+            return false;
+          });
+
+          let element;
+          if (value && value.effectiveIdentifier) {
+            element = this._specs.dataElements.findByIdentifier(value.effectiveIdentifier);
+          }
+
+          if (element) {
+            path.push(element.identifier);
+            currentElement = element;
+          } else {
+            break; // Exit loop to reach error condition below
+          }
+        }
+      }
+    } else {
+      logger.error(
+        'Definition not found for data element in content profile path: %s. ERROR_CODE:11035',
+        this._currentDef.identifier.fqn
+      );
+    }
+
+    if (path.length === names.length) {
+      this._currentRule = new ContentProfileRule(path);
+    } else {
+      // TODO: We may be able to help the author by suggesting fixes when the problem is that they referred to the old identifier instead of the effectiveIdentifier.
+      // This will require some rework of the above code to detect and remember this situation.
+      logger.error('Path not found for %s: %s. ERROR_CODE:11036', this._currentDef.identifier.fqn, pathStr);
+    }
+  }
+
+  enterFlag(ctx) {
+    if (this._currentRule) {
+      this._currentRule.mustSupport = (ctx.KW_MUST_SUPPORT() != null);
+      this._currentDef.addRule(this._currentRule);
+    }
+  }
+
+  exitCpRule(ctx) {
+    this._currentRule = null;
+  }
+
+  exitContentDef(ctx) {
+    this._specs.contentProfiles.add(this._currentDef);
+    this._currentDef = null;
+  }
+
+  // NOTE: This function "borrowed" from shr-expand
+  getRecursiveBasedOns(identifier, alreadyProcessed = []) {
+    // If it's primitive or we've already processed this one, don't go further (avoid circular dependencies)
+    if (identifier.isPrimitive || alreadyProcessed.some(id => id.equals(identifier))) {
+      return alreadyProcessed;
+    }
+
+    // We haven't processed it, so look it up
+    const element = this._specs.dataElements.findByIdentifier(identifier);
+    if (typeof element === 'undefined') {
+      logger.error('Cannot resolve element definition for %s. ERROR_CODE:13023', identifier.fqn);
+      return alreadyProcessed;
+    }
+    // Add it to the already processed list (again, to avoid circular dependencies)
+    alreadyProcessed.push(identifier);
+    // Now recursively get the BasedOns for each of the BasedOns
+    for (const basedOn of element.basedOn) {
+      alreadyProcessed = this.getRecursiveBasedOns(basedOn, alreadyProcessed);
+    }
+
+    return alreadyProcessed;
+  }
+}
+
+module.exports = {ContentProfileImporter, setLogger};
diff --git a/lib/import.js b/lib/import.js
@@ -6,6 +6,7 @@ const {Preprocessor, VERSION, GRAMMAR_VERSION} = require('./preprocessor');
 const {DataElementImporter} = require('./dataElementListener');
 const {ValueSetImporter} = require('./valueSetListener');
 const {MappingImporter} = require('./mappingListener');
+const {ContentProfileImporter} = require('./contentProfileListener');
 const {CimcoreImporter} = require('./cimcore/cimcoreImport');
 
 var logger = bunyan.createLogger({name: 'shr-text-import'});
@@ -15,6 +16,7 @@ function setLogger(bunyanLogger) {
   require('./dataElementListener').setLogger(logger);
   require('./valueSetListener').setLogger(logger);
   require('./mappingListener').setLogger(logger);
+  require('./contentProfileListener').setLogger(logger);
   require('./cimcore/cimcoreImport').setLogger(logger);
 }
 
@@ -37,6 +39,17 @@ function importFromFilePath(filePath, configuration=[], specifications = new Spe
   for (const file of filesByType.map) {
     mappingImporter.importFile(file);
   }
+  const contentProfileImporter = new ContentProfileImporter(specifications);
+  let contentProfileFound = false;
+  for (const file of filesByType.contentProfile) {
+    if (configuration && (path.basename(file) === configuration.contentProfile)) {
+      contentProfileFound = true;
+      contentProfileImporter.importFile(file);
+    }
+  }
+  if (configuration && configuration.contentProfile && !contentProfileFound) {
+    logger.error('Could not find content profile file: %s. ERROR_CODE:11037', configuration.contentProfile);
+  }
   return specifications;
 }
 

diff --git a/lib/parsers/SHRContentProfileLexer.js b/lib/parsers/SHRContentProfileLexer.js
@@ -0,0 +1,131 @@
+// Generated from SHRContentProfileLexer.g4 by ANTLR 4.5
+// jshint ignore: start
+var antlr4 = require('antlr4/index');
+
+
+var serializedATN = ["\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd",
+    "\2\23\u00c2\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b",
+    "\t\b\4\t\t\t\4\n\t\n\4\13\t\13\4\f\t\f\4\r\t\r\4\16\t\16\4\17\t\17\4",
+    "\20\t\20\4\21\t\21\4\22\t\22\3\2\3\2\3\2\3\2\3\2\3\2\3\2\3\2\3\2\3\3",
+    "\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\3\4\3\4\3\4",
+    "\3\4\3\4\3\4\3\4\3\4\3\4\3\4\3\4\3\5\3\5\3\5\3\6\3\6\3\7\3\7\3\b\6\b",
+    "Q\n\b\r\b\16\bR\3\t\3\t\7\tW\n\t\f\t\16\tZ\13\t\3\n\3\n\7\n^\n\n\f\n",
+    "\16\na\13\n\3\13\3\13\7\13e\n\13\f\13\16\13h\13\13\3\f\3\f\7\fl\n\f",
+    "\f\f\16\fo\13\f\3\f\3\f\3\f\7\ft\n\f\f\f\16\fw\13\f\6\fy\n\f\r\f\16",
+    "\fz\3\r\3\r\7\r\177\n\r\f\r\16\r\u0082\13\r\3\r\3\r\3\r\7\r\u0087\n",
+    "\r\f\r\16\r\u008a\13\r\7\r\u008c\n\r\f\r\16\r\u008f\13\r\3\r\3\r\3\r",
+    "\7\r\u0094\n\r\f\r\16\r\u0097\13\r\3\16\3\16\7\16\u009b\n\16\f\16\16",
+    "\16\u009e\13\16\3\16\3\16\3\17\3\17\3\17\3\17\3\20\3\20\3\20\3\20\3",
+    "\21\3\21\3\21\3\21\7\21\u00ae\n\21\f\21\16\21\u00b1\13\21\3\21\3\21",
+    "\3\21\3\21\3\21\3\22\3\22\3\22\3\22\7\22\u00bc\n\22\f\22\16\22\u00bf",
+    "\13\22\3\22\3\22\3\u00af\2\23\3\3\5\4\7\5\t\6\13\7\r\b\17\t\21\n\23",
+    "\13\25\f\27\r\31\16\33\17\35\20\37\21!\22#\23\3\2\f\3\2\62;\3\2C\\\5",
+    "\2\62;C\\aa\7\2//\62;C\\aac|\3\2c|\6\2//\62;C\\c|\5\2//\62;C|\4\2$$",
+    "^^\5\2\13\13\17\17\"\"\4\2\f\f\17\17\u00cf\2\3\3\2\2\2\2\5\3\2\2\2\2",
+    "\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\21\3",
+    "\2\2\2\2\23\3\2\2\2\2\25\3\2\2\2\2\27\3\2\2\2\2\31\3\2\2\2\2\33\3\2",
+    "\2\2\2\35\3\2\2\2\2\37\3\2\2\2\2!\3\2\2\2\2#\3\2\2\2\3%\3\2\2\2\5.\3",
+    "\2\2\2\7=\3\2\2\2\tH\3\2\2\2\13K\3\2\2\2\rM\3\2\2\2\17P\3\2\2\2\21T",
+    "\3\2\2\2\23[\3\2\2\2\25b\3\2\2\2\27i\3\2\2\2\31|\3\2\2\2\33\u0098\3",
+    "\2\2\2\35\u00a1\3\2\2\2\37\u00a5\3\2\2\2!\u00a9\3\2\2\2#\u00b7\3\2\2",
+    "\2%&\7I\2\2&\'\7t\2\2\'(\7c\2\2()\7o\2\2)*\7o\2\2*+\7c\2\2+,\7t\2\2",
+    ",-\7<\2\2-\4\3\2\2\2./\7E\2\2/\60\7q\2\2\60\61\7p\2\2\61\62\7v\2\2\62",
+    "\63\7g\2\2\63\64\7p\2\2\64\65\7v\2\2\65\66\7R\2\2\66\67\7t\2\2\678\7",
+    "q\2\289\7h\2\29:\7k\2\2:;\7n\2\2;<\7g\2\2<\6\3\2\2\2=>\7P\2\2>?\7c\2",
+    "\2?@\7o\2\2@A\7g\2\2AB\7u\2\2BC\7r\2\2CD\7c\2\2DE\7e\2\2EF\7g\2\2FG",
+    "\7<\2\2G\b\3\2\2\2HI\7O\2\2IJ\7U\2\2J\n\3\2\2\2KL\7\60\2\2L\f\3\2\2",
+    "\2MN\7<\2\2N\16\3\2\2\2OQ\t\2\2\2PO\3\2\2\2QR\3\2\2\2RP\3\2\2\2RS\3",
+    "\2\2\2S\20\3\2\2\2TX\t\3\2\2UW\t\4\2\2VU\3\2\2\2WZ\3\2\2\2XV\3\2\2\2",
+    "XY\3\2\2\2Y\22\3\2\2\2ZX\3\2\2\2[_\t\3\2\2\\^\t\5\2\2]\\\3\2\2\2^a\3",
+    "\2\2\2_]\3\2\2\2_`\3\2\2\2`\24\3\2\2\2a_\3\2\2\2bf\t\6\2\2ce\t\7\2\2",
+    "dc\3\2\2\2eh\3\2\2\2fd\3\2\2\2fg\3\2\2\2g\26\3\2\2\2hf\3\2\2\2im\t\6",
+    "\2\2jl\t\7\2\2kj\3\2\2\2lo\3\2\2\2mk\3\2\2\2mn\3\2\2\2nx\3\2\2\2om\3",
+    "\2\2\2pq\7\60\2\2qu\t\6\2\2rt\t\b\2\2sr\3\2\2\2tw\3\2\2\2us\3\2\2\2",
+    "uv\3\2\2\2vy\3\2\2\2wu\3\2\2\2xp\3\2\2\2yz\3\2\2\2zx\3\2\2\2z{\3\2\2",
+    "\2{\30\3\2\2\2|\u0080\t\6\2\2}\177\t\7\2\2~}\3\2\2\2\177\u0082\3\2\2",
+    "\2\u0080~\3\2\2\2\u0080\u0081\3\2\2\2\u0081\u008d\3\2\2\2\u0082\u0080",
+    "\3\2\2\2\u0083\u0084\7\60\2\2\u0084\u0088\t\6\2\2\u0085\u0087\t\b\2",
+    "\2\u0086\u0085\3\2\2\2\u0087\u008a\3\2\2\2\u0088\u0086\3\2\2\2\u0088",
+    "\u0089\3\2\2\2\u0089\u008c\3\2\2\2\u008a\u0088\3\2\2\2\u008b\u0083\3",
+    "\2\2\2\u008c\u008f\3\2\2\2\u008d\u008b\3\2\2\2\u008d\u008e\3\2\2\2\u008e",
+    "\u0090\3\2\2\2\u008f\u008d\3\2\2\2\u0090\u0091\7\60\2\2\u0091\u0095",
+    "\t\3\2\2\u0092\u0094\t\b\2\2\u0093\u0092\3\2\2\2\u0094\u0097\3\2\2\2",
+    "\u0095\u0093\3\2\2\2\u0095\u0096\3\2\2\2\u0096\32\3\2\2\2\u0097\u0095",
+    "\3\2\2\2\u0098\u009c\7$\2\2\u0099\u009b\n\t\2\2\u009a\u0099\3\2\2\2",
+    "\u009b\u009e\3\2\2\2\u009c\u009a\3\2\2\2\u009c\u009d\3\2\2\2\u009d\u009f",
+    "\3\2\2\2\u009e\u009c\3\2\2\2\u009f\u00a0\7$\2\2\u00a0\34\3\2\2\2\u00a1",
+    "\u00a2\t\n\2\2\u00a2\u00a3\3\2\2\2\u00a3\u00a4\b\17\2\2\u00a4\36\3\2",
+    "\2\2\u00a5\u00a6\7\f\2\2\u00a6\u00a7\3\2\2\2\u00a7\u00a8\b\20\2\2\u00a8",
+    " \3\2\2\2\u00a9\u00aa\7\61\2\2\u00aa\u00ab\7,\2\2\u00ab\u00af\3\2\2",
+    "\2\u00ac\u00ae\13\2\2\2\u00ad\u00ac\3\2\2\2\u00ae\u00b1\3\2\2\2\u00af",
+    "\u00b0\3\2\2\2\u00af\u00ad\3\2\2\2\u00b0\u00b2\3\2\2\2\u00b1\u00af\3",
+    "\2\2\2\u00b2\u00b3\7,\2\2\u00b3\u00b4\7\61\2\2\u00b4\u00b5\3\2\2\2\u00b5",
+    "\u00b6\b\21\3\2\u00b6\"\3\2\2\2\u00b7\u00b8\7\61\2\2\u00b8\u00b9\7\61",
+    "\2\2\u00b9\u00bd\3\2\2\2\u00ba\u00bc\n\13\2\2\u00bb\u00ba\3\2\2\2\u00bc",
+    "\u00bf\3\2\2\2\u00bd\u00bb\3\2\2\2\u00bd\u00be\3\2\2\2\u00be\u00c0\3",
+    "\2\2\2\u00bf\u00bd\3\2\2\2\u00c0\u00c1\b\22\3\2\u00c1$\3\2\2\2\21\2",
+    "RX_fmuz\u0080\u0088\u008d\u0095\u009c\u00af\u00bd\4\2\3\2\b\2\2"].join("");
+
+
+var atn = new antlr4.atn.ATNDeserializer().deserialize(serializedATN);
+
+var decisionsToDFA = atn.decisionToState.map( function(ds, index) { return new antlr4.dfa.DFA(ds, index); });
+
+function SHRContentProfileLexer(input) {
+	antlr4.Lexer.call(this, input);
+    this._interp = new antlr4.atn.LexerATNSimulator(this, atn, decisionsToDFA, new antlr4.PredictionContextCache());
+    return this;
+}
+
+SHRContentProfileLexer.prototype = Object.create(antlr4.Lexer.prototype);
+SHRContentProfileLexer.prototype.constructor = SHRContentProfileLexer;
+
+SHRContentProfileLexer.EOF = antlr4.Token.EOF;
+SHRContentProfileLexer.KW_GRAMMAR = 1;
+SHRContentProfileLexer.KW_G_CONTENT_PROFILE = 2;
+SHRContentProfileLexer.KW_NAMESPACE = 3;
+SHRContentProfileLexer.KW_MUST_SUPPORT = 4;
+SHRContentProfileLexer.DOT = 5;
+SHRContentProfileLexer.COLON = 6;
+SHRContentProfileLexer.WHOLE_NUMBER = 7;
+SHRContentProfileLexer.ALL_CAPS = 8;
+SHRContentProfileLexer.UPPER_WORD = 9;
+SHRContentProfileLexer.LOWER_WORD = 10;
+SHRContentProfileLexer.DOT_SEPARATED_LW = 11;
+SHRContentProfileLexer.DOT_SEPARATED_UW = 12;
+SHRContentProfileLexer.STRING = 13;
+SHRContentProfileLexer.WS = 14;
+SHRContentProfileLexer.NEWLINE = 15;
+SHRContentProfileLexer.COMMENT = 16;
+SHRContentProfileLexer.LINE_COMMENT = 17;
+
+
+SHRContentProfileLexer.modeNames = [ "DEFAULT_MODE" ];
+
+SHRContentProfileLexer.literalNames = [ 'null', "'Grammar:'", "'ContentProfile'", 
+                                        "'Namespace:'", "'MS'", "'.'", "':'", 
+                                        'null', 'null', 'null', 'null', 
+                                        'null', 'null', 'null', 'null', 
+                                        "'\n'" ];
+
+SHRContentProfileLexer.symbolicNames = [ 'null', "KW_GRAMMAR", "KW_G_CONTENT_PROFILE", 
+                                         "KW_NAMESPACE", "KW_MUST_SUPPORT", 
+                                         "DOT", "COLON", "WHOLE_NUMBER", 
+                                         "ALL_CAPS", "UPPER_WORD", "LOWER_WORD", 
+                                         "DOT_SEPARATED_LW", "DOT_SEPARATED_UW", 
+                                         "STRING", "WS", "NEWLINE", "COMMENT", 
+                                         "LINE_COMMENT" ];
+
+SHRContentProfileLexer.ruleNames = [ "KW_GRAMMAR", "KW_G_CONTENT_PROFILE", 
+                                     "KW_NAMESPACE", "KW_MUST_SUPPORT", 
+                                     "DOT", "COLON", "WHOLE_NUMBER", "ALL_CAPS", 
+                                     "UPPER_WORD", "LOWER_WORD", "DOT_SEPARATED_LW", 
+                                     "DOT_SEPARATED_UW", "STRING", "WS", 
+                                     "NEWLINE", "COMMENT", "LINE_COMMENT" ];
+
+SHRContentProfileLexer.grammarFileName = "SHRContentProfileLexer.g4";
+
+
+
+exports.SHRContentProfileLexer = SHRContentProfileLexer;
+