Skip to content

Commit

Permalink
Merge pull request #86 from FHIR/molec-conseq
Browse files Browse the repository at this point in the history
Implement molec-conseq API
  • Loading branch information
rhdolin authored May 16, 2024
2 parents 054ab29 + 9562ece commit 979d932
Show file tree
Hide file tree
Showing 7 changed files with 643 additions and 3 deletions.
95 changes: 95 additions & 0 deletions app/api_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,101 @@ paths:
- "somatic"
description: Enables an App to limit results to those that are 'germline' or 'somatic'. Default is to include variants irrespective of genomic source class.

/subject-operations/phenotype-operations/$find-subject-molecular-consequences:
get:
description: |-
Retrieves molecular consequences of a DNA variant.
Generally, the operation will be called with 'subject' and 'ranges' parameters, which will return molecular consequences for patient variants in supplied ranges. Another common pattern is to call the operation with 'subject', 'ranges', and 'featureConsequences' parameters, which will limit the return to only those molecular consequences having supplied features. For other combinations of input parameters, the operation returns molecular consequences satisfying all parameters.
Where no variants, ranges, or featureConsequences are provided, the operation returns no consequences.
summary: "Find Subject Molecular Consequences"
operationId: "app.endpoints.find_subject_molecular_consequences"
tags:
- "Subject Phenotype Operations"
responses:
'200':
description: "Returns a FHIR Parameters resource, containing each molecular consequence identified, along with those variants from which the implication is derived."
content:
application/json:
schema:
type: object
parameters:
- name: subject
in: query
schema:
format: PatientID
type: string
example: 'TCGA-DD-A1EH'
required: true
description: The subject of interest.
- name: variants
in: query
description: List of variants being sought. Must be in HGVS or SPDI format.
schema:
type: array
items:
type: string
example:
- 'NM_178349.2:c.118G>T'
style: "form"
explode: false
- name: ranges
in: query
description: List of regions to be searched for variants. Must be in zero-based RefSeq:Integer-range format.
schema:
type: array
items:
type: string
pattern: '^\s*[Nn][A-Za-z]_\d{4,10}(\.)(\d{1,2}):\d{1,10}-\d{1,10}\s*$'
style: "form"
explode: false
- name: featureConsequences
in: query
description: List of consequences sought. Must be in token or codesystem|code format. (These will generally be coded with Sequence Ontology codes under SO:0001537)
schema:
type: array
items:
type: string
style: "form"
explode: false
- name: testIdentifiers
in: query
description: Supply a list of test identifiers. Only results originating from one of these tests will be returned.
schema:
type: array
items:
type: string
style: "form"
explode: false
- name: testDateRange
in: query
schema:
type: array
items:
type: string
pattern: '^\s*(eq|ne|lt|gt|ge|le|sa|eb)(((18|19|20)[0-9]{2}-(0?[13578]|1[02])-(0?[1-9]|[12][0-9]|3[01]))|((18|19|20)[0-9]{2}-(0?[469]|11)-(0?[1-9]|[12][0-9]|30))|((18|19|20)[0-9]{2}-(0?[2])-(0?[1-9]|1[0-9]|2[0-8]))|((((18|19|20)(04|08|[2468][048]|[13579][26]))|2000)-(0?[2])-29))\s*$'
description: Supply a date range. Only results generated during this range will be returned.
style: "form"
explode: true
- name: specimenIdentifiers
in: query
description: Supply a list of specimen identifiers. Only results derived from one of these specimens will be returned.
schema:
type: array
items:
type: string
style: "form"
explode: false
- name: genomicSourceClass
in: query
schema:
type: string
enum:
- "germline"
- "somatic"
description: Enables an App to limit results to those that are 'germline' or 'somatic'. Default is to include variants irrespective of genomic source class.

/subject-operations/metadata-operations/$find-study-metadata:
get:
description: |-
Expand Down
133 changes: 133 additions & 0 deletions app/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,60 @@ def create_dx_implication_profile(implication, subject, vids):
return resource


def create_molecular_consequence_profile(molecular_consequence, subject, vids):
resource = OrderedDict()
resource["resourceType"] = "Observation"
resource["id"] = "dv-" + str(molecular_consequence['_id'])
resource["meta"] = {"profile": [
"http://hl7.org/fhir/uv/genomics-reporting/StructureDefinition/molecular-consequence"]}
resource["status"] = "final"
resource["category"] = [{"coding": [{"system": "http://terminology.hl7.org/CodeSystem/observation-category",
"code": "laboratory"}, {"system": "http://terminology.hl7.org/CodeSystem/v2-0074", "code": "GE"}]}]
resource["code"] = {"coding": [{"system": "http://hl7.org/fhir/uv/genomics-reporting/CodeSystem/tbd-codes-cs",
"code": "molecular-consequences"}]}
resource["subject"] = {"reference": f"Patient/{subject}"}
if len(vids) > 0:
resource["derivedFrom"] = []
for vid in vids:
resource["derivedFrom"].append({"reference": f"Observation/dv-{vid}"})

resource["component"] = []

if 'cHGVS' in molecular_consequence:
resource["component"].append({"code": {"coding": [{"system": "http://loinc.org",
"code": "48004-6",
"display": "DNA change (c.HGVS)"}]},
"valueCodeableConcept": {"text": f"{molecular_consequence['cHGVS']}"}})

if 'transcriptRefSeq' in molecular_consequence:
resource["component"].append({"code": {"coding": [{"system": "http://loinc.org",
"code": "51958-7",
"display": "Reference Transcript"}]},
"valueCodeableConcept": {"text": f"{molecular_consequence['transcriptRefSeq']}"}})

if 'pHGVS' in molecular_consequence:
resource["component"].append({"code": {"coding": [{"system": "http://loinc.org",
"code": "48005-3",
"display": "Protein (Amino Acid) Change - pHGVS"}]},
"valueCodeableConcept": {"text": f"{molecular_consequence['pHGVS']}"}})

for feature_consequence in molecular_consequence["featureConsequence"]:
resource["component"].append({"code": {"coding": [{"system": "http://hl7.org/fhir/uv/genomics-reporting/CodeSystem/tbd-codes-cs",
"code": "feature-consequence",
"display": "Feature Consequence"}]},
"valueCodeableConcept": {"coding": [{"system": f"{feature_consequence['system']}",
"code": f"{feature_consequence['code']}",
"display": f"{feature_consequence['display']}"}]}})

if 'impact' in molecular_consequence:
resource["component"].append({"code": {"coding": [{"system": "http://hl7.org/fhir/uv/genomics-reporting/CodeSystem/tbd-codes-cs",
"code": "functional-effect",
"display": "Functional Effectt"}]},
"valueCodeableConcept": {"text": f"{molecular_consequence['impact']}"}})

return resource


def create_tx_implication_profile_civic(implication, subject, vids):
resource = OrderedDict()
resource["resourceType"] = "Observation"
Expand Down Expand Up @@ -1818,3 +1872,82 @@ def query_transcript(transcript):
abort(400, "Unable to provide information on this transcript at this time")

return results


def query_molecular_consequences_by_variants(normalized_variant_list, feature_consequence_list, query):
variant_list = []
for item in normalized_variant_list:
if "GRCh37" in item:
variant_list.append(item["GRCh37"])
if "GRCh38" in item:
variant_list.append(item["GRCh38"])

pipeline_part = [{'$match': {'$expr': {'$and': [{'$or': [{'$eq': ['$variantID', '$$myvariant_id']}]}]}}},
{'$addFields': {}}]

if feature_consequence_list != []:
pipeline_part.append({'$match': {'$or': []}})
or_query = []

for feature_consequence in feature_consequence_list:
if feature_consequence['isSystem']:
or_query.append({'$and': [{'featureConsequence.code': {'$eq': feature_consequence['feature_consequence']}}, {'featureConsequence.system': {'$eq': feature_consequence['system']}}]})
else:
or_query.append({'$or': [
{'featureConsequence.code': {'$regex': ".*"+str(feature_consequence['feature_consequence']).replace('*', r'\*')+".*"}},
{'featureConsequence.display': {'$regex': ".*"+str(feature_consequence['feature_consequence']).replace('*', r'\*')+".*"}}
]})
pipeline_part[-1]['$match']['$or'] = or_query
pipeline_part.append({"$unwind": "$featureConsequence"})
pipeline_part.append({'$match': {'$or': or_query}})
pipeline_part.append({"$group": {
"patientID": {
"$first": "$$ROOT.patientID"
},
"variantID": {
"$first": "$$ROOT.variantID"
},
"transcriptRefSeq": {
"$first": "$$ROOT.transcriptRefSeq"
},
"MANE": {
"$first": "$$ROOT.MANE"
},
"source": {
"$first": "$$ROOT.source"
},
"cHGVS": {
"$first": "$$ROOT.cHGVS"
},
"pHGVS": {
"$first": "$$ROOT.pHGVS"
},
"featureConsequence": {
"$push": "$$ROOT.featureConsequence"
},
"impact": {
"$first": "$$ROOT.impact"
}
}})

query['SPDI'] = {'$in': variant_list}

query_string = [{'$match': query},
{'$lookup': {'from': 'MolecConseq', 'let': {'myvariant_id': '$_id'}, 'pipeline': pipeline_part,
'as': 'molecularConsequenceMatches'}},
{'$addFields': {}},
{'$match': {'molecularConsequenceMatches': {'$exists': True, '$not': {'$size': 0}}}}]

try:
results = variants_db.aggregate(query_string)
results = list(results)
except Exception as e:
print(f"DEBUG: Error {e} under query_molecular_consequences_by_variants query={query}")
results = []

query_results = []

for item in results:
query_results.append(item)

return query_results
99 changes: 99 additions & 0 deletions app/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,105 @@ def find_subject_dx_implications(
return jsonify(result)


def find_subject_molecular_consequences(
subject, variants=None, ranges=None, featureConsequence=None, testIdentifiers=None, testDateRange=None,
specimenIdentifiers=None, genomicSourceClass=None):

# Parameters
subject = subject.strip()
common.validate_subject(subject)

if variants and ranges:
abort(400, "You cannot supply both 'variants' and 'ranges'.")

if not variants and not ranges:
abort(400, "You must supply either 'variants' or 'ranges'.")

normalized_feature_consequence_list = []
if featureConsequence:
normalized_feature_consequence_list = list(map(common.get_feature_consequence, featureConsequence))

# Query
query = {}

# date query
if testDateRange:
testDateRange = list(map(common.get_date, testDateRange))
query["testDate"] = {}

for date_range in testDateRange:
query["testDate"][date_range['OPERATOR']] = date_range['DATE']

# Subject Query
query["patientID"] = {"$eq": subject}

# testIdentifiers query
if testIdentifiers:
testIdentifiers = list(map(str.strip, testIdentifiers))
query["testID"] = {"$in": testIdentifiers}

# specimenIdentifiers query
if specimenIdentifiers:
specimenIdentifiers = list(map(str.strip, specimenIdentifiers))
query["specimenID"] = {"$in": specimenIdentifiers}

# Genomic Source Class Query
if genomicSourceClass:
genomicSourceClass = genomicSourceClass.strip().lower()
query["genomicSourceClass"] = {"$eq": genomicSourceClass}

normalized_variants = []
if ranges:
ranges = list(map(common.get_range, ranges))
common.get_lift_over_range(ranges)
variants = common.get_variants(ranges, query)
if not variants:
return jsonify({"resourceType": "Parameters"})
normalized_variants = [{variant["BUILD"]: variant["SPDI"]} for variant in variants]

if variants and not ranges:
normalized_variants = list(map(common.get_variant, variants))

# Result Object
result = OrderedDict()
result["resourceType"] = "Parameters"
result["parameter"] = []

if normalized_variants:
if not ranges:
genomics_build_presence = common.get_genomics_build_presence(query)

for normalizedVariant in normalized_variants:
if not normalizedVariant["GRCh37"] and genomics_build_presence["GRCh37"]:
abort(422, f'Failed LiftOver. Variant: {normalizedVariant["variant"]}')
elif not normalizedVariant["GRCh38"] and genomics_build_presence["GRCh38"]:
abort(422, f'Failed LiftOver. Variant: {normalizedVariant["variant"]}')

query_results = common.query_molecular_consequences_by_variants(normalized_variants, normalized_feature_consequence_list, query)

for res in query_results:
if res["molecularConsequenceMatches"]:
result["parameter"].append([])
for molecular_consequence in res["molecularConsequenceMatches"]:
parameter = OrderedDict()
parameter["name"] = "consequence"
molecular_consequence_profile = common.create_molecular_consequence_profile(molecular_consequence, subject, [str(res['_id'])])
parameter["resource"] = molecular_consequence_profile
result["parameter"][0].append(parameter)
ref_seq = common.get_ref_seq_by_chrom_and_build(res['genomicBuild'], res['CHROM'])
resource = common.create_fhir_variant_resource(res, ref_seq, subject)
variant_param = {
"name": "variant",
"resource": resource
}
result["parameter"].append(variant_param)

if not result["parameter"]:
result.pop("parameter")

return jsonify(result)


def find_study_metadata(
subject, testIdentifiers=None, testDateRange=None,
specimenIdentifiers=None, ranges=None):
Expand Down
Loading

0 comments on commit 979d932

Please sign in to comment.