Skip to content

Commit

Permalink
add reference record class
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Aug 21, 2024
1 parent 0107f0c commit a7870e6
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 23 deletions.
4 changes: 2 additions & 2 deletions cmat/clinvar_xml_io/clinvar_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from datetime import date

from cmat.clinvar_xml_io.clinvar_record import ClinVarRecord
from cmat.clinvar_xml_io.clinvar_reference_record import ClinVarReferenceRecord
from cmat.clinvar_xml_io.xml_parsing import iterate_rcv_from_xml, parse_header_attributes

logger = logging.getLogger(__name__)
Expand All @@ -20,7 +20,7 @@ def __init__(self, clinvar_xml):

def __iter__(self):
for rcv in iterate_rcv_from_xml(self.clinvar_xml):
yield ClinVarRecord(rcv, self.xsd_version)
yield ClinVarReferenceRecord(rcv, self.xsd_version)

def get_xsd_version(self):
# For format, see https://github.com/ncbi/clinvar/blob/master/FTPSiteXsdChanges.md
Expand Down
30 changes: 12 additions & 18 deletions cmat/clinvar_xml_io/clinvar_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from functools import cached_property
from xml.dom import minidom

from cmat.clinvar_xml_io.clinical_classification import ClinicalClassification, MultipleClinicalClassificationsError
from cmat.clinvar_xml_io.clinical_classification import MultipleClinicalClassificationsError
from cmat.clinvar_xml_io.clinvar_measure import ClinVarRecordMeasure
from cmat.clinvar_xml_io.clinvar_trait import ClinVarTrait
from cmat.clinvar_xml_io.xml_parsing import find_elements, find_optional_unique_element, \
Expand All @@ -15,10 +15,10 @@


class ClinVarRecord:
"""Instances of this class hold data on individual ClinVar records. See also:
* /data-exploration/clinvar-variant-types/README.md for the in-depth explanation of ClinVar data model;
* Issue https://github.com/EBIvariation/eva-opentargets/issues/127 for the most recent discussions on changing
support of different ClinVar record types."""
"""
Base class for both reference and submitted records in ClinVar. See also:
/data-exploration/clinvar-variant-types/README.md for the in-depth explanation of ClinVar data model
"""

# Some allele origin terms in ClinVar are essentially conveying lack of information and are thus not useful.
NONSPECIFIC_ALLELE_ORIGINS = {'unknown', 'not provided', 'not applicable', 'tested-inconclusive', 'not-reported'}
Expand Down Expand Up @@ -62,13 +62,15 @@ def accession(self):

@property
def last_updated_date(self):
"""This tracks the latest update date, counting even minor technical updates."""
return self.record_xml.attrib['DateLastUpdated']
"""This tracks the latest update date, counting even minor technical updates.
Appears differently in reference and submitted records."""
raise NotImplementedError

@property
def created_date(self):
"""This tracks the date the record was first made public on ClinVar."""
return self.record_xml.attrib['DateCreated']
"""This tracks the date the record was first made public on ClinVar.
Appears differently in reference and submitted records."""
raise NotImplementedError

@property
def mode_of_inheritance(self):
Expand Down Expand Up @@ -111,15 +113,7 @@ def valid_allele_origins(self):
@cached_property
def clinical_classifications(self):
"""List of clinical classifications (Germline, Somatic, or Oncogenecity)"""
clinical_classifications = []
if self.xsd_version < 2:
# V1 only ever has a single clinical classification / clinical significance
clinical_classifications.append(
ClinicalClassification(find_mandatory_unique_element(self.record_xml, './ClinicalSignificance'), self))
else:
for clin_class in find_elements(self.record_xml, './Classifications/*'):
clinical_classifications.append(ClinicalClassification(clin_class, self))
return clinical_classifications
raise NotImplementedError

# The following properties are maintained for backwards compatibility, but are only present for a ClinVarRecord
# if there is exactly one ClinicalClassification for the record.
Expand Down
41 changes: 41 additions & 0 deletions cmat/clinvar_xml_io/clinvar_reference_record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import logging
from functools import cached_property

from cmat.clinvar_xml_io.clinical_classification import ClinicalClassification

from cmat.clinvar_xml_io.clinvar_record import ClinVarRecord
from cmat.clinvar_xml_io.xml_parsing import find_mandatory_unique_element, find_elements

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class ClinVarReferenceRecord(ClinVarRecord):
"""Reference records (RCVs) summarise information from submitted records (SCVs) and include additional annotations
and cross-references supplied by ClinVar."""

def __init__(self, record_xml, xsd_version):
super().__init__(record_xml, xsd_version)

def __str__(self):
return f'ClinVarReferenceRecord object with accession {self.accession}'

@property
def last_updated_date(self):
return self.record_xml.attrib['DateLastUpdated']

@property
def created_date(self):
return self.record_xml.attrib['DateCreated']

@cached_property
def clinical_classifications(self):
clinical_classifications = []
if self.xsd_version < 2:
# V1 only ever has a single clinical classification / clinical significance
clinical_classifications.append(
ClinicalClassification(find_mandatory_unique_element(self.record_xml, './ClinicalSignificance'), self))
else:
for clin_class in find_elements(self.record_xml, './Classifications/*'):
clinical_classifications.append(ClinicalClassification(clin_class, self))
return clinical_classifications
2 changes: 0 additions & 2 deletions cmat/clinvar_xml_io/clinvar_submitted_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,10 @@ def submission_date(self):

@property
def last_updated_date(self):
"""Overrides parent definition, in SCV this date is in the accession element"""
return find_mandatory_unique_element(self.record_xml, './ClinVarAccession').attrib['DateUpdated']

@property
def created_date(self):
"""Overrides parent definition, in SCV this date is in the accession element"""
return find_mandatory_unique_element(self.record_xml, './ClinVarAccession').attrib['DateCreated']

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3087,4 +3087,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
2 changes: 2 additions & 0 deletions tests/clinvar_xml_io/test_clinvar_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def test_multiple_clinical_classifications_record():


class TestClinvarRecord:
"""Tests base class as well as reference record"""

@classmethod
def setup_class(cls):
input_file = os.path.join(resources_dir, 'clinvar_dataset_v2.xml.gz')
Expand Down

0 comments on commit a7870e6

Please sign in to comment.