Skip to content

Commit

Permalink
implement a more meaningful v1/v2 migration test
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed May 24, 2024
1 parent 6feecc8 commit 0d51f7a
Show file tree
Hide file tree
Showing 10 changed files with 20 additions and 15 deletions.
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/clinvar_xml_io/test_clinvar_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def test_dataset_write():
input_file = os.path.join(resources_dir, 'test_clinvar_dataset.xml.gz')
input_file = os.path.join(resources_dir, 'clinvar_dataset_v2.xml.gz')
output_file = os.path.join(resources_dir, 'test_output.xml.gz')

input_dataset = ClinVarDataset(input_file)
Expand Down
15 changes: 15 additions & 0 deletions tests/clinvar_xml_io/test_clinvar_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@
resources_dir = os.path.join(os.path.dirname(__file__), 'resources')


def test_clinvar_record_v1_v2_migration():
# Same RCV in both versions
v1_input_file = os.path.join(resources_dir, 'clinvar_dataset_v1.xml.gz')
v2_input_file = os.path.join(resources_dir, 'clinvar_dataset_v2.xml.gz')
record_v1 = next(iter(ClinVarDataset(v1_input_file)))
record_v2 = next(iter(ClinVarDataset(v2_input_file)))
assert record_v1.xsd_version == 1.6
assert record_v2.xsd_version == 2.0

assert record_v1.accession == record_v2.accession
assert record_v1.score == record_v2.score
assert set(record_v1.valid_clinical_significances) == set(record_v2.valid_clinical_significances)
assert set(record_v1.valid_allele_origins) == set(record_v2.valid_allele_origins)


def test_multiple_clinical_classifications_record():
# input dataset with only one record
input_file = os.path.join(resources_dir, 'multiple_classifications.xml.gz')
Expand Down
2 changes: 1 addition & 1 deletion tests/clinvar_xml_io/test_xml_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def test_parse_header_attributes():
input_file = os.path.join(resources_dir, 'test_clinvar_dataset.xml.gz')
input_file = os.path.join(resources_dir, 'clinvar_dataset_v2.xml.gz')
header_attr = parse_header_attributes(input_file)
assert header_attr['Dated'] == '2023-02-22'
assert header_attr['xsi:noNamespaceSchemaLocation'] == 'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/RCV/ClinVar_RCV_2.0.xsd'
Binary file not shown.
Binary file not shown.
16 changes: 3 additions & 13 deletions tests/output_generation/test_annotated_clinvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ def test_string_to_set():
assert string_to_set('{}') == set()


def run_generate_annotated_xml(input_file, expected_output_file):
def test_generate_annotated_xml():
input_file = os.path.join(resources_dir, 'test_annotation_input.xml.gz')
efo_mapping_file = os.path.join(resources_dir, 'string_to_ontology_mappings.tsv')
gene_mapping_file = os.path.join(resources_dir, 'snp2gene_extract.tsv')
output_file = os.path.join(resources_dir, 'test_output.xml.gz')
expected_output_file = os.path.join(resources_dir, 'expected_annotation_output.xml.gz')

generate_annotated_clinvar_xml(input_file, efo_mapping_file, gene_mapping_file, output_file)

Expand All @@ -31,15 +33,3 @@ def run_generate_annotated_xml(input_file, expected_output_file):

if os.path.exists(output_file):
os.remove(output_file)


def test_generate_annotated_xml_v1():
input_file = os.path.join(resources_dir, 'test_annotation_input_v1.xml.gz')
expected_output_file = os.path.join(resources_dir, 'expected_annotation_output_v1.xml.gz')
run_generate_annotated_xml(input_file, expected_output_file)


def test_generate_annotated_xml_v2():
input_file = os.path.join(resources_dir, 'test_annotation_input_v2.xml.gz')
expected_output_file = os.path.join(resources_dir, 'expected_annotation_output_v2.xml.gz')
run_generate_annotated_xml(input_file, expected_output_file)

0 comments on commit 0d51f7a

Please sign in to comment.