diff --git a/tests/clinvar_xml_io/resources/clinvar_dataset_v1.xml.gz b/tests/clinvar_xml_io/resources/clinvar_dataset_v1.xml.gz new file mode 100644 index 00000000..ebfddebe Binary files /dev/null and b/tests/clinvar_xml_io/resources/clinvar_dataset_v1.xml.gz differ diff --git a/tests/clinvar_xml_io/resources/test_clinvar_dataset.xml.gz b/tests/clinvar_xml_io/resources/clinvar_dataset_v2.xml.gz similarity index 100% rename from tests/clinvar_xml_io/resources/test_clinvar_dataset.xml.gz rename to tests/clinvar_xml_io/resources/clinvar_dataset_v2.xml.gz diff --git a/tests/clinvar_xml_io/test_clinvar_dataset.py b/tests/clinvar_xml_io/test_clinvar_dataset.py index 4e9c82e5..de6d7ff7 100644 --- a/tests/clinvar_xml_io/test_clinvar_dataset.py +++ b/tests/clinvar_xml_io/test_clinvar_dataset.py @@ -7,7 +7,7 @@ def test_dataset_write(): - input_file = os.path.join(resources_dir, 'test_clinvar_dataset.xml.gz') + input_file = os.path.join(resources_dir, 'clinvar_dataset_v2.xml.gz') output_file = os.path.join(resources_dir, 'test_output.xml.gz') input_dataset = ClinVarDataset(input_file) diff --git a/tests/clinvar_xml_io/test_clinvar_record.py b/tests/clinvar_xml_io/test_clinvar_record.py index 0db57f90..f6000c55 100644 --- a/tests/clinvar_xml_io/test_clinvar_record.py +++ b/tests/clinvar_xml_io/test_clinvar_record.py @@ -8,6 +8,21 @@ resources_dir = os.path.join(os.path.dirname(__file__), 'resources') +def test_clinvar_record_v1_v2_migration(): + # Same RCV in both versions + v1_input_file = os.path.join(resources_dir, 'clinvar_dataset_v1.xml.gz') + v2_input_file = os.path.join(resources_dir, 'clinvar_dataset_v2.xml.gz') + record_v1 = next(iter(ClinVarDataset(v1_input_file))) + record_v2 = next(iter(ClinVarDataset(v2_input_file))) + assert record_v1.xsd_version == 1.6 + assert record_v2.xsd_version == 2.0 + + assert record_v1.accession == record_v2.accession + assert record_v1.score == record_v2.score + assert set(record_v1.valid_clinical_significances) == set(record_v2.valid_clinical_significances) + assert set(record_v1.valid_allele_origins) == set(record_v2.valid_allele_origins) + + def test_multiple_clinical_classifications_record(): # input dataset with only one record input_file = os.path.join(resources_dir, 'multiple_classifications.xml.gz') diff --git a/tests/clinvar_xml_io/test_xml_parsing.py b/tests/clinvar_xml_io/test_xml_parsing.py index 6ac96677..28a2231a 100644 --- a/tests/clinvar_xml_io/test_xml_parsing.py +++ b/tests/clinvar_xml_io/test_xml_parsing.py @@ -7,7 +7,7 @@ def test_parse_header_attributes(): - input_file = os.path.join(resources_dir, 'test_clinvar_dataset.xml.gz') + input_file = os.path.join(resources_dir, 'clinvar_dataset_v2.xml.gz') header_attr = parse_header_attributes(input_file) assert header_attr['Dated'] == '2023-02-22' assert header_attr['xsi:noNamespaceSchemaLocation'] == 'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/RCV/ClinVar_RCV_2.0.xsd' diff --git a/tests/output_generation/resources/expected_annotation_output_v2.xml.gz b/tests/output_generation/resources/expected_annotation_output.xml.gz similarity index 100% rename from tests/output_generation/resources/expected_annotation_output_v2.xml.gz rename to tests/output_generation/resources/expected_annotation_output.xml.gz diff --git a/tests/output_generation/resources/expected_annotation_output_v1.xml.gz b/tests/output_generation/resources/expected_annotation_output_v1.xml.gz deleted file mode 100644 index fd2b56cd..00000000 Binary files a/tests/output_generation/resources/expected_annotation_output_v1.xml.gz and /dev/null differ diff --git a/tests/output_generation/resources/test_annotation_input_v2.xml.gz b/tests/output_generation/resources/test_annotation_input.xml.gz similarity index 100% rename from tests/output_generation/resources/test_annotation_input_v2.xml.gz rename to tests/output_generation/resources/test_annotation_input.xml.gz diff --git a/tests/output_generation/resources/test_annotation_input_v1.xml.gz b/tests/output_generation/resources/test_annotation_input_v1.xml.gz deleted file mode 100644 index 1dc10fb9..00000000 Binary files a/tests/output_generation/resources/test_annotation_input_v1.xml.gz and /dev/null differ diff --git a/tests/output_generation/test_annotated_clinvar.py b/tests/output_generation/test_annotated_clinvar.py index 14d2a40e..7abb05ff 100644 --- a/tests/output_generation/test_annotated_clinvar.py +++ b/tests/output_generation/test_annotated_clinvar.py @@ -13,10 +13,12 @@ def test_string_to_set(): assert string_to_set('{}') == set() -def run_generate_annotated_xml(input_file, expected_output_file): +def test_generate_annotated_xml(): + input_file = os.path.join(resources_dir, 'test_annotation_input.xml.gz') efo_mapping_file = os.path.join(resources_dir, 'string_to_ontology_mappings.tsv') gene_mapping_file = os.path.join(resources_dir, 'snp2gene_extract.tsv') output_file = os.path.join(resources_dir, 'test_output.xml.gz') + expected_output_file = os.path.join(resources_dir, 'expected_annotation_output.xml.gz') generate_annotated_clinvar_xml(input_file, efo_mapping_file, gene_mapping_file, output_file) @@ -31,15 +33,3 @@ def run_generate_annotated_xml(input_file, expected_output_file): if os.path.exists(output_file): os.remove(output_file) - - -def test_generate_annotated_xml_v1(): - input_file = os.path.join(resources_dir, 'test_annotation_input_v1.xml.gz') - expected_output_file = os.path.join(resources_dir, 'expected_annotation_output_v1.xml.gz') - run_generate_annotated_xml(input_file, expected_output_file) - - -def test_generate_annotated_xml_v2(): - input_file = os.path.join(resources_dir, 'test_annotation_input_v2.xml.gz') - expected_output_file = os.path.join(resources_dir, 'expected_annotation_output_v2.xml.gz') - run_generate_annotated_xml(input_file, expected_output_file)