From 9a044ed0d27f3cb0f442d71ebffc1b9141424a39 Mon Sep 17 00:00:00 2001 From: April Shen Date: Tue, 9 Jul 2024 15:20:44 +0100 Subject: [PATCH] fix more tests --- bin/xlsx2json.py | 6 +- eva_sub_cli/metadata_utils.py | 3 +- .../EVA_Submission_test_with_asm_report.json | 97 ++++++++----------- tests/resources/sample_checker/metadata.json | 26 +---- tests/test_xlsx2json.py | 7 ++ 5 files changed, 55 insertions(+), 84 deletions(-) diff --git a/bin/xlsx2json.py b/bin/xlsx2json.py index 250d10a..767adaa 100644 --- a/bin/xlsx2json.py +++ b/bin/xlsx2json.py @@ -241,7 +241,7 @@ def get_sample_json_data(self): json_key = self.xlsx_conf[WORKSHEETS_KEY_NAME][SAMPLE] sample_json = {json_key: []} for row in self.get_rows(): - row.pop('row_num') + row_num = row.pop('row_num') json_value = {self.translate_header(SAMPLE, k): v for k, v in row.items() if v is not None} bio_sample_acc = self.xlsx_conf[SAMPLE][OPTIONAL_HEADERS_KEY_NAME][SAMPLE_ACCESSION_KEY] @@ -262,12 +262,12 @@ def get_sample_json_data(self): if sample_name not in json_value: self.add_error(f'If BioSample Accession is not provided, the {SAMPLE} worksheet should have ' f'{SAMPLE_NAME_KEY} populated', - sheet=SAMPLE, column=SAMPLE_NAME_KEY) + sheet=SAMPLE, row=row_num, column=SAMPLE_NAME_KEY) return None if scientific_name not in json_value: self.add_error(f'If BioSample Accession is not provided, the {SAMPLE} worksheet should have ' f'{SCIENTIFIC_NAME_KEY} populated', - sheet=SAMPLE, column=SCIENTIFIC_NAME_KEY) + sheet=SAMPLE, row=row_num, column=SCIENTIFIC_NAME_KEY) return None biosample_obj = self.get_biosample_object(json_value) diff --git a/eva_sub_cli/metadata_utils.py b/eva_sub_cli/metadata_utils.py index 77a8a7c..ce967ae 100644 --- a/eva_sub_cli/metadata_utils.py +++ b/eva_sub_cli/metadata_utils.py @@ -10,7 +10,8 @@ def get_samples_per_analysis(metadata): """Returns mapping of analysis alias to sample names, based on metadata.""" samples_per_analysis = defaultdict(list) for sample_info in metadata.get('sample', []): - samples_per_analysis[sample_info.get('analysisAlias')].append(sample_info.get('sampleInVCF')) + for analysis_alias in sample_info.get('analysisAlias', []): + samples_per_analysis[analysis_alias].append(sample_info.get('sampleInVCF')) return { analysis_alias: set(samples) for analysis_alias, samples in samples_per_analysis.items() diff --git a/tests/resources/EVA_Submission_test_with_asm_report.json b/tests/resources/EVA_Submission_test_with_asm_report.json index 892a0c2..5c3ede3 100644 --- a/tests/resources/EVA_Submission_test_with_asm_report.json +++ b/tests/resources/EVA_Submission_test_with_asm_report.json @@ -64,90 +64,73 @@ ], "sample": [ { - "analysisAlias": "VD1", - "sampleInVCF": "sample1", - "bioSampleAccession": "SAME00001" - }, - { - "analysisAlias": "VD2", - "sampleInVCF": "sample1", - "bioSampleAccession": "SAME00001" - }, - { - "analysisAlias": "VD3", + "analysisAlias": [ + "VD1", + "VD2", + "VD3" + ], "sampleInVCF": "sample1", "bioSampleAccession": "SAME00001" }, { - "analysisAlias": "VD1", - "sampleInVCF": "sample2", - "bioSampleAccession": "SAME00002" - }, - { - "analysisAlias": "VD2", + "analysisAlias": [ + "VD1", + "VD2", + "VD3" + ], "sampleInVCF": "sample2", "bioSampleAccession": "SAME00002" }, { - "analysisAlias": "VD3", - "sampleInVCF": "sample2", - "bioSampleAccession": "SAME00002" - }, - { - "analysisAlias": "VD3", + "analysisAlias": [ + "VD3" + ], "sampleInVCF": "sample3", "bioSampleAccession": "SAME00003" }, { - "analysisAlias": "VD4", + "analysisAlias": [ + "VD4", + "VD5" + ], "sampleInVCF": "sample4", "bioSampleObject": { "name": "Lm_17_S8", "characteristics": { - "bioSampleName": "Lm_17_S8", "title": [ - "Bastet normal sample" + { + "text": "Bastet normal sample" + } ], "description": [ - "Test Description" + { + "text": "Test Description" + } ], "taxId": [ - 9447 + { + "text": "9447" + } ], "scientificName": [ - "Lemur catta" + { + "text": "Lemur catta" + } ], - "sex": "Female", - "tissueType": "skin", - "species": [ - "Lemur catta" - ] - } - } - }, - { - "analysisAlias": "VD5", - "sampleInVCF": "sample4", - "bioSampleObject": { - "name": "Lm_17_S8", - "characteristics": { - "bioSampleName": "Lm_17_S8", - "title": [ - "Bastet normal sample" + "sex": [ + { + "text": "Female" + } ], - "description": [ - "Test Description" - ], - "taxId": [ - 9447 - ], - "scientificName": [ - "Lemur catta" + "tissueType": [ + { + "text": "skin" + } ], - "sex": "Female", - "tissueType": "skin", "species": [ - "Lemur catta" + { + "text": "Lemur catta" + } ] } } diff --git a/tests/resources/sample_checker/metadata.json b/tests/resources/sample_checker/metadata.json index 8854cde..20a9d0b 100644 --- a/tests/resources/sample_checker/metadata.json +++ b/tests/resources/sample_checker/metadata.json @@ -23,37 +23,17 @@ }, "sample": [ { - "analysisAlias": "VD1", + "analysisAlias": ["VD1", "VD2", "VD3"], "sampleInVCF": "sample1", "BioSampleAccession": "SAME00001" }, { - "analysisAlias": "VD1", + "analysisAlias": ["VD1", "VD2", "VD3"], "sampleInVCF": "sample2", "BioSampleAccession": "SAME00002" }, { - "analysisAlias": "VD2", - "sampleInVCF": "sample1", - "BioSampleAccession": "SAME00001" - }, - { - "analysisAlias": "VD2", - "sampleInVCF": "sample2", - "BioSampleAccession": "SAME00002" - }, - { - "analysisAlias": "VD3", - "sampleInVCF": "sample1", - "BioSampleAccession": "SAME00001" - }, - { - "analysisAlias": "VD3", - "sampleInVCF": "sample2", - "BioSampleAccession": "SAME00002" - }, - { - "analysisAlias": "VD3", + "analysisAlias": ["VD3"], "sampleInVCF": "sample3", "BioSampleAccession": "SAME00003" } diff --git a/tests/test_xlsx2json.py b/tests/test_xlsx2json.py index 22dd80f..84a6c63 100644 --- a/tests/test_xlsx2json.py +++ b/tests/test_xlsx2json.py @@ -29,7 +29,14 @@ def test_conversion_2_json(self) -> None: xls_filename = os.path.join(self.resource_dir, 'EVA_Submission_test.xlsx') self.parser = XlsxParser(xls_filename, self.conf_filename) output_json = os.path.join(self.resource_dir, 'EVA_Submission_test_output.json') + errors_yaml = os.path.join(self.resource_dir, 'EVA_Submission_test_errors.yml') self.parser.json(output_json) + self.parser.save_errors(errors_yaml) + + # confirm no errors + with open(errors_yaml) as open_file: + errors_data = yaml.safe_load(open_file) + assert errors_data == [] with open(output_json) as open_file: json_data = json.load(open_file)