Skip to content

Commit

Permalink
fix more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Jul 9, 2024
1 parent 2930ffa commit 9a044ed
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 84 deletions.
6 changes: 3 additions & 3 deletions bin/xlsx2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def get_sample_json_data(self):
json_key = self.xlsx_conf[WORKSHEETS_KEY_NAME][SAMPLE]
sample_json = {json_key: []}
for row in self.get_rows():
row.pop('row_num')
row_num = row.pop('row_num')
json_value = {self.translate_header(SAMPLE, k): v for k, v in row.items() if v is not None}
bio_sample_acc = self.xlsx_conf[SAMPLE][OPTIONAL_HEADERS_KEY_NAME][SAMPLE_ACCESSION_KEY]

Expand All @@ -262,12 +262,12 @@ def get_sample_json_data(self):
if sample_name not in json_value:
self.add_error(f'If BioSample Accession is not provided, the {SAMPLE} worksheet should have '
f'{SAMPLE_NAME_KEY} populated',
sheet=SAMPLE, column=SAMPLE_NAME_KEY)
sheet=SAMPLE, row=row_num, column=SAMPLE_NAME_KEY)
return None
if scientific_name not in json_value:
self.add_error(f'If BioSample Accession is not provided, the {SAMPLE} worksheet should have '
f'{SCIENTIFIC_NAME_KEY} populated',
sheet=SAMPLE, column=SCIENTIFIC_NAME_KEY)
sheet=SAMPLE, row=row_num, column=SCIENTIFIC_NAME_KEY)
return None

biosample_obj = self.get_biosample_object(json_value)
Expand Down
3 changes: 2 additions & 1 deletion eva_sub_cli/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ def get_samples_per_analysis(metadata):
"""Returns mapping of analysis alias to sample names, based on metadata."""
samples_per_analysis = defaultdict(list)
for sample_info in metadata.get('sample', []):
samples_per_analysis[sample_info.get('analysisAlias')].append(sample_info.get('sampleInVCF'))
for analysis_alias in sample_info.get('analysisAlias', []):
samples_per_analysis[analysis_alias].append(sample_info.get('sampleInVCF'))
return {
analysis_alias: set(samples)
for analysis_alias, samples in samples_per_analysis.items()
Expand Down
97 changes: 40 additions & 57 deletions tests/resources/EVA_Submission_test_with_asm_report.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,90 +64,73 @@
],
"sample": [
{
"analysisAlias": "VD1",
"sampleInVCF": "sample1",
"bioSampleAccession": "SAME00001"
},
{
"analysisAlias": "VD2",
"sampleInVCF": "sample1",
"bioSampleAccession": "SAME00001"
},
{
"analysisAlias": "VD3",
"analysisAlias": [
"VD1",
"VD2",
"VD3"
],
"sampleInVCF": "sample1",
"bioSampleAccession": "SAME00001"
},
{
"analysisAlias": "VD1",
"sampleInVCF": "sample2",
"bioSampleAccession": "SAME00002"
},
{
"analysisAlias": "VD2",
"analysisAlias": [
"VD1",
"VD2",
"VD3"
],
"sampleInVCF": "sample2",
"bioSampleAccession": "SAME00002"
},
{
"analysisAlias": "VD3",
"sampleInVCF": "sample2",
"bioSampleAccession": "SAME00002"
},
{
"analysisAlias": "VD3",
"analysisAlias": [
"VD3"
],
"sampleInVCF": "sample3",
"bioSampleAccession": "SAME00003"
},
{
"analysisAlias": "VD4",
"analysisAlias": [
"VD4",
"VD5"
],
"sampleInVCF": "sample4",
"bioSampleObject": {
"name": "Lm_17_S8",
"characteristics": {
"bioSampleName": "Lm_17_S8",
"title": [
"Bastet normal sample"
{
"text": "Bastet normal sample"
}
],
"description": [
"Test Description"
{
"text": "Test Description"
}
],
"taxId": [
9447
{
"text": "9447"
}
],
"scientificName": [
"Lemur catta"
{
"text": "Lemur catta"
}
],
"sex": "Female",
"tissueType": "skin",
"species": [
"Lemur catta"
]
}
}
},
{
"analysisAlias": "VD5",
"sampleInVCF": "sample4",
"bioSampleObject": {
"name": "Lm_17_S8",
"characteristics": {
"bioSampleName": "Lm_17_S8",
"title": [
"Bastet normal sample"
"sex": [
{
"text": "Female"
}
],
"description": [
"Test Description"
],
"taxId": [
9447
],
"scientificName": [
"Lemur catta"
"tissueType": [
{
"text": "skin"
}
],
"sex": "Female",
"tissueType": "skin",
"species": [
"Lemur catta"
{
"text": "Lemur catta"
}
]
}
}
Expand Down
26 changes: 3 additions & 23 deletions tests/resources/sample_checker/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,37 +23,17 @@
},
"sample": [
{
"analysisAlias": "VD1",
"analysisAlias": ["VD1", "VD2", "VD3"],
"sampleInVCF": "sample1",
"BioSampleAccession": "SAME00001"
},
{
"analysisAlias": "VD1",
"analysisAlias": ["VD1", "VD2", "VD3"],
"sampleInVCF": "sample2",
"BioSampleAccession": "SAME00002"
},
{
"analysisAlias": "VD2",
"sampleInVCF": "sample1",
"BioSampleAccession": "SAME00001"
},
{
"analysisAlias": "VD2",
"sampleInVCF": "sample2",
"BioSampleAccession": "SAME00002"
},
{
"analysisAlias": "VD3",
"sampleInVCF": "sample1",
"BioSampleAccession": "SAME00001"
},
{
"analysisAlias": "VD3",
"sampleInVCF": "sample2",
"BioSampleAccession": "SAME00002"
},
{
"analysisAlias": "VD3",
"analysisAlias": ["VD3"],
"sampleInVCF": "sample3",
"BioSampleAccession": "SAME00003"
}
Expand Down
7 changes: 7 additions & 0 deletions tests/test_xlsx2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ def test_conversion_2_json(self) -> None:
xls_filename = os.path.join(self.resource_dir, 'EVA_Submission_test.xlsx')
self.parser = XlsxParser(xls_filename, self.conf_filename)
output_json = os.path.join(self.resource_dir, 'EVA_Submission_test_output.json')
errors_yaml = os.path.join(self.resource_dir, 'EVA_Submission_test_errors.yml')
self.parser.json(output_json)
self.parser.save_errors(errors_yaml)

# confirm no errors
with open(errors_yaml) as open_file:
errors_data = yaml.safe_load(open_file)
assert errors_data == []

with open(output_json) as open_file:
json_data = json.load(open_file)
Expand Down

0 comments on commit 9a044ed

Please sign in to comment.