diff --git a/cli/reporter.py b/cli/reporter.py
index 70fe5f7..b49026c 100755
--- a/cli/reporter.py
+++ b/cli/reporter.py
@@ -267,6 +267,12 @@ def convert_metadata_validation_results(self):
new_description = f'In sheet "{sheet}", row "{row}", column "{column}" is not populated'
else:
new_description = error["description"].replace(sheet_json, sheet)
+ if column is None:
+ # We do not know this attribute. It's most likely about bioSampleObject
+ continue
+ if 'schema' in new_description:
+ # This is an error specific to json schema
+ continue
self.results['metadata_check']['spreadsheet_errors'].append({
'sheet': sheet, 'row': row, 'column': column,
'description': new_description
@@ -290,8 +296,7 @@ def _convert_metadata_sheet(self, json_attribute, xls2json_conf):
def _convert_metadata_row(self, sheet, json_row, xls2json_conf):
if json_row is None:
- # This is for Sheet that can only have a single entry (Project)
- json_row = 0
+ return ''
if 'header_row' in xls2json_conf[sheet]:
return int(json_row) + xls2json_conf[sheet]['header_row']
else:
@@ -299,7 +304,7 @@ def _convert_metadata_row(self, sheet, json_row, xls2json_conf):
def _convert_metadata_attribute(self, sheet, json_attribute, xls2json_conf):
if json_attribute is None:
- return None
+ return ''
attributes_dict = {}
attributes_dict.update(xls2json_conf[sheet].get('required', {}))
attributes_dict.update(xls2json_conf[sheet].get('optional', {}))
diff --git a/tests/resources/validation_reports/expected_report.html b/tests/resources/validation_reports/expected_report.html
index 4e7b357..d069a3e 100644
--- a/tests/resources/validation_reports/expected_report.html
+++ b/tests/resources/validation_reports/expected_report.html
@@ -17,4 +17,4 @@
th { background-color: lightgrey; }
.fail { background-color: #FFB6C1; }
.pass { background-color: #90EE90; }
- .error-list { display: none; }
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
❌ Metadata validation check
Full report:
Sheet | Row | Column | Description |
---|
Files | 2 | None | Sheet "Files" is missing |
Project | 2 | Project Title | In sheet "Project", column "Project Title" is not populated |
Project | 2 | Description | In sheet "Project", column "Description" is not populated |
Project | 2 | Tax ID | In sheet "Project", column "Tax ID" is not populated |
Project | 2 | Center | In sheet "Project", column "Center" is not populated |
Analysis | 2 | Analysis Title | In sheet "Analysis", row "2", column "Analysis Title" is not populated |
Analysis | 2 | Description | In sheet "Analysis", row "2", column "Description" is not populated |
Analysis | 2 | Experiment Type | In sheet "Analysis", row "2", column "Experiment Type" is not populated |
Analysis | 2 | Reference | In sheet "Analysis", row "2", column "Reference" is not populated |
Sample | 3 | Sample Accession | In sheet "Sample", row "3", column "Sample Accession" is not populated |
Sample | 3 | None | should have required property 'bioSampleObject' |
Sample | 3 | None | should match exactly one schema in oneOf |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
❌ VCF check: 1 critical errors, 1 non-critical errors, 0 warnings
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors, 0 warnings
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
❌ AA: Sample names concordance check
First 10 errors per category are below. Full report: /path/to/sample/report
Category | Error |
---|
Samples described in the metadata but not in the VCF files | Sample1 |
Samples in the VCF files but not described in the metadata | 1Sample |
\ No newline at end of file
+ .error-list { display: none; }Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
❌ Metadata validation check
Full report:
Sheet | Row | Column | Description |
---|
Files | | | Sheet "Files" is missing |
Project | | Project Title | In sheet "Project", column "Project Title" is not populated |
Project | | Description | In sheet "Project", column "Description" is not populated |
Project | | Tax ID | In sheet "Project", column "Tax ID" is not populated |
Project | | Center | In sheet "Project", column "Center" is not populated |
Analysis | 2 | Analysis Title | In sheet "Analysis", row "2", column "Analysis Title" is not populated |
Analysis | 2 | Description | In sheet "Analysis", row "2", column "Description" is not populated |
Analysis | 2 | Experiment Type | In sheet "Analysis", row "2", column "Experiment Type" is not populated |
Analysis | 2 | Reference | In sheet "Analysis", row "2", column "Reference" is not populated |
Sample | 3 | Sample Accession | In sheet "Sample", row "3", column "Sample Accession" is not populated |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
❌ VCF check: 1 critical errors, 1 non-critical errors, 0 warnings
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors, 0 warnings
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
❌ AA: Sample names concordance check
First 10 errors per category are below. Full report: /path/to/sample/report
Category | Error |
---|
Samples described in the metadata but not in the VCF files | Sample1 |
Samples in the VCF files but not described in the metadata | 1Sample |
\ No newline at end of file
diff --git a/tests/test_report.py b/tests/test_report.py
index e589144..9ca2b2a 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -78,9 +78,7 @@
{'property': '/analysis/0.description', 'description': "should have required property 'description'"},
{'property': '/analysis/0.experimentType', 'description': "should have required property 'experimentType'"},
{'property': '/analysis/0.referenceGenome', 'description': "should have required property 'referenceGenome'"},
- {'property': '/sample/0.bioSampleAccession', 'description': "should have required property 'bioSampleAccession'"},
- {'property': '/sample/0.bioSampleObject', 'description': "should have required property 'bioSampleObject'"},
- {'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'}
+ {'property': '/sample/0.bioSampleAccession', 'description': "should have required property 'bioSampleAccession'"}
],
'report_path': '/path/to/metadata/report'
}
@@ -98,5 +96,7 @@ def test_generate_html_report(self):
reporter.convert_metadata_validation_results()
validation_results = reporter.results
report = generate_html_report(validation_results, datetime.datetime(2023, 8, 31, 12, 34, 56), "My cool project")
+ with open('report.html', 'w') as open_html:
+ assert open_html.write(report)
with open(self.expected_report) as open_html:
assert report == open_html.read()
diff --git a/tests/test_reporter.py b/tests/test_reporter.py
index a481109..09e3da9 100644
--- a/tests/test_reporter.py
+++ b/tests/test_reporter.py
@@ -47,8 +47,8 @@ def test__collect_validation_workflow_results(self):
{'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'}
],
'spreadsheet_errors': [
- {'sheet': 'Files', 'row': 2, 'column': None, 'description': 'Sheet "Files" is missing'},
- {'sheet': 'Project', 'row': 2, 'column': 'Project Title',
+ {'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'},
+ {'sheet': 'Project', 'row': '', 'column': 'Project Title',
'description': 'In sheet "Project", column "Project Title" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Description',
'description': 'In sheet "Analysis", row "2", column "Description" is not populated'},
@@ -56,10 +56,6 @@ def test__collect_validation_workflow_results(self):
'description': 'In sheet "Analysis", row "2", column "Reference" is not populated'},
{'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession',
'description': 'In sheet "Sample", row "3", column "Sample Accession" is not populated'},
- {'sheet': 'Sample', 'row': 3, 'column': None,
- 'description': "should have required property 'bioSampleObject'"},
- {'sheet': 'Sample', 'row': 3, 'column': None,
- 'description': 'should match exactly one schema in oneOf'}
]
}
}