diff --git a/cli/reporter.py b/cli/reporter.py index 70fe5f7..b49026c 100755 --- a/cli/reporter.py +++ b/cli/reporter.py @@ -267,6 +267,12 @@ def convert_metadata_validation_results(self): new_description = f'In sheet "{sheet}", row "{row}", column "{column}" is not populated' else: new_description = error["description"].replace(sheet_json, sheet) + if column is None: + # We do not know this attribute. It's most likely about bioSampleObject + continue + if 'schema' in new_description: + # This is an error specific to json schema + continue self.results['metadata_check']['spreadsheet_errors'].append({ 'sheet': sheet, 'row': row, 'column': column, 'description': new_description @@ -290,8 +296,7 @@ def _convert_metadata_sheet(self, json_attribute, xls2json_conf): def _convert_metadata_row(self, sheet, json_row, xls2json_conf): if json_row is None: - # This is for Sheet that can only have a single entry (Project) - json_row = 0 + return '' if 'header_row' in xls2json_conf[sheet]: return int(json_row) + xls2json_conf[sheet]['header_row'] else: @@ -299,7 +304,7 @@ def _convert_metadata_row(self, sheet, json_row, xls2json_conf): def _convert_metadata_attribute(self, sheet, json_attribute, xls2json_conf): if json_attribute is None: - return None + return '' attributes_dict = {} attributes_dict.update(xls2json_conf[sheet].get('required', {})) attributes_dict.update(xls2json_conf[sheet].get('optional', {})) diff --git a/tests/resources/validation_reports/expected_report.html b/tests/resources/validation_reports/expected_report.html index 4e7b357..d069a3e 100644 --- a/tests/resources/validation_reports/expected_report.html +++ b/tests/resources/validation_reports/expected_report.html @@ -17,4 +17,4 @@ th { background-color: lightgrey; } .fail { background-color: #FFB6C1; } .pass { background-color: #90EE90; } - .error-list { display: none; }

Validation Report: My cool project

Generated at 2023-08-31 12:34:56

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report:
SheetRowColumnDescription
Files2NoneSheet "Files" is missing
Project2Project TitleIn sheet "Project", column "Project Title" is not populated
Project2DescriptionIn sheet "Project", column "Description" is not populated
Project2Tax IDIn sheet "Project", column "Tax ID" is not populated
Project2CenterIn sheet "Project", column "Center" is not populated
Analysis2Analysis TitleIn sheet "Analysis", row "2", column "Analysis Title" is not populated
Analysis2DescriptionIn sheet "Analysis", row "2", column "Description" is not populated
Analysis2Experiment TypeIn sheet "Analysis", row "2", column "Experiment Type" is not populated
Analysis2ReferenceIn sheet "Analysis", row "2", column "Reference" is not populated
Sample3Sample AccessionIn sheet "Sample", row "3", column "Sample Accession" is not populated
Sample3Noneshould have required property 'bioSampleObject'
Sample3Noneshould match exactly one schema in oneOf

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors, 0 warnings
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors, 0 warnings

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
❌ AA: Sample names concordance check
First 10 errors per category are below. Full report: /path/to/sample/report
CategoryError
Samples described in the metadata but not in the VCF filesSample1
Samples in the VCF files but not described in the metadata1Sample
\ No newline at end of file + .error-list { display: none; }

Validation Report: My cool project

Generated at 2023-08-31 12:34:56

Metadata validation results

Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the EVA website.
❌ Metadata validation check
Full report:
SheetRowColumnDescription
FilesSheet "Files" is missing
ProjectProject TitleIn sheet "Project", column "Project Title" is not populated
ProjectDescriptionIn sheet "Project", column "Description" is not populated
ProjectTax IDIn sheet "Project", column "Tax ID" is not populated
ProjectCenterIn sheet "Project", column "Center" is not populated
Analysis2Analysis TitleIn sheet "Analysis", row "2", column "Analysis Title" is not populated
Analysis2DescriptionIn sheet "Analysis", row "2", column "Description" is not populated
Analysis2Experiment TypeIn sheet "Analysis", row "2", column "Experiment Type" is not populated
Analysis2ReferenceIn sheet "Analysis", row "2", column "Reference" is not populated
Sample3Sample AccessionIn sheet "Sample", row "3", column "Sample Accession" is not populated

VCF validation results

Checks whether each file is compliant with the VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
CategoryError
mismatch errorChromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'
mismatch errorChromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'
mismatch errorChromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'
mismatch errorChromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'
❌ VCF check: 1 critical errors, 1 non-critical errors, 0 warnings
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
CategoryError
critical errorLine 4: Error in meta-data section.
non-critical errorSample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=..

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors, 0 warnings

Sample name concordance check

Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
❌ AA: Sample names concordance check
First 10 errors per category are below. Full report: /path/to/sample/report
CategoryError
Samples described in the metadata but not in the VCF filesSample1
Samples in the VCF files but not described in the metadata1Sample
\ No newline at end of file diff --git a/tests/test_report.py b/tests/test_report.py index e589144..9ca2b2a 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -78,9 +78,7 @@ {'property': '/analysis/0.description', 'description': "should have required property 'description'"}, {'property': '/analysis/0.experimentType', 'description': "should have required property 'experimentType'"}, {'property': '/analysis/0.referenceGenome', 'description': "should have required property 'referenceGenome'"}, - {'property': '/sample/0.bioSampleAccession', 'description': "should have required property 'bioSampleAccession'"}, - {'property': '/sample/0.bioSampleObject', 'description': "should have required property 'bioSampleObject'"}, - {'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'} + {'property': '/sample/0.bioSampleAccession', 'description': "should have required property 'bioSampleAccession'"} ], 'report_path': '/path/to/metadata/report' } @@ -98,5 +96,7 @@ def test_generate_html_report(self): reporter.convert_metadata_validation_results() validation_results = reporter.results report = generate_html_report(validation_results, datetime.datetime(2023, 8, 31, 12, 34, 56), "My cool project") + with open('report.html', 'w') as open_html: + assert open_html.write(report) with open(self.expected_report) as open_html: assert report == open_html.read() diff --git a/tests/test_reporter.py b/tests/test_reporter.py index a481109..09e3da9 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -47,8 +47,8 @@ def test__collect_validation_workflow_results(self): {'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'} ], 'spreadsheet_errors': [ - {'sheet': 'Files', 'row': 2, 'column': None, 'description': 'Sheet "Files" is missing'}, - {'sheet': 'Project', 'row': 2, 'column': 'Project Title', + {'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'}, + {'sheet': 'Project', 'row': '', 'column': 'Project Title', 'description': 'In sheet "Project", column "Project Title" is not populated'}, {'sheet': 'Analysis', 'row': 2, 'column': 'Description', 'description': 'In sheet "Analysis", row "2", column "Description" is not populated'}, @@ -56,10 +56,6 @@ def test__collect_validation_workflow_results(self): 'description': 'In sheet "Analysis", row "2", column "Reference" is not populated'}, {'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession', 'description': 'In sheet "Sample", row "3", column "Sample Accession" is not populated'}, - {'sheet': 'Sample', 'row': 3, 'column': None, - 'description': "should have required property 'bioSampleObject'"}, - {'sheet': 'Sample', 'row': 3, 'column': None, - 'description': 'should match exactly one schema in oneOf'} ] } }