Skip to content

Commit

Permalink
report metadata errors for json directly when no spreadsheet used
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Jul 23, 2024
1 parent 5ffe5ee commit 6892adf
Show file tree
Hide file tree
Showing 7 changed files with 292 additions and 29 deletions.
22 changes: 21 additions & 1 deletion eva_sub_cli/jinja_templates/metadata_validation.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
{% macro metadata_validation_report(validation_results) -%}
{% set results = validation_results.get('metadata_check', {}) %}
{% set spreadsheet_errors = results.get('spreadsheet_errors', []) %}
{% if spreadsheet_errors %}
{% set json_errors = results.get('json_errors', []) %}

{% set has_errors = spreadsheet_errors or json_errors %}
{% if has_errors %}
{% set expand_icon = "▶" %}
{% set icon = "❌" %}
{% set row_class = "report-section fail collapsible" %}
Expand All @@ -29,4 +32,21 @@
</table>
</div>
{% endif %}

{% if json_errors %}
<div class="error-list">
<div class="error-description"><strong>Full report:</strong> {{ results.get('json_report_path', '') }}</div>
<table>
<tr>
<th>Json Property</th><th>Error Description</th>
</tr>
{% for error in json_errors %}
<tr>
<td><strong>{{ error.get('property') }}</strong></td>
<td> {{ error.get('description') }}</td>
</tr>
{% endfor %}
</table>
</div>
{% endif %}
{%- endmacro %}
5 changes: 3 additions & 2 deletions eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,9 @@ def _collect_metadata_results(self):
self._load_spreadsheet_conversion_errors()
self._parse_biovalidator_validation_results()
self._parse_semantic_metadata_results()
self._convert_biovalidator_validation_to_spreadsheet()
self._write_spreadsheet_validation_results()
if self.metadata_xlsx:
self._convert_biovalidator_validation_to_spreadsheet()
self._write_spreadsheet_validation_results()
self._collect_md5sum_to_metadata()

def _load_spreadsheet_conversion_errors(self):
Expand Down

Large diffs are not rendered by default.

5 changes: 0 additions & 5 deletions tests/test_docker_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,6 @@ def test_validate(self):
'md5': '96a80c9368cc3c37095c86fbe6044fb2'}
]

# Check metadata errors
with open(os.path.join(self.validator.output_dir, 'other_validations', 'metadata_validation.txt')) as open_file:
metadata_val_lines = {l.strip() for l in open_file.readlines()}
assert 'must match pattern "^PRJ(EB|NA)\\d+$"' in metadata_val_lines

def test_validate_from_excel(self):
self.validator_from_excel.validate()
self.assertTrue(os.path.isfile(self.validator_from_excel._sample_check_yaml))
194 changes: 175 additions & 19 deletions tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,159 @@
import eva_sub_cli
from eva_sub_cli.report import generate_html_report

validation_results = {
validation_results_xlsx = {
"assembly_check": {
"input_passed.vcf": {
"report_path": "/path/to/assembly_passed/report",
"error_list": [],
"match": 247,
"mismatch_list": [],
"nb_error": 0,
"nb_mismatch": 0,
"total": 247,
},
"input_fail.vcf": {
"report_path": "/path/to/assembly_failed/report",
"error_list": ["The assembly checking could not be completed: Contig 'chr23' not found in assembly report"],
"match": 26,
"mismatch_list": [
"Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'",
"Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'",
"Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'",
"Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'",
"Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'",
"Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'",
"Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'",
"Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'",
"Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'",
],
"nb_error": 1,
"nb_mismatch": 10,
"total": 36,
},
},
"vcf_check": {
"input_passed.vcf": {
'report_path': '/path/to/vcf_passed/report',
"error_count": 0,
"error_list": [],
"valid": True,
"warning_count": 0,
},
"input_fail.vcf": {
'report_path': '/path/to/vcf_failed/report',
"critical_count": 1,
"critical_list": ["Line 4: Error in meta-data section."],
"error_count": 1,
"error_list": ["Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.."],
"valid": False,
"warning_count": 0,
},
},
"sample_check": {
'report_path': '/path/to/sample/report',
'overall_differences': True,
'results_per_analysis': {
'Analysis A': {
'difference': True,
'more_metadata_submitted_files': [' SampleA1', 'SampleA2 ','SampleA3', 'SampleA4', 'SampleA5', 'SampleA6', 'SampleA7','SampleA8', 'SampleA9', 'SampleA10'],
'more_per_submitted_files_metadata': {},
'more_submitted_files_metadata': ['A1Sample ', ' A2Sample', 'A3Sample', 'A4Sample', 'A5Sample', 'A6Sample', 'A7Sample', 'A8Sample', 'A9Sample', 'A10Sample']
},
'Analysis B': {
'difference': False,
'more_metadata_submitted_files': [],
'more_per_submitted_files_metadata': {},
'more_submitted_files_metadata': []
},
'Analysis C': {
'difference': True,
'more_metadata_submitted_files': ['SampleC1 ', ' SampleC2', 'SampleC3', 'SampleC4'],
'more_per_submitted_files_metadata': {},
'more_submitted_files_metadata': ['C1Sample ', ' C2Sample', 'C3Sample', 'C4Sample']
}
}
},
# NB. obviously this doesn't make sense for the number of analyses in this report, but demonstrates the possible
# outputs for this check.
"fasta_check": {
'not_all_insdc.fa': {
'report_path': '/path/to/not_all_insdc_check.yml',
'all_insdc': False,
'sequences': [
{'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
{'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': False}
],
'metadata_assembly_compatible': True,
'possible_assemblies': {'GCA_1'},
'assembly_in_metadata': 'GCA_1',
'associated_analyses': ['Analysis A']
},
'metadata_asm_not_found.fa': {
'report_path': '/path/to/metadata_asm_not_found.yml',
'all_insdc': True,
'sequences': [
{'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
{'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
],
'possible_assemblies': {'GCA_1'}
},
'metadata_asm_not_match.fa': {
'report_path': '/path/to/metadata_asm_not_match.yml',
'all_insdc': True,
'sequences': [
{'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
{'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
],
'metadata_assembly_compatible': False,
'possible_assemblies': {'GCA_1'},
'assembly_in_metadata': 'GCA_2',
'associated_analyses': ['Analysis B']
},
'metadata_asm_match.fa': {
'report_path': '/path/to/metadata_asm_match.yml',
'all_insdc': True,
'sequences': [
{'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
{'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
],
'metadata_assembly_compatible': True,
'possible_assemblies': {'GCA_1'},
'assembly_in_metadata': 'GCA_1',
'associated_analyses': ['Analysis A']
},
'metadata_error.fa': {
'report_path': '/path/to/metadata_error.yml',
'all_insdc': True,
'sequences': [
{'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
{'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
],
'metadata_assembly_compatible': True,
'possible_assemblies': {'GCA_1'},
'assembly_in_metadata': 'GCA_1',
'associated_analyses': ['Analysis C'],
'connection_error': '500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve'
}
},
'metadata_check': {
'spreadsheet_errors': [
{'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'},
{'sheet': 'Project', 'row': '', 'column': 'Project Title', 'description': 'In sheet "Project", column "Project Title" is not populated'},
{'sheet': 'Project', 'row': '', 'column': 'Description', 'description': 'In sheet "Project", column "Description" is not populated'},
{'sheet': 'Project', 'row': '', 'column': 'Tax ID', 'description': 'In sheet "Project", column "Tax ID" is not populated'},
{'sheet': 'Project', 'row': '', 'column': 'Center', 'description': 'In sheet "Project", column "Center" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Analysis Title', 'description': 'In sheet "Analysis", row "2", column "Analysis Title" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Description', 'description': 'In sheet "Analysis", row "2", column "Description" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Experiment Type', 'description': 'In sheet "Analysis", row "2", column "Experiment Type" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Reference', 'description': 'In sheet "Analysis", row "2", column "Reference" is not populated'},
{'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession', 'description': 'In sheet "Sample", row "3", column "Sample Accession" is not populated'}
],
'spreadsheet_report_path': '/path/to/metadata/metadata_spreadsheet_validation.txt',
}
}

validation_results_json = {
"assembly_check": {
"input_passed.vcf": {
"report_path": "/path/to/assembly_passed/report",
Expand Down Expand Up @@ -155,27 +307,15 @@
{'property': '/sample/0.bioSampleObject', 'description': "should have required property 'bioSampleObject'"},
{'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'}
],
'json_report_path': '/path/to/metadata/report',
'spreadsheet_errors': [
{'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'},
{'sheet': 'Project', 'row': '', 'column': 'Project Title', 'description': 'In sheet "Project", column "Project Title" is not populated'},
{'sheet': 'Project', 'row': '', 'column': 'Description', 'description': 'In sheet "Project", column "Description" is not populated'},
{'sheet': 'Project', 'row': '', 'column': 'Tax ID', 'description': 'In sheet "Project", column "Tax ID" is not populated'},
{'sheet': 'Project', 'row': '', 'column': 'Center', 'description': 'In sheet "Project", column "Center" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Analysis Title', 'description': 'In sheet "Analysis", row "2", column "Analysis Title" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Description', 'description': 'In sheet "Analysis", row "2", column "Description" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Experiment Type', 'description': 'In sheet "Analysis", row "2", column "Experiment Type" is not populated'},
{'sheet': 'Analysis', 'row': 2, 'column': 'Reference', 'description': 'In sheet "Analysis", row "2", column "Reference" is not populated'},
{'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession', 'description': 'In sheet "Sample", row "3", column "Sample Accession" is not populated'}
],
'spreadsheet_report_path': '/path/to/metadata/metadata_spreadsheet_validation.txt',
'json_report_path': '/path/to/json/metadata/report'
}
}


class TestReport(TestCase):
resource_dir = os.path.join(os.path.dirname(__file__), 'resources')
expected_report = os.path.join(resource_dir, 'validation_reports', 'expected_report.html')
expected_report_metadata_xlsx = os.path.join(resource_dir, 'validation_reports', 'expected_report_metadata_xlsx.html')
expected_report_metadata_json = os.path.join(resource_dir, 'validation_reports', 'expected_report_metadata_json.html')
test_project_name = "My cool project"
test_validation_date = datetime.datetime(2023, 8, 31, 12, 34, 56)
test_submission_dir = "/test/submission/dir"
Expand All @@ -184,13 +324,29 @@ class TestReport(TestCase):
test_vcf_fasta_analysis_mapping.append({'vcf_file': 'input_pass.vcf', 'fasta_file': 'input_pass.fa', 'analysis': 'B'})
test_vcf_fasta_analysis_mapping.append({'vcf_file': 'input_test.vcf', 'fasta_file': 'input_test.fa', 'analysis': 'could not be linked'})

def test_generate_html_report(self):
report = generate_html_report(validation_results, self.test_validation_date, self.test_submission_dir,
def test_generate_html_report_metadata_xlsx(self):
report = generate_html_report(validation_results_xlsx, self.test_validation_date, self.test_submission_dir,
self.test_vcf_fasta_analysis_mapping, self.test_project_name)
with open('report.html', 'w') as open_file:
open_file.write(report)

with open(self.expected_report_metadata_xlsx) as open_html:
expected_report_text = open_html.read()
# Inject the version in the expected report
expected_report_text = expected_report_text.replace('cligeneratedversion', eva_sub_cli.__version__)
assert report == expected_report_text

# Remove output file if assert passes
if os.path.exists('report.html'):
os.remove('report.html')

def test_generate_html_report_metadata_json(self):
report = generate_html_report(validation_results_json, self.test_validation_date, self.test_submission_dir,
self.test_vcf_fasta_analysis_mapping, self.test_project_name)
with open('report.html', 'w') as open_file:
open_file.write(report)

with open(self.expected_report) as open_html:
with open(self.expected_report_metadata_json) as open_html:
expected_report_text = open_html.read()
# Inject the version in the expected report
expected_report_text = expected_report_text.replace('cligeneratedversion', eva_sub_cli.__version__)
Expand Down
Loading

0 comments on commit 6892adf

Please sign in to comment.