diff --git a/eva_sub_cli/jinja_templates/metadata_validation.html b/eva_sub_cli/jinja_templates/metadata_validation.html
index 0c5e960..05d1328 100644
--- a/eva_sub_cli/jinja_templates/metadata_validation.html
+++ b/eva_sub_cli/jinja_templates/metadata_validation.html
@@ -2,7 +2,10 @@
{% macro metadata_validation_report(validation_results) -%}
{% set results = validation_results.get('metadata_check', {}) %}
{% set spreadsheet_errors = results.get('spreadsheet_errors', []) %}
- {% if spreadsheet_errors %}
+ {% set json_errors = results.get('json_errors', []) %}
+
+ {% set has_errors = spreadsheet_errors or json_errors %}
+ {% if has_errors %}
{% set expand_icon = "▶" %}
{% set icon = "❌" %}
{% set row_class = "report-section fail collapsible" %}
@@ -29,4 +32,21 @@
{% endif %}
+
+ {% if json_errors %}
+
+
Full report: {{ results.get('json_report_path', '') }}
+
+
+ JSON Property | Error Description |
+
+ {% for error in json_errors %}
+
+ {{ error.get('property') }} |
+ {{ error.get('description') }} |
+
+ {% endfor %}
+
+
+ {% endif %}
{%- endmacro %}
\ No newline at end of file
diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py
index bd201d5..246c1ba 100755
--- a/eva_sub_cli/validators/validator.py
+++ b/eva_sub_cli/validators/validator.py
@@ -361,8 +361,9 @@ def _collect_metadata_results(self):
self._load_spreadsheet_conversion_errors()
self._parse_biovalidator_validation_results()
self._parse_semantic_metadata_results()
- self._convert_biovalidator_validation_to_spreadsheet()
- self._write_spreadsheet_validation_results()
+ if self.metadata_xlsx:
+ self._convert_biovalidator_validation_to_spreadsheet()
+ self._write_spreadsheet_validation_results()
self._collect_file_info_to_metadata()
def _load_spreadsheet_conversion_errors(self):
diff --git a/tests/resources/validation_reports/expected_report_metadata_json.html b/tests/resources/validation_reports/expected_report_metadata_json.html
new file mode 100644
index 0000000..6ef2da7
--- /dev/null
+++ b/tests/resources/validation_reports/expected_report_metadata_json.html
@@ -0,0 +1,22 @@
+Validation ReportProject Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/json/metadata/report
JSON Property | Error Description |
---|
.files | should have required property 'files' |
/project.title | should have required property 'title' |
/project.description | should have required property 'description' |
/project.taxId | should have required property 'taxId' |
/project.centre | should have required property 'centre' |
/analysis/0.analysisTitle | should have required property 'analysisTitle' |
/analysis/0.description | should have required property 'description' |
/analysis/0.experimentType | should have required property 'experimentType' |
/analysis/0.referenceGenome | should have required property 'referenceGenome' |
/sample/0.bioSampleAccession | should have required property 'bioSampleAccession' |
/sample/0.bioSampleObject | should have required property 'bioSampleObject' |
/sample/0 | should match exactly one schema in oneOf |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
diff --git a/tests/resources/validation_reports/expected_report.html b/tests/resources/validation_reports/expected_report_metadata_xlsx.html
similarity index 100%
rename from tests/resources/validation_reports/expected_report.html
rename to tests/resources/validation_reports/expected_report_metadata_xlsx.html
diff --git a/tests/test_report.py b/tests/test_report.py
index 4b55ee1..db2f525 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -5,7 +5,159 @@
import eva_sub_cli
from eva_sub_cli.report import generate_html_report
-validation_results = {
+validation_results_xlsx = {
+ "assembly_check": {
+ "input_passed.vcf": {
+ "report_path": "/path/to/assembly_passed/report",
+ "error_list": [],
+ "match": 247,
+ "mismatch_list": [],
+ "nb_error": 0,
+ "nb_mismatch": 0,
+ "total": 247,
+ },
+ "input_fail.vcf": {
+ "report_path": "/path/to/assembly_failed/report",
+ "error_list": ["The assembly checking could not be completed: Contig 'chr23' not found in assembly report"],
+ "match": 26,
+ "mismatch_list": [
+ "Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'",
+ "Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'",
+ "Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'",
+ "Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'",
+ "Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'",
+ "Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'",
+ "Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'",
+ "Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'",
+ "Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'",
+ ],
+ "nb_error": 1,
+ "nb_mismatch": 10,
+ "total": 36,
+ },
+ },
+ "vcf_check": {
+ "input_passed.vcf": {
+ 'report_path': '/path/to/vcf_passed/report',
+ "error_count": 0,
+ "error_list": [],
+ "valid": True,
+ "warning_count": 0,
+ },
+ "input_fail.vcf": {
+ 'report_path': '/path/to/vcf_failed/report',
+ "critical_count": 1,
+ "critical_list": ["Line 4: Error in meta-data section."],
+ "error_count": 1,
+ "error_list": ["Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.."],
+ "valid": False,
+ "warning_count": 0,
+ },
+ },
+ "sample_check": {
+ 'report_path': '/path/to/sample/report',
+ 'overall_differences': True,
+ 'results_per_analysis': {
+ 'Analysis A': {
+ 'difference': True,
+ 'more_metadata_submitted_files': [' SampleA1', 'SampleA2 ','SampleA3', 'SampleA4', 'SampleA5', 'SampleA6', 'SampleA7','SampleA8', 'SampleA9', 'SampleA10'],
+ 'more_per_submitted_files_metadata': {},
+ 'more_submitted_files_metadata': ['A1Sample ', ' A2Sample', 'A3Sample', 'A4Sample', 'A5Sample', 'A6Sample', 'A7Sample', 'A8Sample', 'A9Sample', 'A10Sample']
+ },
+ 'Analysis B': {
+ 'difference': False,
+ 'more_metadata_submitted_files': [],
+ 'more_per_submitted_files_metadata': {},
+ 'more_submitted_files_metadata': []
+ },
+ 'Analysis C': {
+ 'difference': True,
+ 'more_metadata_submitted_files': ['SampleC1 ', ' SampleC2', 'SampleC3', 'SampleC4'],
+ 'more_per_submitted_files_metadata': {},
+ 'more_submitted_files_metadata': ['C1Sample ', ' C2Sample', 'C3Sample', 'C4Sample']
+ }
+ }
+ },
+ # NB. obviously this doesn't make sense for the number of analyses in this report, but demonstrates the possible
+ # outputs for this check.
+ "fasta_check": {
+ 'not_all_insdc.fa': {
+ 'report_path': '/path/to/not_all_insdc_check.yml',
+ 'all_insdc': False,
+ 'sequences': [
+ {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
+ {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': False}
+ ],
+ 'metadata_assembly_compatible': True,
+ 'possible_assemblies': {'GCA_1'},
+ 'assembly_in_metadata': 'GCA_1',
+ 'associated_analyses': ['Analysis A']
+ },
+ 'metadata_asm_not_found.fa': {
+ 'report_path': '/path/to/metadata_asm_not_found.yml',
+ 'all_insdc': True,
+ 'sequences': [
+ {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
+ {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
+ ],
+ 'possible_assemblies': {'GCA_1'}
+ },
+ 'metadata_asm_not_match.fa': {
+ 'report_path': '/path/to/metadata_asm_not_match.yml',
+ 'all_insdc': True,
+ 'sequences': [
+ {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
+ {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
+ ],
+ 'metadata_assembly_compatible': False,
+ 'possible_assemblies': {'GCA_1'},
+ 'assembly_in_metadata': 'GCA_2',
+ 'associated_analyses': ['Analysis B']
+ },
+ 'metadata_asm_match.fa': {
+ 'report_path': '/path/to/metadata_asm_match.yml',
+ 'all_insdc': True,
+ 'sequences': [
+ {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
+ {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
+ ],
+ 'metadata_assembly_compatible': True,
+ 'possible_assemblies': {'GCA_1'},
+ 'assembly_in_metadata': 'GCA_1',
+ 'associated_analyses': ['Analysis A']
+ },
+ 'metadata_error.fa': {
+ 'report_path': '/path/to/metadata_error.yml',
+ 'all_insdc': True,
+ 'sequences': [
+ {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True},
+ {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True}
+ ],
+ 'metadata_assembly_compatible': True,
+ 'possible_assemblies': {'GCA_1'},
+ 'assembly_in_metadata': 'GCA_1',
+ 'associated_analyses': ['Analysis C'],
+ 'connection_error': '500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve'
+ }
+ },
+ 'metadata_check': {
+ 'spreadsheet_errors': [
+ {'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'},
+ {'sheet': 'Project', 'row': '', 'column': 'Project Title', 'description': 'In sheet "Project", column "Project Title" is not populated'},
+ {'sheet': 'Project', 'row': '', 'column': 'Description', 'description': 'In sheet "Project", column "Description" is not populated'},
+ {'sheet': 'Project', 'row': '', 'column': 'Tax ID', 'description': 'In sheet "Project", column "Tax ID" is not populated'},
+ {'sheet': 'Project', 'row': '', 'column': 'Center', 'description': 'In sheet "Project", column "Center" is not populated'},
+ {'sheet': 'Analysis', 'row': 2, 'column': 'Analysis Title', 'description': 'In sheet "Analysis", row "2", column "Analysis Title" is not populated'},
+ {'sheet': 'Analysis', 'row': 2, 'column': 'Description', 'description': 'In sheet "Analysis", row "2", column "Description" is not populated'},
+ {'sheet': 'Analysis', 'row': 2, 'column': 'Experiment Type', 'description': 'In sheet "Analysis", row "2", column "Experiment Type" is not populated'},
+ {'sheet': 'Analysis', 'row': 2, 'column': 'Reference', 'description': 'In sheet "Analysis", row "2", column "Reference" is not populated'},
+ {'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession', 'description': 'In sheet "Sample", row "3", column "Sample Accession" is not populated'}
+ ],
+ 'spreadsheet_report_path': '/path/to/metadata/metadata_spreadsheet_validation.txt',
+ }
+}
+
+validation_results_json = {
"assembly_check": {
"input_passed.vcf": {
"report_path": "/path/to/assembly_passed/report",
@@ -155,27 +307,15 @@
{'property': '/sample/0.bioSampleObject', 'description': "should have required property 'bioSampleObject'"},
{'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'}
],
- 'json_report_path': '/path/to/metadata/report',
- 'spreadsheet_errors': [
- {'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'},
- {'sheet': 'Project', 'row': '', 'column': 'Project Title', 'description': 'In sheet "Project", column "Project Title" is not populated'},
- {'sheet': 'Project', 'row': '', 'column': 'Description', 'description': 'In sheet "Project", column "Description" is not populated'},
- {'sheet': 'Project', 'row': '', 'column': 'Tax ID', 'description': 'In sheet "Project", column "Tax ID" is not populated'},
- {'sheet': 'Project', 'row': '', 'column': 'Center', 'description': 'In sheet "Project", column "Center" is not populated'},
- {'sheet': 'Analysis', 'row': 2, 'column': 'Analysis Title', 'description': 'In sheet "Analysis", row "2", column "Analysis Title" is not populated'},
- {'sheet': 'Analysis', 'row': 2, 'column': 'Description', 'description': 'In sheet "Analysis", row "2", column "Description" is not populated'},
- {'sheet': 'Analysis', 'row': 2, 'column': 'Experiment Type', 'description': 'In sheet "Analysis", row "2", column "Experiment Type" is not populated'},
- {'sheet': 'Analysis', 'row': 2, 'column': 'Reference', 'description': 'In sheet "Analysis", row "2", column "Reference" is not populated'},
- {'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession', 'description': 'In sheet "Sample", row "3", column "Sample Accession" is not populated'}
- ],
- 'spreadsheet_report_path': '/path/to/metadata/metadata_spreadsheet_validation.txt',
+ 'json_report_path': '/path/to/json/metadata/report'
}
}
class TestReport(TestCase):
resource_dir = os.path.join(os.path.dirname(__file__), 'resources')
- expected_report = os.path.join(resource_dir, 'validation_reports', 'expected_report.html')
+ expected_report_metadata_xlsx = os.path.join(resource_dir, 'validation_reports', 'expected_report_metadata_xlsx.html')
+ expected_report_metadata_json = os.path.join(resource_dir, 'validation_reports', 'expected_report_metadata_json.html')
test_project_name = "My cool project"
test_validation_date = datetime.datetime(2023, 8, 31, 12, 34, 56)
test_submission_dir = "/test/submission/dir"
@@ -184,18 +324,34 @@ class TestReport(TestCase):
test_vcf_fasta_analysis_mapping.append({'vcf_file': 'input_pass.vcf', 'fasta_file': 'input_pass.fa', 'analysis': 'B'})
test_vcf_fasta_analysis_mapping.append({'vcf_file': 'input_test.vcf', 'fasta_file': 'input_test.fa', 'analysis': 'could not be linked'})
- def test_generate_html_report(self):
- report = generate_html_report(validation_results, self.test_validation_date, self.test_submission_dir,
+ def test_generate_html_report_metadata_xlsx(self):
+ report = generate_html_report(validation_results_xlsx, self.test_validation_date, self.test_submission_dir,
+ self.test_vcf_fasta_analysis_mapping, self.test_project_name)
+ with open('metadata_xlsx_report.html', 'w') as open_file:
+ open_file.write(report)
+
+ with open(self.expected_report_metadata_xlsx) as open_html:
+ expected_report_text = open_html.read()
+ # Inject the version in the expected report
+ expected_report_text = expected_report_text.replace('cligeneratedversion', eva_sub_cli.__version__)
+ assert report == expected_report_text
+
+ # Remove output file if assert passes
+ if os.path.exists('metadata_xlsx_report.html'):
+ os.remove('metadata_xlsx_report.html')
+
+ def test_generate_html_report_metadata_json(self):
+ report = generate_html_report(validation_results_json, self.test_validation_date, self.test_submission_dir,
self.test_vcf_fasta_analysis_mapping, self.test_project_name)
- with open('report.html', 'w') as open_file:
+ with open('metadata_json_report.html', 'w') as open_file:
open_file.write(report)
- with open(self.expected_report) as open_html:
+ with open(self.expected_report_metadata_json) as open_html:
expected_report_text = open_html.read()
# Inject the version in the expected report
expected_report_text = expected_report_text.replace('cligeneratedversion', eva_sub_cli.__version__)
assert report == expected_report_text
# Remove output file if assert passes
- if os.path.exists('report.html'):
- os.remove('report.html')
+ if os.path.exists('metadata_json_report.html'):
+ os.remove('metadata_json_report.html')
diff --git a/tests/test_validator.py b/tests/test_validator.py
index f01b4e1..9a15f04 100644
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -12,6 +12,7 @@ class TestValidator(TestCase):
assembly_reports = os.path.join(resource_dir, 'assembly_reports')
output_dir = os.path.join(resource_dir, 'validation_reports')
mapping_file = os.path.join(output_dir, 'vcf_files_mapping.csv')
+ metadata_xlsx_file = os.path.join(output_dir, 'EVA_Submission_test.xlsx')
def setUp(self) -> None:
# create vcf mapping file
@@ -20,7 +21,8 @@ def setUp(self) -> None:
[os.path.join(self.vcf_files, 'input_passed.vcf')],
[os.path.join(self.fasta_files, 'input_passed.fa')],
[os.path.join(self.assembly_reports, 'input_passed.txt')])
- self.validator = Validator(self.mapping_file, self.output_dir)
+ self.validator = Validator(self.mapping_file, self.output_dir, metadata_xlsx=self.metadata_xlsx_file)
+ self.validator_json = Validator(self.mapping_file, self.output_dir)
def tearDown(self) -> None:
files_from_tests = [
@@ -32,7 +34,7 @@ def tearDown(self) -> None:
if os.path.exists(f):
os.remove(f)
- def test__collect_validation_workflow_results(self):
+ def test__collect_validation_workflow_results_with_metadata_xlsx(self):
expected_results = {
'vcf_check': {
'input_passed.vcf': {'valid': True, 'error_list': [], 'error_count': 0, 'warning_count': 0, 'critical_count': 0, 'critical_list': []}
@@ -116,6 +118,73 @@ def test__collect_validation_workflow_results(self):
assert self.validator.results == expected_results
+ def test__collect_validation_workflow_results_with_metadata_json(self):
+ expected_results = {
+ 'vcf_check': {
+ 'input_passed.vcf': {'valid': True, 'error_list': [], 'error_count': 0, 'warning_count': 0,
+ 'critical_count': 0, 'critical_list': []}
+ },
+ 'assembly_check': {
+ 'input_passed.vcf': {'error_list': [], 'mismatch_list': [], 'nb_mismatch': 0, 'nb_error': 0,
+ 'match': 247, 'total': 247}
+ },
+ 'sample_check': {
+ 'overall_differences': False,
+ 'results_per_analysis': {
+ 'AA': {
+ 'difference': False,
+ 'more_metadata_submitted_files': [],
+ 'more_per_submitted_files_metadata': {},
+ 'more_submitted_files_metadata': []
+ }
+ }
+ },
+ 'fasta_check': {
+ 'input_passed.fa': {'all_insdc': False, 'sequences': [
+ {'sequence_name': 1, 'insdc': True, 'sequence_md5': '6681ac2f62509cfc220d78751b8dc524'},
+ {'sequence_name': 2, 'insdc': False, 'sequence_md5': 'd2b3f22704d944f92a6bc45b6603ea2d'}
+ ]},
+ },
+ 'metadata_check': {
+ 'json_errors': [
+ {'property': '/files', 'description': "should have required property 'files'"},
+ {'property': '/project/title', 'description': "should have required property 'title'"},
+ {'property': '/project/taxId', 'description': "must have required property 'taxId'"},
+ {'property': '/project/holdDate', 'description': 'must match format "date"'},
+ {'property': '/analysis/0/description',
+ 'description': "should have required property 'description'"},
+ {'property': '/analysis/0/referenceGenome',
+ 'description': "should have required property 'referenceGenome'"},
+ {'property': '/sample/0/bioSampleAccession',
+ 'description': "should have required property 'bioSampleAccession'"},
+ {'property': '/sample/0/bioSampleObject',
+ 'description': "should have required property 'bioSampleObject'"},
+ {'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'},
+ {'property': '/project/childProjects/1', 'description': 'PRJEBNA does not exist or is private'},
+ {'property': '/sample/2/bioSampleObject/characteristics/taxId',
+ 'description': '1234 is not a valid taxonomy code'},
+ {'property': '/sample/analysisAlias', 'description': 'alias1 present in Analysis not in Samples'},
+ {'property': '/sample/analysisAlias',
+ 'description': 'alias_1,alias_2 present in Samples not in Analysis'},
+ ],
+ 'spreadsheet_errors': [
+ {'sheet': 'Project', 'row': '', 'column': 'Tax ID',
+ 'description': 'Worksheet Project is missing required header Tax ID'}
+ ]
+ }
+ }
+
+ self.validator_json._collect_validation_workflow_results()
+ # Drop report paths from comparison (test will fail if missing)
+ del self.validator_json.results['metadata_check']['json_report_path']
+ del self.validator_json.results['sample_check']['report_path']
+ for file in self.validator_json.results['vcf_check'].values():
+ del file['report_path']
+ for file in self.validator_json.results['assembly_check'].values():
+ del file['report_path']
+
+ assert self.validator_json.results == expected_results
+
def test_create_report(self):
self.validator._collect_validation_workflow_results()
report_path = self.validator.create_reports()