From 4c37d99ee7efaf2c47f70b7f29bddb319c98ae47 Mon Sep 17 00:00:00 2001 From: nitin-ebi <79518737+nitin-ebi@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:40:08 +0000 Subject: [PATCH] EVA-3695 Save validation results in a separate yaml file (#67) * save validation results in a separate yaml file --- eva_sub_cli/validators/validator.py | 68 ++++++++++++++++++++--------- requirements.txt | 2 +- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py index de1606d..15c85b6 100755 --- a/eva_sub_cli/validators/validator.py +++ b/eva_sub_cli/validators/validator.py @@ -20,7 +20,8 @@ parse_vcf_check_report, parse_metadata_property VALIDATION_OUTPUT_DIR = "validation_output" -VALIDATION_RESULTS = 'validation_results' +VALIDATION_RESULTS_KEY = 'validation_results' +VALIDATION_RESULTS_FILE = 'validation_results.yaml' READY_FOR_SUBMISSION_TO_EVA = 'ready_for_submission_to_eva' logger = logging_config.get_logger(__name__) @@ -34,6 +35,7 @@ def __init__(self, mapping_file, submission_dir, project_title=None, metadata_js # If the submission_config is not set it will also be written to the VALIDATION_OUTPUT_DIR self.submission_dir = submission_dir self.output_dir = os.path.join(submission_dir, VALIDATION_OUTPUT_DIR) + self.validation_result_file = os.path.join(submission_dir, VALIDATION_RESULTS_FILE) self.mapping_file = mapping_file vcf_files, fasta_files = self._find_vcf_and_fasta_files() self.vcf_files = vcf_files @@ -90,10 +92,11 @@ def validate(self): self._validate() self.clean_up_output_dir() self._collect_validation_workflow_results() + self._assess_validation_results() + self._save_validation_results() def report(self): self.create_reports() - self.update_config_with_validation_result() def _validate(self): raise NotImplementedError @@ -147,26 +150,11 @@ def check_if_file_missing(self): missing_files_list.append(row['report']) return files_missing, missing_files_list - def update_config_with_validation_result(self): - self.sub_config.set(VALIDATION_RESULTS, value=self.results) - self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva()) - def verify_ready_for_submission_to_eva(self): - """ - Assess if the validation results are meeting expectations - It assumes all validation have been parsed already. - """ + """ Checks if all the validation are passed """ return all(( - self.results.get('vcf_check', {}).get('critical_count', 1) == 0, - self.results.get('assembly_check', {}).get('nb_mismatch', 1) == 0, - self.results.get('assembly_check', {}).get('nb_error', 1) == 0, - all(( - fa_file_check.get('all_insdc', False) is True - for fa_file, fa_file_check in self.results.get('fasta_check', {}).items() - )), - self.results.get('sample_check', {}).get('overall_differences', True) is False, - len(self.results.get('metadata_check', {}).get('spreadsheet_errors', [])) == 0, - len(self.results.get('metadata_check', {}).get('json_errors', [])) == 0, + all((value.get('pass', False) is True for key, value in self.results.items() if + key in ['vcf_check', 'assembly_check', 'fasta_check', 'sample_check', 'metadata_check'])), any(( self.results['shallow_validation']['requested'] is False, self.results['shallow_validation'].get('required', True) is False @@ -183,6 +171,46 @@ def _collect_validation_workflow_results(self): self._load_fasta_check_results() self._collect_metadata_results() + def _assess_validation_results(self): + """ + Assess if the validation results are meeting expectations and marks them as "PASS: true" or "PASS: false" + It assumes all validation have been parsed already. + """ + # vcf_check result + vcf_check_result = all((vcf_check.get('critical_count', 1) == 0 + for vcf_name, vcf_check in self.results.get('vcf_check', {}).items())) + self.results['vcf_check']['pass'] = vcf_check_result + + # assembly_check result + asm_nb_mismatch_result = all((asm_check.get('nb_mismatch', 1) == 0 + for vcf_name, asm_check in self.results.get('assembly_check', {}).items())) + asm_nb_error_result = all((asm_check.get('nb_error', 1) == 0 + for vcf_name, asm_check in self.results.get('assembly_check', {}).items())) + self.results['assembly_check']['pass'] = asm_nb_mismatch_result and asm_nb_error_result + + # fasta_check result + fasta_check_result = all((fa_file_check.get('all_insdc', False) is True + for fa_file, fa_file_check in self.results.get('fasta_check', {}).items())) + self.results['fasta_check']['pass'] = fasta_check_result + + # sample check result + self.results['sample_check']['pass'] = self.results.get('sample_check', {}).get('overall_differences', + True) is False + + # metadata check result + metadata_xlsx_result = len(self.results.get('metadata_check', {}).get('spreadsheet_errors', []) or []) == 0 + metadata_json_result = len(self.results.get('metadata_check', {}).get('json_errors', []) or []) == 0 + self.results['metadata_check']['pass'] = metadata_xlsx_result and metadata_json_result + + # update config based on the validation results + self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva()) + + def _save_validation_results(self): + with open(self.validation_result_file, 'w') as val_res_file: + yaml.safe_dump(self.results, val_res_file) + + self.debug(f"saved validation result in {self.validation_result_file}") + @lru_cache def _vcf_check_log(self, vcf_name): return resolve_single_file_path( diff --git a/requirements.txt b/requirements.txt index 1c20ce8..01e6e0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -ebi_eva_common_pyutils==0.6.10 +ebi_eva_common_pyutils==0.6.11 jinja2 jsonschema openpyxl