Skip to content

Commit

Permalink
EVA-3695 Save validation results in a separate yaml file (#67)
Browse files Browse the repository at this point in the history
* save validation results in a separate yaml file
  • Loading branch information
nitin-ebi authored Nov 15, 2024
1 parent ed6b737 commit 4c37d99
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 21 deletions.
68 changes: 48 additions & 20 deletions eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
parse_vcf_check_report, parse_metadata_property

VALIDATION_OUTPUT_DIR = "validation_output"
VALIDATION_RESULTS = 'validation_results'
VALIDATION_RESULTS_KEY = 'validation_results'
VALIDATION_RESULTS_FILE = 'validation_results.yaml'
READY_FOR_SUBMISSION_TO_EVA = 'ready_for_submission_to_eva'

logger = logging_config.get_logger(__name__)
Expand All @@ -34,6 +35,7 @@ def __init__(self, mapping_file, submission_dir, project_title=None, metadata_js
# If the submission_config is not set it will also be written to the VALIDATION_OUTPUT_DIR
self.submission_dir = submission_dir
self.output_dir = os.path.join(submission_dir, VALIDATION_OUTPUT_DIR)
self.validation_result_file = os.path.join(submission_dir, VALIDATION_RESULTS_FILE)
self.mapping_file = mapping_file
vcf_files, fasta_files = self._find_vcf_and_fasta_files()
self.vcf_files = vcf_files
Expand Down Expand Up @@ -90,10 +92,11 @@ def validate(self):
self._validate()
self.clean_up_output_dir()
self._collect_validation_workflow_results()
self._assess_validation_results()
self._save_validation_results()

def report(self):
self.create_reports()
self.update_config_with_validation_result()

def _validate(self):
raise NotImplementedError
Expand Down Expand Up @@ -147,26 +150,11 @@ def check_if_file_missing(self):
missing_files_list.append(row['report'])
return files_missing, missing_files_list

def update_config_with_validation_result(self):
self.sub_config.set(VALIDATION_RESULTS, value=self.results)
self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva())

def verify_ready_for_submission_to_eva(self):
"""
Assess if the validation results are meeting expectations
It assumes all validation have been parsed already.
"""
""" Checks if all the validation are passed """
return all((
self.results.get('vcf_check', {}).get('critical_count', 1) == 0,
self.results.get('assembly_check', {}).get('nb_mismatch', 1) == 0,
self.results.get('assembly_check', {}).get('nb_error', 1) == 0,
all((
fa_file_check.get('all_insdc', False) is True
for fa_file, fa_file_check in self.results.get('fasta_check', {}).items()
)),
self.results.get('sample_check', {}).get('overall_differences', True) is False,
len(self.results.get('metadata_check', {}).get('spreadsheet_errors', [])) == 0,
len(self.results.get('metadata_check', {}).get('json_errors', [])) == 0,
all((value.get('pass', False) is True for key, value in self.results.items() if
key in ['vcf_check', 'assembly_check', 'fasta_check', 'sample_check', 'metadata_check'])),
any((
self.results['shallow_validation']['requested'] is False,
self.results['shallow_validation'].get('required', True) is False
Expand All @@ -183,6 +171,46 @@ def _collect_validation_workflow_results(self):
self._load_fasta_check_results()
self._collect_metadata_results()

def _assess_validation_results(self):
"""
Assess if the validation results are meeting expectations and marks them as "PASS: true" or "PASS: false"
It assumes all validation have been parsed already.
"""
# vcf_check result
vcf_check_result = all((vcf_check.get('critical_count', 1) == 0
for vcf_name, vcf_check in self.results.get('vcf_check', {}).items()))
self.results['vcf_check']['pass'] = vcf_check_result

# assembly_check result
asm_nb_mismatch_result = all((asm_check.get('nb_mismatch', 1) == 0
for vcf_name, asm_check in self.results.get('assembly_check', {}).items()))
asm_nb_error_result = all((asm_check.get('nb_error', 1) == 0
for vcf_name, asm_check in self.results.get('assembly_check', {}).items()))
self.results['assembly_check']['pass'] = asm_nb_mismatch_result and asm_nb_error_result

# fasta_check result
fasta_check_result = all((fa_file_check.get('all_insdc', False) is True
for fa_file, fa_file_check in self.results.get('fasta_check', {}).items()))
self.results['fasta_check']['pass'] = fasta_check_result

# sample check result
self.results['sample_check']['pass'] = self.results.get('sample_check', {}).get('overall_differences',
True) is False

# metadata check result
metadata_xlsx_result = len(self.results.get('metadata_check', {}).get('spreadsheet_errors', []) or []) == 0
metadata_json_result = len(self.results.get('metadata_check', {}).get('json_errors', []) or []) == 0
self.results['metadata_check']['pass'] = metadata_xlsx_result and metadata_json_result

# update config based on the validation results
self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva())

def _save_validation_results(self):
with open(self.validation_result_file, 'w') as val_res_file:
yaml.safe_dump(self.results, val_res_file)

self.debug(f"saved validation result in {self.validation_result_file}")

@lru_cache
def _vcf_check_log(self, vcf_name):
return resolve_single_file_path(
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ebi_eva_common_pyutils==0.6.10
ebi_eva_common_pyutils==0.6.11
jinja2
jsonschema
openpyxl
Expand Down

0 comments on commit 4c37d99

Please sign in to comment.