Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3695 Save validation results in a separate yaml file #67

Merged
merged 5 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 48 additions & 20 deletions eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
parse_vcf_check_report, parse_metadata_property

VALIDATION_OUTPUT_DIR = "validation_output"
VALIDATION_RESULTS = 'validation_results'
VALIDATION_RESULTS_KEY = 'validation_results'
VALIDATION_RESULTS_FILE = 'validation_results.yaml'
READY_FOR_SUBMISSION_TO_EVA = 'ready_for_submission_to_eva'

logger = logging_config.get_logger(__name__)
Expand All @@ -34,6 +35,7 @@ def __init__(self, mapping_file, submission_dir, project_title=None, metadata_js
# If the submission_config is not set it will also be written to the VALIDATION_OUTPUT_DIR
self.submission_dir = submission_dir
self.output_dir = os.path.join(submission_dir, VALIDATION_OUTPUT_DIR)
self.validation_result_file = os.path.join(submission_dir, VALIDATION_RESULTS_FILE)
self.mapping_file = mapping_file
vcf_files, fasta_files = self._find_vcf_and_fasta_files()
self.vcf_files = vcf_files
Expand Down Expand Up @@ -90,10 +92,11 @@ def validate(self):
self._validate()
self.clean_up_output_dir()
self._collect_validation_workflow_results()
self._assess_validation_results()
self._save_validation_results()

def report(self):
self.create_reports()
self.update_config_with_validation_result()

def _validate(self):
raise NotImplementedError
Expand Down Expand Up @@ -147,26 +150,11 @@ def check_if_file_missing(self):
missing_files_list.append(row['report'])
return files_missing, missing_files_list

def update_config_with_validation_result(self):
self.sub_config.set(VALIDATION_RESULTS, value=self.results)
self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva())

def verify_ready_for_submission_to_eva(self):
"""
Assess if the validation results are meeting expectations
It assumes all validation have been parsed already.
"""
""" Checks if all the validation are passed """
return all((
self.results.get('vcf_check', {}).get('critical_count', 1) == 0,
self.results.get('assembly_check', {}).get('nb_mismatch', 1) == 0,
self.results.get('assembly_check', {}).get('nb_error', 1) == 0,
all((
fa_file_check.get('all_insdc', False) is True
for fa_file, fa_file_check in self.results.get('fasta_check', {}).items()
)),
self.results.get('sample_check', {}).get('overall_differences', True) is False,
len(self.results.get('metadata_check', {}).get('spreadsheet_errors', [])) == 0,
len(self.results.get('metadata_check', {}).get('json_errors', [])) == 0,
all((value.get('pass', False) is True for key, value in self.results.items() if
key in ['vcf_check', 'assembly_check', 'fasta_check', 'sample_check', 'metadata_check'])),
any((
self.results['shallow_validation']['requested'] is False,
self.results['shallow_validation'].get('required', True) is False
Expand All @@ -183,6 +171,46 @@ def _collect_validation_workflow_results(self):
self._load_fasta_check_results()
self._collect_metadata_results()

def _assess_validation_results(self):
"""
Assess if the validation results are meeting expectations and marks them as "PASS: true" or "PASS: false"
It assumes all validation have been parsed already.
"""
# vcf_check result
vcf_check_result = all((vcf_check.get('critical_count', 1) == 0
for vcf_name, vcf_check in self.results.get('vcf_check', {}).items()))
self.results['vcf_check']['pass'] = vcf_check_result

# assembly_check result
asm_nb_mismatch_result = all((asm_check.get('nb_mismatch', 1) == 0
for vcf_name, asm_check in self.results.get('assembly_check', {}).items()))
asm_nb_error_result = all((asm_check.get('nb_error', 1) == 0
for vcf_name, asm_check in self.results.get('assembly_check', {}).items()))
self.results['assembly_check']['pass'] = asm_nb_mismatch_result and asm_nb_error_result

# fasta_check result
fasta_check_result = all((fa_file_check.get('all_insdc', False) is True
for fa_file, fa_file_check in self.results.get('fasta_check', {}).items()))
self.results['fasta_check']['pass'] = fasta_check_result

# sample check result
self.results['sample_check']['pass'] = self.results.get('sample_check', {}).get('overall_differences',
True) is False

# metadata check result
metadata_xlsx_result = len(self.results.get('metadata_check', {}).get('spreadsheet_errors', []) or []) == 0
metadata_json_result = len(self.results.get('metadata_check', {}).get('json_errors', []) or []) == 0
self.results['metadata_check']['pass'] = metadata_xlsx_result and metadata_json_result

# update config based on the validation results
self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva())

def _save_validation_results(self):
with open(self.validation_result_file, 'w') as val_res_file:
yaml.safe_dump(self.results, val_res_file)

self.debug(f"saved validation result in {self.validation_result_file}")

@lru_cache
def _vcf_check_log(self, vcf_name):
return resolve_single_file_path(
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ebi_eva_common_pyutils==0.6.10
ebi_eva_common_pyutils==0.6.11
jinja2
jsonschema
openpyxl
Expand Down
Loading