Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3695 Save validation results in a separate yaml file #67

Merged
merged 5 commits into from
Nov 15, 2024
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 46 additions & 16 deletions eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
parse_vcf_check_report, parse_metadata_property

VALIDATION_OUTPUT_DIR = "validation_output"
VALIDATION_RESULTS = 'validation_results'
VALIDATION_RESULTS_KEY = 'validation_results'
VALIDATION_RESULTS_FILE = 'validation_results.yaml'
READY_FOR_SUBMISSION_TO_EVA = 'ready_for_submission_to_eva'

logger = logging_config.get_logger(__name__)
Expand All @@ -34,6 +35,7 @@ def __init__(self, mapping_file, submission_dir, project_title=None, metadata_js
# If the submission_config is not set it will also be written to the VALIDATION_OUTPUT_DIR
self.submission_dir = submission_dir
self.output_dir = os.path.join(submission_dir, VALIDATION_OUTPUT_DIR)
self.validation_result_file = os.path.join(submission_dir, VALIDATION_RESULTS_FILE)
self.mapping_file = mapping_file
vcf_files, fasta_files = self._find_vcf_and_fasta_files()
self.vcf_files = vcf_files
Expand Down Expand Up @@ -90,6 +92,8 @@ def validate(self):
self._validate()
self.clean_up_output_dir()
self._collect_validation_workflow_results()
self._assess_validation_results()
self._save_validation_results()

def report(self):
self.create_reports()
Expand Down Expand Up @@ -148,25 +152,14 @@ def check_if_file_missing(self):
return files_missing, missing_files_list

def update_config_with_validation_result(self):
self.sub_config.set(VALIDATION_RESULTS, value=self.results)
self.sub_config.set(VALIDATION_RESULTS_KEY, value=self.results)
apriltuesday marked this conversation as resolved.
Show resolved Hide resolved
self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva())

def verify_ready_for_submission_to_eva(self):
"""
Assess if the validation results are meeting expectations
It assumes all validation have been parsed already.
"""
""" Checks if all the validation are passed """
return all((
self.results.get('vcf_check', {}).get('critical_count', 1) == 0,
self.results.get('assembly_check', {}).get('nb_mismatch', 1) == 0,
self.results.get('assembly_check', {}).get('nb_error', 1) == 0,
all((
fa_file_check.get('all_insdc', False) is True
for fa_file, fa_file_check in self.results.get('fasta_check', {}).items()
)),
self.results.get('sample_check', {}).get('overall_differences', True) is False,
len(self.results.get('metadata_check', {}).get('spreadsheet_errors', [])) == 0,
len(self.results.get('metadata_check', {}).get('json_errors', [])) == 0,
all((value.get('PASS', False) is True for key, value in self.results.items() if
key in ['vcf_check', 'assembly_check', 'fasta_check', 'sample_check', 'metadata_check'])),
any((
self.results['shallow_validation']['requested'] is False,
self.results['shallow_validation'].get('required', True) is False
Expand All @@ -183,6 +176,43 @@ def _collect_validation_workflow_results(self):
self._load_fasta_check_results()
self._collect_metadata_results()

def _assess_validation_results(self):
"""
Assess if the validation results are meeting expectations and marks them as "PASS: true" or "PASS: false"
It assumes all validation have been parsed already.
"""
# vcf_check result
vcf_check_result = all((vcf_check.get('critical_count', 1) == 0
for vcf_name, vcf_check in self.results.get('vcf_check', {}).items()))
self.results['vcf_check']['PASS'] = vcf_check_result
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

personal preference I guess but I would prefer lower case pass key.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


# assembly_check result
asm_nb_mismatch_result = all((asm_check.get('nb_mismatch', 1) == 0
for vcf_name, asm_check in self.results.get('assembly_check', {}).items()))
asm_nb_error_result = all((asm_check.get('nb_error', 1) == 0
for vcf_name, asm_check in self.results.get('assembly_check', {}).items()))
self.results['assembly_check']['PASS'] = asm_nb_mismatch_result and asm_nb_error_result

# fasta_check result
fasta_check_result = all((fa_file_check.get('all_insdc', False) is True
for fa_file, fa_file_check in self.results.get('fasta_check', {}).items()))
self.results['fasta_check']['PASS'] = fasta_check_result

# sample check result
self.results['sample_check']['PASS'] = self.results.get('sample_check', {}).get('overall_differences',
True) is False

# metadata check result
metadata_xlsx_result = len(self.results.get('metadata_check', {}).get('spreadsheet_errors', [])) == 0
metadata_json_result = len(self.results.get('metadata_check', {}).get('json_errors', [])) == 0
self.results['metadata_check']['PASS'] = metadata_xlsx_result and metadata_json_result

def _save_validation_results(self):
with open(self.validation_result_file, 'w') as val_res_file:
yaml.safe_dump(self.results, val_res_file)

self.debug(f"saved validation result in {self.validation_result_file}")

@lru_cache
def _vcf_check_log(self, vcf_name):
return resolve_single_file_path(
Expand Down
Loading