From 42aa50361f0ffd3b145144965e4b18726c9e69f1 Mon Sep 17 00:00:00 2001 From: tcezard Date: Tue, 24 Oct 2023 14:19:22 +0100 Subject: [PATCH] address review comments --- bin/samples_checker.py | 13 ++++++++++--- eva_sub_cli/reporter.py | 32 +++++++++++++------------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/bin/samples_checker.py b/bin/samples_checker.py index 59aa1b1..80c542d 100644 --- a/bin/samples_checker.py +++ b/bin/samples_checker.py @@ -101,7 +101,14 @@ def read_metadata_json(json_file): ) -def resolve_vcf_file_location(vcf_files, files_per_analysis): +def associate_vcf_path_with_analysis(vcf_files, files_per_analysis): + """ + Match the files names associated with analysis provided in the metadata with the file path given on the command + line. + :param vcf_files the list of full path to the vcf files + :param files_per_analysis: dictionary of the analysis and their associated VCF file names + :returns dictionary of analysis and their associated vcf file path + """ result_files_per_analysis = dict() for analysis in files_per_analysis: result_files_per_analysis[analysis] = [] @@ -137,8 +144,8 @@ def check_sample_name_concordance(metadata_json, vcf_files, output_yaml): found in the VCF files """ samples_per_analysis, files_per_analysis = read_metadata_json(metadata_json) - files_per_analysis = resolve_vcf_file_location(vcf_files, files_per_analysis) - overall_differences, results_per_analysis_alias = compare_all_analysis(samples_per_analysis, files_per_analysis) + file_path_per_analysis = associate_vcf_path_with_analysis(vcf_files, files_per_analysis) + overall_differences, results_per_analysis_alias = compare_all_analysis(samples_per_analysis, file_path_per_analysis) write_result_yaml(output_yaml, overall_differences, results_per_analysis_alias) diff --git a/eva_sub_cli/reporter.py b/eva_sub_cli/reporter.py index d4d5479..c693d05 100755 --- a/eva_sub_cli/reporter.py +++ b/eva_sub_cli/reporter.py @@ -156,22 +156,19 @@ def _collect_validation_workflow_results(self, ): self._write_spreadsheet_validation_results() @lru_cache - def _vcf_check_log(self, vcf_file): - vcf_name = os.path.basename(vcf_file) + def _vcf_check_log(self, vcf_name): return resolve_single_file_path( os.path.join(self.output_dir, 'vcf_format', vcf_name + '.vcf_format.log') ) @lru_cache - def _vcf_check_text_report(self, vcf_file): - vcf_name = os.path.basename(vcf_file) + def _vcf_check_text_report(self, vcf_name): return resolve_single_file_path( os.path.join(self.output_dir, 'vcf_format', vcf_name + '.*.txt') ) @lru_cache - def _vcf_check_db_report(self, vcf_file): - vcf_name = os.path.basename(vcf_file) + def _vcf_check_db_report(self, vcf_name): return resolve_single_file_path( os.path.join(self.output_dir, 'vcf_format', vcf_name + '.*.db') ) @@ -182,9 +179,9 @@ def _collect_vcf_check_results(self,): for vcf_file in self.vcf_files: vcf_name = os.path.basename(vcf_file) - vcf_check_log = self._vcf_check_log(vcf_file) - vcf_check_text_report = self._vcf_check_text_report(vcf_file) - vcf_check_db_report = self._vcf_check_db_report(vcf_file) + vcf_check_log = self._vcf_check_log(vcf_name) + vcf_check_text_report = self._vcf_check_text_report(vcf_name) + vcf_check_db_report = self._vcf_check_db_report(vcf_name) if vcf_check_log and vcf_check_text_report and vcf_check_db_report: valid, warning_count, error_count, critical_count, error_list, critical_list = self.parse_vcf_check_report(vcf_check_text_report) @@ -201,22 +198,19 @@ def _collect_vcf_check_results(self,): } @lru_cache - def _assembly_check_log(self, vcf_file): - vcf_name = os.path.basename(vcf_file) + def _assembly_check_log(self, vcf_name): return resolve_single_file_path( os.path.join(self.output_dir, 'assembly_check', vcf_name + '.assembly_check.log') ) @lru_cache - def _assembly_check_valid_vcf(self, vcf_file): - vcf_name = os.path.basename(vcf_file) + def _assembly_check_valid_vcf(self, vcf_name): return resolve_single_file_path( os.path.join(self.output_dir, 'assembly_check', vcf_name + '.valid_assembly_report*') ) @lru_cache - def _assembly_check_text_report(self, vcf_file): - vcf_name = os.path.basename(vcf_file) - return resolve_single_file_path( + def _assembly_check_text_report(self, vcf_name): + return resolve_single_file_path( os.path.join(self.output_dir, 'assembly_check', vcf_name + '*text_assembly_report*') ) @@ -226,9 +220,9 @@ def _collect_assembly_check_results(self): for vcf_file in self.vcf_files: vcf_name = os.path.basename(vcf_file) - assembly_check_log = self._assembly_check_log(vcf_file) - assembly_check_valid_vcf = self._assembly_check_valid_vcf(vcf_file) - assembly_check_text_report = self._assembly_check_text_report(vcf_file) + assembly_check_log = self._assembly_check_log(vcf_name) + assembly_check_valid_vcf = self._assembly_check_valid_vcf(vcf_name) + assembly_check_text_report = self._assembly_check_text_report(vcf_name) if assembly_check_log and assembly_check_valid_vcf and assembly_check_text_report: error_list_from_log, nb_error_from_log, match, total = \