From 42aa50361f0ffd3b145144965e4b18726c9e69f1 Mon Sep 17 00:00:00 2001
From: tcezard <tcezard@ebi.ac.uk>
Date: Tue, 24 Oct 2023 14:19:22 +0100
Subject: [PATCH] address review comments

---
 bin/samples_checker.py  | 13 ++++++++++---
 eva_sub_cli/reporter.py | 32 +++++++++++++-------------------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/bin/samples_checker.py b/bin/samples_checker.py
index 59aa1b1..80c542d 100644
--- a/bin/samples_checker.py
+++ b/bin/samples_checker.py
@@ -101,7 +101,14 @@ def read_metadata_json(json_file):
         )
 
 
-def resolve_vcf_file_location(vcf_files, files_per_analysis):
+def associate_vcf_path_with_analysis(vcf_files, files_per_analysis):
+    """
+    Match the files names associated with analysis provided in the metadata with the file path given on the command
+    line.
+    :param vcf_files the list of full path to the vcf files
+    :param files_per_analysis: dictionary of the analysis and their associated VCF file names
+    :returns dictionary of analysis and their associated vcf file path
+    """
     result_files_per_analysis = dict()
     for analysis in files_per_analysis:
         result_files_per_analysis[analysis] = []
@@ -137,8 +144,8 @@ def check_sample_name_concordance(metadata_json, vcf_files, output_yaml):
     found in the VCF files
     """
     samples_per_analysis, files_per_analysis = read_metadata_json(metadata_json)
-    files_per_analysis = resolve_vcf_file_location(vcf_files, files_per_analysis)
-    overall_differences, results_per_analysis_alias = compare_all_analysis(samples_per_analysis, files_per_analysis)
+    file_path_per_analysis = associate_vcf_path_with_analysis(vcf_files, files_per_analysis)
+    overall_differences, results_per_analysis_alias = compare_all_analysis(samples_per_analysis, file_path_per_analysis)
     write_result_yaml(output_yaml, overall_differences, results_per_analysis_alias)
 
 
diff --git a/eva_sub_cli/reporter.py b/eva_sub_cli/reporter.py
index d4d5479..c693d05 100755
--- a/eva_sub_cli/reporter.py
+++ b/eva_sub_cli/reporter.py
@@ -156,22 +156,19 @@ def _collect_validation_workflow_results(self, ):
         self._write_spreadsheet_validation_results()
 
     @lru_cache
-    def _vcf_check_log(self, vcf_file):
-        vcf_name = os.path.basename(vcf_file)
+    def _vcf_check_log(self, vcf_name):
         return resolve_single_file_path(
             os.path.join(self.output_dir, 'vcf_format', vcf_name + '.vcf_format.log')
         )
 
     @lru_cache
-    def _vcf_check_text_report(self, vcf_file):
-        vcf_name = os.path.basename(vcf_file)
+    def _vcf_check_text_report(self, vcf_name):
         return resolve_single_file_path(
             os.path.join(self.output_dir, 'vcf_format', vcf_name + '.*.txt')
         )
 
     @lru_cache
-    def _vcf_check_db_report(self, vcf_file):
-        vcf_name = os.path.basename(vcf_file)
+    def _vcf_check_db_report(self, vcf_name):
         return resolve_single_file_path(
             os.path.join(self.output_dir, 'vcf_format', vcf_name + '.*.db')
         )
@@ -182,9 +179,9 @@ def _collect_vcf_check_results(self,):
         for vcf_file in self.vcf_files:
             vcf_name = os.path.basename(vcf_file)
 
-            vcf_check_log = self._vcf_check_log(vcf_file)
-            vcf_check_text_report = self._vcf_check_text_report(vcf_file)
-            vcf_check_db_report = self._vcf_check_db_report(vcf_file)
+            vcf_check_log = self._vcf_check_log(vcf_name)
+            vcf_check_text_report = self._vcf_check_text_report(vcf_name)
+            vcf_check_db_report = self._vcf_check_db_report(vcf_name)
 
             if vcf_check_log and vcf_check_text_report and vcf_check_db_report:
                 valid, warning_count, error_count, critical_count, error_list, critical_list = self.parse_vcf_check_report(vcf_check_text_report)
@@ -201,22 +198,19 @@ def _collect_vcf_check_results(self,):
             }
 
     @lru_cache
-    def _assembly_check_log(self, vcf_file):
-        vcf_name = os.path.basename(vcf_file)
+    def _assembly_check_log(self, vcf_name):
         return resolve_single_file_path(
             os.path.join(self.output_dir, 'assembly_check', vcf_name + '.assembly_check.log')
         )
     @lru_cache
-    def _assembly_check_valid_vcf(self, vcf_file):
-        vcf_name = os.path.basename(vcf_file)
+    def _assembly_check_valid_vcf(self, vcf_name):
         return resolve_single_file_path(
             os.path.join(self.output_dir, 'assembly_check', vcf_name + '.valid_assembly_report*')
         )
 
     @lru_cache
-    def _assembly_check_text_report(self, vcf_file):
-        vcf_name = os.path.basename(vcf_file)
-        return  resolve_single_file_path(
+    def _assembly_check_text_report(self, vcf_name):
+        return resolve_single_file_path(
             os.path.join(self.output_dir, 'assembly_check', vcf_name + '*text_assembly_report*')
         )
 
@@ -226,9 +220,9 @@ def _collect_assembly_check_results(self):
         for vcf_file in self.vcf_files:
             vcf_name = os.path.basename(vcf_file)
 
-            assembly_check_log = self._assembly_check_log(vcf_file)
-            assembly_check_valid_vcf = self._assembly_check_valid_vcf(vcf_file)
-            assembly_check_text_report = self._assembly_check_text_report(vcf_file)
+            assembly_check_log = self._assembly_check_log(vcf_name)
+            assembly_check_valid_vcf = self._assembly_check_valid_vcf(vcf_name)
+            assembly_check_text_report = self._assembly_check_text_report(vcf_name)
 
             if assembly_check_log and assembly_check_valid_vcf and assembly_check_text_report:
                 error_list_from_log, nb_error_from_log, match, total = \