EBIvariation · tcezard · Jan 15, 2024 · Jan 8, 2024 · Jan 8, 2024 · Jan 8, 2024
diff --git a/eva_submission/eload_ingestion.py b/eva_submission/eload_ingestion.py
@@ -642,11 +642,14 @@ def run_nextflow(self, workflow_name, params, resume, tasks=all_tasks):
                 task for task in tasks
                 if self.eload_cfg.query(self.config_section, workflow_name, 'nextflow_dir', task) == self.nextflow_complete_value
             ]
-            if completed_tasks:
-                self.info(f'Task(s) {", ".join(completed_tasks)} already completed, skipping.')
+            for task in completed_tasks:
+                self.info(f'Task {task} already completed, skipping.')
             # Remove completed tasks
             for task in completed_tasks:
                 tasks.remove(task)
+            if not tasks:
+                self.info(f'No more to perform: Skip nextflow run.')
+                return
             # Retrieve the work directory for the remaining tasks
             work_dirs = [
                 self.eload_cfg.query(self.config_section, workflow_name, 'nextflow_dir', task)

diff --git a/eva_submission/nextflow/accession_and_load.nf b/eva_submission/nextflow/accession_and_load.nf
@@ -122,7 +122,7 @@ workflow {
                 .splitCsv(header:true)
                 .map{row -> tuple(file(row.vcf_file).name, file(row.vcf_file), row.assembly_accession, row.aggregation, file(row.fasta), file(row.report))}
                 .combine(normalise_vcf.out.vcf_tuples, by:0)     // Join based on the vcf_filename
-                .map {tuple(it[0], it[7], it[2], it[3], it[4], it[5])}   // vcf_filename, normalised vcf, assembly_accession, aggregation, fasta, report
+                .map {tuple(it[0], it[6], it[2], it[3], it[4], it[5])}   // vcf_filename, normalised vcf, assembly_accession, aggregation, fasta, report
             accession_vcf(normalised_vcfs_ch)
             sort_and_compress_vcf(accession_vcf.out.accession_done)
             csi_vcfs = sort_and_compress_vcf.out.compressed_vcf

diff --git a/eva_submission/xlsx/xlsx_validation.py b/eva_submission/xlsx/xlsx_validation.py
@@ -140,7 +140,7 @@ def check_biosamples_accessions(self):
                 sample_accession = row.get('Sample Accession').strip()
                 try:
                     sample_data = self.communicator.follows_link('samples', join_url=sample_accession)
-                    self._validate_existing_bioSample(sample_data, row.get('row_num'), sample_accession)
+                    self._validate_existing_biosample(sample_data, row.get('row_num'), sample_accession)
                 except ValueError:
                     self.error_list.append(
                         f'In Sample, row {row.get("row_num")} BioSamples accession {sample_accession} '
@@ -208,7 +208,7 @@ def _check_date(self, date):
                str(date).lower() in not_provided_check_list
 
     def check_date(self, row, key, required=True):
-        if required and key not in row:
+        if required and not row.get(key):
             self.error_list.append(f'In row {row.get("row_num")}, {key} is required and missing')
             return
         if key in row and self._check_date(row[key]):
@@ -226,18 +226,18 @@ def _check_date_str_format(self, d):
     def _validate_existing_biosample(self, sample_data, row_num, accession):
         """This function only check if the existing sample has the expected fields present"""
         found_collection_date=False
-        for key in  ['collection_date', 'collection date']:
+        for key in ['collection_date', 'collection date']:
             if key in sample_data['characteristics'] and \
                     self._check_date(sample_data['characteristics'][key][0]['text']):
                 found_collection_date = True
         if not found_collection_date:
             self.error_list.append(
-                f'In row {row_num}, samples accession {accession} does not have a valid collection date')
+                f'In row {row_num}, existing sample accession {accession} does not have a valid collection date')
         found_geo_loc = False
         for key in ['geographic location (country and/or sea)']:
             if key in sample_data['characteristics'] and \
                     self._check_date(sample_data['characteristics'][key][0]['text']):
                 found_geo_loc = True
         if not found_geo_loc:
             self.error_list.append(
-                f'In row {row_num}, samples accession {accession} does not have a valid geographic location')
+                f'In row {row_num}, existing sample accession {accession} does not have a valid geographic location')
diff --git a/setup.py b/setup.py
@@ -10,7 +10,8 @@
 
 setup(
     name='eva_submission',
-    packages=['eva_submission', 'eva_submission.ENA_submission', 'eva_submission.xlsx', 'eva_submission.steps'],
+    packages=['eva_submission', 'eva_submission.ENA_submission', 'eva_submission.xlsx', 'eva_submission.steps',
+              'eva_submission.biosample_submission'],
     package_data={'eva_submission': ['nextflow/*', 'etc/*', 'VERSION']},
     version=version,
     license='Apache',