Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix v1.15 #193

Merged
merged 4 commits into from
Jan 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions eva_submission/eload_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,11 +642,14 @@ def run_nextflow(self, workflow_name, params, resume, tasks=all_tasks):
task for task in tasks
if self.eload_cfg.query(self.config_section, workflow_name, 'nextflow_dir', task) == self.nextflow_complete_value
]
if completed_tasks:
self.info(f'Task(s) {", ".join(completed_tasks)} already completed, skipping.')
for task in completed_tasks:
self.info(f'Task {task} already completed, skipping.')
# Remove completed tasks
for task in completed_tasks:
tasks.remove(task)
if not tasks:
self.info(f'No more to perform: Skip nextflow run.')
return
# Retrieve the work directory for the remaining tasks
work_dirs = [
self.eload_cfg.query(self.config_section, workflow_name, 'nextflow_dir', task)
Expand Down
2 changes: 1 addition & 1 deletion eva_submission/nextflow/accession_and_load.nf
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ workflow {
.splitCsv(header:true)
.map{row -> tuple(file(row.vcf_file).name, file(row.vcf_file), row.assembly_accession, row.aggregation, file(row.fasta), file(row.report))}
.combine(normalise_vcf.out.vcf_tuples, by:0) // Join based on the vcf_filename
.map {tuple(it[0], it[7], it[2], it[3], it[4], it[5])} // vcf_filename, normalised vcf, assembly_accession, aggregation, fasta, report
.map {tuple(it[0], it[6], it[2], it[3], it[4], it[5])} // vcf_filename, normalised vcf, assembly_accession, aggregation, fasta, report
accession_vcf(normalised_vcfs_ch)
sort_and_compress_vcf(accession_vcf.out.accession_done)
csi_vcfs = sort_and_compress_vcf.out.compressed_vcf
Expand Down
10 changes: 5 additions & 5 deletions eva_submission/xlsx/xlsx_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def check_biosamples_accessions(self):
sample_accession = row.get('Sample Accession').strip()
try:
sample_data = self.communicator.follows_link('samples', join_url=sample_accession)
self._validate_existing_bioSample(sample_data, row.get('row_num'), sample_accession)
self._validate_existing_biosample(sample_data, row.get('row_num'), sample_accession)
except ValueError:
self.error_list.append(
f'In Sample, row {row.get("row_num")} BioSamples accession {sample_accession} '
Expand Down Expand Up @@ -208,7 +208,7 @@ def _check_date(self, date):
str(date).lower() in not_provided_check_list

def check_date(self, row, key, required=True):
if required and key not in row:
if required and not row.get(key):
self.error_list.append(f'In row {row.get("row_num")}, {key} is required and missing')
return
if key in row and self._check_date(row[key]):
Expand All @@ -226,18 +226,18 @@ def _check_date_str_format(self, d):
def _validate_existing_biosample(self, sample_data, row_num, accession):
"""This function only check if the existing sample has the expected fields present"""
found_collection_date=False
for key in ['collection_date', 'collection date']:
for key in ['collection_date', 'collection date']:
if key in sample_data['characteristics'] and \
self._check_date(sample_data['characteristics'][key][0]['text']):
found_collection_date = True
if not found_collection_date:
self.error_list.append(
f'In row {row_num}, samples accession {accession} does not have a valid collection date')
f'In row {row_num}, existing sample accession {accession} does not have a valid collection date')
found_geo_loc = False
for key in ['geographic location (country and/or sea)']:
if key in sample_data['characteristics'] and \
self._check_date(sample_data['characteristics'][key][0]['text']):
found_geo_loc = True
if not found_geo_loc:
self.error_list.append(
f'In row {row_num}, samples accession {accession} does not have a valid geographic location')
f'In row {row_num}, existing sample accession {accession} does not have a valid geographic location')
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

setup(
name='eva_submission',
packages=['eva_submission', 'eva_submission.ENA_submission', 'eva_submission.xlsx', 'eva_submission.steps'],
packages=['eva_submission', 'eva_submission.ENA_submission', 'eva_submission.xlsx', 'eva_submission.steps',
'eva_submission.biosample_submission'],
package_data={'eva_submission': ['nextflow/*', 'etc/*', 'VERSION']},
version=version,
license='Apache',
Expand Down
Loading