diff --git a/eva_submission/eload_backlog.py b/eva_submission/eload_backlog.py index eca54d4..19a2304 100644 --- a/eva_submission/eload_backlog.py +++ b/eva_submission/eload_backlog.py @@ -124,7 +124,7 @@ def find_file_on_ena(self, fn, analysis): if not os.path.exists(full_path): try: self.info(f'Retrieve {basename} in {analysis} from ENA ftp') - url = f'ftp://ftp.sra.ebi.ac.uk/vol1/{analysis[:6]}/{analysis}/{basename}' + url = f'http://ftp.sra.ebi.ac.uk/vol1/{analysis[:6]}/{analysis}/{basename}' download_file(url, full_path) except urllib.error.URLError: self.error(f'Could not access {url} on ENA: most likely does not exist') diff --git a/eva_submission/eload_ingestion.py b/eva_submission/eload_ingestion.py index 3967e76..06db869 100644 --- a/eva_submission/eload_ingestion.py +++ b/eva_submission/eload_ingestion.py @@ -420,8 +420,8 @@ def _get_target_assembly(self): If present use that, and insert into supported assembly table. 3. If the project's taxonomy is not in Ensembl, check ENA for the taxonomies associated with the project's assemblies, called the alternate taxonomies. - 4. If there is exactly one alternate taxonomy, check Ensembl (main then rapid release) for its supported - assembly. If present use that, and insert into supported assembly table. + 4. If there is exactly one alternate taxonomy, check EVAPRO first then Ensembl (main and rapid release) for its + supported assembly. If present use that, inserting into supported assembly table if necessary. 5. If we still do not have a target assembly (i.e. project taxonomy is not supported by Ensembl AND alternate taxonomy cannot be determined or is not supported by Ensembl), and the project has exactly one assembly, then choose that one and insert into the supported assembly table. @@ -441,6 +441,9 @@ def _get_target_assembly(self): target_assembly = list(self.assembly_accessions)[0] add_to_supported_assemblies(self.metadata_connection_handle, source_of_assembly='EVA', target_assembly=target_assembly, taxonomy_id=self.taxonomy) + else: + self.warning(f'Could not determine target assembly from EVAPRO, Ensembl, or submitted assemblies: ' + f'{", ".join(self.assembly_accessions)}') return target_assembly def _get_alt_tax_id(self): diff --git a/requirements.txt b/requirements.txt index 812aa02..dd1feef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ cached-property cerberus -ebi-eva-common-pyutils==0.5.8.dev0 +ebi-eva-common-pyutils==0.5.8.dev1 eva-vcf-merge>=0.0.6 humanize lxml