From 083da7411da6c13c6a1c8c7d5bb4156b5833b76d Mon Sep 17 00:00:00 2001 From: Timothee Cezard Date: Mon, 22 Apr 2024 14:16:59 +0100 Subject: [PATCH] Provide NCBI API keys when possible (#203) * Provide NCBI API keys when possible * Fix requirements.txt --- bin/insert_new_assembly.py | 3 ++- eva_submission/eload_ingestion.py | 1 + eva_submission/eload_utils.py | 2 +- eva_submission/vep_utils.py | 4 ++-- eva_submission/xlsx/xlsx_validation.py | 2 +- requirements.txt | 2 +- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/bin/insert_new_assembly.py b/bin/insert_new_assembly.py index 55804bf..e5bd945 100755 --- a/bin/insert_new_assembly.py +++ b/bin/insert_new_assembly.py @@ -53,7 +53,8 @@ def main(): insert_new_assembly_and_taxonomy( metadata_connection_handle=conn, assembly_accession=assembly_accession, - taxonomy_id=taxon_id + taxonomy_id=taxon_id, + ncbi_api_key=cfg['eutils_api_key'] ) elif taxon_id: ensure_taxonomy_is_in_evapro(conn, taxon_id) diff --git a/eva_submission/eload_ingestion.py b/eva_submission/eload_ingestion.py index 9369046..72fd38e 100644 --- a/eva_submission/eload_ingestion.py +++ b/eva_submission/eload_ingestion.py @@ -206,6 +206,7 @@ def check_variant_db(self): metadata_connection_handle=conn, assembly_accession=assembly, taxonomy_id=self.taxonomy, + ncbi_api_key=cfg['eutils_api_key'] ) for db_info in assembly_to_db_name.values(): diff --git a/eva_submission/eload_utils.py b/eva_submission/eload_utils.py index e55d906..8083010 100644 --- a/eva_submission/eload_utils.py +++ b/eva_submission/eload_utils.py @@ -54,7 +54,7 @@ def resolve_accession_from_text(reference_text): if NCBIAssembly.is_assembly_accession_format(reference_text): return [reference_text] # Search for a reference genome that resolve this text - accession = retrieve_genbank_assembly_accessions_from_ncbi(reference_text) + accession = retrieve_genbank_assembly_accessions_from_ncbi(reference_text, api_key=cfg['eutils_api_key']) if accession: return accession diff --git a/eva_submission/vep_utils.py b/eva_submission/vep_utils.py index a2bb775..5b46f33 100644 --- a/eva_submission/vep_utils.py +++ b/eva_submission/vep_utils.py @@ -139,7 +139,7 @@ def get_species_and_assembly(assembly_acc): Returns None if the taxonomy is not known. """ # We first need to search for the species associated with the assembly - assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc) + assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc, api_key=cfg['eutils_api_key']) taxid_and_assembly_name = set([ (assembly_dict.get('taxid'), assembly_dict.get('assemblyname')) for assembly_dict in assembly_dicts @@ -253,7 +253,7 @@ def recursive_nlst(ftp, root, pattern): @retry(tries=4, delay=2, backoff=1.2, jitter=(1, 3), logger=logger) def download_and_extract_vep_cache(ftp, vep_cache_file, taxonomy_id): - scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id) + scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id, api_key=cfg['eutils_api_key']) species_name = scientific_name.replace(' ', '_').lower() tmp_dir = tempfile.TemporaryDirectory() diff --git a/eva_submission/xlsx/xlsx_validation.py b/eva_submission/xlsx/xlsx_validation.py index 2dc9dc2..4e2ae34 100644 --- a/eva_submission/xlsx/xlsx_validation.py +++ b/eva_submission/xlsx/xlsx_validation.py @@ -107,7 +107,7 @@ def check_reference_genome(self): """Check if the references can be retrieved""" references = set([row['Reference'] for row in self.metadata['Analysis'] if row['Reference']]) for reference in references: - accessions = retrieve_genbank_assembly_accessions_from_ncbi(reference) + accessions = retrieve_genbank_assembly_accessions_from_ncbi(reference, api_key=cfg['eutils_api_key']) if len(accessions) == 0: self.error_list.append(f'In Analysis, Reference {reference} did not resolve to any accession') elif len(accessions) > 1: diff --git a/requirements.txt b/requirements.txt index 29dee7e..20f8e11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ cached-property cerberus -ebi-eva-common-pyutils[eva-internal]==0.6.4 +ebi-eva-common-pyutils[eva-internal]==0.6.6 eva-vcf-merge==0.0.8 humanize lxml