diff --git a/eva_submission/eload_ingestion.py b/eva_submission/eload_ingestion.py index d4252e7..d9cf34f 100644 --- a/eva_submission/eload_ingestion.py +++ b/eva_submission/eload_ingestion.py @@ -399,7 +399,9 @@ def _insert_new_supported_asm_from_ensembl(self, tax_id: int = None): tax_id = tax_id or self.taxonomy target_assembly = None try: - target_assembly = get_supported_asm_from_ensembl(tax_id) + target_assembly = get_supported_asm_from_ensembl(tax_id, + private_config_xml_file=cfg['maven']['settings_file'], + profile=cfg['maven']['environment']) except requests.exceptions.HTTPError as ex: # Ensembl throws HTTP 400 Error if it cannot resolve a tax ID if ex.errno == 400: diff --git a/eva_submission/eload_preparation.py b/eva_submission/eload_preparation.py index 2a4d3ce..5362289 100644 --- a/eva_submission/eload_preparation.py +++ b/eva_submission/eload_preparation.py @@ -2,6 +2,7 @@ import os import shutil import requests +from ebi_eva_common_pyutils.metadata_utils import get_metadata_connection_handle, ensure_taxonomy_is_in_evapro from retry import retry @@ -151,6 +152,8 @@ def detect_metadata_attributes(self): taxonomy_id = eva_metadata.project.get('Tax ID') if taxonomy_id and (isinstance(taxonomy_id, int) or taxonomy_id.isdigit()): self.eload_cfg.set('submission', 'taxonomy_id', value=int(taxonomy_id)) + with get_metadata_connection_handle(cfg['maven']['environment'], cfg['maven']['settings_file']) as pg_conn: + ensure_taxonomy_is_in_evapro(pg_conn, taxonomy_id) scientific_name = get_scientific_name_from_taxonomy(taxonomy_id, private_config_xml_file=cfg['maven']['settings_file'], profile=cfg['maven']['environment']) diff --git a/eva_submission/vep_utils.py b/eva_submission/vep_utils.py index 3bd357e..bb0ddd7 100644 --- a/eva_submission/vep_utils.py +++ b/eva_submission/vep_utils.py @@ -9,8 +9,8 @@ import pymongo import requests -from ebi_eva_common_pyutils.ncbi_utils import get_ncbi_assembly_dicts_from_term, \ - retrieve_species_scientific_name_from_tax_id_ncbi +from ebi_eva_common_pyutils.ncbi_utils import get_ncbi_assembly_dicts_from_term +from ebi_eva_common_pyutils.taxonomy.taxonomy import get_normalized_scientific_name from retry import retry from ebi_eva_common_pyutils.config import cfg @@ -254,9 +254,9 @@ def recursive_nlst(ftp, root, pattern): @retry(tries=4, delay=2, backoff=1.2, jitter=(1, 3), logger=logger) def download_and_extract_vep_cache(ftp, vep_cache_file, taxonomy_id): - scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id) - species_name = scientific_name.replace(' ', '_').lower() - + species_name = get_normalized_scientific_name(taxonomy_id, + private_config_xml_file=cfg['maven']['settings_file'], + profile=cfg['maven']['environment']) tmp_dir = tempfile.TemporaryDirectory() destination = os.path.join(tmp_dir.name, f'{species_name}.tar.gz') with open(destination, 'wb+') as dest: diff --git a/tests/test_eload_preparation.py b/tests/test_eload_preparation.py index d6b3e9f..9afcdda 100644 --- a/tests/test_eload_preparation.py +++ b/tests/test_eload_preparation.py @@ -82,7 +82,9 @@ def test_detect_metadata_attributes(self): self.eload.eload_cfg.set('submission', 'metadata_spreadsheet', value=metadata) cfg.content['maven']['settings_file'] = None cfg.content['maven']['environment'] = None - with patch('eva_submission.eload_preparation.get_scientific_name_from_taxonomy') as m_sci_name: + with patch('eva_submission.eload_preparation.get_scientific_name_from_taxonomy') as m_sci_name, \ + patch('eva_submission.eload_preparation.get_metadata_connection_handle', autospec=True), \ + patch('eva_submission.eload_preparation.ensure_taxonomy_is_in_evapro'): m_sci_name.return_value = 'Homo sapiens' self.eload.detect_metadata_attributes() diff --git a/tests/test_vep_utils.py b/tests/test_vep_utils.py index db9b442..adbb2d8 100644 --- a/tests/test_vep_utils.py +++ b/tests/test_vep_utils.py @@ -62,11 +62,13 @@ def fake_dir(path, callback): ) def test_get_vep_versions_from_ensembl(self): - vep_version, cache_version = get_vep_and_vep_cache_version_from_ensembl('GCA_000827895.1') - self.assertEqual(vep_version, 110) - self.assertEqual(cache_version, 57) - assert os.path.exists(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei')) - assert os.listdir(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei')) == ['57_ASM82789v1'] + with patch('eva_submission.vep_utils.get_normalized_scientific_name') as m_get_scf_name: + m_get_scf_name.return_value = 'thelohanellus_kitauei' + vep_version, cache_version = get_vep_and_vep_cache_version_from_ensembl('GCA_000827895.1') + self.assertEqual(vep_version, 110) + self.assertEqual(cache_version, 57) + assert os.path.exists(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei')) + assert os.listdir(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei')) == ['57_ASM82789v1'] def test_get_vep_versions_from_ensembl_not_found(self): vep_version, cache_version = get_vep_and_vep_cache_version_from_ensembl('GCA_015220235.1') @@ -120,7 +122,7 @@ def test_get_vep_versions(self): get_vep_and_vep_cache_version('fake_mongo', 'fake_db', 'fake_assembly') def test_download_and_extract_vep_cache(self): - with patch('eva_submission.vep_utils.retrieve_species_scientific_name_from_tax_id_ncbi') as m_get_scf_name: + with patch('eva_submission.vep_utils.get_normalized_scientific_name') as m_get_scf_name: m_get_scf_name.return_value = 'whatever_species_name' download_and_extract_vep_cache( get_ftp_connection('ftp.ensembl.org'),