Skip to content

Commit

Permalink
update methods
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Sep 20, 2023
1 parent 97ea791 commit 8943db7
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 13 deletions.
4 changes: 3 additions & 1 deletion eva_submission/eload_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,9 @@ def _insert_new_supported_asm_from_ensembl(self, tax_id: int = None):
tax_id = tax_id or self.taxonomy
target_assembly = None
try:
target_assembly = get_supported_asm_from_ensembl(tax_id)
target_assembly = get_supported_asm_from_ensembl(tax_id,
private_config_xml_file=cfg['maven']['settings_file'],
profile=cfg['maven']['environment'])
except requests.exceptions.HTTPError as ex:
# Ensembl throws HTTP 400 Error if it cannot resolve a tax ID
if ex.errno == 400:
Expand Down
3 changes: 3 additions & 0 deletions eva_submission/eload_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
import requests
from ebi_eva_common_pyutils.metadata_utils import get_metadata_connection_handle, ensure_taxonomy_is_in_evapro

from retry import retry

Expand Down Expand Up @@ -151,6 +152,8 @@ def detect_metadata_attributes(self):
taxonomy_id = eva_metadata.project.get('Tax ID')
if taxonomy_id and (isinstance(taxonomy_id, int) or taxonomy_id.isdigit()):
self.eload_cfg.set('submission', 'taxonomy_id', value=int(taxonomy_id))
with get_metadata_connection_handle(cfg['maven']['environment'], cfg['maven']['settings_file']) as pg_conn:
ensure_taxonomy_is_in_evapro(pg_conn, taxonomy_id)
scientific_name = get_scientific_name_from_taxonomy(taxonomy_id,
private_config_xml_file=cfg['maven']['settings_file'],
profile=cfg['maven']['environment'])
Expand Down
10 changes: 5 additions & 5 deletions eva_submission/vep_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

import pymongo
import requests
from ebi_eva_common_pyutils.ncbi_utils import get_ncbi_assembly_dicts_from_term, \
retrieve_species_scientific_name_from_tax_id_ncbi
from ebi_eva_common_pyutils.ncbi_utils import get_ncbi_assembly_dicts_from_term
from ebi_eva_common_pyutils.taxonomy.taxonomy import get_normalized_scientific_name
from retry import retry

from ebi_eva_common_pyutils.config import cfg
Expand Down Expand Up @@ -254,9 +254,9 @@ def recursive_nlst(ftp, root, pattern):

@retry(tries=4, delay=2, backoff=1.2, jitter=(1, 3), logger=logger)
def download_and_extract_vep_cache(ftp, vep_cache_file, taxonomy_id):
scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id)
species_name = scientific_name.replace(' ', '_').lower()

species_name = get_normalized_scientific_name(taxonomy_id,
private_config_xml_file=cfg['maven']['settings_file'],
profile=cfg['maven']['environment'])
tmp_dir = tempfile.TemporaryDirectory()
destination = os.path.join(tmp_dir.name, f'{species_name}.tar.gz')
with open(destination, 'wb+') as dest:
Expand Down
4 changes: 3 additions & 1 deletion tests/test_eload_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def test_detect_metadata_attributes(self):
self.eload.eload_cfg.set('submission', 'metadata_spreadsheet', value=metadata)
cfg.content['maven']['settings_file'] = None
cfg.content['maven']['environment'] = None
with patch('eva_submission.eload_preparation.get_scientific_name_from_taxonomy') as m_sci_name:
with patch('eva_submission.eload_preparation.get_scientific_name_from_taxonomy') as m_sci_name, \
patch('eva_submission.eload_preparation.get_metadata_connection_handle', autospec=True), \
patch('eva_submission.eload_preparation.ensure_taxonomy_is_in_evapro'):
m_sci_name.return_value = 'Homo sapiens'
self.eload.detect_metadata_attributes()

Expand Down
14 changes: 8 additions & 6 deletions tests/test_vep_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@ def fake_dir(path, callback):
)

def test_get_vep_versions_from_ensembl(self):
vep_version, cache_version = get_vep_and_vep_cache_version_from_ensembl('GCA_000827895.1')
self.assertEqual(vep_version, 110)
self.assertEqual(cache_version, 57)
assert os.path.exists(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei'))
assert os.listdir(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei')) == ['57_ASM82789v1']
with patch('eva_submission.vep_utils.get_normalized_scientific_name') as m_get_scf_name:
m_get_scf_name.return_value = 'thelohanellus_kitauei'
vep_version, cache_version = get_vep_and_vep_cache_version_from_ensembl('GCA_000827895.1')
self.assertEqual(vep_version, 110)
self.assertEqual(cache_version, 57)
assert os.path.exists(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei'))
assert os.listdir(os.path.join(cfg['vep_cache_path'], 'thelohanellus_kitauei')) == ['57_ASM82789v1']

def test_get_vep_versions_from_ensembl_not_found(self):
vep_version, cache_version = get_vep_and_vep_cache_version_from_ensembl('GCA_015220235.1')
Expand Down Expand Up @@ -120,7 +122,7 @@ def test_get_vep_versions(self):
get_vep_and_vep_cache_version('fake_mongo', 'fake_db', 'fake_assembly')

def test_download_and_extract_vep_cache(self):
with patch('eva_submission.vep_utils.retrieve_species_scientific_name_from_tax_id_ncbi') as m_get_scf_name:
with patch('eva_submission.vep_utils.get_normalized_scientific_name') as m_get_scf_name:
m_get_scf_name.return_value = 'whatever_species_name'
download_and_extract_vep_cache(
get_ftp_connection('ftp.ensembl.org'),
Expand Down

0 comments on commit 8943db7

Please sign in to comment.