Skip to content

Commit

Permalink
only use project assembly as target if there's exactly one
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Sep 15, 2023
1 parent 5747290 commit 2844d9c
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 11 deletions.
11 changes: 7 additions & 4 deletions eva_submission/eload_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,9 @@ def _get_target_assembly(self):
4. If there is exactly one alternate taxonomy, check Ensembl (main then rapid release) for its supported
assembly. If present use that, and insert into supported assembly table.
5. If we still do not have a target assembly (i.e. project taxonomy is not supported by Ensembl AND alternate
taxonomy cannot be determined or is not supported by Ensembl), then choose one of the project's
assemblies and insert into the supported assembly table.
taxonomy cannot be determined or is not supported by Ensembl), and the project has exactly one assembly,
then choose that one and insert into the supported assembly table.
6. Otherwise return None (which will skip clustering)
"""
if self.taxonomy == 9606:
self.info('No remapping or clustering for human studies')
Expand All @@ -435,8 +436,10 @@ def _get_target_assembly(self):
target_assembly = self._get_supported_assembly_from_evapro(alt_tax_id) or \
self._insert_new_supported_asm_from_ensembl(alt_tax_id)
if target_assembly is None:
# TODO any particular one?
target_assembly = list(self.assembly_accessions)[0]
if len(self.assembly_accessions) == 1:
target_assembly = list(self.assembly_accessions)[0]
add_to_supported_assemblies(self.metadata_connection_handle, source_of_assembly='EVA',
target_assembly=target_assembly, taxonomy_id=self.taxonomy)
return target_assembly

def _get_alt_tax_id(self):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cached-property
cerberus
ebi-eva-common-pyutils>=0.5.6
ebi-eva-common-pyutils==0.5.8.dev0
eva-vcf-merge>=0.0.6
humanize
lxml
Expand Down
18 changes: 12 additions & 6 deletions tests/test_eload_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ def default_db_results_for_metadata_load():
[(391,)] # Check the assembly_set_id in update_assembly_set_in_analysis
]


def default_db_results_for_target_assembly():
return [
[('GCA_999')]
]


def default_db_results_for_accession():
browsable_files = [(1, 'ERA', 'filename_1', 'PRJ', 123), (2, 'ERA', 'filename_1', 'PRJ', 123)]
return [
Expand All @@ -44,7 +47,6 @@ def default_db_results_for_clustering():
def default_db_results_for_ingestion():
return (
default_db_results_for_metadata_load()
+ default_db_results_for_target_assembly()
+ default_db_results_for_accession()
+ default_db_results_for_clustering()
+ default_db_results_for_variant_load()
Expand Down Expand Up @@ -207,7 +209,7 @@ def test_ingest_accession(self):
m_get_vep_versions.return_value = (100, 100)
m_get_species.return_value = 'homo_sapiens'
m_post.return_value.text = self.get_mock_result_for_ena_date()
m_get_results.side_effect = default_db_results_for_target_assembly() + default_db_results_for_accession()
m_get_results.side_effect = default_db_results_for_accession()
self.eload.ingest(
instance_id=1,
tasks=['accession']
Expand Down Expand Up @@ -469,7 +471,6 @@ def test_resume_when_step_fails(self):
m_get_species.return_value = 'homo_sapiens'
m_post.return_value.text = self.get_mock_result_for_ena_date()
m_get_results.side_effect = default_db_results_for_metadata_load() \
+ default_db_results_for_target_assembly()\
+ default_db_results_for_ingestion()

m_run_command.side_effect = [
Expand Down Expand Up @@ -535,9 +536,10 @@ def test_resume_with_tasks(self):
m_get_vep_versions.return_value = (100, 100)
m_get_species.return_value = 'homo_sapiens'
m_post.return_value.text = self.get_mock_result_for_ena_date()
m_get_results.side_effect = (default_db_results_for_target_assembly()
+ default_db_results_for_variant_load() + default_db_results_for_target_assembly()
+ default_db_results_for_accession() + default_db_results_for_variant_load()
m_get_results.side_effect = (
default_db_results_for_variant_load()
+ default_db_results_for_accession()
+ default_db_results_for_variant_load()
)

m_run_command.side_effect = [
Expand Down Expand Up @@ -568,3 +570,7 @@ def test_resume_with_tasks(self):
'nextflow_dir')
assert new_accession_nextflow_dir == self.eload.nextflow_complete_value
assert not os.path.exists(accession_nextflow_dir)

def test_get_target_assembly(self):
# TODO mock only the db, run everything else
...

0 comments on commit 2844d9c

Please sign in to comment.