Skip to content

Commit

Permalink
changed genomes to be queried by genome instead of genome sequence
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch committed Sep 13, 2024
1 parent a68b249 commit 3b6f8d9
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 21 deletions.
39 changes: 29 additions & 10 deletions rescript/bv_brc.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

def get_bv_brc_metadata(
ids_metadata: Union[qiime2.NumericMetadataColumn,
qiime2.CategoricalMetadataColumn] = None,
qiime2.CategoricalMetadataColumn] = None,
data_type: str = None,
rql_query: str = None,
data_field: str = None,
Expand Down Expand Up @@ -67,29 +67,48 @@ def get_bv_brc_genomes(
# Parameter validation
rql_query = parameter_validation(rql_query=rql_query,
ids=ids,
data_type="genome_sequence",
data_type="genome",
data_field=data_field,
metadata=ids_metadata
)

# Get requests response for genome sequences
sequences = download_data(data_type="genome_sequence",
query=rql_query,
accept="application/json",
)
response_genomes = download_data(data_type="genome",
query=rql_query,
accept="application/json",
select=["genome_id", "taxon_id"]
)

# Convert sequences in JSON to FASTA file
genomes = create_genome_fasta(genome_sequences=sequences)
# Get genome sequences and create FASTA files
genomes = get_genome_sequences(response_genomes=response_genomes)

# Get taxonomy for sequences
taxonomy = get_taxonomy(response_sequences=sequences,
taxonomy = get_taxonomy(response_sequences=response_genomes,
ranks=ranks,
rank_propagation=rank_propagation,
accession_name="accession")

return genomes, taxonomy


def get_genome_sequences(response_genomes):
# Extract genome ids from response (list of dicts)
genome_ids = set([str(entry['genome_id']) for entry in response_genomes])

# Fetch the genome sequences for all genome ids
genome_sequences = download_data(
data_type="genome_sequence",
query=f"in(genome_id,({','.join(genome_ids)}))",
accept="application/json",
select=["accession", "description", "genome_name",
"genome_id", "sequence"]
)

# Create FASTA files from sequences
genomes = create_genome_fasta(genome_sequences=genome_sequences)

return genomes


def get_bv_brc_genome_features(
ids_metadata: Union[qiime2.NumericMetadataColumn,
qiime2.CategoricalMetadataColumn] = None,
Expand Down
40 changes: 29 additions & 11 deletions rescript/tests/test_bv_brc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
get_bv_brc_genome_features, parameter_validation, \
error_handling, download_data, create_genome_fasta, \
create_taxonomy_entry, get_loci, read_tsv_data_with_dtypes, process_loci, \
get_sequences, get_taxonomy, create_taxonomy
get_sequences, get_taxonomy, create_taxonomy, get_genome_sequences


class TestParameterValidation(TestPluginBase):
Expand Down Expand Up @@ -343,20 +343,12 @@ class TestGetBvBrcGenomes(TestPluginBase):
package = 'rescript.tests'

@patch('rescript.bv_brc.get_taxonomy')
@patch('rescript.bv_brc.create_genome_fasta')
@patch('rescript.bv_brc.get_genome_sequences')
@patch('rescript.bv_brc.download_data')
@patch('rescript.bv_brc.parameter_validation')
def test_get_bv_brc_genomes(self, mock_parameter_validation,
mock_download_data, mock_create_genome_fasta,
mock_download_data, mock_get_genome_sequences,
mock_get_taxonomy):
# Mocked return values for the external functions
mock_parameter_validation.return_value = "mocked_rql_query"
mock_download_data.return_value = [
{'id': 'genome1', 'sequence': 'ATGC'},
{'id': 'genome2', 'sequence': 'GCTA'}]
mock_create_genome_fasta.return_value = MagicMock(
name='GenomeSequencesDirectoryFormat')
mock_get_taxonomy.return_value = MagicMock(name='TSVTaxonomyFormat')

# Call the function
get_bv_brc_genomes(
Expand Down Expand Up @@ -764,3 +756,29 @@ def test_get_sequences(self, mock_open, mock_download_data):

# Check if the correct sequences were written to the proteins file
mock_open().write.assert_any_call('>feature1\nMKV\n')


class TestGetGenomeSequences(TestPluginBase):
package = 'rescript.tests'

@patch('rescript.bv_brc.download_data')
@patch('rescript.bv_brc.create_genome_fasta')
def test_get_genome_sequences(self, mock_create_genome_fasta,
mock_download_data):
# Sample response_genomes to be used as input
response_genomes = [
{'genome_id': '12345'},
{'genome_id': '67890'}
]

# Call the function
get_genome_sequences(response_genomes)

# Assert that download_data was called with the correct arguments
mock_download_data.assert_called_once_with(
data_type="genome_sequence",
query=unittest.mock.ANY,
accept="application/json",
select=["accession", "description", "genome_name", "genome_id",
"sequence"]
)

0 comments on commit 3b6f8d9

Please sign in to comment.