Skip to content

Commit

Permalink
allow unifying ranks (#145)
Browse files Browse the repository at this point in the history
* allow unifying ranks

* check if osqp is there

* check if osqp is there
  • Loading branch information
cdiener authored Oct 29, 2023
1 parent a467432 commit 2858dd7
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
run: |
python -m pip install --upgrade pip cryptography
pip install wheel numpy Cython
pip install biom-format
pip install biom-format highspy
- name: Install MICOM
run: pip install -e .
- name: install CPLEX
Expand Down
2 changes: 2 additions & 0 deletions micom/community.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from micom.optcom import optcom, solve
from micom.problems import cooperative_tradeoff, knockout_taxa
from micom.qiime_formats import load_qiime_model_db
from micom.taxonomy import unify_rank_prefixes
from rich.progress import track
from tempfile import TemporaryDirectory

Expand Down Expand Up @@ -209,6 +210,7 @@ def __init__(
if r in taxonomy.columns and r in manifest.columns
]
manifest = manifest[keep_cols + ["file"]]
taxonomy = unify_rank_prefixes(taxonomy, manifest)
merged = pd.merge(taxonomy, manifest, on=keep_cols)

self.__db_metrics = pd.Series(
Expand Down
79 changes: 67 additions & 12 deletions micom/taxonomy.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,42 @@
"""Helpers to convert external data to a MICOM taxonomy."""

from micom.community import _ranks
from micom.qiime_formats import (
load_qiime_feature_table,
load_qiime_taxonomy,
)
import pandas as pd


RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species", "strain"]


def build_from_qiime(
abundance,
abundance: pd.DataFrame,
taxonomy: pd.Series,
collapse_on="genus"
collapse_on: str = "genus",
trim_rank_prefix: bool = False,
) -> pd.DataFrame:
"""Build the specification for the community models."""
taxa = taxonomy.str.replace("[\\w_]+__|\\[|\\]", "", regex=True)
if trim_rank_prefix:
taxa = taxonomy.str.replace("[\\w_]+__|\\[|\\]", "", regex=True)
taxa = taxa.str.split(";\\s*", expand=True).replace("", None)
taxa.columns = _ranks[0 : taxa.shape[1]]
taxa.columns = RANKS[0 : taxa.shape[1]]
taxa["taxid"] = taxonomy.index
taxa.index == taxa.taxid

if isinstance(collapse_on, str):
collapse_on = [collapse_on]

ranks = [
r
for r in collapse_on
if r in taxa.columns
]
ranks = [r for r in collapse_on if r in taxa.columns]
taxa["mapping_ranks"] = taxa[ranks].apply(
lambda s: "|".join(s.astype("str")), axis=1
)

abundance = (
abundance.collapse(
lambda id_, x: taxa.loc[id_, "mapping_ranks"],
axis="observation", norm=False
axis="observation",
norm=False,
)
.to_dataframe(dense=True)
.T
Expand All @@ -48,7 +49,7 @@ def build_from_qiime(
abundance = pd.merge(
abundance[abundance.abundance > 0.0],
taxa[ranks + ["mapping_ranks"]].drop_duplicates(),
on="mapping_ranks"
on="mapping_ranks",
)
abundance["id"] = abundance["mapping_ranks"].replace(
r"[^A-Za-z0-9_]+", "_", regex=True
Expand Down Expand Up @@ -84,3 +85,57 @@ def qiime_to_micom(feature_table, taxonomy, collapse_on="genus"):
taxonomy = load_qiime_taxonomy(taxonomy)

return build_from_qiime(table, taxonomy, collapse_on)


def rank_prefixes(manifest: pd.DataFrame) -> pd.Series:
"""Get the used prefixes for taxonomic ranks.
Arguments
---------
manifest : pandas.DataFrame
A model database manifest.
Returns
-------
pandas.Series
The detected prefix for each taxonomic rank in the manifest.
"""
ranks = [c for c in manifest.columns if c.lower() in RANKS]
prefixes = pd.Series(
{r: manifest[r].str.extract(r"^([a-z]__)").iloc[0, 0] for r in ranks}
)

return prefixes


def unify_rank_prefixes(taxonomy: pd.DataFrame, manifest: pd.DataFrame) -> pd.DataFrame:
"""Handle taxonomic rank prefixes in the taxonomy or database manifest.
Arguments
---------
taxonomy : pandas.DataFrame
A taxonomy table.
manifest : pandas.DataFrame
A database manifest.
Returns
-------
tuple of pandas.DataFrame
The taxonomy with adjusted taxa names consistent with the database.
"""
tax_prefixes = rank_prefixes(taxonomy)
db_prefixes = rank_prefixes(manifest)
ranks = tax_prefixes.index[tax_prefixes.index.isin(db_prefixes.index)]
if all(tax_prefixes[ranks] == db_prefixes[ranks]):
return taxonomy

taxonomy = taxonomy.copy()
ranks = [c for c in taxonomy.columns if c.lower() in RANKS]
if db_prefixes.isna().all():
for r in ranks:
taxonomy[r] = taxonomy[r].str.replace(r"^[a-z]__", "", regex=True)
else:
for r in ranks:
taxonomy[r] = db_prefixes[r] + taxonomy[r]

return taxonomy
6 changes: 6 additions & 0 deletions tests/test_osqp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

from .fixtures import community
import cobra.util.solver as su
import micom.data as md
from micom.workflows import (
build,
Expand All @@ -17,6 +18,11 @@
import pytest
from pytest import approx

pytestmark = pytest.mark.skipif(
"osqp" not in su.solvers,
reason="OSQP not functional here"
)

medium = load_qiime_medium(md.test_medium)
db = md.test_db

Expand Down
35 changes: 35 additions & 0 deletions tests/test_taxonomy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Test helper for taxonomy handling."""

from micom.data import test_taxonomy
import micom.taxonomy as mt


no_prefix = test_taxonomy()
with_prefix = no_prefix.copy()
with_prefix["genus"] = "g__" + with_prefix["genus"]
with_prefix["species"] = "s__" + with_prefix["species"]


def test_get_prefixes():
assert mt.rank_prefixes(no_prefix).isna().all()
assert mt.rank_prefixes(with_prefix)["species"] == "s__"


def test_unify_all_good():
tax = mt.unify_rank_prefixes(no_prefix, no_prefix)
assert all(tax.species == no_prefix.species)

tax = mt.unify_rank_prefixes(with_prefix, with_prefix)
assert all(tax.species == with_prefix.species)


def test_remove_prefix():
tax = mt.unify_rank_prefixes(with_prefix, no_prefix)
assert all(tax.genus == no_prefix.genus)
assert all(tax.species == no_prefix.species)


def test_add_prefix():
tax = mt.unify_rank_prefixes(no_prefix, with_prefix)
assert all(tax.genus == with_prefix.genus)
assert all(tax.species == with_prefix.species)

0 comments on commit 2858dd7

Please sign in to comment.