From 2858dd753ffbdcfe5c1a41598d810c6874feac4d Mon Sep 17 00:00:00 2001 From: Christian Diener Date: Sun, 29 Oct 2023 12:19:12 -0700 Subject: [PATCH] allow unifying ranks (#145) * allow unifying ranks * check if osqp is there * check if osqp is there --- .github/workflows/pythonpackage.yml | 2 +- micom/community.py | 2 + micom/taxonomy.py | 79 ++++++++++++++++++++++++----- tests/test_osqp.py | 6 +++ tests/test_taxonomy.py | 35 +++++++++++++ 5 files changed, 111 insertions(+), 13 deletions(-) create mode 100644 tests/test_taxonomy.py diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 712c659f..15414c24 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -30,7 +30,7 @@ jobs: run: | python -m pip install --upgrade pip cryptography pip install wheel numpy Cython - pip install biom-format + pip install biom-format highspy - name: Install MICOM run: pip install -e . - name: install CPLEX diff --git a/micom/community.py b/micom/community.py index 9f133446..0f16d40d 100755 --- a/micom/community.py +++ b/micom/community.py @@ -21,6 +21,7 @@ from micom.optcom import optcom, solve from micom.problems import cooperative_tradeoff, knockout_taxa from micom.qiime_formats import load_qiime_model_db +from micom.taxonomy import unify_rank_prefixes from rich.progress import track from tempfile import TemporaryDirectory @@ -209,6 +210,7 @@ def __init__( if r in taxonomy.columns and r in manifest.columns ] manifest = manifest[keep_cols + ["file"]] + taxonomy = unify_rank_prefixes(taxonomy, manifest) merged = pd.merge(taxonomy, manifest, on=keep_cols) self.__db_metrics = pd.Series( diff --git a/micom/taxonomy.py b/micom/taxonomy.py index c79be5d4..2099ee12 100644 --- a/micom/taxonomy.py +++ b/micom/taxonomy.py @@ -1,6 +1,5 @@ """Helpers to convert external data to a MICOM taxonomy.""" -from micom.community import _ranks from micom.qiime_formats import ( load_qiime_feature_table, load_qiime_taxonomy, @@ -8,26 +7,27 @@ import pandas as pd +RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species", "strain"] + + def build_from_qiime( - abundance, + abundance: pd.DataFrame, taxonomy: pd.Series, - collapse_on="genus" + collapse_on: str = "genus", + trim_rank_prefix: bool = False, ) -> pd.DataFrame: """Build the specification for the community models.""" - taxa = taxonomy.str.replace("[\\w_]+__|\\[|\\]", "", regex=True) + if trim_rank_prefix: + taxa = taxonomy.str.replace("[\\w_]+__|\\[|\\]", "", regex=True) taxa = taxa.str.split(";\\s*", expand=True).replace("", None) - taxa.columns = _ranks[0 : taxa.shape[1]] + taxa.columns = RANKS[0 : taxa.shape[1]] taxa["taxid"] = taxonomy.index taxa.index == taxa.taxid if isinstance(collapse_on, str): collapse_on = [collapse_on] - ranks = [ - r - for r in collapse_on - if r in taxa.columns - ] + ranks = [r for r in collapse_on if r in taxa.columns] taxa["mapping_ranks"] = taxa[ranks].apply( lambda s: "|".join(s.astype("str")), axis=1 ) @@ -35,7 +35,8 @@ def build_from_qiime( abundance = ( abundance.collapse( lambda id_, x: taxa.loc[id_, "mapping_ranks"], - axis="observation", norm=False + axis="observation", + norm=False, ) .to_dataframe(dense=True) .T @@ -48,7 +49,7 @@ def build_from_qiime( abundance = pd.merge( abundance[abundance.abundance > 0.0], taxa[ranks + ["mapping_ranks"]].drop_duplicates(), - on="mapping_ranks" + on="mapping_ranks", ) abundance["id"] = abundance["mapping_ranks"].replace( r"[^A-Za-z0-9_]+", "_", regex=True @@ -84,3 +85,57 @@ def qiime_to_micom(feature_table, taxonomy, collapse_on="genus"): taxonomy = load_qiime_taxonomy(taxonomy) return build_from_qiime(table, taxonomy, collapse_on) + + +def rank_prefixes(manifest: pd.DataFrame) -> pd.Series: + """Get the used prefixes for taxonomic ranks. + + Arguments + --------- + manifest : pandas.DataFrame + A model database manifest. + + Returns + ------- + pandas.Series + The detected prefix for each taxonomic rank in the manifest. + """ + ranks = [c for c in manifest.columns if c.lower() in RANKS] + prefixes = pd.Series( + {r: manifest[r].str.extract(r"^([a-z]__)").iloc[0, 0] for r in ranks} + ) + + return prefixes + + +def unify_rank_prefixes(taxonomy: pd.DataFrame, manifest: pd.DataFrame) -> pd.DataFrame: + """Handle taxonomic rank prefixes in the taxonomy or database manifest. + + Arguments + --------- + taxonomy : pandas.DataFrame + A taxonomy table. + manifest : pandas.DataFrame + A database manifest. + + Returns + ------- + tuple of pandas.DataFrame + The taxonomy with adjusted taxa names consistent with the database. + """ + tax_prefixes = rank_prefixes(taxonomy) + db_prefixes = rank_prefixes(manifest) + ranks = tax_prefixes.index[tax_prefixes.index.isin(db_prefixes.index)] + if all(tax_prefixes[ranks] == db_prefixes[ranks]): + return taxonomy + + taxonomy = taxonomy.copy() + ranks = [c for c in taxonomy.columns if c.lower() in RANKS] + if db_prefixes.isna().all(): + for r in ranks: + taxonomy[r] = taxonomy[r].str.replace(r"^[a-z]__", "", regex=True) + else: + for r in ranks: + taxonomy[r] = db_prefixes[r] + taxonomy[r] + + return taxonomy diff --git a/tests/test_osqp.py b/tests/test_osqp.py index 408dfb20..d27c43be 100644 --- a/tests/test_osqp.py +++ b/tests/test_osqp.py @@ -4,6 +4,7 @@ """ from .fixtures import community +import cobra.util.solver as su import micom.data as md from micom.workflows import ( build, @@ -17,6 +18,11 @@ import pytest from pytest import approx +pytestmark = pytest.mark.skipif( + "osqp" not in su.solvers, + reason="OSQP not functional here" +) + medium = load_qiime_medium(md.test_medium) db = md.test_db diff --git a/tests/test_taxonomy.py b/tests/test_taxonomy.py new file mode 100644 index 00000000..fb51a6fc --- /dev/null +++ b/tests/test_taxonomy.py @@ -0,0 +1,35 @@ +"""Test helper for taxonomy handling.""" + +from micom.data import test_taxonomy +import micom.taxonomy as mt + + +no_prefix = test_taxonomy() +with_prefix = no_prefix.copy() +with_prefix["genus"] = "g__" + with_prefix["genus"] +with_prefix["species"] = "s__" + with_prefix["species"] + + +def test_get_prefixes(): + assert mt.rank_prefixes(no_prefix).isna().all() + assert mt.rank_prefixes(with_prefix)["species"] == "s__" + + +def test_unify_all_good(): + tax = mt.unify_rank_prefixes(no_prefix, no_prefix) + assert all(tax.species == no_prefix.species) + + tax = mt.unify_rank_prefixes(with_prefix, with_prefix) + assert all(tax.species == with_prefix.species) + + +def test_remove_prefix(): + tax = mt.unify_rank_prefixes(with_prefix, no_prefix) + assert all(tax.genus == no_prefix.genus) + assert all(tax.species == no_prefix.species) + + +def test_add_prefix(): + tax = mt.unify_rank_prefixes(no_prefix, with_prefix) + assert all(tax.genus == with_prefix.genus) + assert all(tax.species == with_prefix.species) \ No newline at end of file