diff --git a/.gitignore b/.gitignore index fe2ff70e..c3b28032 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,6 @@ dmypy.json # Pyre type checker .pyre/ + +# IDEs +.vscode diff --git a/pydeseq2/_formulaic.py b/pydeseq2/_formulaic.py deleted file mode 100644 index af21f242..00000000 --- a/pydeseq2/_formulaic.py +++ /dev/null @@ -1,278 +0,0 @@ -"""Helpers to interact with Formulaic Formulas, taken from pertpy -https://github.com/scverse/pertpy/blob/main/pertpy/tools/_differential_gene_expression/_formulaic.py - -Some helpful definitions for working with formulaic formulas -(e.g. `~ 0 + C(donor):treatment + np.log1p(continuous)`): - * A *term* refers to an expression in the formula, separated by - `+`, e.g. `C(donor):treatment`, or `np.log1p(continuous)`. - * A *variable* refers to a column of the data frame passed to formulaic, e.g. `donor`. - * A *factor* is the specification of how a certain variable is represented in the - design matrix, e.g. treatment coding with base level "A" and reduced rank. -""" # noqa - -from collections import defaultdict -from collections.abc import Mapping -from collections.abc import Sequence -from dataclasses import dataclass -from typing import Any - -from formulaic import FactorValues -from formulaic import ModelSpec -from formulaic.materializers import PandasMaterializer -from formulaic.materializers.types import EvaluatedFactor -from formulaic.parser.types import Factor -from interface_meta import override - - -@dataclass -class FactorMetadata: - """Store (relevant) metadata for a factor of a formula.""" - - name: str - """The unambiguous factor name as specified in the formula. - E.g. `donor`, or `C(donor, contr.treatment(base="A"))`""" - - reduced_rank: bool - """Whether a column will be dropped because it is redundant""" - - custom_encoder: bool - """Whether or not a custom encoder (e.g. `C(...)`) was used.""" - - categories: Sequence[str] - """The unique categories in this factor (after applying `drop_rows`)""" - - kind: Factor.Kind - """Type of the factor""" - - drop_field: str | None = None - """The category that is dropped. - - Note that - * this may also be populated if `reduced_rank = False` - * this is only populated when no encoder was used - (e.g. `~ donor` but NOT `~ C(donor)`. - """ - - column_names: Sequence[str] | None = None - """The column names for this factor included in the design matrix. - - This may be the same as `categories` if the default encoder is used, or - categories without the base level if a custom encoder (e.g. `C(...)`) is used. - """ - - colname_format: str | None = None - """A formattable string that can be used to generate the column name in the - design matrix, e.g. `{name}[T.{field}]`""" - - @property - def base(self) -> str | None: - """ - The base category for this categorical factor. - - This is derived from `drop_field` (for default encoding) or by comparing the - column names inthe design matrix with all categories (for custom encoding, - e.g. `C(...)`). - """ - if not self.reduced_rank: - return None - else: - if self.custom_encoder: - assert (self.categories is not None) and (self.column_names is not None) - tmp_base = set(self.categories) - set(self.column_names) - assert len(tmp_base) == 1 - return tmp_base.pop() - else: - assert self.drop_field is not None - return self.drop_field - - -def get_factor_storage_and_materializer() -> ( - tuple[dict[str, list[FactorMetadata]], dict[str, set[str]], type] -): - """Keep track of categorical factors used in a model specification. - - Generates a custom materializer that reports back metadata upon materialization of - the model matrix. - - Returns - ------- - factor_storage: dict[str, list[FactorMetadata]] - A dictionary storing metadata for each factor processed by the custom - materializer. - - variable_to_factors: dict[str, set[str]] - A dictionary mapping variables to factor names, which works similarly to - ``model_spec.variable_terms`` but maps to factors rather than terms. - - CustomPandasMaterializer: type - A materializer class tied to the specific instance of `factor_storage`. - """ - # There can be multiple FactorMetadata entries per sample, for instance when - # formulaic generates an interaction term, it generates the factor with both full - # rank and reduced rank. - factor_storage: dict[str, list[FactorMetadata]] = defaultdict(list) - variable_to_factors: dict[str, set[str]] = defaultdict(set) - - class CustomPandasMaterializer(PandasMaterializer): - """An extension of the PandasMaterializer. - - Records all categorical variables and their (base) categories. - """ - - REGISTER_NAME = "custom_pandas" - REGISTER_INPUTS = ("pandas.core.frame.DataFrame",) - REGISTER_OUTPUTS = ("pandas", "numpy", "sparse") - - def __init__( - self, - data: Any, - context: Mapping[str, Any] | None = None, - record_factor_metadata: bool = False, - **params: Any, - ): - """Initialize the Materializer. - - Parameters - ---------- - data: Any - Passed to PandasMaterializer. - - context: Mapping[str, Any], optional - Passed to PandasMaterializer. - - record_factor_metadata: bool - Flag that tells whether this particular instance of the custom - materializer class is supposed to record factor metadata. Only the - instance that is used for building the design matrix should record the - metadata. All other instances (e.g. used to generate contrast vectors) - should not record metadata to not overwrite the specifications from the - design matrix. (Default: False). - - **params: - Passed to PandasMaterializer - """ - self.factor_metadata_storage = ( - factor_storage if record_factor_metadata else None - ) - self.variable_to_factors = ( - variable_to_factors if record_factor_metadata else None - ) - # temporary pointer to metadata of factor that is currently evaluated - self._current_factor: FactorMetadata | None = None - super().__init__(data, context, **params) - - @override - def _encode_evaled_factor( - self, - factor: EvaluatedFactor, - spec: ModelSpec, - drop_rows: Sequence[int], - reduced_rank: bool = False, - ) -> dict[str, Any]: - """Call this function just before the factor is evaluated. - - Records some metadata, before we call the original function. - """ - assert ( - self._current_factor is None - ), "_current_factor should always be None when we start recording metadata" - if self.factor_metadata_storage is not None: - # Don't store if the factor is cached - # (then we should already have recorded it) - if ( - factor.expr in self.encoded_cache - or (factor.expr, reduced_rank) in self.encoded_cache - ): - assert ( - factor.expr in self.factor_metadata_storage - ), "Factor should be there since it's cached" - else: - assert self.variable_to_factors is not None - for var in factor.variables: - self.variable_to_factors[var].add(factor.expr) - self._current_factor = FactorMetadata( - name=factor.expr, - reduced_rank=reduced_rank, - categories=tuple( - sorted( - factor.values.drop( - index=factor.values.index[drop_rows] - ).unique() - ) - ), - custom_encoder=factor.metadata.encoder is not None, - kind=factor.metadata.kind, - ) - return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank) - - @override - def _flatten_encoded_evaled_factor( - self, name: str, values: FactorValues[dict] - ) -> dict[str, Any]: - """ - Call this function at the end, before the design matrix gets materialized. - - Here we have access to additional metadata, such as `drop_field`. - """ - if self._current_factor is not None: - assert self._current_factor.name == name - assert self.factor_metadata_storage is not None - self._current_factor.drop_field = ( - values.__formulaic_metadata__.drop_field - ) - self._current_factor.column_names = ( - values.__formulaic_metadata__.column_names - ) - self._current_factor.colname_format = ( - values.__formulaic_metadata__.format - ) - self.factor_metadata_storage[name].append(self._current_factor) - self._current_factor = None - - return super()._flatten_encoded_evaled_factor(name, values) - - return factor_storage, variable_to_factors, CustomPandasMaterializer - - -class AmbiguousAttributeError(ValueError): - pass - - -def resolve_ambiguous(objs: Sequence[Any], attr: str) -> Any: - """Check consistency of an attribute across objects. - - Given a list of objects, return an attribute if it is the same between all - object. Otherwise, raise an error. - - Parameters - ---------- - objs: Sequence[Any] - A list of objects. - - attr: str - The attribute to check. - - Returns - ------- - Any - The attribute, if it is the same for all objects. - - Raises - ------ - AmbiguousAttributeError - If the collection is empty. - """ - if not objs: - raise AmbiguousAttributeError("Collection is empty") - - first_obj_attr = getattr(objs[0], attr) - - # Check if the attribute is the same for all objects - for obj in objs[1:]: - if getattr(obj, attr) != first_obj_attr: - raise AmbiguousAttributeError( - f"Ambiguous attribute '{attr}': values differ between objects" - ) - - # If attribute is the same for all objects, return it - return first_obj_attr diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index d16fbe3b..13e0cd2b 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -1,7 +1,6 @@ import sys import time import warnings -from itertools import chain from typing import List from typing import Literal from typing import Optional @@ -11,16 +10,12 @@ import anndata as ad # type: ignore import numpy as np import pandas as pd +from formulaic_contrasts import FormulaicContrasts from scipy.optimize import minimize from scipy.special import polygamma # type: ignore from scipy.stats import f # type: ignore from scipy.stats import trim_mean # type: ignore -from pydeseq2._formulaic import Factor - -# TODO this is from pertpy, if we keep it we shoud acknoledge it or import it directly -from pydeseq2._formulaic import get_factor_storage_and_materializer -from pydeseq2._formulaic import resolve_ambiguous from pydeseq2.default_inference import DefaultInference from pydeseq2.inference import Inference from pydeseq2.preprocessing import deseq2_norm_fit @@ -254,8 +249,6 @@ def __init__( self.fit_type = fit_type self.design = design - self.factor_storage = None - self.variable_to_factors = None if continuous_factors is not None: warnings.warn( @@ -298,12 +291,8 @@ def __init__( if isinstance(self.design, str): # Keep track of the categorical factors used in the model specification, # including variable and factor names, by generating a custom materializer. - self.factor_storage, self.variable_to_factors, materializer_class = ( - get_factor_storage_and_materializer() - ) - self.obsm["design_matrix"] = materializer_class( - self.obs, record_factor_metadata=True - ).get_model_matrix(self.design) + self.formulaic_contrasts = FormulaicContrasts(self.obs, self.design) + self.obsm["design_matrix"] = self.formulaic_contrasts.design_matrix else: self.obsm["design_matrix"] = self.design @@ -343,7 +332,7 @@ def __init__( def variables(self): """Get the names of the variables used in the model definition.""" try: - return self.obsm["design_matrix"].model_spec.variables_by_source["data"] + return self.formulaic_contrasts.variables except AttributeError: raise ValueError( """Retrieving variables is only possible if the model was initialized @@ -571,20 +560,11 @@ def cond(self, **kwargs): ndarray A contrast vector that aligns to the columns of the design matrix. """ - cond_dict = kwargs - if not set(cond_dict.keys()).issubset(self.variables): - raise ValueError( - """You specified a variable that is not part of the model. Available - variables: """ - + ",".join(self.variables) - ) - for var in self.variables: - if var in cond_dict: - self._check_category(var, cond_dict[var]) - else: - cond_dict[var] = self._get_default_value(var) - df = pd.DataFrame([kwargs]) - return self.obsm["design_matrix"].model_spec.get_model_matrix(df).iloc[0] + return self.formulaic_contrasts.cond(**kwargs) + + def contrast(self, *args, **kwargs): + """Get a contrast for a simple pairwise comparison.""" + return self.formulaic_contrasts.contrast(*args, **kwargs) def fit_size_factors( self, @@ -1543,34 +1523,3 @@ def _check_full_rank_design(self): UserWarning, stacklevel=2, ) - - ### Methods below are taken and adapted from pertpy's LinearModelBase ### - def _check_category(self, var, value): - factor_metadata = self._get_factor_metadata_for_variable(var) - tmp_categories = resolve_ambiguous(factor_metadata, "categories") - if ( - resolve_ambiguous(factor_metadata, "kind") == Factor.Kind.CATEGORICAL - and value not in tmp_categories - ): - raise ValueError( - f"""You specified a non-existant category for {var}. - Possible categories: {', '.join(tmp_categories)}""" - ) - - def _get_factor_metadata_for_variable(self, var): - factors = self.variable_to_factors[var] - return list(chain.from_iterable(self.factor_storage[f] for f in factors)) - - def _get_default_value(self, var): - factor_metadata = self._get_factor_metadata_for_variable(var) - if resolve_ambiguous(factor_metadata, "kind") == Factor.Kind.CATEGORICAL: - try: - tmp_base = resolve_ambiguous(factor_metadata, "base") - except ValueError as e: - raise ValueError( - f"""Could not automatically resolve base category for variable {var}. - Please specify it explicity in `model.cond`.""" - ) from e - return tmp_base if tmp_base is not None else "\0" - else: - return 0 diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py index 5a379b43..ebb9111e 100644 --- a/pydeseq2/ds.py +++ b/pydeseq2/ds.py @@ -586,34 +586,6 @@ def _build_contrast_vector(self) -> None: factor = self.contrast[0] alternative = self.contrast[1] ref = self.contrast[2] - self.contrast_vector = self._contrast( + self.contrast_vector = self.dds.contrast( column=factor, baseline=ref, group_to_compare=alternative ) - - # Everything below is copied from pertpy. TODO : get a MWE, then clean up - def _contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndarray: - """Build a simple contrast for pairwise comparisons. - - This is equivalent to - - ``` - model.cond( = baseline) - model.cond( = group_to_compare) - ``` - - Parameters - ---------- - column: str - The column to contrast. - baseline: str - The baseline group. - group_to_compare: str - The group to compare to the baseline. - - Returns - ------- - np.ndarray - The contrast vector. - """ - return self.dds.cond(**{column: baseline}) - self.dds.cond( - **{column: group_to_compare} - ) diff --git a/setup.py b/setup.py index 8a25ccb1..1a8a5e8a 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ "pandas>=1.4.0", "scikit-learn>=1.1.0", "scipy>=1.11.0", + "formulaic-contrasts>=0.2.0", "matplotlib>=3.6.2", # not sure why sphinx_gallery does not work without it ], # external packages as dependencies extras_require={ diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py deleted file mode 100644 index a6205ae7..00000000 --- a/tests/test_formulaic.py +++ /dev/null @@ -1,367 +0,0 @@ -""" -Copied from pertpy -https://github.com/scverse/pertpy/tests/tools/_differential_gene_expression/ -""" - -import anndata as ad -import numpy as np -import pandas as pd -import pytest -import scipy.sparse as sp -from formulaic.parser.types import Factor - -from pydeseq2._formulaic import AmbiguousAttributeError -from pydeseq2._formulaic import FactorMetadata -from pydeseq2._formulaic import get_factor_storage_and_materializer -from pydeseq2._formulaic import resolve_ambiguous -from pydeseq2.utils import load_example_data - - -@pytest.fixture -def test_counts(): - return load_example_data( - modality="raw_counts", - dataset="synthetic", - debug=False, - ) - - -@pytest.fixture -def test_metadata(): - return load_example_data( - modality="metadata", - dataset="synthetic", - debug=False, - ) - - -@pytest.fixture -def test_adata(test_counts, test_metadata): - return ad.AnnData(X=test_counts, obs=test_metadata) - - -@pytest.fixture(params=[np.array, sp.csr_matrix, sp.csc_matrix]) -def test_adata_minimal(request): - matrix_format = request.param - n_obs = 80 - n_donors = n_obs // 4 - rng = np.random.default_rng(9) # make tests deterministic - obs = pd.DataFrame( - { - "condition": ["A", "B"] * (n_obs // 2), - "donor": sum(([f"D{i}"] * n_donors for i in range(n_obs // n_donors)), []), - "other": (["X"] * (n_obs // 4)) + (["Y"] * ((3 * n_obs) // 4)), - "pairing": sum(([str(i), str(i)] for i in range(n_obs // 2)), []), - "continuous": [rng.uniform(0, 1) * 4000 for _ in range(n_obs)], - }, - ) - var = pd.DataFrame(index=["gene1", "gene2"]) - group1 = rng.negative_binomial(20, 0.1, n_obs // 2) # large mean - group2 = rng.negative_binomial(5, 0.5, n_obs // 2) # small mean - - condition_data = np.empty((n_obs,), dtype=group1.dtype) - condition_data[0::2] = group1 - condition_data[1::2] = group2 - - donor_data = np.empty((n_obs,), dtype=group1.dtype) - donor_data[0:n_donors] = group2[:n_donors] - donor_data[n_donors : (2 * n_donors)] = group1[n_donors:] - - donor_data[(2 * n_donors) : (3 * n_donors)] = group2[:n_donors] - donor_data[(3 * n_donors) :] = group1[n_donors:] - - X = matrix_format(np.vstack([condition_data, donor_data]).T) - - return ad.AnnData(X=X, obs=obs, var=var) - - -# Ignore anndata ImplicitModificationWarning -@pytest.mark.filterwarnings("ignore::UserWarning") -@pytest.mark.parametrize( - "formula,reorder_categorical,expected_factor_metadata", - [ - [ - "~ donor", - None, - {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"}}, - ], - [ - "~ donor", - {"donor": ["D2", "D1", "D0", "D3"]}, - {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D2"}}, - ], - [ - "~ C(donor)", - None, - {"C(donor)": {"reduced_rank": True, "custom_encoder": True, "base": "D0"}}, - ], - [ - "~ C(donor, contr.treatment(base='D2'))", - None, - { - "C(donor, contr.treatment(base='D2'))": { - "reduced_rank": True, - "custom_encoder": True, - "base": "D2", - } - }, - ], - [ - "~ C(donor, contr.sum)", - None, - { - "C(donor, contr.sum)": { - "reduced_rank": True, - "custom_encoder": True, - "base": "D3", - } - }, - ], - [ - "~ C(donor, contr.sum)", - {"donor": ["D1", "D0", "D3", "D2"]}, - { - "C(donor, contr.sum)": { - "reduced_rank": True, - "custom_encoder": True, - "base": "D2", - } - }, - ], - [ - "~ condition", - None, - {"condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"}}, - ], - [ - "~ C(condition)", - None, - { - "C(condition)": { - "reduced_rank": True, - "custom_encoder": True, - "base": "A", - } - }, - ], - [ - "~ C(condition, contr.treatment(base='B'))", - None, - { - "C(condition, contr.treatment(base='B'))": { - "reduced_rank": True, - "custom_encoder": True, - "base": "B", - } - }, - ], - [ - "~ C(condition, contr.sum)", - None, - { - "C(condition, contr.sum)": { - "reduced_rank": True, - "custom_encoder": True, - "base": "B", - } - }, - ], - [ - "~ 0 + condition", - None, - { - "condition": { - "reduced_rank": False, - "custom_encoder": False, - "base": None, - } - }, - ], - [ - "~ condition + donor", - None, - { - "condition": { - "reduced_rank": True, - "custom_encoder": False, - "base": "A", - }, - "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"}, - }, - ], - [ - "~ 0 + condition + donor", - None, - { - "condition": { - "reduced_rank": False, - "custom_encoder": False, - "base": None, - }, - "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"}, - }, - ], - [ - "~ condition * donor", - None, - { - "condition": { - "reduced_rank": True, - "custom_encoder": False, - "base": "A", - }, - "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"}, - }, - ], - [ - "~ condition * C(donor, contr.treatment(base='D2'))", - None, - { - "condition": { - "reduced_rank": True, - "custom_encoder": False, - "base": "A", - }, - "C(donor, contr.treatment(base='D2'))": { - "reduced_rank": True, - "custom_encoder": True, - "base": "D2", - }, - }, - ], - [ - "~ condition + C(condition) + C(condition, contr.treatment(base='B'))", - None, - { - "condition": { - "reduced_rank": True, - "custom_encoder": False, - "base": "A", - }, - "C(condition)": { - "reduced_rank": True, - "custom_encoder": True, - "base": "A", - }, - "C(condition, contr.treatment(base='B'))": { - "reduced_rank": True, - "custom_encoder": True, - "base": "B", - }, - }, - ], - [ - "~ condition + continuous + np.log(continuous)", - None, - { - "condition": { - "reduced_rank": True, - "custom_encoder": False, - "base": "A", - "kind": Factor.Kind.CATEGORICAL, - }, - "continuous": { - "reduced_rank": False, - "custom_encoder": False, - "base": None, - "kind": Factor.Kind.NUMERICAL, - }, - "np.log(continuous)": { - "reduced_rank": False, - "custom_encoder": False, - "base": None, - "kind": Factor.Kind.NUMERICAL, - }, - }, - ], - [ - "~ condition * donor + continuous", - None, - { - "condition": { - "reduced_rank": True, - "custom_encoder": False, - "base": "A", - }, - "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"}, - "continuous": { - "reduced_rank": False, - "custom_encoder": False, - "base": None, - "kind": Factor.Kind.NUMERICAL, - }, - }, - ], - [ - "~ condition:donor", - None, - { - "condition": { - "reduced_rank": True, - "custom_encoder": False, - "base": "A", - }, - "donor": { - "custom_encoder": False, - "drop_field": "D0", - }, # `reduced_rank` and `base` will be ambigous here because Formulaic - # generates both version of the factor internally - }, - ], - ], -) -def test_custom_materializer( - test_adata_minimal, formula, reorder_categorical, expected_factor_metadata -): - """Test that the custom materializer correctly stores the baseline category. - - Parameters - ---------- - test_adata_minimal - adata fixture - formula - Formula to test - reorder_categorical - Create a pandas categorical for a given column with a certain order of categories - expected_factor_metadata - dict with expected values for each factor - """ - if reorder_categorical is not None: - for col, order in reorder_categorical.items(): - test_adata_minimal.obs[col] = pd.Categorical( - test_adata_minimal.obs[col], categories=order - ) - factor_storage, _, materializer = get_factor_storage_and_materializer() - materializer(test_adata_minimal.obs, record_factor_metadata=True).get_model_matrix( - formula - ) - for factor, expected_metadata in expected_factor_metadata.items(): - actual_metadata = factor_storage[factor] - for k in expected_metadata: - assert resolve_ambiguous(actual_metadata, k) == expected_metadata[k] - - -# Ignore anndata ImplicitModificationWarning -@pytest.mark.filterwarnings("ignore::UserWarning") -def test_resolve_ambiguous(): - obj1 = FactorMetadata("F1", True, True, ["A", "B"], Factor.Kind.CATEGORICAL) - obj2 = FactorMetadata("F2", True, False, ["A", "B"], Factor.Kind.CATEGORICAL) - obj3 = FactorMetadata("F3", True, False, None, Factor.Kind.NUMERICAL) - - with pytest.raises(ValueError): - resolve_ambiguous([], "foo") - - with pytest.raises(AttributeError): - resolve_ambiguous([obj1, obj2], "doesntexist") - - with pytest.raises(AmbiguousAttributeError): - assert resolve_ambiguous([obj1, obj2], "name") - - assert resolve_ambiguous([obj1, obj2, obj3], "reduced_rank") is True - assert resolve_ambiguous([obj1, obj2], "categories") == ["A", "B"] - - with pytest.raises(AmbiguousAttributeError): - assert resolve_ambiguous([obj1, obj2, obj3], "categories") - - with pytest.raises(AmbiguousAttributeError): - assert resolve_ambiguous([obj1, obj3], "kind") diff --git a/tests/test_pydeseq2.py b/tests/test_pydeseq2.py index 4fd8809e..2279fc8f 100644 --- a/tests/test_pydeseq2.py +++ b/tests/test_pydeseq2.py @@ -119,7 +119,7 @@ def test_deseq_independent_filtering_parametric_fit(counts_df, metadata, tol=0.0 ) dds.deseq2() - ds = DeseqStats(dds, contrast=["condition", "A", "B"]) + ds = DeseqStats(dds, contrast=["condition", "B", "A"]) ds.summary() # Check results @@ -146,7 +146,7 @@ def test_deseq_independent_filtering_mean_fit(counts_df, metadata, tol=0.02): ) dds.deseq2() - ds = DeseqStats(dds, contrast=["condition", "A", "B"]) + ds = DeseqStats(dds, contrast=["condition", "B", "A"]) ds.summary() # Check results @@ -178,7 +178,7 @@ def test_deseq_without_independent_filtering_parametric_fit( ) dds.deseq2() - ds = DeseqStats(dds, contrast=["condition", "A", "B"], independent_filter=False) + ds = DeseqStats(dds, contrast=["condition", "B", "A"], independent_filter=False) ds.summary() # Check results @@ -207,7 +207,7 @@ def test_alt_hypothesis(alt_hypothesis, counts_df, metadata, tol=0.02): ds = DeseqStats( dds, - contrast=["condition", "A", "B"], + contrast=["condition", "B", "A"], lfc_null=-0.5 if alt_hypothesis == "less" else 0.5, alt_hypothesis=alt_hypothesis, ) @@ -255,7 +255,7 @@ def test_deseq_no_refit_cooks(counts_df, metadata, tol=0.02): ) dds.deseq2() - ds = DeseqStats(dds, contrast=["condition", "A", "B"]) + ds = DeseqStats(dds, contrast=["condition", "B", "A"]) ds.summary() # Check results @@ -401,7 +401,7 @@ def test_multifactor_deseq(counts_df, metadata, with_outliers, tol=0.04): dds = DeseqDataSet(counts=counts_df, metadata=metadata, design="~group + condition") dds.deseq2() - res = DeseqStats(dds, contrast=["condition", "A", "B"]) + res = DeseqStats(dds, contrast=["condition", "B", "A"]) res.summary() res_df = res.results_df @@ -595,7 +595,7 @@ def test_wide_deseq( ) dds.deseq2() - ds = DeseqStats(dds, contrast=["condition", "A", "B"]) + ds = DeseqStats(dds, contrast=["condition", "B", "A"]) ds.summary() # Check results @@ -663,7 +663,7 @@ def test_anndata_init(counts_df, metadata, tol=0.02): dds = DeseqDataSet(adata=adata, design="~condition") dds.deseq2() - ds = DeseqStats(dds, contrast=["condition", "A", "B"]) + ds = DeseqStats(dds, contrast=["condition", "B", "A"]) ds.summary() # Check results