diff --git a/.gitignore b/.gitignore
index fe2ff70e..c3b28032 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# IDEs
+.vscode
diff --git a/pydeseq2/_formulaic.py b/pydeseq2/_formulaic.py
deleted file mode 100644
index af21f242..00000000
--- a/pydeseq2/_formulaic.py
+++ /dev/null
@@ -1,278 +0,0 @@
-"""Helpers to interact with Formulaic Formulas, taken from pertpy
-https://github.com/scverse/pertpy/blob/main/pertpy/tools/_differential_gene_expression/_formulaic.py
-
-Some helpful definitions for working with formulaic formulas
-(e.g. `~ 0 + C(donor):treatment + np.log1p(continuous)`):
- * A *term* refers to an expression in the formula, separated by
-   `+`, e.g. `C(donor):treatment`, or `np.log1p(continuous)`.
- * A *variable* refers to a column of the data frame passed to formulaic, e.g. `donor`.
- * A *factor* is the specification of how a certain variable is represented in the
-   design matrix, e.g. treatment coding with base level "A" and reduced rank.
-"""  # noqa
-
-from collections import defaultdict
-from collections.abc import Mapping
-from collections.abc import Sequence
-from dataclasses import dataclass
-from typing import Any
-
-from formulaic import FactorValues
-from formulaic import ModelSpec
-from formulaic.materializers import PandasMaterializer
-from formulaic.materializers.types import EvaluatedFactor
-from formulaic.parser.types import Factor
-from interface_meta import override
-
-
-@dataclass
-class FactorMetadata:
-    """Store (relevant) metadata for a factor of a formula."""
-
-    name: str
-    """The unambiguous factor name as specified in the formula.
-    E.g. `donor`, or `C(donor, contr.treatment(base="A"))`"""
-
-    reduced_rank: bool
-    """Whether a column will be dropped because it is redundant"""
-
-    custom_encoder: bool
-    """Whether or not a custom encoder (e.g. `C(...)`) was used."""
-
-    categories: Sequence[str]
-    """The unique categories in this factor (after applying `drop_rows`)"""
-
-    kind: Factor.Kind
-    """Type of the factor"""
-
-    drop_field: str | None = None
-    """The category that is dropped.
-
-    Note that
-      * this may also be populated if `reduced_rank = False`
-      * this is only populated when no encoder was used
-        (e.g. `~ donor` but NOT `~ C(donor)`.
-    """
-
-    column_names: Sequence[str] | None = None
-    """The column names for this factor included in the design matrix.
-
-    This may be the same as `categories` if the default encoder is used, or
-    categories without the base level if a custom encoder (e.g. `C(...)`) is used.
-    """
-
-    colname_format: str | None = None
-    """A formattable string that can be used to generate the column name in the
-    design matrix, e.g. `{name}[T.{field}]`"""
-
-    @property
-    def base(self) -> str | None:
-        """
-        The base category for this categorical factor.
-
-        This is derived from `drop_field` (for default encoding) or by comparing the
-        column names inthe design matrix with all categories (for custom encoding,
-        e.g. `C(...)`).
-        """
-        if not self.reduced_rank:
-            return None
-        else:
-            if self.custom_encoder:
-                assert (self.categories is not None) and (self.column_names is not None)
-                tmp_base = set(self.categories) - set(self.column_names)
-                assert len(tmp_base) == 1
-                return tmp_base.pop()
-            else:
-                assert self.drop_field is not None
-                return self.drop_field
-
-
-def get_factor_storage_and_materializer() -> (
-    tuple[dict[str, list[FactorMetadata]], dict[str, set[str]], type]
-):
-    """Keep track of categorical factors used in a model specification.
-
-    Generates a custom materializer that reports back metadata upon materialization of
-    the model matrix.
-
-    Returns
-    -------
-    factor_storage: dict[str, list[FactorMetadata]]
-        A dictionary storing metadata for each factor processed by the custom
-        materializer.
-
-    variable_to_factors: dict[str, set[str]]
-        A dictionary mapping variables to factor names, which works similarly to
-        ``model_spec.variable_terms`` but maps to factors rather than terms.
-
-    CustomPandasMaterializer: type
-        A materializer class tied to the specific instance of `factor_storage`.
-    """
-    # There can be multiple FactorMetadata entries per sample, for instance when
-    # formulaic generates an interaction term, it generates the factor with both full
-    # rank and reduced rank.
-    factor_storage: dict[str, list[FactorMetadata]] = defaultdict(list)
-    variable_to_factors: dict[str, set[str]] = defaultdict(set)
-
-    class CustomPandasMaterializer(PandasMaterializer):
-        """An extension of the PandasMaterializer.
-
-        Records all categorical variables and their (base) categories.
-        """
-
-        REGISTER_NAME = "custom_pandas"
-        REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)
-        REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
-
-        def __init__(
-            self,
-            data: Any,
-            context: Mapping[str, Any] | None = None,
-            record_factor_metadata: bool = False,
-            **params: Any,
-        ):
-            """Initialize the Materializer.
-
-            Parameters
-            ----------
-            data: Any
-                Passed to PandasMaterializer.
-
-            context: Mapping[str, Any], optional
-                Passed to PandasMaterializer.
-
-            record_factor_metadata: bool
-                Flag that tells whether this particular instance of the custom
-                materializer class is supposed to record factor metadata. Only the
-                instance that is used for building the design matrix should record the
-                metadata. All other instances (e.g. used to generate contrast vectors)
-                should not record metadata to not overwrite the specifications from the
-                design matrix. (Default: False).
-
-            **params:
-                Passed to PandasMaterializer
-            """
-            self.factor_metadata_storage = (
-                factor_storage if record_factor_metadata else None
-            )
-            self.variable_to_factors = (
-                variable_to_factors if record_factor_metadata else None
-            )
-            # temporary pointer to metadata of factor that is currently evaluated
-            self._current_factor: FactorMetadata | None = None
-            super().__init__(data, context, **params)
-
-        @override
-        def _encode_evaled_factor(
-            self,
-            factor: EvaluatedFactor,
-            spec: ModelSpec,
-            drop_rows: Sequence[int],
-            reduced_rank: bool = False,
-        ) -> dict[str, Any]:
-            """Call this function just before the factor is evaluated.
-
-            Records some metadata, before we call the original function.
-            """
-            assert (
-                self._current_factor is None
-            ), "_current_factor should always be None when we start recording metadata"
-            if self.factor_metadata_storage is not None:
-                # Don't store if the factor is cached
-                # (then we should already have recorded it)
-                if (
-                    factor.expr in self.encoded_cache
-                    or (factor.expr, reduced_rank) in self.encoded_cache
-                ):
-                    assert (
-                        factor.expr in self.factor_metadata_storage
-                    ), "Factor should be there since it's cached"
-                else:
-                    assert self.variable_to_factors is not None
-                    for var in factor.variables:
-                        self.variable_to_factors[var].add(factor.expr)
-                    self._current_factor = FactorMetadata(
-                        name=factor.expr,
-                        reduced_rank=reduced_rank,
-                        categories=tuple(
-                            sorted(
-                                factor.values.drop(
-                                    index=factor.values.index[drop_rows]
-                                ).unique()
-                            )
-                        ),
-                        custom_encoder=factor.metadata.encoder is not None,
-                        kind=factor.metadata.kind,
-                    )
-            return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
-
-        @override
-        def _flatten_encoded_evaled_factor(
-            self, name: str, values: FactorValues[dict]
-        ) -> dict[str, Any]:
-            """
-            Call this function at the end, before the design matrix gets materialized.
-
-            Here we have access to additional metadata, such as `drop_field`.
-            """
-            if self._current_factor is not None:
-                assert self._current_factor.name == name
-                assert self.factor_metadata_storage is not None
-                self._current_factor.drop_field = (
-                    values.__formulaic_metadata__.drop_field
-                )
-                self._current_factor.column_names = (
-                    values.__formulaic_metadata__.column_names
-                )
-                self._current_factor.colname_format = (
-                    values.__formulaic_metadata__.format
-                )
-                self.factor_metadata_storage[name].append(self._current_factor)
-                self._current_factor = None
-
-            return super()._flatten_encoded_evaled_factor(name, values)
-
-    return factor_storage, variable_to_factors, CustomPandasMaterializer
-
-
-class AmbiguousAttributeError(ValueError):
-    pass
-
-
-def resolve_ambiguous(objs: Sequence[Any], attr: str) -> Any:
-    """Check consistency of an attribute across objects.
-
-    Given a list of objects, return an attribute if it is the same between all
-    object. Otherwise, raise an error.
-
-    Parameters
-    ----------
-    objs: Sequence[Any]
-        A list of objects.
-
-    attr: str
-        The attribute to check.
-
-    Returns
-    -------
-    Any
-        The attribute, if it is the same for all objects.
-
-    Raises
-    ------
-    AmbiguousAttributeError
-        If the collection is empty.
-    """
-    if not objs:
-        raise AmbiguousAttributeError("Collection is empty")
-
-    first_obj_attr = getattr(objs[0], attr)
-
-    # Check if the attribute is the same for all objects
-    for obj in objs[1:]:
-        if getattr(obj, attr) != first_obj_attr:
-            raise AmbiguousAttributeError(
-                f"Ambiguous attribute '{attr}': values differ between objects"
-            )
-
-    # If attribute is the same for all objects, return it
-    return first_obj_attr
diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index d16fbe3b..13e0cd2b 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -1,7 +1,6 @@
 import sys
 import time
 import warnings
-from itertools import chain
 from typing import List
 from typing import Literal
 from typing import Optional
@@ -11,16 +10,12 @@
 import anndata as ad  # type: ignore
 import numpy as np
 import pandas as pd
+from formulaic_contrasts import FormulaicContrasts
 from scipy.optimize import minimize
 from scipy.special import polygamma  # type: ignore
 from scipy.stats import f  # type: ignore
 from scipy.stats import trim_mean  # type: ignore
 
-from pydeseq2._formulaic import Factor
-
-# TODO this is from pertpy, if we keep it we shoud acknoledge it or import it directly
-from pydeseq2._formulaic import get_factor_storage_and_materializer
-from pydeseq2._formulaic import resolve_ambiguous
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.inference import Inference
 from pydeseq2.preprocessing import deseq2_norm_fit
@@ -254,8 +249,6 @@ def __init__(
 
         self.fit_type = fit_type
         self.design = design
-        self.factor_storage = None
-        self.variable_to_factors = None
 
         if continuous_factors is not None:
             warnings.warn(
@@ -298,12 +291,8 @@ def __init__(
         if isinstance(self.design, str):
             # Keep track of the categorical factors used in the model specification,
             # including variable and factor names, by generating a custom materializer.
-            self.factor_storage, self.variable_to_factors, materializer_class = (
-                get_factor_storage_and_materializer()
-            )
-            self.obsm["design_matrix"] = materializer_class(
-                self.obs, record_factor_metadata=True
-            ).get_model_matrix(self.design)
+            self.formulaic_contrasts = FormulaicContrasts(self.obs, self.design)
+            self.obsm["design_matrix"] = self.formulaic_contrasts.design_matrix
         else:
             self.obsm["design_matrix"] = self.design
 
@@ -343,7 +332,7 @@ def __init__(
     def variables(self):
         """Get the names of the variables used in the model definition."""
         try:
-            return self.obsm["design_matrix"].model_spec.variables_by_source["data"]
+            return self.formulaic_contrasts.variables
         except AttributeError:
             raise ValueError(
                 """Retrieving variables is only possible if the model was initialized
@@ -571,20 +560,11 @@ def cond(self, **kwargs):
         ndarray
             A contrast vector that aligns to the columns of the design matrix.
         """
-        cond_dict = kwargs
-        if not set(cond_dict.keys()).issubset(self.variables):
-            raise ValueError(
-                """You specified a variable that is not part of the model. Available
-                variables: """
-                + ",".join(self.variables)
-            )
-        for var in self.variables:
-            if var in cond_dict:
-                self._check_category(var, cond_dict[var])
-            else:
-                cond_dict[var] = self._get_default_value(var)
-        df = pd.DataFrame([kwargs])
-        return self.obsm["design_matrix"].model_spec.get_model_matrix(df).iloc[0]
+        return self.formulaic_contrasts.cond(**kwargs)
+
+    def contrast(self, *args, **kwargs):
+        """Get a contrast for a simple pairwise comparison."""
+        return self.formulaic_contrasts.contrast(*args, **kwargs)
 
     def fit_size_factors(
         self,
@@ -1543,34 +1523,3 @@ def _check_full_rank_design(self):
                 UserWarning,
                 stacklevel=2,
             )
-
-    ### Methods below are taken and adapted from pertpy's LinearModelBase ###
-    def _check_category(self, var, value):
-        factor_metadata = self._get_factor_metadata_for_variable(var)
-        tmp_categories = resolve_ambiguous(factor_metadata, "categories")
-        if (
-            resolve_ambiguous(factor_metadata, "kind") == Factor.Kind.CATEGORICAL
-            and value not in tmp_categories
-        ):
-            raise ValueError(
-                f"""You specified a non-existant category for {var}.
-                Possible categories: {', '.join(tmp_categories)}"""
-            )
-
-    def _get_factor_metadata_for_variable(self, var):
-        factors = self.variable_to_factors[var]
-        return list(chain.from_iterable(self.factor_storage[f] for f in factors))
-
-    def _get_default_value(self, var):
-        factor_metadata = self._get_factor_metadata_for_variable(var)
-        if resolve_ambiguous(factor_metadata, "kind") == Factor.Kind.CATEGORICAL:
-            try:
-                tmp_base = resolve_ambiguous(factor_metadata, "base")
-            except ValueError as e:
-                raise ValueError(
-                    f"""Could not automatically resolve base category for variable {var}.
-                    Please specify it explicity in `model.cond`."""
-                ) from e
-            return tmp_base if tmp_base is not None else "\0"
-        else:
-            return 0
diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py
index 5a379b43..ebb9111e 100644
--- a/pydeseq2/ds.py
+++ b/pydeseq2/ds.py
@@ -586,34 +586,6 @@ def _build_contrast_vector(self) -> None:
         factor = self.contrast[0]
         alternative = self.contrast[1]
         ref = self.contrast[2]
-        self.contrast_vector = self._contrast(
+        self.contrast_vector = self.dds.contrast(
             column=factor, baseline=ref, group_to_compare=alternative
         )
-
-    # Everything below is copied from pertpy. TODO : get a MWE, then clean up
-    def _contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndarray:
-        """Build a simple contrast for pairwise comparisons.
-
-        This is equivalent to
-
-        ```
-        model.cond(<column> = baseline) - model.cond(<column> = group_to_compare)
-        ```
-
-        Parameters
-        ----------
-        column: str
-            The column to contrast.
-        baseline: str
-            The baseline group.
-        group_to_compare: str
-            The group to compare to the baseline.
-
-        Returns
-        -------
-        np.ndarray
-            The contrast vector.
-        """
-        return self.dds.cond(**{column: baseline}) - self.dds.cond(
-            **{column: group_to_compare}
-        )
diff --git a/setup.py b/setup.py
index 8a25ccb1..1a8a5e8a 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,7 @@
         "pandas>=1.4.0",
         "scikit-learn>=1.1.0",
         "scipy>=1.11.0",
+        "formulaic-contrasts>=0.2.0",
         "matplotlib>=3.6.2",  # not sure why sphinx_gallery does not work without it
     ],  # external packages as dependencies
     extras_require={
diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
deleted file mode 100644
index a6205ae7..00000000
--- a/tests/test_formulaic.py
+++ /dev/null
@@ -1,367 +0,0 @@
-"""
-Copied from pertpy
-https://github.com/scverse/pertpy/tests/tools/_differential_gene_expression/
-"""
-
-import anndata as ad
-import numpy as np
-import pandas as pd
-import pytest
-import scipy.sparse as sp
-from formulaic.parser.types import Factor
-
-from pydeseq2._formulaic import AmbiguousAttributeError
-from pydeseq2._formulaic import FactorMetadata
-from pydeseq2._formulaic import get_factor_storage_and_materializer
-from pydeseq2._formulaic import resolve_ambiguous
-from pydeseq2.utils import load_example_data
-
-
-@pytest.fixture
-def test_counts():
-    return load_example_data(
-        modality="raw_counts",
-        dataset="synthetic",
-        debug=False,
-    )
-
-
-@pytest.fixture
-def test_metadata():
-    return load_example_data(
-        modality="metadata",
-        dataset="synthetic",
-        debug=False,
-    )
-
-
-@pytest.fixture
-def test_adata(test_counts, test_metadata):
-    return ad.AnnData(X=test_counts, obs=test_metadata)
-
-
-@pytest.fixture(params=[np.array, sp.csr_matrix, sp.csc_matrix])
-def test_adata_minimal(request):
-    matrix_format = request.param
-    n_obs = 80
-    n_donors = n_obs // 4
-    rng = np.random.default_rng(9)  # make tests deterministic
-    obs = pd.DataFrame(
-        {
-            "condition": ["A", "B"] * (n_obs // 2),
-            "donor": sum(([f"D{i}"] * n_donors for i in range(n_obs // n_donors)), []),
-            "other": (["X"] * (n_obs // 4)) + (["Y"] * ((3 * n_obs) // 4)),
-            "pairing": sum(([str(i), str(i)] for i in range(n_obs // 2)), []),
-            "continuous": [rng.uniform(0, 1) * 4000 for _ in range(n_obs)],
-        },
-    )
-    var = pd.DataFrame(index=["gene1", "gene2"])
-    group1 = rng.negative_binomial(20, 0.1, n_obs // 2)  # large mean
-    group2 = rng.negative_binomial(5, 0.5, n_obs // 2)  # small mean
-
-    condition_data = np.empty((n_obs,), dtype=group1.dtype)
-    condition_data[0::2] = group1
-    condition_data[1::2] = group2
-
-    donor_data = np.empty((n_obs,), dtype=group1.dtype)
-    donor_data[0:n_donors] = group2[:n_donors]
-    donor_data[n_donors : (2 * n_donors)] = group1[n_donors:]
-
-    donor_data[(2 * n_donors) : (3 * n_donors)] = group2[:n_donors]
-    donor_data[(3 * n_donors) :] = group1[n_donors:]
-
-    X = matrix_format(np.vstack([condition_data, donor_data]).T)
-
-    return ad.AnnData(X=X, obs=obs, var=var)
-
-
-# Ignore anndata ImplicitModificationWarning
-@pytest.mark.filterwarnings("ignore::UserWarning")
-@pytest.mark.parametrize(
-    "formula,reorder_categorical,expected_factor_metadata",
-    [
-        [
-            "~ donor",
-            None,
-            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"}},
-        ],
-        [
-            "~ donor",
-            {"donor": ["D2", "D1", "D0", "D3"]},
-            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D2"}},
-        ],
-        [
-            "~ C(donor)",
-            None,
-            {"C(donor)": {"reduced_rank": True, "custom_encoder": True, "base": "D0"}},
-        ],
-        [
-            "~ C(donor, contr.treatment(base='D2'))",
-            None,
-            {
-                "C(donor, contr.treatment(base='D2'))": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "D2",
-                }
-            },
-        ],
-        [
-            "~ C(donor, contr.sum)",
-            None,
-            {
-                "C(donor, contr.sum)": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "D3",
-                }
-            },
-        ],
-        [
-            "~ C(donor, contr.sum)",
-            {"donor": ["D1", "D0", "D3", "D2"]},
-            {
-                "C(donor, contr.sum)": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "D2",
-                }
-            },
-        ],
-        [
-            "~ condition",
-            None,
-            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"}},
-        ],
-        [
-            "~ C(condition)",
-            None,
-            {
-                "C(condition)": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "A",
-                }
-            },
-        ],
-        [
-            "~ C(condition, contr.treatment(base='B'))",
-            None,
-            {
-                "C(condition, contr.treatment(base='B'))": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "B",
-                }
-            },
-        ],
-        [
-            "~ C(condition, contr.sum)",
-            None,
-            {
-                "C(condition, contr.sum)": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "B",
-                }
-            },
-        ],
-        [
-            "~ 0 + condition",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": False,
-                    "custom_encoder": False,
-                    "base": None,
-                }
-            },
-        ],
-        [
-            "~ condition + donor",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": True,
-                    "custom_encoder": False,
-                    "base": "A",
-                },
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
-            },
-        ],
-        [
-            "~ 0 + condition + donor",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": False,
-                    "custom_encoder": False,
-                    "base": None,
-                },
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
-            },
-        ],
-        [
-            "~ condition * donor",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": True,
-                    "custom_encoder": False,
-                    "base": "A",
-                },
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
-            },
-        ],
-        [
-            "~ condition * C(donor, contr.treatment(base='D2'))",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": True,
-                    "custom_encoder": False,
-                    "base": "A",
-                },
-                "C(donor, contr.treatment(base='D2'))": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "D2",
-                },
-            },
-        ],
-        [
-            "~ condition + C(condition) + C(condition, contr.treatment(base='B'))",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": True,
-                    "custom_encoder": False,
-                    "base": "A",
-                },
-                "C(condition)": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "A",
-                },
-                "C(condition, contr.treatment(base='B'))": {
-                    "reduced_rank": True,
-                    "custom_encoder": True,
-                    "base": "B",
-                },
-            },
-        ],
-        [
-            "~ condition + continuous + np.log(continuous)",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": True,
-                    "custom_encoder": False,
-                    "base": "A",
-                    "kind": Factor.Kind.CATEGORICAL,
-                },
-                "continuous": {
-                    "reduced_rank": False,
-                    "custom_encoder": False,
-                    "base": None,
-                    "kind": Factor.Kind.NUMERICAL,
-                },
-                "np.log(continuous)": {
-                    "reduced_rank": False,
-                    "custom_encoder": False,
-                    "base": None,
-                    "kind": Factor.Kind.NUMERICAL,
-                },
-            },
-        ],
-        [
-            "~ condition * donor + continuous",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": True,
-                    "custom_encoder": False,
-                    "base": "A",
-                },
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
-                "continuous": {
-                    "reduced_rank": False,
-                    "custom_encoder": False,
-                    "base": None,
-                    "kind": Factor.Kind.NUMERICAL,
-                },
-            },
-        ],
-        [
-            "~ condition:donor",
-            None,
-            {
-                "condition": {
-                    "reduced_rank": True,
-                    "custom_encoder": False,
-                    "base": "A",
-                },
-                "donor": {
-                    "custom_encoder": False,
-                    "drop_field": "D0",
-                },  # `reduced_rank` and `base` will be ambigous here because Formulaic
-                # generates both version of the factor internally
-            },
-        ],
-    ],
-)
-def test_custom_materializer(
-    test_adata_minimal, formula, reorder_categorical, expected_factor_metadata
-):
-    """Test that the custom materializer correctly stores the baseline category.
-
-    Parameters
-    ----------
-    test_adata_minimal
-        adata fixture
-    formula
-        Formula to test
-    reorder_categorical
-        Create a pandas categorical for a given column with a certain order of categories
-    expected_factor_metadata
-        dict with expected values for each factor
-    """
-    if reorder_categorical is not None:
-        for col, order in reorder_categorical.items():
-            test_adata_minimal.obs[col] = pd.Categorical(
-                test_adata_minimal.obs[col], categories=order
-            )
-    factor_storage, _, materializer = get_factor_storage_and_materializer()
-    materializer(test_adata_minimal.obs, record_factor_metadata=True).get_model_matrix(
-        formula
-    )
-    for factor, expected_metadata in expected_factor_metadata.items():
-        actual_metadata = factor_storage[factor]
-        for k in expected_metadata:
-            assert resolve_ambiguous(actual_metadata, k) == expected_metadata[k]
-
-
-# Ignore anndata ImplicitModificationWarning
-@pytest.mark.filterwarnings("ignore::UserWarning")
-def test_resolve_ambiguous():
-    obj1 = FactorMetadata("F1", True, True, ["A", "B"], Factor.Kind.CATEGORICAL)
-    obj2 = FactorMetadata("F2", True, False, ["A", "B"], Factor.Kind.CATEGORICAL)
-    obj3 = FactorMetadata("F3", True, False, None, Factor.Kind.NUMERICAL)
-
-    with pytest.raises(ValueError):
-        resolve_ambiguous([], "foo")
-
-    with pytest.raises(AttributeError):
-        resolve_ambiguous([obj1, obj2], "doesntexist")
-
-    with pytest.raises(AmbiguousAttributeError):
-        assert resolve_ambiguous([obj1, obj2], "name")
-
-    assert resolve_ambiguous([obj1, obj2, obj3], "reduced_rank") is True
-    assert resolve_ambiguous([obj1, obj2], "categories") == ["A", "B"]
-
-    with pytest.raises(AmbiguousAttributeError):
-        assert resolve_ambiguous([obj1, obj2, obj3], "categories")
-
-    with pytest.raises(AmbiguousAttributeError):
-        assert resolve_ambiguous([obj1, obj3], "kind")
diff --git a/tests/test_pydeseq2.py b/tests/test_pydeseq2.py
index 4fd8809e..2279fc8f 100644
--- a/tests/test_pydeseq2.py
+++ b/tests/test_pydeseq2.py
@@ -119,7 +119,7 @@ def test_deseq_independent_filtering_parametric_fit(counts_df, metadata, tol=0.0
     )
     dds.deseq2()
 
-    ds = DeseqStats(dds, contrast=["condition", "A", "B"])
+    ds = DeseqStats(dds, contrast=["condition", "B", "A"])
     ds.summary()
 
     # Check results
@@ -146,7 +146,7 @@ def test_deseq_independent_filtering_mean_fit(counts_df, metadata, tol=0.02):
     )
     dds.deseq2()
 
-    ds = DeseqStats(dds, contrast=["condition", "A", "B"])
+    ds = DeseqStats(dds, contrast=["condition", "B", "A"])
     ds.summary()
 
     # Check results
@@ -178,7 +178,7 @@ def test_deseq_without_independent_filtering_parametric_fit(
     )
     dds.deseq2()
 
-    ds = DeseqStats(dds, contrast=["condition", "A", "B"], independent_filter=False)
+    ds = DeseqStats(dds, contrast=["condition", "B", "A"], independent_filter=False)
     ds.summary()
 
     # Check results
@@ -207,7 +207,7 @@ def test_alt_hypothesis(alt_hypothesis, counts_df, metadata, tol=0.02):
 
     ds = DeseqStats(
         dds,
-        contrast=["condition", "A", "B"],
+        contrast=["condition", "B", "A"],
         lfc_null=-0.5 if alt_hypothesis == "less" else 0.5,
         alt_hypothesis=alt_hypothesis,
     )
@@ -255,7 +255,7 @@ def test_deseq_no_refit_cooks(counts_df, metadata, tol=0.02):
     )
     dds.deseq2()
 
-    ds = DeseqStats(dds, contrast=["condition", "A", "B"])
+    ds = DeseqStats(dds, contrast=["condition", "B", "A"])
     ds.summary()
 
     # Check results
@@ -401,7 +401,7 @@ def test_multifactor_deseq(counts_df, metadata, with_outliers, tol=0.04):
     dds = DeseqDataSet(counts=counts_df, metadata=metadata, design="~group + condition")
     dds.deseq2()
 
-    res = DeseqStats(dds, contrast=["condition", "A", "B"])
+    res = DeseqStats(dds, contrast=["condition", "B", "A"])
     res.summary()
     res_df = res.results_df
 
@@ -595,7 +595,7 @@ def test_wide_deseq(
     )
     dds.deseq2()
 
-    ds = DeseqStats(dds, contrast=["condition", "A", "B"])
+    ds = DeseqStats(dds, contrast=["condition", "B", "A"])
     ds.summary()
 
     # Check results
@@ -663,7 +663,7 @@ def test_anndata_init(counts_df, metadata, tol=0.02):
     dds = DeseqDataSet(adata=adata, design="~condition")
     dds.deseq2()
 
-    ds = DeseqStats(dds, contrast=["condition", "A", "B"])
+    ds = DeseqStats(dds, contrast=["condition", "B", "A"])
     ds.summary()
 
     # Check results