Skip to content

Commit

Permalink
Fix order issue with categorical variables (#73)
Browse files Browse the repository at this point in the history
* Fix order issue with categorical variables

* update changelog
  • Loading branch information
tomicapretto authored Apr 14, 2022
1 parent e6c1b84 commit 5a0d8f2
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 5 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@

### Deprecation

### v0.3.3

### Maintenance and fixes

- Fixed a bug in `CategoricalBox`. Now it considers the order of the categories if `data` is ordered and `levels` is `None` (#73)

### v0.3.2

### Maintenance and fixes
Expand Down
5 changes: 4 additions & 1 deletion formulae/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class CategoricalBox:
"""

def __init__(self, data, contrast, levels):
# If 'data' is ordered and no explicit levels have been passed, use order in 'data'.
if hasattr(data.dtype, "ordered") and data.dtype.ordered and levels is None:
levels = data.dtype.categories.tolist()
self.data = data
self.contrast = contrast
self.levels = levels
Expand Down Expand Up @@ -198,7 +201,7 @@ def __init__(self, omit=None):
def _omit_index(self, levels):
"""Returns a number between 0 and len(levels) - 1"""
if self.omit is None:
# By default, omit the lats level.
# By default, omit the last level.
return len(levels) - 1
else:
return levels.index(self.omit)
Expand Down
17 changes: 14 additions & 3 deletions formulae/tests/test_design_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,7 +1133,18 @@ def test_extra_namespace(data):
assert df["myfunc(x3)"].equals(np.log(df["x3"]))


def test_categorical_series():
data = pd.DataFrame({"x": list("abc") * 10})
data["x"] = pd.Categorical(data["x"], list("abc"), ordered=True)
def test_categorical_ordered_series():
# Test it works
data = pd.DataFrame({"x": list("abcd") * 10})
data["x"] = pd.Categorical(data["x"], list("bcda"), ordered=True)
design_matrices("S(x)", data)

# Test it works and it respects original order
levels = design_matrices("x", data).common.terms["x"].levels
assert levels == list("cda")

levels = design_matrices("T(x)", data).common.terms["T(x)"].levels
assert levels == list("cda")

levels = design_matrices("S(x)", data).common.terms["S(x)"].levels
assert levels == list("bcd")
2 changes: 1 addition & 1 deletion formulae/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.2"
__version__ = "0.3.3"

0 comments on commit 5a0d8f2

Please sign in to comment.