From 1cde0c10f8985ad6b6da468edeba5b3263f89a18 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 24 Sep 2024 13:46:00 -0700 Subject: [PATCH 1/2] better support '.' characters outside of nested context --- src/nested_pandas/nestedframe/core.py | 31 ++++++++++++++----- .../nestedframe/test_nestedframe.py | 11 +++++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index 3f28a5e..cc33c9e 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -38,7 +38,7 @@ def all_columns(self) -> dict: """returns a dictionary of columns for each base/nested dataframe""" all_columns = {"base": self.columns} for column in self.columns: - if isinstance(self[column].dtype, NestedDtype): + if isinstance(self.dtypes[column], NestedDtype): nest_cols = self[column].nest.fields all_columns[column] = nest_cols return all_columns @@ -48,16 +48,18 @@ def nested_columns(self) -> list: """retrieves the base column names for all nested dataframes""" nest_cols = [] for column in self.columns: - if isinstance(self[column].dtype, NestedDtype): + if isinstance(self.dtypes[column], NestedDtype): nest_cols.append(column) return nest_cols def _is_known_hierarchical_column(self, colname) -> bool: """Determine whether a string is a known hierarchical column name""" if "." in colname: - left, right = colname.split(".") - if left in self.nested_columns: - return right in self.all_columns[left] + base_name = colname.split(".")[0] + if base_name in self.nested_columns: + # TODO: only handles one level of nesting for now + nested_name = ".".join(colname.split(".")[1:]) + return nested_name in self.all_columns[base_name] return False return False @@ -68,12 +70,25 @@ def _is_known_column(self, colname) -> bool: def __getitem__(self, item): """Adds custom __getitem__ behavior for nested columns""" + if isinstance(item, str): + # Pre-empt the nested check if the item is a base column + if item in self.columns: + return super().__getitem__(item) + # If a nested column name is passed, return a flat series for that column + # flat series is chosen over list series for utility + # e.g. native ability to do something like ndf["nested.a"] + 3 + elif self._is_known_hierarchical_column(item): + # TODO: only handles one level of nesting for now + nested = item.split(".")[0] + col = ".".join(item.split(".")[1:]) + return self[nested].nest.get_flat_series(col) + # If a nested column name is passed, return a flat series for that column # flat series is chosen over list series for utility # e.g. native ability to do something like ndf["nested.a"] + 3 - if isinstance(item, str) and self._is_known_hierarchical_column(item): - nested, col = item.split(".") - return self[nested].nest.get_flat_series(col) + # elif isinstance(item, str) and self._is_known_hierarchical_column(item): + # nested, col = item.split(".") + # return self[nested].nest.get_flat_series(col) # Otherwise, do __getitem__ as normal else: return super().__getitem__(item) diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index 95421b3..b29881d 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -136,6 +136,17 @@ def test_set_new_nested_col(): ) +def test_get_dot_names(): + """Test the ability to still work with column names with '.' characters outside of nesting""" + nf = NestedFrame.from_flat( + NestedFrame({"a": [1, 2, 3, 4], ".b.": [1, 1, 3, 3], "R.A.": [3, None, 6, 5]}, index=[1, 1, 2, 2]), + base_columns=[".b."], + ) + + assert len(nf[".b."]) == 2 + assert len(nf["nested.R.A."]) == 4 + + def test_add_nested_with_flat_df(): """Test that add_nested correctly adds a nested column to the base df""" From e8f805c0244e2e5efb03eb057f1b605d14d1aeff Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 24 Sep 2024 13:54:13 -0700 Subject: [PATCH 2/2] remove commented lines --- src/nested_pandas/nestedframe/core.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index cc33c9e..c244c0a 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -82,14 +82,6 @@ def __getitem__(self, item): nested = item.split(".")[0] col = ".".join(item.split(".")[1:]) return self[nested].nest.get_flat_series(col) - - # If a nested column name is passed, return a flat series for that column - # flat series is chosen over list series for utility - # e.g. native ability to do something like ndf["nested.a"] + 3 - # elif isinstance(item, str) and self._is_known_hierarchical_column(item): - # nested, col = item.split(".") - # return self[nested].nest.get_flat_series(col) - # Otherwise, do __getitem__ as normal else: return super().__getitem__(item)