From a65ec41dd2fa4bdcfc1a38033102907e4741387a Mon Sep 17 00:00:00 2001 From: Nikita Almakov Date: Wed, 4 Sep 2024 21:54:10 +0300 Subject: [PATCH] improved CSE (common subexpression elimination) for `c.group_by` mode --- docs/CHANGELOG.md | 4 ++++ src/convtools/__init__.py | 2 +- src/convtools/_aggregations.py | 10 +++++----- tests/test_group_by.py | 17 +++++++++++++++++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 52e7a36..5e68893 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.14.2 (2024-09-04) + +- improved CSE (common subexpression elimination) for `c.group_by` mode + ## 1.14.1 (2024-09-01) - python 3.13t (free threading) support diff --git a/src/convtools/__init__.py b/src/convtools/__init__.py index 7c9571a..412fac6 100644 --- a/src/convtools/__init__.py +++ b/src/convtools/__init__.py @@ -4,4 +4,4 @@ from ._dt import DateGrid, DateTimeGrid -__version__ = "1.14.1" +__version__ = "1.14.2" diff --git a/src/convtools/_aggregations.py b/src/convtools/_aggregations.py index f6055fc..5a40220 100644 --- a/src/convtools/_aggregations.py +++ b/src/convtools/_aggregations.py @@ -160,17 +160,17 @@ def fuzzy_merge_aggregate_cmp(x, y): def no_side_effects_test(x): - if isinstance(x, AstCall): - x = x.func - if isinstance(x, AstAttribute): - x = x.value - elif ( + if ( isinstance(x, AstCompare) and isinstance(x.ops[0], AstIs) and isinstance(x.comparators[0], AstName) and x.comparators[0].id == "_none" ): x = x.left + elif isinstance(x, AstCall): + x = x.func + if isinstance(x, AstAttribute): + x = x.value return isinstance(x, AstName) and x.id.startswith("agg_data_") diff --git a/tests/test_group_by.py b/tests/test_group_by.py index 1706dd4..a742e2f 100644 --- a/tests/test_group_by.py +++ b/tests/test_group_by.py @@ -1015,6 +1015,23 @@ def test_group_by_reducers_reuse(): }, ] + converter = ( + c.group_by(c.item("x")) + .aggregate( + { + "x": c.item("x"), + "y": c.ReduceFuncs.Sum(c.item("obj", "y")), + "z": c.ReduceFuncs.Sum(c.item("obj", "z")), + } + ) + .gen_converter(debug=False) + ) + code_str = format_code(get_code_str(converter)) + assert ( + code_str.count("row_['obj']") == 1 + or code_str.count('row_["obj"]') == 1 + ) + def test_aggregate_reducers_reuse(): converter = c.aggregate(