From f4ad2b72fc7b3242bef936e78c71a34305847d71 Mon Sep 17 00:00:00 2001 From: jianfengmao Date: Tue, 28 Nov 2023 18:34:57 -0700 Subject: [PATCH 1/3] Add the optional dependencies parameters --- py/server/deephaven/table.py | 30 ++++++++++++++++++----- py/server/tests/test_partitioned_table.py | 9 +++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index e46348667b9..211cc12e72a 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -2663,13 +2663,18 @@ def constituent_tables(self) -> List[Table]: """Returns all the current constituent tables.""" return list(map(Table, self.j_partitioned_table.constituents())) - def transform(self, func: Callable[[Table], Table]) -> PartitionedTable: + def transform(self, func: Callable[[Table], Table], + dependencies: Optional[Sequence[Union[Table, PartitionedTable]]] = None) -> PartitionedTable: """Apply the provided function to all constituent Tables and produce a new PartitionedTable with the results as its constituents, with the same data for all other columns in the underlying partitioned Table. Note that if the Table underlying this PartitionedTable changes, a corresponding change will propagate to the result. Args: func (Callable[[Table], Table]): a function which takes a Table as input and returns a new Table + dependencies (Optional[Sequence[Union[Table, PartitionedTable]]]): additional dependencies that must be + satisfied before applying the provided transform function to added or modified constituents during + update processing, use this when the transform function uses additional Table or Partitioned Table + inputs besides the constituents of this PartitionedTable. Defaults to None. Returns: a PartitionedTable @@ -2679,13 +2684,18 @@ def transform(self, func: Callable[[Table], Table]) -> PartitionedTable: """ try: j_operator = j_unary_operator(func, dtypes.from_jtype(Table.j_object_type.jclass)) - with auto_locking_ctx(self): - j_pt = self.j_partitioned_table.transform(j_operator) + dependencies = to_sequence(dependencies, wrapped=True) + j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table)] + j_dependencies.extend([d.table for d in dependencies if isinstance(d, PartitionedTable)]) + with auto_locking_ctx(self, *dependencies): + j_pt = self.j_partitioned_table.transform(j_operator, j_dependencies) return PartitionedTable(j_partitioned_table=j_pt) except Exception as e: raise DHError(e, "failed to transform the PartitionedTable.") from e - def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, Table], Table]) -> PartitionedTable: + def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, Table], Table], + dependencies: Optional[Sequence[Union[Table, PartitionedTable]]] = None) -> \ + PartitionedTable: """Join the underlying partitioned Tables from this PartitionedTable and other on the key columns, then apply the provided function to all pairs of constituent Tables with the same keys in order to produce a new PartitionedTable with the results as its constituents, with the same data for all other columns in the @@ -2698,6 +2708,10 @@ def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, other (PartitionedTable): the other Partitioned table whose constituent tables will be passed in as the 2nd argument to the provided function func (Callable[[Table, Table], Table]): a function which takes two Tables as input and returns a new Table + dependencies (Optional[Sequence[Union[Table, PartitionedTable]]]): additional dependencies that must be + satisfied before applying the provided transform function to added, modified, or newly-matched + constituents during update processing, use this when the transform function uses additional Table or + Partitioned Table inputs besides the constituents of this PartitionedTable. Defaults to None. Returns: a PartitionedTable @@ -2707,8 +2721,12 @@ def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, """ try: j_operator = j_binary_operator(func, dtypes.from_jtype(Table.j_object_type.jclass)) - with auto_locking_ctx(self, other): - j_pt = self.j_partitioned_table.partitionedTransform(other.j_partitioned_table, j_operator) + dependencies = to_sequence(dependencies, wrapped=True) + j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table)] + j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable)]) + with auto_locking_ctx(self, other, *dependencies): + j_pt = self.j_partitioned_table.partitionedTransform(other.j_partitioned_table, j_operator, + j_dependencies) return PartitionedTable(j_partitioned_table=j_pt) except Exception as e: raise DHError(e, "failed to transform the PartitionedTable with another PartitionedTable.") from e diff --git a/py/server/tests/test_partitioned_table.py b/py/server/tests/test_partitioned_table.py index 5e525a792ca..524792d784c 100644 --- a/py/server/tests/test_partitioned_table.py +++ b/py/server/tests/test_partitioned_table.py @@ -11,6 +11,7 @@ from deephaven.filters import Filter from deephaven import read_csv, DHError, new_table, update_graph, time_table, empty_table +from deephaven.update_graph import shared_lock from tests.testbase import BaseTestCase from deephaven.execution_context import get_exec_ctx @@ -128,6 +129,10 @@ def test_transform(self): pt = self.partitioned_table.transform(Transformer) self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + with shared_lock(self.test_table): + pt = self.partitioned_table.transform(Transformer, dependencies=[self.test_table]) + self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + with self.assertRaises(DHError) as cm: pt = self.partitioned_table.transform(lambda t, t1: t.join(t1)) self.assertRegex(str(cm.exception), r"missing .* argument") @@ -141,6 +146,10 @@ def test_partitioned_transform(self): pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer()) self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + with shared_lock(other_pt): + pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer(), dependencies=[other_pt]) + self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + def test_partition_agg(self): with update_graph.shared_lock(self.test_update_graph): test_table = time_table("PT00:00:00.001").update(["X=i", "Y=i%13", "Z=X*Y"]) From 7909f0c10b9757e9541e4127f17fae1c30e32e3c Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Tue, 28 Nov 2023 18:52:23 -0700 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Ryan Caudy --- py/server/deephaven/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index 211cc12e72a..c06ec466d2d 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -2686,7 +2686,7 @@ def transform(self, func: Callable[[Table], Table], j_operator = j_unary_operator(func, dtypes.from_jtype(Table.j_object_type.jclass)) dependencies = to_sequence(dependencies, wrapped=True) j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table)] - j_dependencies.extend([d.table for d in dependencies if isinstance(d, PartitionedTable)]) + j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable)]) with auto_locking_ctx(self, *dependencies): j_pt = self.j_partitioned_table.transform(j_operator, j_dependencies) return PartitionedTable(j_partitioned_table=j_pt) From a170d553258d4f6ff54277a691d8ead91733e4e6 Mon Sep 17 00:00:00 2001 From: jianfengmao Date: Wed, 29 Nov 2023 10:26:40 -0700 Subject: [PATCH 3/3] Respond to review comments --- py/server/deephaven/table.py | 16 ++++++++-------- py/server/tests/test_partitioned_table.py | 19 +++++++++++++------ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index c06ec466d2d..89fa8df9c19 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -2673,8 +2673,8 @@ def transform(self, func: Callable[[Table], Table], func (Callable[[Table], Table]): a function which takes a Table as input and returns a new Table dependencies (Optional[Sequence[Union[Table, PartitionedTable]]]): additional dependencies that must be satisfied before applying the provided transform function to added or modified constituents during - update processing, use this when the transform function uses additional Table or Partitioned Table - inputs besides the constituents of this PartitionedTable. Defaults to None. + update processing. If the transform function uses any other refreshing Table or refreshing Partitioned + Table, they must be included in this argument. Defaults to None. Returns: a PartitionedTable @@ -2685,8 +2685,8 @@ def transform(self, func: Callable[[Table], Table], try: j_operator = j_unary_operator(func, dtypes.from_jtype(Table.j_object_type.jclass)) dependencies = to_sequence(dependencies, wrapped=True) - j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table)] - j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable)]) + j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table) and d.is_refreshing] + j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable) and d.is_refreshing]) with auto_locking_ctx(self, *dependencies): j_pt = self.j_partitioned_table.transform(j_operator, j_dependencies) return PartitionedTable(j_partitioned_table=j_pt) @@ -2710,8 +2710,8 @@ def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, func (Callable[[Table, Table], Table]): a function which takes two Tables as input and returns a new Table dependencies (Optional[Sequence[Union[Table, PartitionedTable]]]): additional dependencies that must be satisfied before applying the provided transform function to added, modified, or newly-matched - constituents during update processing, use this when the transform function uses additional Table or - Partitioned Table inputs besides the constituents of this PartitionedTable. Defaults to None. + constituents during update processing. If the transform function uses any other refreshing Table or + refreshing Partitioned Table, they must be included in this argument. Defaults to None. Returns: a PartitionedTable @@ -2722,8 +2722,8 @@ def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, try: j_operator = j_binary_operator(func, dtypes.from_jtype(Table.j_object_type.jclass)) dependencies = to_sequence(dependencies, wrapped=True) - j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table)] - j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable)]) + j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table) and d.is_refreshing] + j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable) and d.is_refreshing]) with auto_locking_ctx(self, other, *dependencies): j_pt = self.j_partitioned_table.partitionedTransform(other.j_partitioned_table, j_operator, j_dependencies) diff --git a/py/server/tests/test_partitioned_table.py b/py/server/tests/test_partitioned_table.py index 524792d784c..c54558b176a 100644 --- a/py/server/tests/test_partitioned_table.py +++ b/py/server/tests/test_partitioned_table.py @@ -129,9 +129,12 @@ def test_transform(self): pt = self.partitioned_table.transform(Transformer) self.assertIn("f", [col.name for col in pt.constituent_table_columns]) - with shared_lock(self.test_table): - pt = self.partitioned_table.transform(Transformer, dependencies=[self.test_table]) - self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + ticking_t = time_table("PT00:00:01") + pt = self.partitioned_table.transform(Transformer, dependencies=[ticking_t]) + self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + + pt = self.partitioned_table.transform(Transformer, dependencies=[self.test_table]) + self.assertIn("f", [col.name for col in pt.constituent_table_columns]) with self.assertRaises(DHError) as cm: pt = self.partitioned_table.transform(lambda t, t1: t.join(t1)) @@ -146,9 +149,13 @@ def test_partitioned_transform(self): pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer()) self.assertIn("f", [col.name for col in pt.constituent_table_columns]) - with shared_lock(other_pt): - pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer(), dependencies=[other_pt]) - self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + ticking_pt = time_table("PT00:00:01").update(["X= i % 10", "Y = String.valueOf(i)"]).partition_by("X") + pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer(), + dependencies=[ticking_pt]) + self.assertIn("f", [col.name for col in pt.constituent_table_columns]) + + pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer(), dependencies=[other_pt]) + self.assertIn("f", [col.name for col in pt.constituent_table_columns]) def test_partition_agg(self): with update_graph.shared_lock(self.test_update_graph):