PennyLaneAI · DSGuala · Jun 21, 2024 · May 13, 2024 · May 16, 2024 · May 16, 2024
diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
@@ -194,6 +194,10 @@
 * `QuantumScript` properties are only calculated when needed, instead of on initialization. This decreases the classical overhead by >20%.
   `par_info`, `obs_sharing_wires`, and `obs_sharing_wires_id` are now public attributes.
   [(#5696)](https://github.com/PennyLaneAI/pennylane/pull/5696)
+
+* The `qml.data` module now supports PyTree data types as dataset attributes
+  [(#5732)](https://github.com/PennyLaneAI/pennylane/pull/5732)
+
 
 * `qml.ops.Conditional` now inherits from `qml.ops.SymbolicOp`, thus it inherits several useful common functionalities. Other properties such as adjoint and diagonalizing gates have been added using the `base` properties.
   [(##5772)](https://github.com/PennyLaneAI/pennylane/pull/5772)
@@ -383,6 +387,7 @@ Guillermo Alonso-Linaje,
 Utkarsh Azad,
 Lillian M. A. Frederiksen,
 Gabriel Bottrill,
+Jack Brown,
 Astral Cai,
 Ahmed Darwish,
 Isaac De Vlugt,

diff --git a/pennylane/data/__init__.py b/pennylane/data/__init__.py
@@ -211,6 +211,7 @@ class QuantumOscillator(qml.data.Dataset, data_name="quantum_oscillator", identi
     DatasetSparseArray,
     DatasetString,
     DatasetTuple,
+    DatasetPyTree,
 )
 from .base import DatasetNotWriteableError
 from .base.attribute import AttributeInfo, DatasetAttribute, attribute
@@ -225,6 +226,7 @@ class QuantumOscillator(qml.data.Dataset, data_name="quantum_oscillator", identi
     "DatasetAttribute",
     "DatasetNotWriteableError",
     "DatasetArray",
+    "DatasetPyTree",
     "DatasetScalar",
     "DatasetString",
     "DatasetList",

diff --git a/pennylane/data/attributes/__init__.py b/pennylane/data/attributes/__init__.py
@@ -24,6 +24,7 @@
 from .sparse_array import DatasetSparseArray
 from .string import DatasetString
 from .tuple import DatasetTuple
+from .pytree import DatasetPyTree
 
 __all__ = (
     "DatasetArray",
@@ -32,6 +33,7 @@
     "DatasetDict",
     "DatasetList",
     "DatasetOperator",
+    "DatasetPyTree",
     "DatasetSparseArray",
     "DatasetMolecule",
     "DatasetNone",

diff --git a/pennylane/data/attributes/operator/operator.py b/pennylane/data/attributes/operator/operator.py
@@ -52,7 +52,8 @@ class DatasetOperator(Generic[Op], DatasetAttribute[HDF5Group, Op, Op]):
 
     @classmethod
     @lru_cache(1)
-    def consumes_types(cls) -> FrozenSet[Type[Operator]]:
+    def supported_ops(cls) -> FrozenSet[Type[Operator]]:
+        """Set of supported operators."""
         return frozenset(
             (
                 # pennylane/operation/Tensor
@@ -214,7 +215,7 @@ def _ops_to_hdf5(
             op_key = f"op_{i}"
             if isinstance(op, (qml.ops.Prod, qml.ops.SProd, qml.ops.Sum)):
                 op = op.simplify()
-            if type(op) not in self.consumes_types():
+            if type(op) not in self.supported_ops():
                 raise TypeError(
                     f"Serialization of operator type '{type(op).__name__}' is not supported."
                 )
@@ -254,6 +255,7 @@ def _hdf5_to_ops(self, bind: HDF5Group) -> List[Operator]:
         wires_bind = bind["op_wire_labels"]
         op_class_names = [] if names_bind.shape == (0,) else names_bind.asstr()
         op_wire_labels = [] if wires_bind.shape == (0,) else wires_bind.asstr()
+
         with qml.QueuingManager.stop_recording():
             for i, op_class_name in enumerate(op_class_names):
                 op_key = f"op_{i}"
@@ -293,4 +295,4 @@ def _hdf5_to_ops(self, bind: HDF5Group) -> List[Operator]:
     @lru_cache(1)
     def _supported_ops_dict(cls) -> Dict[str, Type[Operator]]:
         """Returns a dict mapping ``Operator`` subclass names to the class."""
-        return {op.__name__: op for op in cls.consumes_types()}
+        return {op.__name__: op for op in cls.supported_ops()}
diff --git a/pennylane/data/attributes/pytree.py b/pennylane/data/attributes/pytree.py
@@ -0,0 +1,57 @@
+# Copyright 2018-2024 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains DatasetAttribute definition for PyTree types."""
+
+
+from typing import TypeVar
+
+import numpy as np
+
+from pennylane.data.attributes import DatasetArray, DatasetList
+from pennylane.data.base.attribute import DatasetAttribute
+from pennylane.data.base.hdf5 import HDF5Group
+from pennylane.data.base.mapper import AttributeTypeMapper
+from pennylane.pytrees import flatten, serialization, unflatten
+
+T = TypeVar("T")
+
+
+class DatasetPyTree(DatasetAttribute[HDF5Group, T, T]):
+    """Attribute type for an object that can be converted to
+    a Pytree. This is the default serialization method for
+    all PennyLane Pytrees, including subclasses of ``Operator``.
+    """
+
+    type_id = "pytree"
+
+    def hdf5_to_value(self, bind: HDF5Group) -> T:
+        return unflatten(
+            AttributeTypeMapper(bind)["leaves"].get_value(),
+            serialization.pytree_structure_load(bind["treedef"][()].tobytes()),
+        )
+
+    def value_to_hdf5(self, bind_parent: HDF5Group, key: str, value: T) -> HDF5Group:
+        bind = bind_parent.create_group(key)
+        leaves, treedef = flatten(value)
+
+        bind["treedef"] = np.void(serialization.pytree_structure_dump(treedef, decode=False))
+
+        try:
+            # Attempt to store leaves as an array, which will be more efficient
+            # but will fail if the leaves are not homogenous
+            DatasetArray(leaves, parent_and_key=(bind, "leaves"))
+        except (ValueError, TypeError):
+            DatasetList(leaves, parent_and_key=(bind, "leaves"))
+
+        return bind
diff --git a/pennylane/data/base/attribute.py b/pennylane/data/base/attribute.py
@@ -38,6 +38,7 @@
 from pennylane.data.base import hdf5
 from pennylane.data.base.hdf5 import HDF5, HDF5Any, HDF5Group
 from pennylane.data.base.typing_util import UNSET, get_type, get_type_str
+from pennylane.pytrees import is_pytree
 
 T = TypeVar("T")
 
@@ -492,5 +493,7 @@ def match_obj_type(
         ret = DatasetAttribute.registry["list"]
     elif issubclass(type_, Mapping):
         ret = DatasetAttribute.registry["dict"]
+    elif is_pytree(type_):
+        ret = DatasetAttribute.registry["pytree"]
 
     return ret
diff --git a/pennylane/measurements/shots.py b/pennylane/measurements/shots.py
@@ -11,8 +11,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """This module contains the Shots class to hold shot-related information."""
+from collections.abc import Sequence
+
 # pylint:disable=inconsistent-return-statements
-from typing import NamedTuple, Sequence, Tuple
+from typing import NamedTuple
 
 
 class ShotCopies(NamedTuple):
@@ -39,7 +41,7 @@ def valid_int(s):
 
 def valid_tuple(s):
     """Returns True if s is a tuple of the form (shots, copies)."""
-    return isinstance(s, tuple) and len(s) == 2 and valid_int(s[0]) and valid_int(s[1])
+    return isinstance(s, Sequence) and len(s) == 2 and valid_int(s[0]) and valid_int(s[1])
 
 
 class Shots:
@@ -136,7 +138,7 @@ class Shots:
     total_shots: int = None
     """The total number of shots to be executed."""
 
-    shot_vector: Tuple[ShotCopies] = None
+    shot_vector: tuple[ShotCopies] = None
     """The tuple of :class:`~ShotCopies` to be executed. Each element is of the form ``(shots, copies)``."""
 
     _SHOT_ERROR = ValueError(
@@ -167,7 +169,7 @@ def __init__(self, shots=None):
         elif isinstance(shots, Sequence):
             if not all(valid_int(s) or valid_tuple(s) for s in shots):
                 raise self._SHOT_ERROR
-            self.__all_tuple_init__([s if isinstance(s, tuple) else (s, 1) for s in shots])
+            self.__all_tuple_init__([s if isinstance(s, Sequence) else (s, 1) for s in shots])
         elif isinstance(shots, self.__class__):
             return  # self already _is_ shots as defined by __new__
         else:
@@ -211,7 +213,7 @@ def __iter__(self):
             for _ in range(shot_copy.copies):
                 yield shot_copy.shots
 
-    def __all_tuple_init__(self, shots: Sequence[Tuple]):
+    def __all_tuple_init__(self, shots: Sequence[tuple]):
         res = []
         total_shots = 0
         current_shots, current_copies = shots[0]

diff --git a/pennylane/pytrees/__init__.py b/pennylane/pytrees/__init__.py
@@ -0,0 +1,27 @@
+# Copyright 2018-2024 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+An internal module for working with pytrees.
+"""
+
+from .pytrees import PyTreeStructure, flatten, is_pytree, leaf, register_pytree, unflatten
+
+__all__ = [
+    "PyTreeStructure",
+    "flatten",
+    "is_pytree",
+    "leaf",
+    "register_pytree",
+    "unflatten",
+]