Remove the shots kwarg from the gradients module (#4448)

* remove shots kwarg from source code * update interfaces and missing spots; fix gradient tests * changelog; remove old stuff * move changelog entry to Breaking Changes * Apply suggestions from code review Co-authored-by: David Wierichs <david.wierichs@xanadu.ai> * makes args positional; lint whitespace --------- Co-authored-by: David Wierichs <david.wierichs@xanadu.ai>
PennyLaneAI · Aug 11, 2023 · 5495875 · 5495875
1 parent f48eb5f
commit 5495875
Show file tree

Hide file tree

Showing 22 changed files with 331 additions and 586 deletions.
diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
@@ -211,6 +211,9 @@ array([False, False])
 * The Pauli-X-term in `transmon_drive` has been removed in accordance with [1904.06560](https://arxiv.org/abs/1904.06560)
   [(#4418)](https://github.com/PennyLaneAI/pennylane/pull/4418/)
 
+* The gradients module no longer needs shot information passed to it explicitly, as the shots are on the tapes.
+  [(#4448)](https://github.com/PennyLaneAI/pennylane/pull/4448)
+
 <h3>Deprecations 👋</h3>
 
 * ``qml.qchem.jordan_wigner`` is deprecated, use ``qml.jordan_wigner`` instead. 

diff --git a/pennylane/gradients/finite_difference.py b/pennylane/gradients/finite_difference.py
@@ -23,7 +23,7 @@
 from scipy.special import factorial
 
 import pennylane as qml
-from pennylane.measurements import ProbabilityMP, Shots
+from pennylane.measurements import ProbabilityMP
 
 from .general_shift_rules import generate_shifted_tapes
 from .gradient_transform import (
@@ -156,7 +156,7 @@ def finite_diff_coeffs(n, approx_order, strategy):
     return coeffs_and_shifts
 
 
-def _processing_fn(results, shots: Shots = Shots(None), single_shot_batch_fn=None):
+def _processing_fn(results, shots, single_shot_batch_fn):
     if not shots.has_partitioned_shots:
         return single_shot_batch_fn(results)
     grads_tuple = []
@@ -177,7 +177,6 @@ def finite_diff(
     strategy="forward",
     f0=None,
     validate_params=True,
-    shots=None,
 ):
     r"""Transform a QNode to compute the finite-difference gradient of all gate parameters with respect to its inputs.
 
@@ -204,10 +203,6 @@ def finite_diff(
             the ``Operation.grad_method`` attribute and the circuit structure will be analyzed
             to determine if the trainable parameters support the finite-difference method.
             If ``False``, the finite-difference method will be applied to all parameters.
-        shots (None, int, list[int], list[~pennylane.measurements.ShotCopies]): The device shots that will
-            be used to execute the tapes outputted by this transform. Note that this argument doesn't
-
-            influence the shots used for tape execution, but provides information about the shots.
 
     Returns:
         function or tuple[list[QuantumTape], function]:
@@ -303,8 +298,7 @@ def finite_diff(
         ((array(-0.38751724), array(-0.18884792), array(-0.38355709)),
          (array(0.69916868), array(0.34072432), array(0.69202366)))
 
-        Devices that have a shot vector defined can also be used for execution, provided
-        the ``shots`` argument was passed to the transform:
+        This gradient transform is compatible with devices that use shot vectors for execution.
 
         >>> shots = (10, 100, 1000)
         >>> dev = qml.device("default.qubit", wires=2, shots=shots)
@@ -315,7 +309,7 @@ def finite_diff(
         ...     qml.RX(params[2], wires=0)
         ...     return qml.expval(qml.PauliZ(0)), qml.var(qml.PauliZ(0))
         >>> params = np.array([0.1, 0.2, 0.3], requires_grad=True)
-        >>> qml.gradients.finite_diff(circuit, shots=shots, h=10e-2)(params)
+        >>> qml.gradients.finite_diff(circuit, h=10e-2)(params)
         (((array(-2.), array(-2.), array(0.)), (array(3.6), array(3.6), array(0.))),
          ((array(1.), array(0.4), array(1.)),
           (array(-1.62), array(-0.624), array(-1.62))),
@@ -334,11 +328,9 @@ def finite_diff(
             strategy=strategy,
             f0=f0,
             validate_params=validate_params,
-            shots=shots,
         )
-    shots = Shots(shots)
     if argnum is None and not tape.trainable_params:
-        return _no_trainable_grad(tape, shots)
+        return _no_trainable_grad(tape)
 
     if validate_params:
         diff_methods = gradient_analysis_and_validation(
@@ -348,7 +340,7 @@ def finite_diff(
         diff_methods = ["F" for i in tape.trainable_params]
 
     if all(g == "0" for g in diff_methods):
-        return _all_zero_grad(tape, shots)
+        return _all_zero_grad(tape)
 
     gradient_tapes = []
     shapes = []
@@ -473,7 +465,7 @@ def _single_shot_batch_result(results):
         return tuple(tuple(elem) for elem in grads_reorder)
 
     processing_fn = functools.partial(
-        _processing_fn, shots=shots, single_shot_batch_fn=_single_shot_batch_result
+        _processing_fn, shots=tape.shots, single_shot_batch_fn=_single_shot_batch_result
     )
 
     return gradient_tapes, processing_fn
@@ -490,7 +482,6 @@ def _finite_diff_legacy(
     strategy="forward",
     f0=None,
     validate_params=True,
-    shots=None,
 ):
     r"""Transform a QNode to compute the finite-difference gradient of all gate
     parameters with respect to its inputs.
@@ -518,9 +509,6 @@ def _finite_diff_legacy(
             the ``Operation.grad_method`` attribute and the circuit structure will be analyzed
             to determine if the trainable parameters support the finite-difference method.
             If ``False``, the finite-difference method will be applied to all parameters.
-        shots (None, int, list[int]): The device shots that will be used to execute the tapes outputted by this
-            transform. Note that this argument doesn't influence the shots used for tape execution, but provides
-            information to the transform about the shots.
 
     Returns:
         function or tuple[list[QuantumTape], function]:

diff --git a/pennylane/gradients/general_shift_rules.py b/pennylane/gradients/general_shift_rules.py
@@ -414,9 +414,7 @@ def _copy_and_shift_params(tape, indices, shifts, multipliers, cast=False):
     prep = all_ops[: len(tape._prep)]
     ops = all_ops[len(tape._prep) : len(tape.operations)]
     meas = all_ops[len(tape.operations) :]
-    shifted_tape = QuantumScript(ops=ops, measurements=meas, prep=prep, shots=tape.shots)
-
-    return shifted_tape
+    return QuantumScript(ops=ops, measurements=meas, prep=prep, shots=tape.shots)
 
 
 def generate_shifted_tapes(tape, index, shifts, multipliers=None, broadcast=False):

diff --git a/pennylane/gradients/gradient_transform.py b/pennylane/gradients/gradient_transform.py
@@ -26,7 +26,6 @@
     VarianceMP,
     VnEntropyMP,
     ProbabilityMP,
-    Shots,
 )
 
 SUPPORTED_GRADIENT_KWARGS = [
@@ -251,7 +250,7 @@ def choose_grad_methods(diff_methods, argnum):
     return {idx: diff_methods[idx] for idx in argnum}
 
 
-def _all_zero_grad(tape, shots=Shots(None)):
+def _all_zero_grad(tape):
     """Auxiliary function to return zeros for the all-zero gradient case."""
     list_zeros = []
 
@@ -266,10 +265,10 @@ def _all_zero_grad(tape, shots=Shots(None)):
 
         list_zeros.append(sub_list_zeros)
 
-    if shots.has_partitioned_shots:
+    if tape.shots.has_partitioned_shots:
         if len(tape.measurements) == 1:
-            return [], lambda _: tuple(list_zeros[0] for _ in range(shots.num_copies))
-        return [], lambda _: tuple(tuple(list_zeros) for _ in range(shots.num_copies))
+            return [], lambda _: tuple(list_zeros[0] for _ in range(tape.shots.num_copies))
+        return [], lambda _: tuple(tuple(list_zeros) for _ in range(tape.shots.num_copies))
 
     if len(tape.measurements) == 1:
         return [], lambda _: list_zeros[0]
@@ -284,16 +283,16 @@ def _all_zero_grad(tape, shots=Shots(None)):
 )
 
 
-def _no_trainable_grad(tape, shots=Shots(None)):
+def _no_trainable_grad(tape):
     """Auxiliary function that returns correctly formatted gradients when there
     are no trainable parameters."""
     warnings.warn(_no_trainable_grad_warning)
-    if shots.has_partitioned_shots:
+    if tape.shots.has_partitioned_shots:
         if len(tape.measurements) == 1:
-            return [], lambda _: tuple(qml.math.zeros([0]) for _ in range(shots.num_copies))
+            return [], lambda _: tuple(qml.math.zeros([0]) for _ in range(tape.shots.num_copies))
         return [], lambda _: tuple(
             tuple(qml.math.zeros([0]) for _ in range(len(tape.measurements)))
-            for _ in range(shots.num_copies)
+            for _ in range(tape.shots.num_copies)
         )
 
     if len(tape.measurements) == 1:
@@ -629,7 +628,7 @@ def jacobian_wrapper(
 
             if qml.active_return():
                 num_measurements = len(qnode.tape.measurements)
-                has_partitioned_shots = Shots(tkwargs.get("shots", None)).has_partitioned_shots
+                has_partitioned_shots = qnode.tape.shots.has_partitioned_shots
                 return _contract_qjac_with_cjac(qjac, cjac, num_measurements, has_partitioned_shots)
 
             return _contract_qjac_with_cjac_legacy(qjac, cjac)

diff --git a/pennylane/gradients/hadamard_gradient.py b/pennylane/gradients/hadamard_gradient.py
@@ -35,7 +35,6 @@
 def _hadamard_grad(
     tape,
     argnum=None,
-    shots=None,
     aux_wire=None,
     device_wires=None,
 ):
@@ -46,9 +45,6 @@ def _hadamard_grad(
         argnum (int or list[int] or None): Trainable tape parameter indices to differentiate
             with respect to. If not provided, the derivatives with respect to all
             trainable parameters are returned.
-        shots (None, int, list[int]): The device shots that will be used to execute the tapes outputted by this
-            transform. Note that this argument doesn't influence the shots used for tape execution, but provides
-            information about the shots.
         aux_wire (pennylane.wires.Wires): Auxiliary wire to be used for the Hadamard tests. If ``None`` (the default),
             a suitable wire is inferred from the wires used in the original circuit and ``device_wires``.
         device_wires (pennylane.wires.Wires): Wires of the device that are going to be used for the
@@ -182,15 +178,14 @@ def _hadamard_grad(
     assert_active_return(transform_name)
     assert_no_state_returns(tape.measurements, transform_name)
     assert_no_variance(tape.measurements, transform_name)
-    shots = qml.measurements.Shots(shots)
 
     if argnum is None and not tape.trainable_params:
-        return _no_trainable_grad(tape, shots)
+        return _no_trainable_grad(tape)
 
     diff_methods = gradient_analysis_and_validation(tape, "analytic", grad_fn=hadamard_grad)
 
     if all(g == "0" for g in diff_methods):
-        return _all_zero_grad(tape, shots)
+        return _all_zero_grad(tape)
 
     method_map = choose_grad_methods(diff_methods, argnum)
 

diff --git a/pennylane/gradients/jvp.py b/pennylane/gradients/jvp.py
@@ -18,7 +18,7 @@
 import numpy as np
 
 import pennylane as qml
-from pennylane.measurements import ProbabilityMP, Shots
+from pennylane.measurements import ProbabilityMP
 
 
 def compute_jvp_single(tangent, jac):
@@ -236,7 +236,7 @@ def compute_jvp_multi(tangent, jac):
     return tuple(compute_jvp_single(tangent, j) for j in jac)
 
 
-def jvp(tape, tangent, gradient_fn, shots=None, gradient_kwargs=None):
+def jvp(tape, tangent, gradient_fn, gradient_kwargs=None):
     r"""Generate the gradient tapes and processing function required to compute
     the Jacobian vector product of a tape. This function only works with the new return type system on.
 
@@ -246,8 +246,6 @@ def jvp(tape, tangent, gradient_fn, shots=None, gradient_kwargs=None):
             matching the number of trainable parameters.
         gradient_fn (callable): the gradient transform to use to differentiate
             the tape
-        shots (None, int, list[int]): The device shots that will be used to
-            execute the tapes outputted by this
         gradient_kwargs (dict): dictionary of keyword arguments to pass when
             determining the gradients of tapes
 
@@ -319,25 +317,24 @@ def func(_):  # pylint: disable=unused-argument
         pass
 
     gradient_kwargs = gradient_kwargs or {}
-    shots = tape.shots if shots is None else Shots(shots)
-    gradient_tapes, fn = gradient_fn(tape, shots=shots, **gradient_kwargs)
+    gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs)
 
     def processing_fn(results):
         # postprocess results to compute the Jacobian
         jac = fn(results)
         _jvp_fn = compute_jvp_multi if multi_m else compute_jvp_single
 
         # Jacobian without shot vectors
-        if not shots.has_partitioned_shots:
+        if not tape.shots.has_partitioned_shots:
             return _jvp_fn(tangent, jac)
 
         # The jacobian is calculated for shot vectors
-        return tuple(_jvp_fn(tangent, jac[i]) for i in range(shots.num_copies))
+        return tuple(_jvp_fn(tangent, jac[i]) for i in range(tape.shots.num_copies))
 
     return gradient_tapes, processing_fn
 
 
-def batch_jvp(tapes, tangents, gradient_fn, shots=None, reduction="append", gradient_kwargs=None):
+def batch_jvp(tapes, tangents, gradient_fn, reduction="append", gradient_kwargs=None):
     r"""Generate the gradient tapes and processing function required to compute
     the Jacobian vector products of a batch of tapes.
 
@@ -348,8 +345,6 @@ def batch_jvp(tapes, tangents, gradient_fn, shots=None, reduction="append", grad
             matching the output shape of the corresponding tape.
         gradient_fn (callable): the gradient transform to use to differentiate
             the tapes
-        shots (None, int, list[int]): The device shots that will be used to
-            execute the tapes outputted by this
         reduction (str): Determines how the Jacobian-vector products are returned.
             If ``append``, then the output of the function will be of the form
             ``List[tensor_like]``, with each element corresponding to the JVP of each
@@ -416,7 +411,7 @@ def batch_jvp(tapes, tangents, gradient_fn, shots=None, reduction="append", grad
 
     # Loop through the tapes and dys vector
     for tape, tangent in zip(tapes, tangents):
-        g_tapes, fn = jvp(tape, tangent, gradient_fn, shots, gradient_kwargs)
+        g_tapes, fn = jvp(tape, tangent, gradient_fn, gradient_kwargs)
 
         reshape_info.append(len(g_tapes))
         processing_fns.append(fn)