Skip to content

Commit

Permalink
Remove the shots kwarg from the gradients module (#4448)
Browse files Browse the repository at this point in the history
* remove shots kwarg from source code

* update interfaces and missing spots; fix gradient tests

* changelog; remove old stuff

* move changelog entry to Breaking Changes

* Apply suggestions from code review

Co-authored-by: David Wierichs <david.wierichs@xanadu.ai>

* makes args positional; lint whitespace

---------

Co-authored-by: David Wierichs <david.wierichs@xanadu.ai>
  • Loading branch information
timmysilv and dwierichs authored Aug 11, 2023
1 parent f48eb5f commit 5495875
Show file tree
Hide file tree
Showing 22 changed files with 331 additions and 586 deletions.
3 changes: 3 additions & 0 deletions doc/releases/changelog-dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ array([False, False])
* The Pauli-X-term in `transmon_drive` has been removed in accordance with [1904.06560](https://arxiv.org/abs/1904.06560)
[(#4418)](https://github.com/PennyLaneAI/pennylane/pull/4418/)

* The gradients module no longer needs shot information passed to it explicitly, as the shots are on the tapes.
[(#4448)](https://github.com/PennyLaneAI/pennylane/pull/4448)

<h3>Deprecations 👋</h3>

* ``qml.qchem.jordan_wigner`` is deprecated, use ``qml.jordan_wigner`` instead.
Expand Down
26 changes: 7 additions & 19 deletions pennylane/gradients/finite_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from scipy.special import factorial

import pennylane as qml
from pennylane.measurements import ProbabilityMP, Shots
from pennylane.measurements import ProbabilityMP

from .general_shift_rules import generate_shifted_tapes
from .gradient_transform import (
Expand Down Expand Up @@ -156,7 +156,7 @@ def finite_diff_coeffs(n, approx_order, strategy):
return coeffs_and_shifts


def _processing_fn(results, shots: Shots = Shots(None), single_shot_batch_fn=None):
def _processing_fn(results, shots, single_shot_batch_fn):
if not shots.has_partitioned_shots:
return single_shot_batch_fn(results)
grads_tuple = []
Expand All @@ -177,7 +177,6 @@ def finite_diff(
strategy="forward",
f0=None,
validate_params=True,
shots=None,
):
r"""Transform a QNode to compute the finite-difference gradient of all gate parameters with respect to its inputs.
Expand All @@ -204,10 +203,6 @@ def finite_diff(
the ``Operation.grad_method`` attribute and the circuit structure will be analyzed
to determine if the trainable parameters support the finite-difference method.
If ``False``, the finite-difference method will be applied to all parameters.
shots (None, int, list[int], list[~pennylane.measurements.ShotCopies]): The device shots that will
be used to execute the tapes outputted by this transform. Note that this argument doesn't
influence the shots used for tape execution, but provides information about the shots.
Returns:
function or tuple[list[QuantumTape], function]:
Expand Down Expand Up @@ -303,8 +298,7 @@ def finite_diff(
((array(-0.38751724), array(-0.18884792), array(-0.38355709)),
(array(0.69916868), array(0.34072432), array(0.69202366)))
Devices that have a shot vector defined can also be used for execution, provided
the ``shots`` argument was passed to the transform:
This gradient transform is compatible with devices that use shot vectors for execution.
>>> shots = (10, 100, 1000)
>>> dev = qml.device("default.qubit", wires=2, shots=shots)
Expand All @@ -315,7 +309,7 @@ def finite_diff(
... qml.RX(params[2], wires=0)
... return qml.expval(qml.PauliZ(0)), qml.var(qml.PauliZ(0))
>>> params = np.array([0.1, 0.2, 0.3], requires_grad=True)
>>> qml.gradients.finite_diff(circuit, shots=shots, h=10e-2)(params)
>>> qml.gradients.finite_diff(circuit, h=10e-2)(params)
(((array(-2.), array(-2.), array(0.)), (array(3.6), array(3.6), array(0.))),
((array(1.), array(0.4), array(1.)),
(array(-1.62), array(-0.624), array(-1.62))),
Expand All @@ -334,11 +328,9 @@ def finite_diff(
strategy=strategy,
f0=f0,
validate_params=validate_params,
shots=shots,
)
shots = Shots(shots)
if argnum is None and not tape.trainable_params:
return _no_trainable_grad(tape, shots)
return _no_trainable_grad(tape)

if validate_params:
diff_methods = gradient_analysis_and_validation(
Expand All @@ -348,7 +340,7 @@ def finite_diff(
diff_methods = ["F" for i in tape.trainable_params]

if all(g == "0" for g in diff_methods):
return _all_zero_grad(tape, shots)
return _all_zero_grad(tape)

gradient_tapes = []
shapes = []
Expand Down Expand Up @@ -473,7 +465,7 @@ def _single_shot_batch_result(results):
return tuple(tuple(elem) for elem in grads_reorder)

processing_fn = functools.partial(
_processing_fn, shots=shots, single_shot_batch_fn=_single_shot_batch_result
_processing_fn, shots=tape.shots, single_shot_batch_fn=_single_shot_batch_result
)

return gradient_tapes, processing_fn
Expand All @@ -490,7 +482,6 @@ def _finite_diff_legacy(
strategy="forward",
f0=None,
validate_params=True,
shots=None,
):
r"""Transform a QNode to compute the finite-difference gradient of all gate
parameters with respect to its inputs.
Expand Down Expand Up @@ -518,9 +509,6 @@ def _finite_diff_legacy(
the ``Operation.grad_method`` attribute and the circuit structure will be analyzed
to determine if the trainable parameters support the finite-difference method.
If ``False``, the finite-difference method will be applied to all parameters.
shots (None, int, list[int]): The device shots that will be used to execute the tapes outputted by this
transform. Note that this argument doesn't influence the shots used for tape execution, but provides
information to the transform about the shots.
Returns:
function or tuple[list[QuantumTape], function]:
Expand Down
4 changes: 1 addition & 3 deletions pennylane/gradients/general_shift_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,7 @@ def _copy_and_shift_params(tape, indices, shifts, multipliers, cast=False):
prep = all_ops[: len(tape._prep)]
ops = all_ops[len(tape._prep) : len(tape.operations)]
meas = all_ops[len(tape.operations) :]
shifted_tape = QuantumScript(ops=ops, measurements=meas, prep=prep, shots=tape.shots)

return shifted_tape
return QuantumScript(ops=ops, measurements=meas, prep=prep, shots=tape.shots)


def generate_shifted_tapes(tape, index, shifts, multipliers=None, broadcast=False):
Expand Down
19 changes: 9 additions & 10 deletions pennylane/gradients/gradient_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
VarianceMP,
VnEntropyMP,
ProbabilityMP,
Shots,
)

SUPPORTED_GRADIENT_KWARGS = [
Expand Down Expand Up @@ -251,7 +250,7 @@ def choose_grad_methods(diff_methods, argnum):
return {idx: diff_methods[idx] for idx in argnum}


def _all_zero_grad(tape, shots=Shots(None)):
def _all_zero_grad(tape):
"""Auxiliary function to return zeros for the all-zero gradient case."""
list_zeros = []

Expand All @@ -266,10 +265,10 @@ def _all_zero_grad(tape, shots=Shots(None)):

list_zeros.append(sub_list_zeros)

if shots.has_partitioned_shots:
if tape.shots.has_partitioned_shots:
if len(tape.measurements) == 1:
return [], lambda _: tuple(list_zeros[0] for _ in range(shots.num_copies))
return [], lambda _: tuple(tuple(list_zeros) for _ in range(shots.num_copies))
return [], lambda _: tuple(list_zeros[0] for _ in range(tape.shots.num_copies))
return [], lambda _: tuple(tuple(list_zeros) for _ in range(tape.shots.num_copies))

if len(tape.measurements) == 1:
return [], lambda _: list_zeros[0]
Expand All @@ -284,16 +283,16 @@ def _all_zero_grad(tape, shots=Shots(None)):
)


def _no_trainable_grad(tape, shots=Shots(None)):
def _no_trainable_grad(tape):
"""Auxiliary function that returns correctly formatted gradients when there
are no trainable parameters."""
warnings.warn(_no_trainable_grad_warning)
if shots.has_partitioned_shots:
if tape.shots.has_partitioned_shots:
if len(tape.measurements) == 1:
return [], lambda _: tuple(qml.math.zeros([0]) for _ in range(shots.num_copies))
return [], lambda _: tuple(qml.math.zeros([0]) for _ in range(tape.shots.num_copies))
return [], lambda _: tuple(
tuple(qml.math.zeros([0]) for _ in range(len(tape.measurements)))
for _ in range(shots.num_copies)
for _ in range(tape.shots.num_copies)
)

if len(tape.measurements) == 1:
Expand Down Expand Up @@ -629,7 +628,7 @@ def jacobian_wrapper(

if qml.active_return():
num_measurements = len(qnode.tape.measurements)
has_partitioned_shots = Shots(tkwargs.get("shots", None)).has_partitioned_shots
has_partitioned_shots = qnode.tape.shots.has_partitioned_shots
return _contract_qjac_with_cjac(qjac, cjac, num_measurements, has_partitioned_shots)

return _contract_qjac_with_cjac_legacy(qjac, cjac)
Expand Down
9 changes: 2 additions & 7 deletions pennylane/gradients/hadamard_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
def _hadamard_grad(
tape,
argnum=None,
shots=None,
aux_wire=None,
device_wires=None,
):
Expand All @@ -46,9 +45,6 @@ def _hadamard_grad(
argnum (int or list[int] or None): Trainable tape parameter indices to differentiate
with respect to. If not provided, the derivatives with respect to all
trainable parameters are returned.
shots (None, int, list[int]): The device shots that will be used to execute the tapes outputted by this
transform. Note that this argument doesn't influence the shots used for tape execution, but provides
information about the shots.
aux_wire (pennylane.wires.Wires): Auxiliary wire to be used for the Hadamard tests. If ``None`` (the default),
a suitable wire is inferred from the wires used in the original circuit and ``device_wires``.
device_wires (pennylane.wires.Wires): Wires of the device that are going to be used for the
Expand Down Expand Up @@ -182,15 +178,14 @@ def _hadamard_grad(
assert_active_return(transform_name)
assert_no_state_returns(tape.measurements, transform_name)
assert_no_variance(tape.measurements, transform_name)
shots = qml.measurements.Shots(shots)

if argnum is None and not tape.trainable_params:
return _no_trainable_grad(tape, shots)
return _no_trainable_grad(tape)

diff_methods = gradient_analysis_and_validation(tape, "analytic", grad_fn=hadamard_grad)

if all(g == "0" for g in diff_methods):
return _all_zero_grad(tape, shots)
return _all_zero_grad(tape)

method_map = choose_grad_methods(diff_methods, argnum)

Expand Down
19 changes: 7 additions & 12 deletions pennylane/gradients/jvp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numpy as np

import pennylane as qml
from pennylane.measurements import ProbabilityMP, Shots
from pennylane.measurements import ProbabilityMP


def compute_jvp_single(tangent, jac):
Expand Down Expand Up @@ -236,7 +236,7 @@ def compute_jvp_multi(tangent, jac):
return tuple(compute_jvp_single(tangent, j) for j in jac)


def jvp(tape, tangent, gradient_fn, shots=None, gradient_kwargs=None):
def jvp(tape, tangent, gradient_fn, gradient_kwargs=None):
r"""Generate the gradient tapes and processing function required to compute
the Jacobian vector product of a tape. This function only works with the new return type system on.
Expand All @@ -246,8 +246,6 @@ def jvp(tape, tangent, gradient_fn, shots=None, gradient_kwargs=None):
matching the number of trainable parameters.
gradient_fn (callable): the gradient transform to use to differentiate
the tape
shots (None, int, list[int]): The device shots that will be used to
execute the tapes outputted by this
gradient_kwargs (dict): dictionary of keyword arguments to pass when
determining the gradients of tapes
Expand Down Expand Up @@ -319,25 +317,24 @@ def func(_): # pylint: disable=unused-argument
pass

gradient_kwargs = gradient_kwargs or {}
shots = tape.shots if shots is None else Shots(shots)
gradient_tapes, fn = gradient_fn(tape, shots=shots, **gradient_kwargs)
gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs)

def processing_fn(results):
# postprocess results to compute the Jacobian
jac = fn(results)
_jvp_fn = compute_jvp_multi if multi_m else compute_jvp_single

# Jacobian without shot vectors
if not shots.has_partitioned_shots:
if not tape.shots.has_partitioned_shots:
return _jvp_fn(tangent, jac)

# The jacobian is calculated for shot vectors
return tuple(_jvp_fn(tangent, jac[i]) for i in range(shots.num_copies))
return tuple(_jvp_fn(tangent, jac[i]) for i in range(tape.shots.num_copies))

return gradient_tapes, processing_fn


def batch_jvp(tapes, tangents, gradient_fn, shots=None, reduction="append", gradient_kwargs=None):
def batch_jvp(tapes, tangents, gradient_fn, reduction="append", gradient_kwargs=None):
r"""Generate the gradient tapes and processing function required to compute
the Jacobian vector products of a batch of tapes.
Expand All @@ -348,8 +345,6 @@ def batch_jvp(tapes, tangents, gradient_fn, shots=None, reduction="append", grad
matching the output shape of the corresponding tape.
gradient_fn (callable): the gradient transform to use to differentiate
the tapes
shots (None, int, list[int]): The device shots that will be used to
execute the tapes outputted by this
reduction (str): Determines how the Jacobian-vector products are returned.
If ``append``, then the output of the function will be of the form
``List[tensor_like]``, with each element corresponding to the JVP of each
Expand Down Expand Up @@ -416,7 +411,7 @@ def batch_jvp(tapes, tangents, gradient_fn, shots=None, reduction="append", grad

# Loop through the tapes and dys vector
for tape, tangent in zip(tapes, tangents):
g_tapes, fn = jvp(tape, tangent, gradient_fn, shots, gradient_kwargs)
g_tapes, fn = jvp(tape, tangent, gradient_fn, gradient_kwargs)

reshape_info.append(len(g_tapes))
processing_fns.append(fn)
Expand Down
Loading

0 comments on commit 5495875

Please sign in to comment.