google-deepmind · copybara-service · Sep 15, 2024 · Sep 15, 2024
diff --git a/optax/_src/float64_test.py b/optax/_src/float64_test.py
@@ -16,12 +16,9 @@
 
 from absl.testing import absltest
 from absl.testing import parameterized
-
 import chex
 import jax
-from jax import config
 import jax.numpy as jnp
-
 from optax._src import alias
 from optax._src import base
 from optax._src import clipping
@@ -40,10 +37,16 @@
  ('scale_by_stddev', transform.scale_by_stddev, {}),
  ('adam', transform.scale_by_adam, {}),
  ('scale', transform.scale, dict(step_size=3.0)),
- ('add_decayed_weights', transform.add_decayed_weights,
- dict(weight_decay=0.1)),
- ('scale_by_schedule', transform.scale_by_schedule,
- dict(step_size_fn=lambda x: x * 0.1)),
+ (
+ 'add_decayed_weights',
+ transform.add_decayed_weights,
+ dict(weight_decay=0.1),
+ ),
+ (
+ 'scale_by_schedule',
+ transform.scale_by_schedule,
+ dict(step_size_fn=lambda x: x * 0.1),
+ ),
  ('scale_by_trust_ratio', transform.scale_by_trust_ratio, {}),
  ('add_noise', transform.add_noise, dict(eta=1.0, gamma=0.1, seed=42)),
  ('apply_every_k', transform.apply_every, {}),
@@ -69,25 +72,29 @@ def _assert_dtype_equals(self, tree1, tree2):
  @chex.all_variants
  @parameterized.named_parameters(ALL_MODULES)
  def test_mixed_dtype_input_outputs(self, transform_constr, transform_kwargs):
+ jax.config.update('jax_enable_x64', True)
  initial_params = (
- jnp.array([1., 2.], dtype=jnp.float32),
- jnp.array([3., 4.], dtype=jnp.float64))
+ jnp.array([1.0, 2.0], dtype=jnp.float32),
+ jnp.array([3.0, 4.0], dtype=jnp.float64),
+ )
  updates = (
- jnp.array([10., 21.], dtype=jnp.float32),
- jnp.array([33., 42.], dtype=jnp.float64))
+ jnp.array([10.0, 21.0], dtype=jnp.float32),
+ jnp.array([33.0, 42.0], dtype=jnp.float64),
+ )
  scaler = transform_constr(**transform_kwargs)
  init_fn = self.variant(scaler.init)
  update_fn = self.variant(scaler.update)
 
  initial_state = init_fn(initial_params)
  updates, new_state = update_fn(
- updates, initial_state, params=initial_params)
+ updates, initial_state, params=initial_params
+ )
  new_params = update.apply_updates(initial_params, updates)
 
  self._assert_dtype_equals(initial_state, new_state)
  self._assert_dtype_equals(initial_params, new_params)
+ jax.config.update('jax_enable_x64', False)
 
 
 if __name__ == '__main__':
- config.update('jax_enable_x64', True)
  absltest.main()
diff --git a/optax/_src/numerics.py b/optax/_src/numerics.py
@@ -111,6 +111,7 @@ def safe_increment(count: chex.Numeric) -> chex.Numeric:
  counter stays at ``max_val``.
 
  Examples:
+ >>> import jax.numpy as jnp
  >>> import optax
  >>> optax.safe_increment(jnp.asarray(1, dtype=jnp.int32))
  Array(2, dtype=int32)

diff --git a/optax/_src/numerics_test.py b/optax/_src/numerics_test.py
@@ -44,19 +44,21 @@ def _invalid_ord_axis_inputs(ord_axis_keepdims):
 class NumericsTest(chex.TestCase):
 
  @chex.all_variants
- @parameterized.product(
- str_dtype=[
-  "bfloat16",
-  "float16",
-  "float32",
-  "int8",
-  "int16",
-  "int32",
- ]
- )
- def test_safe_increment(self, str_dtype):
+ @parameterized.parameters(*(
+ "bfloat16",
+ "float16",
+ "float32",
+ "float64",
+ "int8",
+ "int16",
+ "int32",
+ "int64",
+ ))
+ def test_safe_increment(self, dtype):
  """Tests that safe_increment works for all dtypes."""
- dtype = jnp.dtype(str_dtype)
+ if dtype in ["float64", "int64"]:
+ jax.config.update("jax_enable_x64", True)
+ dtype = jnp.dtype(dtype)
  inc_fn = self.variant(numerics.safe_increment)
 
  with self.subTest("Increments correctly"):
@@ -75,6 +77,8 @@ def test_safe_increment(self, str_dtype):
  base = jnp.asarray(max_val, dtype=dtype)
  incremented = inc_fn(base)
  np.testing.assert_array_equal(incremented, base)
+ if dtype in ["float64", "int64"]:
+ jax.config.update("jax_enable_x64", False)
 
  @parameterized.product(
  str_dtype=[

diff --git a/optax/contrib/_acprop.py b/optax/contrib/_acprop.py
@@ -67,7 +67,7 @@ def update_fn(updates, state, params=None):
  prediction_error = jtu.tree_map(lambda g, m: g - m, updates, state.mu)
  nu = otu.tree_update_moment_per_elem_norm(prediction_error, state.nu, b2, 2)
  nu = jtu.tree_map(lambda v: v + eps_root, nu)
- count_inc = numerics.safe_int32_increment(state.count)
+ count_inc = numerics.safe_increment(state.count)
 
  # On initial step, avoid division by zero and force nu_hat to be 1.
  initial = state.count == 0

diff --git a/optax/schedules/_inject.py b/optax/schedules/_inject.py
@@ -197,7 +197,7 @@ def update_fn(updates, state, params=None, **extra_args):
  ).update(updates, state.inner_state, params, **extra_args)
 
  return updates, InjectStatefulHyperparamsState(
- count=numerics.safe_int32_increment(state.count),
+ count=numerics.safe_increment(state.count),
  hyperparams=hparams,
  hyperparams_states=hyperparams_states,
  inner_state=inner_state,
@@ -270,7 +270,7 @@ def update(
  **extra_args,
  ) -> WrappedScheduleState:
  del extra_args
- new_count = numerics.safe_int32_increment(state.count)
+ new_count = numerics.safe_increment(state.count)
  return WrappedScheduleState(count=new_count)
 
  def __call__(

diff --git a/optax/transforms/_accumulation.py b/optax/transforms/_accumulation.py
@@ -345,9 +345,9 @@ def _do_update(updates, state, params):
 
  emit = state.mini_step == (k_steps - 1)
  new_state = MultiStepsState(
- mini_step=numerics.safe_int32_increment(state.mini_step) % k_steps,
+ mini_step=numerics.safe_increment(state.mini_step) % k_steps,
  gradient_step=emit
- * numerics.safe_int32_increment(state.gradient_step)
+ * numerics.safe_increment(state.gradient_step)
  + (1 - emit) * state.gradient_step,
  inner_opt_state=jtu.tree_map(
  lambda st, nst: jnp.where(emit, nst, st),

diff --git a/optax/transforms/_adding.py b/optax/transforms/_adding.py
@@ -92,7 +92,7 @@ def init_fn(params):
 
  def update_fn(updates, state, params=None): # pylint: disable=missing-docstring
  del params
- count_inc = numerics.safe_int32_increment(state.count)
+ count_inc = numerics.safe_increment(state.count)
  standard_deviation = jnp.sqrt(eta / count_inc**gamma)
 
  rng_key, sample_key = jax.random.split(state.rng_key)

diff --git a/optax/transforms/_conditionality.py b/optax/transforms/_conditionality.py
@@ -102,7 +102,7 @@ def reject_update(_):
  should_transform_fn(state.step, **condition_kwargs),
  do_update, reject_update, operand=None)
  return updates, ConditionallyTransformState(
- new_inner_state, numerics.safe_int32_increment(state.step))
+ new_inner_state, numerics.safe_increment(state.step))
 
  return base.GradientTransformationExtraArgs(init_fn, update_fn)
 
@@ -165,7 +165,7 @@ def reject_update(_):
  do_update, reject_update, operand=None)
 
  return updates, ConditionallyMaskState(
- step=numerics.safe_int32_increment(state.step),
+ step=numerics.safe_increment(state.step),
  inner_state=new_inner_state,
  )
 
@@ -230,7 +230,7 @@ def update(updates, state, params=None, **extra_args):
  jnp.array([jnp.all(jnp.isfinite(p)) for p in flat_updates]))
  notfinite_count = jnp.where(
  isfinite, jnp.zeros([], jnp.int32),
- numerics.safe_int32_increment(state.notfinite_count))
+ numerics.safe_increment(state.notfinite_count))
 
  def do_update(_):
  return inner.update(updates, inner_state, params, **extra_args)
@@ -247,7 +247,7 @@ def reject_update(_):
  last_finite=isfinite,
  total_notfinite=jnp.where(
  isfinite, state.total_notfinite,
- numerics.safe_int32_increment(state.total_notfinite)),
+ numerics.safe_increment(state.total_notfinite)),
  inner_state=new_inner_state)
 
  return base.GradientTransformationExtraArgs(init=init, update=update)