diff --git a/dfdx-core/src/tensor_ops/prodigy/cpu_kernel.rs b/dfdx-core/src/tensor_ops/prodigy/cpu_kernel.rs index 6cc2d6be..f6789b38 100644 --- a/dfdx-core/src/tensor_ops/prodigy/cpu_kernel.rs +++ b/dfdx-core/src/tensor_ops/prodigy/cpu_kernel.rs @@ -152,7 +152,13 @@ impl ProdigyKernel for Cpu ) -> Result<(), Error> { let mut d_denom_: E = E::zero(); let [beta1, beta2] = cfg.betas.map(E::from_f64).map(Option::unwrap); - let beta3 = E::from_f64(cfg.beta3.unwrap_or_else(|| cfg.betas[1].sqrt())).unwrap(); + let beta3 = E::from_f64(cfg.beta3.unwrap_or_else(|| { + #[cfg(feature = "no-std")] + use num_traits::Float; + + cfg.betas[1].sqrt() + })) + .unwrap(); let bias_correction = if cfg.use_bias_correction { // note: in here the first k = 1, whereas on the reference python code it's 0 diff --git a/dfdx/src/nn/optim/prodigy.rs b/dfdx/src/nn/optim/prodigy.rs index dfe80f0c..5e80947c 100644 --- a/dfdx/src/nn/optim/prodigy.rs +++ b/dfdx/src/nn/optim/prodigy.rs @@ -166,12 +166,27 @@ mod tests { .zip(expected_updates) { let prediction = m.forward_mut(x.trace(grads)); + + #[cfg(feature = "test-f64")] + assert_close_to_literal!(prediction, ey, 7e-5); + #[cfg(not(feature = "test-f64"))] assert_close_to_literal!(prediction, ey); + let loss = crate::losses::mse_loss(prediction, dev.tensor(y)); grads = loss.backward(); + + #[cfg(feature = "test-f64")] + assert_close_to_literal!(grads.get(&m.weight), eg, 3e-5); + #[cfg(not(feature = "test-f64"))] assert_close_to_literal!(grads.get(&m.weight), eg); + opt.update(&mut m, &grads).expect(""); + + #[cfg(feature = "test-f64")] + assert_close_to_literal!(m.weight, eu, 5e-4); + #[cfg(not(feature = "test-f64"))] assert_close_to_literal!(m.weight, eu); + m.zero_grads(&mut grads); } }