renmae apply_gradients

LuxDL · May 13, 2024 · 35c7648 · 35c7648
1 parent eb8918d
commit 35c7648
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 17 deletions.
diff --git a/docs/src/api/Lux/contrib.md b/docs/src/api/Lux/contrib.md
@@ -35,6 +35,7 @@ basic building blocks which can be seamlessly composed to create complex trainin
 Lux.Experimental.TrainState
 Lux.Experimental.compute_gradients
 Lux.Experimental.apply_gradients
+Lux.Experimental.apply_gradients!
 ```
 
 ## Parameter Freezing

diff --git a/examples/HyperNet/main.jl b/examples/HyperNet/main.jl
@@ -102,7 +102,7 @@ function train()
  y = y |> dev
  (gs, _, _, train_state) = Lux.Experimental.compute_gradients(
  AutoZygote(), loss, (data_idx, x, y), train_state)
- train_state = Lux.Experimental.apply_gradients(train_state, gs, true)
+ train_state = Lux.Experimental.apply_gradients!(train_state, gs)
  end
  ttime = time() - stime
 

diff --git a/examples/PolynomialFitting/main.jl b/examples/PolynomialFitting/main.jl
@@ -79,7 +79,7 @@ function main(tstate::Lux.Experimental.TrainState, vjp, data, epochs)
  if epoch % 50 == 1 || epoch == epochs
  @printf "Epoch: %3d \t Loss: %.5g\n" epoch loss
  end
- tstate = Lux.Training.apply_gradients(tstate, grads, true)
+ tstate = Lux.Training.apply_gradients!(tstate, grads)
  end
  return tstate
 end

diff --git a/examples/SimpleChains/main.jl b/examples/SimpleChains/main.jl
@@ -82,7 +82,7 @@ function train(model; rng=Xoshiro(0), kwargs...)
  for (x, y) in train_dataloader
  (gs, _, _, train_state) = Lux.Experimental.compute_gradients(
  AutoZygote(), loss, (x, y), train_state)
- train_state = Lux.Experimental.apply_gradients(train_state, gs, true)
+ train_state = Lux.Experimental.apply_gradients!(train_state, gs)
  end
  ttime = time() - stime
 

diff --git a/examples/SimpleRNN/main.jl b/examples/SimpleRNN/main.jl
@@ -157,7 +157,7 @@ function main(model_type)
 
  gs, loss, _, train_state = Lux.Experimental.compute_gradients(
  AutoZygote(), compute_loss, (x, y), train_state)
- train_state = Lux.Experimental.apply_gradients(train_state, gs, true)
+ train_state = Lux.Experimental.apply_gradients!(train_state, gs)
 
  @printf "Epoch [%3d]: Loss %4.5f\n" epoch loss
  end

diff --git a/ext/LuxOptimisersExt.jl b/ext/LuxOptimisersExt.jl
@@ -36,18 +36,17 @@ function Lux.Experimental.TrainState(
  return Lux.Experimental.TrainState(nothing, nothing, model, ps, st, st_opt, 0)
 end
 
-function Lux.Experimental.apply_gradients(
- ts::Lux.Experimental.TrainState, grads, update_inplace=false)
- if update_inplace
- optimizer_state, ps = Optimisers.update(ts.optimizer_state, ts.parameters, grads)
- return Lux.Experimental.TrainState(ts.cache, ts.objective_function, ts.model,
- ps, ts.states, optimizer_state, ts.step + 1)
- else
- Optimisers.update!(ts.optimizer_state, ts.parameters, grads)
- return Lux.Experimental.TrainState(
- ts.cache, ts.objective_function, ts.model, ts.parameters,
- ts.states, ts.optimizer_state, ts.step + 1)
- end
+function Lux.Experimental.apply_gradients(ts::Lux.Experimental.TrainState, grads)
+ optimizer_state, ps = Optimisers.update(ts.optimizer_state, ts.parameters, grads)
+ return Lux.Experimental.TrainState(ts.cache, ts.objective_function, ts.model,
+ ps, ts.states, optimizer_state, ts.step + 1)
+end
+
+function Lux.Experimental.apply_gradients!(ts::Lux.Experimental.TrainState, grads)
+ Optimisers.update!(ts.optimizer_state, ts.parameters, grads)
+ return Lux.Experimental.TrainState(
+ ts.cache, ts.objective_function, ts.model, ts.parameters,
+ ts.states, ts.optimizer_state, ts.step + 1)
 end
 
 # DistributedUtils

diff --git a/src/contrib/training.jl b/src/contrib/training.jl
@@ -37,7 +37,7 @@ function Base.show(io::IO, ts::TrainState)
 end
 
 """
- apply_gradients(ts::TrainState, grads, update_inplace::Bool=false)
+ apply_gradients(ts::TrainState, grads)
 
 Update the parameters stored in `ts` using the gradients `grads`.
 
@@ -53,6 +53,23 @@ Updated [`TrainState`](@ref) object.
 """
 function apply_gradients end
 
+"""
+ apply_gradients!(ts::TrainState, grads)
+
+Update the parameters stored in `ts` using the gradients `grads`. This is an inplace version
+of [`apply_gradients`](@ref).
+
+## Arguments
+
+ - `ts`: [`TrainState`](@ref) object.
+ - `grads`: Gradients of the loss function wrt `ts.params`.
+
+## Returns
+
+Updated [`TrainState`](@ref) object.
+"""
+function apply_gradients! end
+
 """
  compute_gradients(ad::ADTypes.AbstractADType, objective_function::Function, data,
  ts::TrainState)