From c9f883a31ff2ae787bfb9c0cb3ea39c2285c29cf Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 13 Sep 2024 23:19:56 -0400
Subject: [PATCH 1/7] fix: update to Lux & Boltz 1.0

---
 Project.toml                    | 10 +++---
 README.md                       |  5 +--
 docs/src/examples/neural_gde.md | 10 +++---
 src/DiffEqFlux.jl               |  2 +-
 src/ffjord.jl                   |  8 ++---
 src/neural_de.jl                | 56 ++++++++++++++++-----------------
 6 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/Project.toml b/Project.toml
index 72a79b18c..f8f6a376d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "DiffEqFlux"
 uuid = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
-version = "3.6.0"
+version = "4.0.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -29,7 +29,7 @@ DiffEqFluxDataInterpolationsExt = "DataInterpolations"
 ADTypes = "1.5"
 Aqua = "0.8.7"
 BenchmarkTools = "1.5.0"
-Boltz = "0.4.2"
+Boltz = "1"
 ChainRulesCore = "1"
 ComponentArrays = "0.15.17"
 ConcreteStructs = "0.2"
@@ -46,10 +46,10 @@ ForwardDiff = "0.10"
 Hwloc = "3"
 InteractiveUtils = "<0.0.1, 1"
 LinearAlgebra = "1.10"
-Lux = "0.5.65"
+Lux = "1"
 LuxCUDA = "0.3.2"
-LuxCore = "0.1"
-LuxLib = "0.3.50"
+LuxCore = "1"
+LuxLib = "1.2"
 MLDatasets = "0.7.14"
 NNlib = "0.9.22"
 OneHotArrays = "0.2.5"
diff --git a/README.md b/README.md
index 0824a4a52..708612e47 100644
--- a/README.md
+++ b/README.md
@@ -63,17 +63,18 @@ explore various ways to integrate the two methodologies:
 
 ## Breaking Changes
 
-### v4 (upcoming)
+### v4
 
   - `TensorLayer` has been removed, use `Boltz.Layers.TensorProductLayer` instead.
   - Basis functions in DiffEqFlux have been removed in favor of `Boltz.Basis` module.
   - `SplineLayer` has been removed, use `Boltz.Layers.SplineLayer` instead.
   - `NeuralHamiltonianDE` has been removed, use `NeuralODE` with `Layers.HamiltonianNN` instead.
   - `HamiltonianNN` has been removed in favor of `Layers.HamiltonianNN`.
+  - `Lux` and `Boltz` are updated to v1.
 
 ### v3
 
-  - Flux dependency is dropped. If a non Lux `AbstractExplicitLayer` is passed we try to automatically convert it to a Lux model with `FromFluxAdaptor()(model)`.
+  - Flux dependency is dropped. If a non Lux `AbstractLuxLayer` is passed we try to automatically convert it to a Lux model with `FromFluxAdaptor()(model)`.
   - `Flux` is no longer re-exported from `DiffEqFlux`. Instead we reexport `Lux`.
   - `NeuralDAE` now allows an optional `du0` as input.
   - `TensorLayer` is now a Lux Neural Network.
diff --git a/docs/src/examples/neural_gde.md b/docs/src/examples/neural_gde.md
index 11643b7c8..e50c70245 100644
--- a/docs/src/examples/neural_gde.md
+++ b/docs/src/examples/neural_gde.md
@@ -14,7 +14,7 @@ using GraphNeuralNetworks, DifferentialEquations
 using DiffEqFlux: NeuralODE
 using GraphNeuralNetworks.GNNGraphs: normalized_adjacency
 using Lux, NNlib, Optimisers, Zygote, Random, ComponentArrays
-using Lux: AbstractExplicitLayer, glorot_normal, zeros32
+using Lux: AbstractLuxLayer, glorot_normal, zeros32
 import Lux: initialparameters, initialstates
 using SciMLSensitivity
 using Statistics: mean
@@ -46,7 +46,7 @@ nout = length(classes)
 epochs = 20
 
 # Define the graph neural network
-struct ExplicitGCNConv{F1, F2, F3, F4} <: AbstractExplicitLayer
+struct ExplicitGCNConv{F1, F2, F3, F4} <: AbstractLuxLayer
     in_chs::Int
     out_chs::Int
     activation::F1
@@ -152,7 +152,7 @@ using GraphNeuralNetworks, DifferentialEquations
 using DiffEqFlux: NeuralODE
 using GraphNeuralNetworks.GNNGraphs: normalized_adjacency
 using Lux, NNlib, Optimisers, Zygote, Random, ComponentArrays
-using Lux: AbstractExplicitLayer, glorot_normal, zeros32
+using Lux: AbstractLuxLayer, glorot_normal, zeros32
 import Lux: initialparameters, initialstates
 using SciMLSensitivity
 using Statistics: mean
@@ -207,10 +207,10 @@ epochs = 20
 
 ## Define the Graph Neural Network
 
-Here, we define a type of graph neural networks called `GCNConv`. We use the name `ExplicitGCNConv` to avoid naming conflicts with `GraphNeuralNetworks`. For more information on defining a layer with `Lux`, please consult to the [doc](http://lux.csail.mit.edu/dev/introduction/overview/#AbstractExplicitLayer-API).
+Here, we define a type of graph neural networks called `GCNConv`. We use the name `ExplicitGCNConv` to avoid naming conflicts with `GraphNeuralNetworks`. For more information on defining a layer with `Lux`, please consult to the [doc](http://lux.csail.mit.edu/dev/introduction/overview/#AbstractLuxLayer-API).
 
 ```julia
-struct ExplicitGCNConv{F1, F2, F3} <: AbstractExplicitLayer
+struct ExplicitGCNConv{F1, F2, F3} <: AbstractLuxLayer
     Ã::AbstractMatrix  # nomalized_adjacency matrix
     in_chs::Int
     out_chs::Int
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
index f21e657a7..60ef7fb0c 100644
--- a/src/DiffEqFlux.jl
+++ b/src/DiffEqFlux.jl
@@ -6,7 +6,7 @@ using ConcreteStructs: @concrete
 using Distributions: Distributions, ContinuousMultivariateDistribution, Distribution, logpdf
 using LinearAlgebra: LinearAlgebra, Diagonal, det, tr, mul!
 using Lux: Lux, Chain, Dense, StatefulLuxLayer, FromFluxAdaptor
-using LuxCore: LuxCore, AbstractExplicitLayer, AbstractExplicitContainerLayer
+using LuxCore: LuxCore, AbstractLuxLayer, AbstractLuxContainerLayer, AbstractLuxWrapperLayer
 using LuxLib: batched_matmul
 using Random: Random, AbstractRNG, randn!
 using Reexport: @reexport
diff --git a/src/ffjord.jl b/src/ffjord.jl
index 8d89ea755..c8fe77b25 100644
--- a/src/ffjord.jl
+++ b/src/ffjord.jl
@@ -1,4 +1,4 @@
-abstract type CNFLayer <: LuxCore.AbstractExplicitContainerLayer{(:model,)} end
+abstract type CNFLayer <: AbstractLuxWrapperLayer{:model} end
 
 """
     FFJORD(model, tspan, input_dims, args...; ad = nothing, basedist = nothing, kwargs...)
@@ -21,7 +21,7 @@ for new values of x.
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     dynamics of the model.
   - `basedist`: Distribution of the base variable. Set to the unit normal by default.
   - `input_dims`: Input Dimensions of the model.
@@ -49,7 +49,7 @@ Information Processing Systems, pp. 6572-6583. 2018.
 preprint arXiv:1810.01367 (2018).
 """
 @concrete struct FFJORD <: CNFLayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     basedist <: Union{Nothing, Distribution}
     ad
     input_dims
@@ -65,7 +65,7 @@ end
 
 function FFJORD(
         model, tspan, input_dims, args...; ad = nothing, basedist = nothing, kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return FFJORD(model, basedist, ad, input_dims, tspan, args, kwargs)
 end
 
diff --git a/src/neural_de.jl b/src/neural_de.jl
index fcdfdab0b..cb69e960e 100644
--- a/src/neural_de.jl
+++ b/src/neural_de.jl
@@ -1,5 +1,5 @@
-abstract type NeuralDELayer <: AbstractExplicitContainerLayer{(:model,)} end
-abstract type NeuralSDELayer <: AbstractExplicitContainerLayer{(:drift, :diffusion)} end
+abstract type NeuralDELayer <: AbstractLuxWrapperLayer{:model} end
+abstract type NeuralSDELayer <: AbstractLuxContainerLayer{(:drift, :diffusion)} end
 
 basic_tgrad(u, p, t) = zero(u)
 basic_dde_tgrad(u, h, p, t) = zero(u)
@@ -15,7 +15,7 @@ derivatives of the loss backwards in time.
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     ̇x.
   - `tspan`: The timespan to be solved on.
   - `alg`: The algorithm used to solve the ODE. Defaults to `nothing`, i.e. the
@@ -33,14 +33,14 @@ References:
 [1] Pontryagin, Lev Semenovich. Mathematical theory of optimal processes. CRC press, 1987.
 """
 @concrete struct NeuralODE <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     tspan
     args
     kwargs
 end
 
 function NeuralODE(model, tspan, args...; kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralODE(model, tspan, args, kwargs)
 end
 
@@ -65,9 +65,9 @@ Constructs a neural stochastic differential equation (neural SDE) with diagonal
 
 Arguments:
 
-  - `drift`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `drift`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     drift function.
-  - `diffusion`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines
+  - `diffusion`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines
     the diffusion function. Should output a vector of the same size as the input.
   - `tspan`: The timespan to be solved on.
   - `alg`: The algorithm used to solve the ODE. Defaults to `nothing`, i.e. the
@@ -78,16 +78,16 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralDSDE <: NeuralSDELayer
-    drift <: AbstractExplicitLayer
-    diffusion <: AbstractExplicitLayer
+    drift <: AbstractLuxLayer
+    diffusion <: AbstractLuxLayer
     tspan
     args
     kwargs
 end
 
 function NeuralDSDE(drift, diffusion, tspan, args...; kwargs...)
-    !(drift isa AbstractExplicitLayer) && (drift = FromFluxAdaptor()(drift))
-    !(diffusion isa AbstractExplicitLayer) && (diffusion = FromFluxAdaptor()(diffusion))
+    !(drift isa AbstractLuxLayer) && (drift = FromFluxAdaptor()(drift))
+    !(diffusion isa AbstractLuxLayer) && (diffusion = FromFluxAdaptor()(diffusion))
     return NeuralDSDE(drift, diffusion, tspan, args, kwargs)
 end
 
@@ -113,9 +113,9 @@ Constructs a neural stochastic differential equation (neural SDE).
 
 Arguments:
 
-  - `drift`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `drift`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     drift function.
-  - `diffusion`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines
+  - `diffusion`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines
     the diffusion function. Should output a matrix that is `nbrown x size(x, 1)`.
   - `tspan`: The timespan to be solved on.
   - `nbrown`: The number of Brownian processes.
@@ -127,8 +127,8 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralSDE <: NeuralSDELayer
-    drift <: AbstractExplicitLayer
-    diffusion <: AbstractExplicitLayer
+    drift <: AbstractLuxLayer
+    diffusion <: AbstractLuxLayer
     tspan
     nbrown::Int
     args
@@ -136,8 +136,8 @@ Arguments:
 end
 
 function NeuralSDE(drift, diffusion, tspan, nbrown, args...; kwargs...)
-    !(drift isa AbstractExplicitLayer) && (drift = FromFluxAdaptor()(drift))
-    !(diffusion isa AbstractExplicitLayer) && (diffusion = FromFluxAdaptor()(diffusion))
+    !(drift isa AbstractLuxLayer) && (drift = FromFluxAdaptor()(drift))
+    !(diffusion isa AbstractLuxLayer) && (diffusion = FromFluxAdaptor()(diffusion))
     return NeuralSDE(drift, diffusion, tspan, nbrown, args, kwargs)
 end
 
@@ -165,7 +165,7 @@ Constructs a neural delay differential equation (neural DDE) with constant delay
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     derivative function. Should take an input of size `[x; x(t - lag_1); ...; x(t - lag_n)]`
     and produce and output shaped like `x`.
   - `tspan`: The timespan to be solved on.
@@ -182,7 +182,7 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralCDDE <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     tspan
     hist
     lags
@@ -191,7 +191,7 @@ Arguments:
 end
 
 function NeuralCDDE(model, tspan, hist, lags, args...; kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralCDDE(model, tspan, hist, lags, args, kwargs)
 end
 
@@ -218,7 +218,7 @@ Constructs a neural differential-algebraic equation (neural DAE).
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     derivative function. Should take an input of size `x` and produce the residual of
     `f(dx,x,t)` for only the differential variables.
   - `constraints_model`: A function `constraints_model(u,p,t)` for the fixed
@@ -233,7 +233,7 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralDAE <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     constraints_model
     tspan
     args
@@ -243,7 +243,7 @@ end
 
 function NeuralDAE(
         model, constraints_model, tspan, args...; differential_vars = nothing, kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralDAE(model, constraints_model, tspan, args, differential_vars, kwargs)
 end
 
@@ -288,7 +288,7 @@ constraint equations.
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     ̇`f(u,p,t)`
   - `constraints_model`: A function `constraints_model(u,p,t)` for the fixed constraints to
     impose on the algebraic equations.
@@ -308,7 +308,7 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralODEMM <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     constraints_model
     tspan
     mass_matrix
@@ -317,7 +317,7 @@ Arguments:
 end
 
 function NeuralODEMM(model, constraints_model, tspan, mass_matrix, args...; kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralODEMM(model, constraints_model, tspan, mass_matrix, args, kwargs)
 end
 
@@ -376,10 +376,10 @@ end
 Constructs a Dimension Mover Layer.
 
 We can have Lux's conventional order `(data, channel, batch)` by using it as the last layer
-of `AbstractExplicitLayer` to swap the batch-index and the time-index of the Neural DE's
+of `AbstractLuxLayer` to swap the batch-index and the time-index of the Neural DE's
 output considering that each time point is a channel.
 """
-@concrete struct DimMover <: AbstractExplicitLayer
+@concrete struct DimMover <: AbstractLuxLayer
     from
     to
 end

From 2a240c0e456678ba6e811e426c9979dafeb9fcee Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 13 Sep 2024 23:26:30 -0400
Subject: [PATCH 2/7] fix!: remove deprecated functions

---
 docs/src/examples/hamiltonian_nn.md |  7 ++--
 docs/src/examples/tensor_layer.md   |  4 +-
 src/DiffEqFlux.jl                   |  3 +-
 src/deprecated.jl                   | 47 ---------------------
 test/hamiltonian_nn_tests.jl        | 62 ----------------------------
 test/spline_layer_tests.jl          | 63 -----------------------------
 test/tensor_product_tests.jl        | 56 -------------------------
 7 files changed, 7 insertions(+), 235 deletions(-)
 delete mode 100644 src/deprecated.jl
 delete mode 100644 test/hamiltonian_nn_tests.jl
 delete mode 100644 test/spline_layer_tests.jl
 delete mode 100644 test/tensor_product_tests.jl

diff --git a/docs/src/examples/hamiltonian_nn.md b/docs/src/examples/hamiltonian_nn.md
index dc359b0c0..ddfa11b7a 100644
--- a/docs/src/examples/hamiltonian_nn.md
+++ b/docs/src/examples/hamiltonian_nn.md
@@ -57,7 +57,7 @@ res = Optimization.solve(opt_prob, opt, dataloader; callback)
 
 ps_trained = res.u
 
-model = NeuralHamiltonianDE(
+model = NeuralODE(
     hnn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, save_start = true, saveat = t)
 
 pred = Array(first(model(data[:, 1], ps_trained, st)))
@@ -127,10 +127,11 @@ ps_trained = res.u
 
 ### Solving the ODE using trained HNN
 
-In order to visualize the learned trajectories, we need to solve the ODE. We will use the `NeuralHamiltonianDE` layer, which is essentially a wrapper over `HamiltonianNN` layer, and solves the ODE.
+In order to visualize the learned trajectories, we need to solve the ODE. We will use the
+`NeuralODE` layer with `HamiltonianNN` layer, and solves the ODE.
 
 ```@example hamiltonian
-model = NeuralHamiltonianDE(
+model = NeuralODE(
     hnn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, save_start = true, saveat = t)
 
 pred = Array(first(model(data[:, 1], ps_trained, st)))
diff --git a/docs/src/examples/tensor_layer.md b/docs/src/examples/tensor_layer.md
index ed717cb98..4bb7e3cae 100644
--- a/docs/src/examples/tensor_layer.md
+++ b/docs/src/examples/tensor_layer.md
@@ -33,8 +33,8 @@ Now, we create a TensorLayer that will be able to perform 10th order expansions
 a Legendre Basis:
 
 ```@example tensor
-A = [LegendreBasis(10), LegendreBasis(10)]
-nn = TensorLayer(A, 1)
+A = [Basis.Legendre(10), Basis.Legendre(10)]
+nn = Layers.TensorProductLayer(A, 1)
 ps, st = Lux.setup(Xoshiro(0), nn)
 ps = ComponentArray(ps)
 nn = StatefulLuxLayer{true}(nn, nothing, st)
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
index 60ef7fb0c..4bb99c9e7 100644
--- a/src/DiffEqFlux.jl
+++ b/src/DiffEqFlux.jl
@@ -26,6 +26,7 @@ const CRC = ChainRulesCore
 @reexport using ADTypes, Lux, Boltz
 
 fixed_state_type(_) = true
+# TODO: Update the signature
 fixed_state_type(::Layers.HamiltonianNN{FST}) where {FST} = FST
 
 include("ffjord.jl")
@@ -34,8 +35,6 @@ include("neural_de.jl")
 include("collocation.jl")
 include("multiple_shooting.jl")
 
-include("deprecated.jl")
-
 export NeuralODE, NeuralDSDE, NeuralSDE, NeuralCDDE, NeuralDAE, AugmentedNDELayer,
        NeuralODEMM
 export FFJORD, FFJORDDistribution
diff --git a/src/deprecated.jl b/src/deprecated.jl
deleted file mode 100644
index 485cb5aad..000000000
--- a/src/deprecated.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-# Tensor Layer
-Base.@deprecate TensorProductBasisFunction(f, n) Basis.GeneralBasisFunction{:none}(f, n, 1)
-
-for B in (:Chebyshev, :Sin, :Cos, :Fourier, :Legendre, :Polynomial)
-    Bold = Symbol(B, :Basis)
-    @eval Base.@deprecate $(Bold)(n) Basis.$(B)(n)
-end
-
-Base.@deprecate TensorLayer(model, out_dim::Int, init_p::F = randn) where {F <: Function} Boltz.Layers.TensorProductLayer(
-    model, out_dim; init_weight = init_p)
-
-# Spline Layer
-function SplineLayer(tspan, tstep, spline_basis; init_saved_points::F = nothing) where {F}
-    Base.depwarn(
-        "SplineLayer is deprecated and will be removed in the next major release. Refer to \
-         Boltz.jl `Layers.SplineLayer` for the newer version.",
-        :SplineLayer)
-
-    init_saved_points_corrected = if init_saved_points === nothing
-        nothing
-    else
-        let init_saved_points = init_saved_points
-            (rng, _, grid_min, grid_max, grid_step) -> begin
-                return init_saved_points(rng, (grid_min, grid_max), grid_step)
-            end
-        end
-    end
-
-    return Layers.SplineLayer((), first(tspan), last(tspan), tstep, spline_basis;
-        init_saved_points = init_saved_points_corrected)
-end
-
-export SplineLayer
-
-# Hamiltonian Neural Network
-Base.@deprecate HamiltonianNN(model; ad = AutoZygote()) Layers.HamiltonianNN{true}(
-    model; autodiff = ad)
-
-function NeuralHamiltonianDE(model, tspan, args...; ad = AutoForwardDiff(), kwargs...)
-    Base.depwarn(
-        "NeuralHamiltonianDE is deprecated, use `NeuralODE` with `Layers.HamiltonianNN` instead.",
-        :NeuralHamiltonianDE)
-    hnn = model isa Layers.HamiltonianNN ? model : HamiltonianNN(model; ad)
-    return NeuralODE(hnn, tspan, args, kwargs)
-end
-
-export NeuralHamiltonianDE
diff --git a/test/hamiltonian_nn_tests.jl b/test/hamiltonian_nn_tests.jl
deleted file mode 100644
index 03aa8da32..000000000
--- a/test/hamiltonian_nn_tests.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-@testitem "Hamiltonian NN" tags=[:advancedneuralde] begin
-    using Zygote, OrdinaryDiffEq, ForwardDiff, Optimisers, Random, ComponentArrays,
-          Statistics
-
-    # Checks for Shapes and Non-Zero Gradients
-    u0 = rand(Float32, 6, 1)
-
-    for ad in (AutoForwardDiff(), AutoZygote())
-        hnn = HamiltonianNN(Chain(Dense(6 => 12, relu), Dense(12 => 1)); ad)
-        ps, st = Lux.setup(Xoshiro(0), hnn)
-        ps = ps |> ComponentArray
-
-        @test size(first(hnn(u0, ps, st))) == (6, 1)
-
-        @test !iszero(ForwardDiff.gradient(ps -> sum(first(hnn(u0, ps, st))), ps))
-        @test !iszero(only(Zygote.gradient(ps -> sum(first(hnn(u0, ps, st))), ps)))
-    end
-
-    # Test Convergence on a toy problem
-    t = range(0.0f0, 1.0f0; length = 64)
-    π_32 = Float32(π)
-    q_t = reshape(sin.(2π_32 * t), 1, :)
-    p_t = reshape(cos.(2π_32 * t), 1, :)
-    dqdt = 2π_32 .* p_t
-    dpdt = -2π_32 .* q_t
-
-    data = vcat(q_t, p_t)
-    target = vcat(dqdt, dpdt)
-
-    hnn = HamiltonianNN(Chain(Dense(2 => 16, relu), Dense(16 => 1)); ad = AutoForwardDiff())
-    ps, st = Lux.setup(Xoshiro(0), hnn)
-    ps = ps |> ComponentArray
-
-    opt = Optimisers.Adam(0.01)
-    st_opt = Optimisers.setup(opt, ps)
-    loss(data, target, ps) = mean(abs2, first(hnn(data, ps, st)) .- target)
-
-    initial_loss = loss(data, target, ps)
-
-    for epoch in 1:100
-        global ps, st_opt
-        gs = last(Zygote.gradient(loss, data, target, ps))
-        st_opt, ps = Optimisers.update!(st_opt, ps, gs)
-    end
-
-    final_loss = loss(data, target, ps)
-
-    @test initial_loss > 5 * final_loss
-
-    # Test output and gradient of NeuralHamiltonianDE Layer
-    tspan = (0.0f0, 1.0f0)
-
-    model = NeuralHamiltonianDE(
-        hnn, tspan, Tsit5(); save_everystep = false, save_start = true,
-        saveat = range(tspan[1], tspan[2]; length = 10))
-    sol = Array(first(model(data[:, 1], ps, st)))
-    @test size(sol) == (2, 10)
-
-    gs = only(Zygote.gradient(ps -> sum(Array(first(model(data[:, 1], ps, st)))), ps))
-
-    @test !iszero(gs)
-end
diff --git a/test/spline_layer_tests.jl b/test/spline_layer_tests.jl
deleted file mode 100644
index cd1b8c9f5..000000000
--- a/test/spline_layer_tests.jl
+++ /dev/null
@@ -1,63 +0,0 @@
-@testitem "SplineLayer" tags=[:basicneuralde] begin
-    using ComponentArrays, Zygote, DataInterpolations, Optimization,
-          OptimizationOptimisers, LinearAlgebra, Random
-
-    function run_test(f, layer, atol)
-        ps, st = Lux.setup(Xoshiro(0), layer)
-        ps = ComponentArray(ps)
-        model = StatefulLuxLayer{true}(layer, ps, st)
-
-        data_train_vals = rand(500)
-        data_train_fn = f.(data_train_vals)
-
-        function loss_function(θ)
-            data_pred = [model(x, θ) for x in data_train_vals]
-            loss = sum(abs.(data_pred .- data_train_fn)) / length(data_train_fn)
-            return loss
-        end
-
-        function callback(p, l)
-            @info "[SplineLayer] Loss: $l"
-            return false
-        end
-
-        optfunc = Optimization.OptimizationFunction(
-            (x, p) -> loss_function(x), Optimization.AutoZygote())
-        optprob = Optimization.OptimizationProblem(optfunc, ps)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.1); callback, maxiters = 100)
-
-        optprob = Optimization.OptimizationProblem(optfunc, res.minimizer)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.1); callback, maxiters = 100)
-        opt = res.minimizer
-
-        data_validate_vals = rand(100)
-        data_validate_fn = f.(data_validate_vals)
-
-        data_validate_pred = [model(x, opt) for x in data_validate_vals]
-
-        output = sum(abs.(data_validate_pred .- data_validate_fn)) /
-                 length(data_validate_fn)
-        return output < atol
-    end
-
-    ##test 01: affine function, Linear Interpolation
-    a, b = rand(2)
-    layer = SplineLayer((0.0, 1.0), 0.01, LinearInterpolation)
-    @test run_test(x -> a * x + b, layer, 0.1)
-
-    ##test 02: non-linear function, Quadratic Interpolation
-    a, b, c = rand(3)
-    layer = SplineLayer((0.0, 1.0), 0.01, QuadraticInterpolation)
-    @test run_test(x -> a * x^2 + b * x + x, layer, 0.1)
-
-    ##test 03: non-linear function, Quadratic Spline
-    a, b, c = rand(3)
-    layer = SplineLayer((0.0, 1.0), 0.1, QuadraticSpline)
-    @test run_test(x -> a * sin(b * x + c), layer, 0.1)
-
-    ##test 04: non-linear function, Cubic Spline
-    layer = SplineLayer((0.0, 1.0), 0.1, CubicSpline)
-    @test run_test(x -> exp(x) * x^2, layer, 0.1)
-end
diff --git a/test/tensor_product_tests.jl b/test/tensor_product_tests.jl
deleted file mode 100644
index d813a6be2..000000000
--- a/test/tensor_product_tests.jl
+++ /dev/null
@@ -1,56 +0,0 @@
-@testitem "TensorProductLayer" tags=[:basicneuralde] begin
-    using Zygote, Optimization, OptimizationOptimJL, OptimizationOptimisers,
-          LinearAlgebra, Random, ComponentArrays
-
-    function run_test(f, layer, atol, N)
-        ps, st = Lux.setup(Xoshiro(0), layer)
-        ps = ComponentArray(ps)
-        model = StatefulLuxLayer{true}(layer, ps, st)
-
-        data_train_vals = [rand(N) for k in 1:500]
-        data_train_fn = f.(data_train_vals)
-
-        function loss_function(p)
-            data_pred = [model(x, p) for x in data_train_vals]
-            loss = sum(norm.(data_pred .- data_train_fn)) / length(data_train_fn)
-            return loss
-        end
-
-        function cb(p, l)
-            @info "[TensorProductLayer] Loss: $l"
-            return false
-        end
-
-        optfunc = Optimization.OptimizationFunction(
-            (x, p) -> loss_function(x), Optimization.AutoZygote())
-        optprob = Optimization.OptimizationProblem(optfunc, ps)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.1); callback = cb, maxiters = 100)
-        optprob = Optimization.OptimizationProblem(optfunc, res.minimizer)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.01); callback = cb, maxiters = 100)
-        optprob = Optimization.OptimizationProblem(optfunc, res.minimizer)
-        res = Optimization.solve(optprob, BFGS(); callback = cb, maxiters = 200)
-        opt = res.minimizer
-
-        data_validate_vals = [rand(N) for k in 1:100]
-        data_validate_fn = f.(data_validate_vals)
-
-        data_validate_pred = [model(x, opt) for x in data_validate_vals]
-
-        return sum(norm.(data_validate_pred .- data_validate_fn)) /
-               length(data_validate_fn) < atol
-    end
-
-    ##test 01: affine function, Chebyshev and Polynomial basis
-    A = rand(2, 2)
-    b = rand(2)
-    layer = TensorLayer([ChebyshevBasis(10), PolynomialBasis(10)], 2)
-    @test run_test(x -> A * x + b, layer, 0.05, 2)
-
-    ##test 02: non-linear function, Chebyshev and Legendre basis
-    A = rand(2, 2)
-    b = rand(2)
-    layer = TensorLayer([ChebyshevBasis(7), FourierBasis(7)], 2)
-    @test run_test(x -> A * x * norm(x) + b * sin(norm(x)), layer, 0.10, 2)
-end

From 405925c606d913150339b4aa3ac27e88d34815e4 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Fri, 13 Sep 2024 23:35:40 -0400
Subject: [PATCH 3/7] fix: update the Hamiltonian NN property

---
 Project.toml      | 2 ++
 src/DiffEqFlux.jl | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index f8f6a376d..1b6ee765b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,6 +18,7 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 
 [weakdeps]
 DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
@@ -65,6 +66,7 @@ Reexport = "0.2, 1"
 SciMLBase = "2"
 SciMLSensitivity = "7"
 Setfield = "1.1.1"
+Static = "1.1.1"
 Statistics = "1.10"
 StochasticDiffEq = "6.68.0"
 Test = "1.10"
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
index 4bb99c9e7..dc1b9d2a2 100644
--- a/src/DiffEqFlux.jl
+++ b/src/DiffEqFlux.jl
@@ -20,14 +20,15 @@ using SciMLSensitivity: SciMLSensitivity, AdjointLSS, BacksolveAdjoint, EnzymeVJ
                         SteadyStateAdjoint, TrackerAdjoint, TrackerVJP, ZygoteAdjoint,
                         ZygoteVJP
 using Setfield: @set!
+using Static: True, False
 
 const CRC = ChainRulesCore
 
 @reexport using ADTypes, Lux, Boltz
 
 fixed_state_type(_) = true
-# TODO: Update the signature
-fixed_state_type(::Layers.HamiltonianNN{FST}) where {FST} = FST
+fixed_state_type(::Layers.HamiltonianNN{True}) = true
+fixed_state_type(::Layers.HamiltonianNN{False}) = false
 
 include("ffjord.jl")
 include("neural_de.jl")

From 42fe746f412813b1f0bbdcbac1bc99e6c2bbe5fb Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Mon, 16 Sep 2024 12:57:05 -0400
Subject: [PATCH 4/7] chore: update compat entries

---
 .buildkite/pipeline.yml | 3 ++-
 Project.toml            | 4 +---
 docs/Project.toml       | 6 +++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 837686c5a..81069a001 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -18,7 +18,7 @@ steps:
       RETESTITEMS_NWORKERS: 1 # These tests require quite a lot of GPU memory
       GROUP: CUDA
       DATADEPS_ALWAYS_ACCEPT: 'true'
-      JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
+      JULIA_PKG_SERVER: ""
 
   - label: "Documentation"
     plugins:
@@ -42,5 +42,6 @@ steps:
       DATADEPS_ALWAYS_ACCEPT: true
       JULIA_DEBUG: "Documenter"
       SECRET_DOCUMENTER_KEY: "AdqcYtp4x3U5j1ELurHIoOwURqXcOan+qmihqVjsjhoGUzS/snTyZNQ5fxaJr8Yawm9CyyGvh+Q5O98St1LJ9S+pi9C5TFSbPWnNp/vXabMmeUEVLHVYHUeR2wgMCciSnM/oLw5sNAEj3hrWFjLslEGKQSptUCTWuU5WRizhQONDxeA3tz9biZUYvKanP8GjsHUkD3te15n1t6o78T1+EJxb1znrBSd9aK1Y4UaVjBEfVtLtTD8Z6VP1L4SVXVipxrDdzwzbzUDaTpvjo3z3e9qx2u6Xn5qa/os6JY81jRa5ZTWFkev73DYhoFmordSI85grOPwNpvrNWqOAs5kTDg==;U2FsdGVkX1+TXM0w98SRH5lY0Dw+nmRJ1xtJmffK+GVWHdMjhiIxQoYGGoP065hgl1VOf+oLzqWWFoYIfcz5i/VKD5F7O3EhfLdcmG2Y15u4sxr/hLKKMedSCueiusNd4N9EGGmJ4LLY0I1K6vA8Pa2eRwE+yDE+wfSpqpTC3yxMo40Xk7wra6ZwAybGpSHzOItV+2QGHttr/WLntRbx7GD8HDBC/LGNhtmFzFhAo/2CiQ0qgMDHBxyVqZlAdCWdu9xGmD9FC2+HDGv6QyNge1Ajmg6TNd6tuRhAP6VfDidEtaohcGl2TxbuUcd1OSrSbwmqxcw0IhVriRN8FgB8pKYmol7J71za9ljViyzgAjhQFvute/PYB2nw9MB8yCoNu6X0hqoLdmSxbzerpeYh0yRdi6SedESBJV3PgL7uahnyHbhC4dudFPavobeP9nU0okzKXG7fYfR1aiqgeeed4WFi48u+pciWv6Uo8J4lbBTUXu4xI+yWCpt01LNXyTmsIPYUvqbE8U0DkJNsNie9lw4of3UYkKhtVkLBoQg8++uc6i0w70+/sKZDp2OA5Y1jMFrRQaWUHyaRfpX3pXqvghAfVLEybiSWnpE0JiAnBsDcI3zajc4Cp+lui9G0+E8Lc+NbXOMbjiYHejjN46/03sIHNu0YPlU5p7o2xrGpa3cw6o3yHhBE7yVTcBc7A0AFPGZQxTLOEw1lYf7+B6J5AEpDhxR9+gUhmL+S+2kUw+nxsMxdD0Tunfeg4CIoeB9Tl7uIBrZDQ23uVRrcEyE6t3zf7skBcW3wlrHpAY61CxuGuMolcTl0JaeYFTJPYzOgPa+nD/vKaICsrRDkaSUUHcGufbTgqdJLjIkh+M9a7+DPKpfoT7H4gp4VrocqClFmmPoZZAIKjxXAEnEHWILBw0a9cOar1DKfJVoyN1vQIdVeux90a50Ao62m3sHoYiXY3DeutHkAmfWWDl/5zcU2h1T0XWHmRnjjmAW0fZPL+E38rKXYQECeHMDFEYYfbGyZMJx9T9pwfvxTM4Yzd4nB0qspOXMdeGvnVbzqlnaGJpxs/M5zyxILMQzq979bwSI0TPFRqLojhNezOhZHZaJdFoWq6UqW1kFDyzNIIRCQak0kGuhpCeFqqxiFFtC1M1vskpZ5UfqtCQSgILU+XbJWAxZOqrAxy2T7+h9JMS/jLPW+tyjCJx/bhqSGF9fBt6Q1R4ZL1MjaBSocnMj/5H7IZI2TdH6ulTyigZv7OEMQZRSyTrZgBNPLAiMHVoZVLkZF2NZse/4tHG/7i0Lio+m2z8WsjSQa04LPjtnlCSpYrug8EoGN4NruaRBDBIlTV9w6Rvz4YYB4iDht14ifF6XJVl/uo4jWKHAJC6Bc5IwFD80A/jYmx1vbLwvwVgYGCOW80WSFUGSnBSwVDLsLufXWt9ct8Kql3ICl2/iLO1ZZofELOgddV8yds1vrBdhn8jP1QCrTqtS0ITgLOabDEJMAma7St5R+Oa7kAj2zlVh/A0WXolGD04ReHnuiNN7S6C/ePpSTP/fMSa5bCrmQkw93fEgHdNOpQq9DwYNa9wEijtdEJsw5Kl+B73SNIhG+X7h1sN1DsCao2v1EPtxzaXw51kfJHzhtdCKnKXYap8Lk+twZ6KKH3QZaSsq2LSL7E7da3ZXwo4oRMjV5OTkWaklGKZmzJaMrUnpbJMQUfb4tyNDBJ/52arcTLOn9b72d+927qUfKNCwm+Ma5tUJZ9PkOUxObbXgguXvgVVBL5Li0kfcCyYQC8HcNu1ZkmP7FbJzYo4m/e8v+jASNorC+49BvDE8WlSw+6dJQoP29S0u2OyZ4CSLYvqVDz5WIiLYR9OH2Dk7dB6n69jgngYkEtTF+1TWwQOR0d+6xungqU86W+4JvPkBx4YwVHmnw+iFRNqJd/OTmBVVDYpEkC6N38SuCRAuZwjcVfl6ERm0C7FEJOlmXqs6UeUuPNxuCE7yKcD4J0JGhVjSxN4c3dbV0aipIt9/ob+I2rXA3TUVOU7G+svsboOo1bHlUfoL1HQcasUHwst6ScsrlzJtSLActVb8QMh2iOw5zlxHGyq/MqU+tcquLZc3ctYZwzXatMjEqNqP4nHF1HHkYavwrhFr4U6lbnPZ6ZlUCdrXKDsn7BrRnn2MQ4My7k/Cau6174Zln9RRB4LRs45P3oDUug3Xc8/erekvf8L1HRFsiHzv+8ssvO/dfOEkD1hUTacNejaWt6HXCPC3zXhnwsk/lw3TLXSuxWO3hDpxBuAy8gRmCVZq4HPLAod/lEIpXQY3Rij8mmU28tCnhXrjxTTCe1ci9F4dy2IigN/1YeA5k+6gVpNmV9NvSDlPKN5vkGymFXokBiHkBKOd/uIkNORPYbjEGq17mO4CTfbJ8uDKneibC22VOaqB5Xk3/Xp4zz4TVCCr4xznVui7OOeuNaUptrypsMRtWfYiJ52s8TYiOsQ4SyKmXdCy51k16wGT9/ZrPpEktYFsC6g4SdY04MvBaowPYsudy9uOyUnZFuxPX+SLusRfLHlgkZU2SxrPMlHbOFpFVHKgCiKXLBoDWL3Bkn/9SUfoFYWf65KEfTeKJc7FCxsH/r6ngLMlJidn/fxI0D5FyVh0FVJnpdhGgv8jXo28nVCRLTNwRl5TzUjdz4EIvSxugmFR38qbpdkYeOlCdggE8safaxy+xXpYX0diec3x9MYvV7cWMtOCmTwKkevlPUTL+UuQiWKNf7fOzmGG67wNKfFtwmMxCbQ20nT2eAT6TjZ9eowW/E7s8i3cpCycJjeshuz7Y1G5ECLjSKe2UjCEGhBqRR8T/cBObQBErrilsRgjdO6w4UvBy4FkuzVpvB2cKhMt3HocBLNMlhUHaIgQfh1C6PMRT5S10+e659+qvF0GZOCJ7Y6b5Z9X9XipJEJo8ISC5uR7Z/WmEqgVtTdfezJvnGoibun2FjFriTP1W+HqHcGTpgwfWrVwHuGXYreckkzZaP//Q5uHajxj4AYQhEmjcQ6jcRNAWxOYLKZWqy7d2sDw79wdvKT6kfKg3wiKB/Arl1Mv33b1FkCfr+MRu1nzWeuAG72su6L91T0CVymOQQCZpA2ubPYrIH3vyKmRLvFIgwIwrWRZtsY8dvznOGmHvlEgUw+C2Ln0loc6fDDQ4Jk20LSXzRtsCeJ+PUahaDT9aW4xgWXHTAjGXjaZUak+5rs3ck7ZH3vbvVtbfg3/PKiCKWKXtt/2ZIZTxgdQVlGhZcz/LVzjIPOSTfKgcyz+pVXFq+hC2fubtsgTTm6DkXzP5pFdens05f27nXdr5FYNoYSJcU7w58gHTZOIez7oiMdkG+FObg9u/cpz923hOeBOmYaoQ9JePCQzQkWyPwHVD1IFsGBKtVIwahdKnpmjxubdr2ehdYbC7SsMD772YdPKOx07ipmz56pGBVqG7nUqYDxX2Mr5hvrHKbnLmYQykBSfazMUmK5/c+dNngn7QU8kN0fxFDmDzaUeQcZwbBNVlfp51Gsc+LvYoyXMUhBu53wQgNmq+ZJCSO4V+XBIgJAKIOSlVDq0GXe1VtyfNC4XcU0ey84K0/mD3RmtNGc2YYNp6OPqwzJAIexQcSr8pehN53fqGuGrRX3EqPzxNZwM7W+VzzpT+Ky2jpLl2YrQyuROIAMV09P8HoDxBorSHAQXkijs2ByIAGbQqJhwtbcovSPIMqvHPeKKMuFfNzKnmCkUysklNxQynM2clwKwbOchghZOBNH2sQ4atfhHjdo66dXtzSmngyPujZcwinq1b1VUbG1n9BuusgdUrhpt/28MJRYLt4tJFVBqYGu98Ewa1oX+7xqCmhEe2us43fg7EYBpwLBAVDNsohVO69upLR+Yy2C1lhqJSSbO+JLKg70/7onpMI8JcCtiNYOMFYMix9ynkpBf8gN+cM/VgL4cldHYwbaAJXgnD7PxdmDIy7r8oZnGOHE//a3iDyB+Xqy0t9c41OYYn6PkB32BqRHFUvbzU+6kaDpQD/gk0EBTb51SLmy3IBBLKpKw1R0CVfS2wY5XX7vYYpgAMQzsoZpL3Ep0NpcRqtutcec0o0VXkd3B9wXJhDG+en0MaY9vc6V4g+nT8Z2jZw0A6lXnbDxlQN/CmvvrcsexHGGIj6vjpQs/oSyvOYaD1gVTWdQgcPhCYZGVH5O/llnKfxsRFVU3g6XvL4ND0oQ7S98eHRhz+8TqOx9Se47vEEC1O3bNDf6Rnnm+aB9vD0GKQ6iAETWI74yF5HrCpZY2XIDK8OgOQJoiWpDWaDxfNjK8nWMSjV8bnEdIzLP08p8fLWP/+JPeJkUB91mxmi5mhMjKFpKo/lXtvM2E8zmzkjo/VyLGYaij7EgF1XNIWRC5LWlsrrPiqVfQlmftDzjaG+jCx/47NLws="
+      JULIA_PKG_SERVER: ""
     if: build.message !~ /\[skip docs\]/ && !build.pull_request.draft
     timeout_in_minutes: 1000
diff --git a/Project.toml b/Project.toml
index 1b6ee765b..aee97ae05 100644
--- a/Project.toml
+++ b/Project.toml
@@ -51,7 +51,6 @@ Lux = "1"
 LuxCUDA = "0.3.2"
 LuxCore = "1"
 LuxLib = "1.2"
-MLDatasets = "0.7.14"
 NNlib = "0.9.22"
 OneHotArrays = "0.2.5"
 Optimisers = "0.3"
@@ -89,7 +88,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
-MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
@@ -107,4 +105,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DataInterpolations", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "DistributionsAD", "ExplicitImports", "ForwardDiff", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "MLDatasets", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test", "Zygote"]
+test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DataInterpolations", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "DistributionsAD", "ExplicitImports", "ForwardDiff", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test", "Zygote"]
diff --git a/docs/Project.toml b/docs/Project.toml
index 87d3a86d0..09aa6590e 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -42,7 +42,7 @@ CUDA = "5"
 ComponentArrays = "0.15"
 DataDeps = "0.7"
 DataFrames = "1"
-DiffEqFlux = "3"
+DiffEqFlux = "4"
 Distances = "0.10.7"
 Distributions = "0.25.78"
 Documenter = "1"
@@ -50,9 +50,9 @@ Flux = "0.14"
 ForwardDiff = "0.10"
 IterTools = "1"
 LinearAlgebra = "1"
-Lux = "0.5.5"
+Lux = "1"
 LuxCUDA = "0.3"
-MLDatasets = "0.7"
+MLDatasets = "0.7.18"
 MLUtils = "0.4"
 NNlib = "0.9"
 OneHotArrays = "0.2"

From 99c337075fe885e70dd200561395ced93d0c1e15 Mon Sep 17 00:00:00 2001
From: Avik Pal <avik.pal.2017@gmail.com>
Date: Mon, 16 Sep 2024 19:55:27 -0400
Subject: [PATCH 5/7] fix: missing `Layers`

---
 docs/src/examples/hamiltonian_nn.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/examples/hamiltonian_nn.md b/docs/src/examples/hamiltonian_nn.md
index ddfa11b7a..dfe0ad211 100644
--- a/docs/src/examples/hamiltonian_nn.md
+++ b/docs/src/examples/hamiltonian_nn.md
@@ -33,7 +33,7 @@ dataloader = ncycle(
     for i in 1:(size(data, 2) ÷ B)),
     NEPOCHS)
 
-hnn = HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
+hnn = Layers.HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
 ps, st = Lux.setup(Xoshiro(0), hnn)
 ps_c = ps |> ComponentArray
 
@@ -95,12 +95,12 @@ dataloader = ncycle(
     NEPOCHS)
 ```
 
-### Training the HamiltonianNN
+### Training the 
 
-We parameterize the HamiltonianNN with a small MultiLayered Perceptron. HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ForwardDiff in the training loop to compute the gradients of the HNN Layer for Optimization.
+We parameterize the  with a small MultiLayered Perceptron. HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ForwardDiff in the training loop to compute the gradients of the HNN Layer for Optimization.
 
 ```@example hamiltonian
-hnn = HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
+hnn = Layers.HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
 ps, st = Lux.setup(Xoshiro(0), hnn)
 ps_c = ps |> ComponentArray
 

From 7a5f854c16d7ddf9fad82f8bbdca562ad4361087 Mon Sep 17 00:00:00 2001
From: Avik Pal <avik.pal.2017@gmail.com>
Date: Mon, 16 Sep 2024 20:29:16 -0400
Subject: [PATCH 6/7] fix: accidental deletion

---
 docs/src/examples/hamiltonian_nn.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/examples/hamiltonian_nn.md b/docs/src/examples/hamiltonian_nn.md
index dfe0ad211..9c1716bad 100644
--- a/docs/src/examples/hamiltonian_nn.md
+++ b/docs/src/examples/hamiltonian_nn.md
@@ -33,7 +33,7 @@ dataloader = ncycle(
     for i in 1:(size(data, 2) ÷ B)),
     NEPOCHS)
 
-hnn = Layers.HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
+hnn = Layers.HamiltonianNN{true}(Layers.MLP(2, (64, 1)); autodiff = AutoZygote())
 ps, st = Lux.setup(Xoshiro(0), hnn)
 ps_c = ps |> ComponentArray
 
@@ -95,12 +95,12 @@ dataloader = ncycle(
     NEPOCHS)
 ```
 
-### Training the 
+### Training the HamiltonianNN
 
 We parameterize the  with a small MultiLayered Perceptron. HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ForwardDiff in the training loop to compute the gradients of the HNN Layer for Optimization.
 
 ```@example hamiltonian
-hnn = Layers.HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
+hnn = Layers.HamiltonianNN{true}(Layers.MLP(2, (64, 1)); autodiff = AutoZygote())
 ps, st = Lux.setup(Xoshiro(0), hnn)
 ps_c = ps |> ComponentArray
 

From 3586821b0107f09544e061f8c96c7f55538e5736 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Tue, 17 Sep 2024 10:40:29 -0400
Subject: [PATCH 7/7] ci: run tests with coverage enabled

---
 .github/workflows/CI.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 839ed6a4a..935e8c8de 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -42,8 +42,6 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-        with:
-          coverage: false
         env:
           GROUP: ${{ matrix.group }}
       - uses: julia-actions/julia-processcoverage@v1