diff --git a/docs/Project.toml b/docs/Project.toml index 47219cb75..296f45e14 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -10,6 +10,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" @@ -18,6 +19,7 @@ Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" +OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" OptimizationPolyalgorithms = "500b13db-7e66-49ce-bda4-eed966be6282" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" @@ -40,6 +42,7 @@ Distributions = "0.25.78" Documenter = "0.27" Flux = "0.13.7" ForwardDiff = "0.10" +IterTools = "1" Lux = "0.4.34" MLDataUtils = "0.5" MLDatasets = "0.7" @@ -47,6 +50,7 @@ Optimisers = "0.2" Optimization = "3.9" OptimizationFlux = "0.1" OptimizationOptimJL = "0.1" +OptimizationOptimisers = "0.1" OptimizationPolyalgorithms = "0.1" OrdinaryDiffEq = "6.31" Plots = "1.36" diff --git a/docs/src/examples/hamiltonian_nn.md b/docs/src/examples/hamiltonian_nn.md index 67fb1abc8..d49a6a370 100644 --- a/docs/src/examples/hamiltonian_nn.md +++ b/docs/src/examples/hamiltonian_nn.md @@ -9,7 +9,8 @@ m\ddot x + kx = 0 Now we make some simplifying assumptions, and assign ``m = 1`` and ``k = 1``. Analytically solving this equation, we get ``x = sin(t)``. Hence, ``q = sin(t)``, and ``p = cos(t)``. Using these solutions, we generate our dataset and fit the `NeuralHamiltonianDE` to learn the dynamics of this system. ```@example hamiltonian_cp -using Lux, DiffEqFlux, OrdinaryDiffEq, Statistics, Plots, Zygote, ForwardDiff, Optimisers, Random, ComponentArrays +using Lux, DiffEqFlux, OrdinaryDiffEq, Statistics, Plots, Zygote, ForwardDiff, Random, + ComponentArrays, Optimization, OptimizationOptimisers, IterTools t = range(0.0f0, 1.0f0, length=1024) π_32 = Float32(π) @@ -21,34 +22,34 @@ dpdt = -2π_32 .* q_t data = vcat(q_t, p_t) target = vcat(dqdt, dpdt) B = 256 -dataloader = ((selectdim(data, 2, ((i-1)*B+1):(min(i * B, size(data, 2))))) - for i in 1:(size(data, 2)÷B)) +NEPOCHS = 500 +dataloader = ncycle(((selectdim(data, 2, ((i - 1) * B + 1):(min(i * B, size(data, 2)))), + selectdim(target, 2, ((i - 1) * B + 1):(min(i * B, size(data, 2))))) + for i in 1:(size(data, 2) ÷ B)), NEPOCHS) hnn = HamiltonianNN(Lux.Chain(Lux.Dense(2, 64, relu), Lux.Dense(64, 1))) ps, st = Lux.setup(Random.default_rng(), hnn) ps_c = ps |> ComponentArray opt = ADAM(0.01f0) -st_opt = Optimisers.setup(opt, ps_c) - -for epoch in 1:500 - for (x, y) in dataloader - # Forward Mode over Reverse Mode for Training - gs = ForwardDiff.gradient(ps_c -> mean(abs2, first(hnn(data, ps_c, st)) .- target), ps_c) - st_opt, ps_c = Optimisers.update!(st_opt, ps_c, gs) - end - if epoch % 100 == 1 - println("Loss at epoch $epoch: $(mean(abs2, first(hnn(data, ps_c, st)) .- target))") - end + +function loss_function(ps, data, target) + pred, st_ = hnn(data, ps, st) + return mean(abs2, pred .- target), pred end -model = NeuralHamiltonianDE( - hnn, (0.0f0, 1.0f0), - Tsit5(), save_everystep = false, - save_start = true, saveat = t -) +opt_func = OptimizationFunction((ps, _, data, target) -> loss_function(ps, data, target), + Optimization.AutoForwardDiff()) +opt_prob = OptimizationProblem(opt_func, ps_c) + +res = Optimization.solve(opt_prob, opt, dataloader) + +ps_trained = res.u -pred = Array(first(model(data[:, 1], ps_c, st))) +model = NeuralHamiltonianDE(hnn, (0.0f0, 1.0f0), Tsit5(), save_everystep=false, + save_start=true, saveat=t) + +pred = Array(first(model(data[:, 1], ps_trained, st))) plot(data[1, :], data[2, :], lw=4, label="Original") plot!(pred[1, :], pred[2, :], lw=4, label="Predicted") xlabel!("Position (q)") @@ -73,8 +74,11 @@ dpdt = -2π_32 .* q_t data = cat(q_t, p_t, dims = 1) target = cat(dqdt, dpdt, dims = 1) -dataloader = ((selectdim(data, 2, ((i-1)*B+1):(min(i * B, size(data, 2))))) - for i in 1:(size(data, 2)÷B)) +B = 256 +NEPOCHS = 500 +dataloader = ncycle(((selectdim(data, 2, ((i - 1) * B + 1):(min(i * B, size(data, 2)))), + selectdim(target, 2, ((i - 1) * B + 1):(min(i * B, size(data, 2))))) + for i in 1:(size(data, 2) ÷ B)), NEPOCHS) ``` ### Training the HamiltonianNN @@ -87,18 +91,24 @@ ps, st = Lux.setup(Random.default_rng(), hnn) ps_c = ps |> ComponentArray opt = ADAM(0.01f0) -st_opt = Optimisers.setup(opt, ps_c) - -for epoch in 1:500 - for (x, y) in dataloader - # Forward Mode over Reverse Mode for Training - gs = ForwardDiff.gradient(ps_c -> mean(abs2, first(hnn(data, ps_c, st)) .- target), ps_c) - st_opt, ps_c = Optimisers.update!(st_opt, ps_c, gs) - end - if epoch % 100 == 1 - println("Loss at epoch $epoch: $(mean(abs2, first(hnn(data, ps_c, st)) .- target))") - end + +function loss_function(ps, data, target) + pred, st_ = hnn(data, ps, st) + return mean(abs2, pred .- target), pred end + +function callback(ps, loss, pred) + println("Loss: ", loss) + return false +end + +opt_func = OptimizationFunction((ps, _, data, target) -> loss_function(ps, data, target), + Optimization.AutoForwardDiff()) +opt_prob = OptimizationProblem(opt_func, ps_c) + +res = solve(opt_prob, opt, dataloader; callback) + +ps_trained = res.u ``` ### Solving the ODE using trained HNN @@ -106,13 +116,10 @@ end In order to visualize the learned trajectories, we need to solve the ODE. We will use the `NeuralHamiltonianDE` layer, which is essentially a wrapper over `HamiltonianNN` layer, and solves the ODE. ```@example hamiltonian -model = NeuralHamiltonianDE( - hnn, (0.0f0, 1.0f0), - Tsit5(), save_everystep = false, - save_start = true, saveat = t -) +model = NeuralHamiltonianDE(hnn, (0.0f0, 1.0f0), Tsit5(), save_everystep=false, + save_start=true, saveat=t) -pred = Array(first(model(data[:, 1], ps_c, st))) +pred = Array(first(model(data[:, 1], ps_trained, st))) plot(data[1, :], data[2, :], lw=4, label="Original") plot!(pred[1, :], pred[2, :], lw=4, label="Predicted") xlabel!("Position (q)") @@ -124,11 +131,15 @@ ylabel!("Momentum (p)") ## Expected Output ```julia -Loss at epoch 1: 16.079542 -Loss at epoch 101: 0.017007854 -Loss at epoch 201: 0.0118254945 -Loss at epoch 301: 0.009933148 -Loss at epoch 401: 0.009158727 +Loss: 19.865715 +Loss: 18.196068 +Loss: 19.179213 +Loss: 19.58956 +⋮ +Loss: 0.02267044 +Loss: 0.019175647 +Loss: 0.02218909 +Loss: 0.018870523 ``` ## References diff --git a/test/hamiltonian_nn.jl b/test/hamiltonian_nn.jl index 4c35c0813..0164efb4f 100644 --- a/test/hamiltonian_nn.jl +++ b/test/hamiltonian_nn.jl @@ -1,4 +1,4 @@ -using DiffEqFlux, Zygote, OrdinaryDiffEq, ForwardDiff, Test, Optimisers, Random, Lux, ComponentArrays +using DiffEqFlux, Zygote, OrdinaryDiffEq, ForwardDiff, Test, Optimisers, Random, Lux, ComponentArrays, Statistics # Checks for Shapes and Non-Zero Gradients u0 = rand(Float32, 6, 1) @@ -41,6 +41,7 @@ loss(data, target, ps) = mean(abs2, first(hnn(data, ps, st)) .- target) initial_loss = loss(data, target, ps) for epoch in 1:100 + global ps, st_opt # Forward Mode over Reverse Mode for Training gs = ForwardDiff.gradient(ps -> loss(data, target, ps), ps) st_opt, ps = Optimisers.update!(st_opt, ps, gs)