diff --git a/Project.toml b/Project.toml index fe72351..e950ba6 100644 --- a/Project.toml +++ b/Project.toml @@ -24,6 +24,7 @@ SciMLBase = "2" julia = "1.10" [extras] +DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" NeuralPDE = "315f7962-48a3-4962-8226-d0f33b1235f0" @@ -35,4 +36,4 @@ SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["SafeTestsets", "Test", "Lux", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "NLopt", "Random", "NeuralPDE"] +test = ["SafeTestsets", "Test", "Lux", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "NLopt", "Random", "NeuralPDE", "DifferentialEquations"] diff --git a/src/NeuralLyapunov.jl b/src/NeuralLyapunov.jl index 78171b2..b6e41b7 100644 --- a/src/NeuralLyapunov.jl +++ b/src/NeuralLyapunov.jl @@ -13,6 +13,7 @@ include("decrease_conditions.jl") include("decrease_conditions_RoA_aware.jl") include("NeuralLyapunovPDESystem.jl") include("local_Lyapunov.jl") +include("policy_search.jl") export NeuralLyapunovPDESystem, NumericalNeuralLyapunovFunctions export local_Lyapunov @@ -20,6 +21,7 @@ export NeuralLyapunovSpecification, NeuralLyapunovStructure, UnstructuredNeuralL NonnegativeNeuralLyapunov, PositiveSemiDefiniteStructure, LyapunovMinimizationCondition, StrictlyPositiveDefinite, PositiveSemiDefinite, DontCheckNonnegativity, LyapunovDecreaseCondition, AsymptoticDecrease, - ExponentialDecrease, DontCheckDecrease, RoAAwareDecreaseCondition, make_RoA_aware + ExponentialDecrease, DontCheckDecrease, RoAAwareDecreaseCondition, make_RoA_aware, + add_policy_search, get_policy end diff --git a/src/NeuralLyapunovPDESystem.jl b/src/NeuralLyapunovPDESystem.jl index 7096e90..51391e8 100644 --- a/src/NeuralLyapunovPDESystem.jl +++ b/src/NeuralLyapunovPDESystem.jl @@ -35,7 +35,8 @@ function NeuralLyapunovPDESystem( fixed_point = zeros(length(lb)), p = SciMLBase.NullParameters(), state_syms = [], - parameter_syms = [] + parameter_syms = [], + policy_search::Bool = false )::Tuple{PDESystem, Function} ########################## Define state symbols ########################### state_dim = length(lb) @@ -78,7 +79,8 @@ function NeuralLyapunovPDESystem( fixed_point, state, params, - defaults + defaults, + policy_search ) end @@ -119,7 +121,8 @@ function NeuralLyapunovPDESystem( fixed_point = fixed_point, p = p, state_syms = SciMLBase.variable_symbols(dynamics.sys), - parameter_syms = p_syms + parameter_syms = p_syms, + policy_search = false ) end @@ -179,7 +182,8 @@ function NeuralLyapunovPDESystem( fixed_point, state, Num.(parameters(dynamics)), - ModelingToolkit.get_defaults(dynamics) + ModelingToolkit.get_defaults(dynamics), + false ) end @@ -191,12 +195,14 @@ function _NeuralLyapunovPDESystem( fixed_point, state, params, - defaults + defaults, + policy_search::Bool )::Tuple{PDESystem, Function} ########################## Unpack specifications ########################## structure = spec.structure minimzation_condition = spec.minimzation_condition decrease_condition = spec.decrease_condition + f_call = structure.f_call ############################# Define domains ############################## state_dim = length(lb) @@ -245,6 +251,10 @@ function _NeuralLyapunovPDESystem( push!(bcs, V_sym(fixed_point) ~ 0.0) end + if policy_search + append!(bcs, f_call(dynamics, φ, fixed_point, params, 0.0) .~ zeros(state_dim)) + end + if isempty(eqs) && isempty(bcs) error("No training conditions specified.") end diff --git a/src/conditions_specification.jl b/src/conditions_specification.jl index e0044e7..010e7b8 100644 --- a/src/conditions_specification.jl +++ b/src/conditions_specification.jl @@ -5,18 +5,23 @@ Specifies the structure of the neural Lyapunov function and its derivative. Allows the user to define the Lyapunov in terms of the neural network to structurally enforce Lyapunov conditions. -`network_dim` is the dimension of the output of the neural network. `V(phi::Function, state, fixed_point)` takes in the neural network, the state, and the fixed point, and outputs the value of the Lyapunov function at `state`. `V̇(phi::Function, J_phi::Function, f::Function, state, params, t, fixed_point)` takes in the neural network, jacobian of the neural network, dynamics, state, parameters and time (for calling the dynamics, when relevant), and fixed point, and outputs the time derivative of the Lyapunov function at `state`. +`f_call(dynamics::Function, phi::Function, state, p, t)` takes in the dynamics, the neural +network, the state, the parameters of the dynamics, and time, and outputs the derivative of +the state; this is useful for making closed-loop dynamics which depend on the neural +network, such as in the policy search case +`network_dim` is the dimension of the output of the neural network. """ struct NeuralLyapunovStructure V::Function ∇V::Function V̇::Function + f_call::Function network_dim::Integer end diff --git a/src/policy_search.jl b/src/policy_search.jl new file mode 100644 index 0000000..2c81869 --- /dev/null +++ b/src/policy_search.jl @@ -0,0 +1,65 @@ +""" + add_policy_search(lyapunov_structure, new_dims, control_structure) + +Adds dependence on the neural network to the dynamics in a `NeuralLyapunovStructure` + +Adds `new_dims` outputs to the neural network and feeds them through `control_structure` to +calculatethe contribution of the neural network to the dynamics. +The existing `lyapunov_structure.network_dim` dimensions are used as in `lyapunov_structure` +to calculate the Lyapunov function. + +`lyapunov_structure` should assume in its `V̇` that the dynamics take a form `f(x, p, t)`. +The returned `NeuralLyapunovStructure` will assume instead `f(x, u, p, t)`, where `u` is the +contribution from the neural network. Therefore, this structure cannot be used with a +`NeuralLyapunovPDESystem` method that requires an `ODEFunction`, `ODESystem`, or +`ODEProblem`. +""" +function add_policy_search( + lyapunov_structure::NeuralLyapunovStructure, + new_dims::Integer; + control_structure::Function = identity +)::NeuralLyapunovStructure + let V = lyapunov_structure.V, ∇V = lyapunov_structure.∇V, V̇ = lyapunov_structure.V̇, + V_dim = lyapunov_structure.network_dim, nd = new_dims, u = control_structure + + NeuralLyapunovStructure( + function (net, state, fixed_point) + if length(size(state)) == 1 + if V_dim == 1 + V(st -> net(st)[1], state, fixed_point) + else + V(st -> net(st)[1:V_dim], state, fixed_point) + end + else + V(st -> net(st)[1:V_dim, :], state, fixed_point) + end + end, + function (net, J_net, state, fixed_point) + ∇V(st -> net(st)[1:V_dim], st -> J_net(st)[1:V_dim, :], state, fixed_point) + end, + function (net, J_net, f, state, params, t, fixed_point) + V̇(st -> net(st)[1:V_dim], st -> J_net(st)[1:V_dim, :], + (st, p, t) -> f(st, u(net(st)[(V_dim + 1):end]), p, t), state, params, + t, fixed_point) + end, + (f, net, state, p, t) -> f(state, u(net(state)[(V_dim + 1):end]), p, t), + V_dim + nd + ) + end +end + +function get_policy( + phi, + θ, + network_func::Function, + dim_u::Integer; + u_func::Function = identity +) + function policy(state::AbstractVector) + u_func(network_func(phi, θ, state)[(end - dim_u + 1):end]) + end + + policy(state::AbstractMatrix) = mapslices(policy, state, dims = [1]) + + return policy +end diff --git a/src/structure_specification.jl b/src/structure_specification.jl index 20c23db..b1f6af5 100644 --- a/src/structure_specification.jl +++ b/src/structure_specification.jl @@ -3,6 +3,9 @@ Creates a `NeuralLyapunovStructure` where the Lyapunov function is the neural network evaluated at the state. This does not structurally enforce any Lyapunov conditions. + +Dynamics are assumed to be in `f(state, p, t)` form, as in an `ODEFunction`. For +`f(state, input, p, t)`, consider using `add_policy_search`. """ function UnstructuredNeuralLyapunov()::NeuralLyapunovStructure NeuralLyapunovStructure( @@ -10,6 +13,7 @@ function UnstructuredNeuralLyapunov()::NeuralLyapunovStructure (net, grad_net, state, fixed_point) -> grad_net(state), (net, grad_net, f, state, params, t, fixed_point) -> grad_net(state) ⋅ f(state, params, t), + (f, net, state, p, t) -> f(state, p, t), 1 ) end @@ -33,6 +37,9 @@ Lyapunov loss function. defaults to `ForwardDiff.gradient`. The neural network output has dimension `network_dim`. + +Dynamics are assumed to be in `f(state, p, t)` form, as in an `ODEFunction`. For +`f(state, input, p, t)`, consider using `add_policy_search`. """ function NonnegativeNeuralLyapunov( network_dim::Integer; @@ -50,6 +57,7 @@ function NonnegativeNeuralLyapunov( (net, J_net, f, state, params, t, fixed_point) -> 2 * dot( net(state), J_net(state), f(state, params, t)), + (f, net, state, p, t) -> f(state, p, t), network_dim ) else @@ -68,6 +76,7 @@ function NonnegativeNeuralLyapunov( J_net(state), f(state, params, t) ) + δ * grad_pos_def(state, fixed_point) ⋅ f(state, params, t), + (f, net, state, p, t) -> f(state, p, t), network_dim ) end @@ -94,6 +103,9 @@ gradient of `non_neg(net, state, fixed_point)` with respect to `state` at `state defaults to `ForwardDiff.gradient`. The neural network output has dimension `network_dim`. + +Dynamics are assumed to be in `f(state, p, t)` form, as in an `ODEFunction`. For +`f(state, input, p, t)`, consider using `add_policy_search`. """ function PositiveSemiDefiniteStructure( network_dim::Integer; @@ -133,6 +145,7 @@ function PositiveSemiDefiniteStructure( (f(state, params, t) ⋅ grad_non_neg( net, J_net, state, fixed_point)), + (f, net, state, p, t) -> f(state, p, t), network_dim ) end diff --git a/test/inverted_pendulum.jl b/test/inverted_pendulum.jl new file mode 100644 index 0000000..f35a87e --- /dev/null +++ b/test/inverted_pendulum.jl @@ -0,0 +1,246 @@ +using LinearAlgebra +using NeuralPDE, Lux, ModelingToolkit +using Optimization, OptimizationOptimisers, OptimizationOptimJL, NLopt +using NeuralLyapunov +using Random +using Test + +Random.seed!(200) + +println("Inverted Pendulum - Policy Search") + +######################### Define dynamics and domain ########################## + +function controlled_pendulum_dynamics(x, u, p, t) + θ, ω = x + ζ, ω_0 = p + τ = u[] + return [ω + -2ζ * ω - ω_0^2 * sin(θ) + τ] +end + +lb = [0.0, -10.0]; +ub = [2π, 10.0]; +upright_equilibrium = [π, 0.0] +p = [0.5, 1.0] +state_syms = [:θ, :ω] +parameter_syms = [:ζ, :ω_0] + +####################### Specify neural Lyapunov problem ####################### + +# Define neural network discretization +# We use an input layer that is periodic with period 2π with respect to θ +dim_state = length(lb) +dim_hidden = 15 +dim_phi = 2 +dim_u = 1 +dim_output = dim_phi + dim_u +chain = [Lux.Chain( + Lux.WrappedFunction(x -> vcat( + transpose(sin.(x[1, :])), + transpose(cos.(x[1, :])), + transpose(x[2, :]) + )), + Dense(3, dim_hidden, tanh), + Dense(dim_hidden, dim_hidden, tanh), + Dense(dim_hidden, 1, use_bias = false) + ) for _ in 1:dim_output] + +# Define neural network discretization +strategy = GridTraining(0.1) +discretization = PhysicsInformedNN(chain, strategy) + +# Define neural Lyapunov structure +structure = PositiveSemiDefiniteStructure( + dim_phi; + pos_def = function (state, fixed_point) + θ, ω = state + θ_eq, ω_eq = fixed_point + log(1.0 + (sin(θ) - sin(θ_eq))^2 + (cos(θ) - cos(θ_eq))^2 + (ω - ω_eq)^2) + end +) +structure = add_policy_search( + structure, + dim_u +) +minimization_condition = DontCheckNonnegativity(check_fixed_point = false) + +# Define Lyapunov decrease condition +decrease_condition = AsymptoticDecrease(strict = true) + +# Construct neural Lyapunov specification +spec = NeuralLyapunovSpecification( + structure, + minimization_condition, + decrease_condition +) + +############################# Construct PDESystem ############################# + +pde_system, network_func = NeuralLyapunovPDESystem( + controlled_pendulum_dynamics, + lb, + ub, + spec; + fixed_point = upright_equilibrium, + p = p, + state_syms = state_syms, + parameter_syms = parameter_syms, + policy_search = true +) + +######################## Construct OptimizationProblem ######################## + +sym_prob = symbolic_discretize(pde_system, discretization) +prob = discretize(pde_system, discretization) + +########################## Solve OptimizationProblem ########################## + +res = Optimization.solve(prob, OptimizationOptimisers.Adam(); maxiters = 300) +prob = Optimization.remake(prob, u0 = res.u) +res = Optimization.solve(prob, BFGS(); maxiters = 300) + +###################### Get numerical numerical functions ###################### + +V_func, V̇_func, ∇V_func = NumericalNeuralLyapunovFunctions( + discretization.phi, + res.u, + network_func, + structure, + controlled_pendulum_dynamics, + upright_equilibrium; + p = p +) + +u = get_policy(discretization.phi, res.u, network_func, dim_u) + +################################## Simulate ################################### + +xs = (-2π):0.02:(2π) +ys = lb[2]:0.02:ub[2] +states = Iterators.map(collect, Iterators.product(xs, ys)) +V_predict = vec(V_func(hcat(states...))) +dVdt_predict = vec(V̇_func(hcat(states...))) + +#################################### Tests #################################### + +# Network structure should enforce positive definiteness +@test V_func(upright_equilibrium) == 0.0 +@test min(V_func(upright_equilibrium), minimum(V_predict)) ≥ 0.0 + +# Network structure should enforce periodicity in θ +x0 = (ub .- lb) .* rand(2, 100) .+ lb +@test all(isapprox.(V_func(x0), V_func(x0 .+ [2π, 0.0]); rtol = 1e-3)) + +# Training should result in a fixed point at the upright equilibrium +@test all(isapprox.( + controlled_pendulum_dynamics(upright_equilibrium, u(upright_equilibrium), p, 0.0), + 0.0; atol = 1e-8)) +@test V̇_func(upright_equilibrium) == 0.0 + +# V̇ should be negative almost everywhere +@test sum(dVdt_predict .> 0) / length(dVdt_predict) < 1e-3 + +################################## Simulate ################################### + +using DifferentialEquations + +closed_loop_dynamics = ODEFunction( + (x, p, t) -> controlled_pendulum_dynamics(x, u(x), p, t); + sys = SciMLBase.SymbolCache(state_syms, parameter_syms) +) + +# Starting still at bottom +downward_equilibrium = zeros(2) +ode_prob = ODEProblem(closed_loop_dynamics, downward_equilibrium, [0.0, 35.0], p) +sol = solve(ode_prob, Tsit5()) +# plot(sol) + +# Should make it to the top +θ_end, ω_end = sol.u[end] +x_end, y_end = sin(θ_end), -cos(θ_end) +@test all(isapprox.([x_end, y_end, ω_end], [0.0, 1.0, 0.0]; atol = 1e-3)) + +# Starting at a random point +x0 = lb .+ rand(2) .* (ub .- lb) +ode_prob = ODEProblem(closed_loop_dynamics, x0, [0.0, 20.0], p) +sol = solve(ode_prob, Tsit5()) +# plot(sol) + +# Should make it to the top +θ_end, ω_end = sol.u[end] +x_end, y_end = sin(θ_end), -cos(θ_end) +@test all(isapprox.([x_end, y_end, ω_end], [0.0, 1.0, 0.0]; atol = 1e-3)) + +#= +# Print statistics +println("V(π, 0) = ", V_func(upright_equilibrium)) +println( + "f([π, 0], u([π, 0])) = ", + controlled_pendulum_dynamics(upright_equilibrium, u(upright_equilibrium), p, 0.0) +) +println( + "V ∋ [", + min(V_func(upright_equilibrium), + minimum(V_predict)), + ", ", + maximum(V_predict), + "]" +) +println( + "V̇ ∋ [", + minimum(dVdt_predict), + ", ", + max(V̇_func(upright_equilibrium), maximum(dVdt_predict)), + "]" +) + +# Plot results +using Plots + +p1 = plot( + xs / pi, + ys, + V_predict, + linetype = + :contourf, + title = "V", + xlabel = "θ/π", + ylabel = "ω", + c = :bone_1 +); +p1 = scatter!([-2 * pi, 0, 2 * pi] / pi, [0, 0, 0], + label = "Downward Equilibria", color = :green, markershape = :+); +p1 = scatter!( + [-pi, pi] / pi, [0, 0], label = "Upward Equilibria", color = :red, markershape = :x); +p2 = plot( + xs / pi, + ys, + dVdt_predict, + linetype = :contourf, + title = "dV/dt", + xlabel = "θ/π", + ylabel = "ω", + c = :binary +); +p2 = scatter!([-2 * pi, 0, 2 * pi] / pi, [0, 0, 0], + label = "Downward Equilibria", color = :green, markershape = :+); +p2 = scatter!([-pi, pi] / pi, [0, 0], label = "Upward Equilibria", + color = :red, markershape = :x, legend = false); +p3 = plot( + xs / pi, + ys, + dVdt_predict .< 0, + linetype = :contourf, + title = "dV/dt < 0", + xlabel = "θ/π", + ylabel = "ω", + colorbar = false, + linewidth = 0 +); +p3 = scatter!([-2 * pi, 0, 2 * pi] / pi, [0, 0, 0], + label = "Downward Equilibria", color = :green, markershape = :+); +p3 = scatter!([-pi, pi] / pi, [0, 0], label = "Upward Equilibria", + color = :red, markershape = :x, legend = false); +plot(p1, p2, p3) +=# diff --git a/test/runtests.jl b/test/runtests.jl index ce449d9..fae3962 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,4 +10,7 @@ using SafeTestsets @time @safetestset "Region of attraction estimation" begin include("roa_estimation.jl") end + @time @safetestset "Policy search - inverted pendulum" begin + include("inverted_pendulum.jl") + end end