From a1135d2d915bee994906ef7aeb905917fd37c250 Mon Sep 17 00:00:00 2001
From: Nicholas Klugman <13633349+nicholaskl97@users.noreply.github.com>
Date: Mon, 10 Jun 2024 16:51:27 -0400
Subject: [PATCH] Added policy search demo to docs

---
 docs/Project.toml                   |   1 +
 docs/make.jl                        |   3 +-
 docs/src/demos/policy_search.md     | 426 ++++++++++++++++++++++++++++
 docs/src/demos/roa_estimation.md    |   6 +-
 test/inverted_pendulum_ODESystem.jl |  10 +-
 5 files changed, 437 insertions(+), 9 deletions(-)
 create mode 100644 docs/src/demos/policy_search.md

diff --git a/docs/Project.toml b/docs/Project.toml
index 34a5ac0..57f966a 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,4 +1,5 @@
 [deps]
+DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
diff --git a/docs/make.jl b/docs/make.jl
index ca3812d..cf1e395 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -28,7 +28,8 @@ makedocs(;
         ],
         "Demonstrations" => [
             "demos/damped_SHO.md",
-            "demos/roa_estimation.md"
+            "demos/roa_estimation.md",
+            "demos/policy_search.md"
         ]
     ]
 )
diff --git a/docs/src/demos/policy_search.md b/docs/src/demos/policy_search.md
new file mode 100644
index 0000000..ef2aea3
--- /dev/null
+++ b/docs/src/demos/policy_search.md
@@ -0,0 +1,426 @@
+# Policy search on the driven inverted pendulum
+
+In this demonstration, we'll search for a neural network policy to stabilize the upright equilibrium of the inverted pendulum.
+
+The governing differential equation for the driven pendulum is:
+```math
+\frac{d^2 \theta}{dt^2} + 2 \zeta \omega_0 \frac{d \theta}{dt} + \omega_0^2 \sin(\theta) = \tau,
+```
+where ``\theta`` is the counterclockwise angle from the downward equilibrium, ``\zeta`` and ``\omega_0`` are system parameters, and ``\tau`` is our control input&mdash;the torque.
+
+We'll jointly train a neural controller ``\tau = u \left( \theta, \frac{d\theta}{dt} \right)`` and neural Lyapunov function ``V`` which will certify the stability of the closed-loop system.
+
+## Copy-Pastable Code
+
+```julia
+using NeuralPDE, Lux, ModelingToolkit, NeuralLyapunov
+import Optimization, OptimizationOptimisers, OptimizationOptimJL
+using Random
+
+Random.seed!(200)
+
+######################### Define dynamics and domain ##########################
+
+@parameters ζ ω_0
+defaults = Dict([ζ => 0.5, ω_0 => 1.0])
+
+@variables t θ(t) τ(t) [input = true]
+Dt = Differential(t)
+DDt = Dt^2
+
+eqs = [DDt(θ) + 2ζ * ω_0 * Dt(θ) + ω_0^2 * sin(θ) ~ τ]
+
+@named driven_pendulum = ODESystem(
+    eqs,
+    t,
+    [θ, τ],
+    [ζ, ω_0];
+    defaults = defaults
+)
+
+bounds = [
+    θ ∈ (0, 2π),
+    Dt(θ) ∈ (-10.0, 10.0)
+]
+
+upright_equilibrium = [π, 0.0]
+
+####################### Specify neural Lyapunov problem #######################
+
+# Define neural network discretization
+# We use an input layer that is periodic with period 2π with respect to θ
+dim_state = length(bounds)
+dim_hidden = 20
+dim_phi = 3
+dim_u = 1
+dim_output = dim_phi + dim_u
+chain = [Lux.Chain(
+             PeriodicEmbedding([1], [2π]),
+             Dense(3, dim_hidden, tanh),
+             Dense(dim_hidden, dim_hidden, tanh),
+             Dense(dim_hidden, 1)
+         ) for _ in 1:dim_output]
+
+# Define neural network discretization
+strategy = QuasiRandomTraining(1_250)
+discretization = PhysicsInformedNN(chain, strategy)
+
+# Define neural Lyapunov structure
+structure = PositiveSemiDefiniteStructure(
+    dim_phi;
+    pos_def = function (state, fixed_point)
+        θ, ω = state
+        θ_eq, ω_eq = fixed_point
+        log(1.0 + (sin(θ) - sin(θ_eq))^2 + (cos(θ) - cos(θ_eq))^2 + (ω - ω_eq)^2)
+    end
+)
+structure = add_policy_search(
+    structure,
+    dim_u
+)
+minimization_condition = DontCheckNonnegativity(check_fixed_point = false)
+
+# Define Lyapunov decrease condition
+decrease_condition = AsymptoticDecrease(strict = true)
+
+# Construct neural Lyapunov specification
+spec = NeuralLyapunovSpecification(
+    structure,
+    minimization_condition,
+    decrease_condition
+)
+
+############################# Construct PDESystem #############################
+
+@named pde_system = NeuralLyapunovPDESystem(
+    driven_pendulum,
+    bounds,
+    spec;
+    fixed_point = upright_equilibrium
+)
+
+######################## Construct OptimizationProblem ########################
+
+prob = discretize(pde_system, discretization)
+
+########################## Solve OptimizationProblem ##########################
+
+res = Optimization.solve(prob, OptimizationOptimisers.Adam(); maxiters = 400)
+prob = Optimization.remake(prob, u0 = res.u)
+res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 300)
+
+###################### Get numerical numerical functions ######################
+
+net = discretization.phi
+_θ = res.u.depvar
+
+(open_loop_pendulum_dynamics, _), state_order, p_order = ModelingToolkit.generate_control_function(
+    driven_pendulum; simplify = true)
+p = [defaults[param] for param in p_order]
+
+V_func, V̇_func = get_numerical_lyapunov_function(
+    net,
+    _θ,
+    structure,
+    open_loop_pendulum_dynamics,
+    upright_equilibrium;
+    p = p
+)
+
+u = get_policy(net, _θ, dim_output, dim_u)
+```
+
+## Detailed description
+
+```@setup policy_search
+using Random
+
+Random.seed!(200)
+```
+
+In this example, we'll set up the dynamics using a ModelingToolkit `ODESystem`.
+Since the torque ``\tau`` is our control input, we use the `[input = true]` flag for it.
+
+Since the angle ``\theta`` is periodic with period ``2\pi``, our box domain will be one period in ``\theta`` and an interval in ``\frac{d\theta}{dt}``.
+
+```@example policy_search
+using ModelingToolkit
+
+@parameters ζ ω_0
+defaults = Dict([ζ => 0.5, ω_0 => 1.0])
+
+@variables t θ(t) τ(t) [input = true]
+Dt = Differential(t)
+DDt = Dt^2
+
+eqs = [DDt(θ) + 2ζ * ω_0 * Dt(θ) + ω_0^2 * sin(θ) ~ τ]
+
+bounds = [
+    θ ∈ (0, 2π),
+    Dt(θ) ∈ (-10.0, 10.0)
+]
+
+upright_equilibrium = [π, 0.0]
+
+@named driven_pendulum = ODESystem(
+    eqs,
+    t,
+    [θ, τ],
+    [ζ, ω_0];
+    defaults = defaults
+)
+```
+
+We'll use an architecture that's ``2\pi``-periodic in ``\theta`` so that we can train on just one period of ``\theta`` and don't need to add any periodic boundary conditions.
+To achieve that, we use `Lux.PeriodicEmbedding([1], [2pi])`, enforces `2pi`-periodicity in input number `1`.
+Additionally, we include output dimensions for both the neural Lyapunov function and the neural controller.
+
+Other than that, setting up the neural network using Lux and NeuralPDE training strategy is no different from any other physics-informed neural network problem.
+For more on that aspect, see the [NeuralPDE documentation](https://docs.sciml.ai/NeuralPDE/stable/).
+
+```@example policy_search
+using Lux
+
+# Define neural network discretization
+# We use an input layer that is periodic with period 2π with respect to θ
+dim_state = length(bounds)
+dim_hidden = 20
+dim_phi = 3
+dim_u = 1
+dim_output = dim_phi + dim_u
+chain = [Lux.Chain(
+             PeriodicEmbedding([1], [2π]),
+             Dense(3, dim_hidden, tanh),
+             Dense(dim_hidden, dim_hidden, tanh),
+             Dense(dim_hidden, 1)
+         ) for _ in 1:dim_output]
+```
+
+```@example policy_search
+using NeuralPDE
+
+# Define neural network discretization
+strategy = QuasiRandomTraining(1250)
+discretization = PhysicsInformedNN(chain, strategy)
+```
+
+We now define our Lyapunov candidate structure along with the form of the Lyapunov conditions we'll be using.
+
+The default Lyapunov candidate from [`PositiveSemiDefiniteStructure`](@ref) is:
+```math
+V(x) = \left( 1 + \lVert \phi(x) \rVert^2 \right) \log \left( 1 + \lVert x \rVert^2 \right),
+```
+which structurally enforces positive definiteness.
+We'll modify the second factor to be ``2\pi``-periodic in ``\theta``:
+
+```@example policy_search
+using NeuralLyapunov
+
+# Define neural Lyapunov structure
+structure = PositiveSemiDefiniteStructure(
+    dim_phi;
+    pos_def = function (state, fixed_point)
+        θ, ω = state
+        θ_eq, ω_eq = fixed_point
+        log(1.0 + (sin(θ) - sin(θ_eq))^2 + (cos(θ) - cos(θ_eq))^2 + (ω - ω_eq)^2)
+    end
+)
+```
+
+In addition to representing the neural Lyapunov function, our neural network must also represent the controller.
+For this, we use the [`add_policy_search`](@ref) function, which tells NeuralLyapunov to expect dynamics with a control input and to treat the last `dim_u` dimensions of the neural network as the output of our controller.
+
+```@example policy_search
+structure = add_policy_search(
+    structure,
+    dim_u
+)
+```
+
+Since our Lyapunov candidate structurally enforces positive definiteness, we use [`DontCheckNonnegativity`](@ref).
+
+```@example policy_search
+minimization_condition = DontCheckNonnegativity(check_fixed_point = false)
+
+# Define Lyapunov decrease condition
+decrease_condition = AsymptoticDecrease(strict = true)
+
+# Construct neural Lyapunov specification
+spec = NeuralLyapunovSpecification(
+    structure,
+    minimization_condition,
+    decrease_condition
+)
+
+# Construct PDESystem 
+@named pde_system = NeuralLyapunovPDESystem(
+    driven_pendulum,
+    bounds,
+    spec;
+    fixed_point = upright_equilibrium
+)
+```
+
+Now, we solve the PDESystem using NeuralPDE the same way we would any PINN problem.
+
+```@example policy_search
+prob = discretize(pde_system, discretization)
+
+import Optimization, OptimizationOptimisers, OptimizationOptimJL
+
+res = Optimization.solve(prob, OptimizationOptimisers.Adam(); maxiters = 400)
+prob = Optimization.remake(prob, u0 = res.u)
+res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 300)
+
+net = discretization.phi
+_θ = res.u.depvar
+```
+
+We can use the result of the optimization problem to build the Lyapunov candidate as a Julia function, as well as extract our controller, using the [`get_policy`](@ref) function.
+
+```@example policy_search
+(open_loop_pendulum_dynamics, _), state_order, p_order = ModelingToolkit.generate_control_function(driven_pendulum; simplify = true)
+p = [defaults[param] for param in p_order]
+
+V_func, V̇_func = get_numerical_lyapunov_function(
+    net,
+    _θ,
+    structure,
+    open_loop_pendulum_dynamics,
+    upright_equilibrium;
+    p = p
+)
+
+u = get_policy(net, _θ, dim_output, dim_u)
+```
+
+Now, let's evaluate our controller.
+First, we'll get the usual summary statistics on the Lyapunov function and plot ``V``, ``\dot{V}``, and the violations of the decrease condition.
+
+```@example policy_search
+lb = [0.0, -10.0];
+ub = [2π, 10.0];
+xs = (-2π):0.1:(2π)
+ys = lb[2]:0.1:ub[2]
+states = Iterators.map(collect, Iterators.product(xs, ys))
+V_samples = vec(V_func(hcat(states...)))
+V̇_samples = vec(V̇_func(hcat(states...)))
+
+# Print statistics
+println("V(π, 0) = ", V_func(upright_equilibrium))
+println(
+    "f([π, 0], u([π, 0])) = ",
+    open_loop_pendulum_dynamics(upright_equilibrium, u(upright_equilibrium), p, 0.0)
+)
+println(
+    "V ∋ [",
+    min(V_func(upright_equilibrium),
+    minimum(V_samples)),
+    ", ",
+    maximum(V_samples),
+    "]"
+)
+println(
+    "V̇ ∋ [",
+    minimum(V̇_samples),
+    ", ",
+    max(V̇_func(upright_equilibrium), maximum(V̇_samples)),
+    "]"
+)
+```
+
+```@example policy_search
+using Plots
+
+p1 = plot(
+    xs / pi,
+    ys,
+    V_samples,
+    linetype =
+    :contourf,
+    title = "V",
+    xlabel = "θ/π",
+    ylabel = "ω",
+    c = :bone_1
+);
+p1 = scatter!([-2 * pi, 0, 2 * pi] / pi, [0, 0, 0],
+    label = "Downward Equilibria", color = :red, markershape = :x);
+p1 = scatter!(
+    [-pi, pi] / pi, [0, 0], label = "Upward Equilibria", color = :green, markershape = :+);
+p2 = plot(
+    xs / pi,
+    ys,
+    V̇_samples,
+    linetype = :contourf,
+    title = "dV/dt",
+    xlabel = "θ/π",
+    ylabel = "ω",
+    c = :binary
+);
+p2 = scatter!([-2 * pi, 0, 2 * pi] / pi, [0, 0, 0],
+    label = "Downward Equilibria", color = :red, markershape = :x);
+p2 = scatter!([-pi, pi] / pi, [0, 0], label = "Upward Equilibria", color = :green,
+    markershape = :+, legend = false);
+p3 = plot(
+    xs / pi,
+    ys,
+    V̇_samples .< 0,
+    linetype = :contourf,
+    title = "dV/dt < 0",
+    xlabel = "θ/π",
+    ylabel = "ω",
+    colorbar = false,
+    linewidth = 0
+);
+p3 = scatter!([-2 * pi, 0, 2 * pi] / pi, [0, 0, 0],
+    label = "Downward Equilibria", color = :green, markershape = :+);
+p3 = scatter!([-pi, pi] / pi, [0, 0], label = "Upward Equilibria",
+    color = :red, markershape = :x, legend = false);
+plot(p1, p2, p3)
+```
+
+Now, let's simulate the closed-loop dynamics to verify that the controller can get our system to the upward equilibrium.
+
+First, we'll start at the downward equilibrium:
+
+```@example policy_search
+state_order = map(st -> SymbolicUtils.iscall(st) ? operation(st) : st, state_order)
+state_syms = Symbol.(state_order)
+
+closed_loop_dynamics = ODEFunction(
+    (x, p, t) -> open_loop_pendulum_dynamics(x, u(x), p, t);
+    sys = SciMLBase.SymbolCache(state_syms, Symbol.(p_order))
+)
+
+using DifferentialEquations
+
+# Starting still at bottom ...
+downward_equilibrium = zeros(2)
+ode_prob = ODEProblem(closed_loop_dynamics, downward_equilibrium, [0.0, 120.0], p)
+sol = solve(ode_prob, Tsit5())
+plot(sol)
+```
+
+```@example policy_search
+# ...the system should make it to the top
+θ_end, ω_end = sol.u[end]
+x_end, y_end = sin(θ_end), -cos(θ_end)
+[x_end, y_end, ω_end] # Should be approximately [0.0, 1.0, 0.0]
+```
+
+Then, we'll start at a random state:
+
+```@example policy_search
+# Starting at a random point ...
+x0 = lb .+ rand(2) .* (ub .- lb)
+ode_prob = ODEProblem(closed_loop_dynamics, x0, [0.0, 150.0], p)
+sol = solve(ode_prob, Tsit5())
+plot(sol)
+```
+
+```@example policy_search
+# ...the system should make it to the top
+θ_end, ω_end = sol.u[end]
+x_end, y_end = sin(θ_end), -cos(θ_end)
+[x_end, y_end, ω_end] # Should be approximately [0.0, 1.0, 0.0]
+```
diff --git a/docs/src/demos/roa_estimation.md b/docs/src/demos/roa_estimation.md
index 5b8a82b..4d8b35d 100644
--- a/docs/src/demos/roa_estimation.md
+++ b/docs/src/demos/roa_estimation.md
@@ -100,14 +100,14 @@ RoA = (first(RoA_states), last(RoA_states))
 
 ## Detailed description
 
-In this example, we set up the dynamics as a Julia function and don't bother specifying the symbols for the variables (so ``x`` will be called the default `state1` in the PDESystem).
-
 ```@setup RoA
 using Random
 
 Random.seed!(200)
 ```
 
+In this example, we set up the dynamics as a Julia function and don't bother specifying the symbols for the variables (so ``x`` will be called the default `state1` in the PDESystem).
+
 ```@example RoA
 f(x, p, t) = -x .+ x .^ 3
 lb = [-2.0];
@@ -184,7 +184,7 @@ Now, we solve the PDESystem using NeuralPDE the same way we would any PINN probl
 ```@example RoA
 prob = discretize(pde_system, discretization)
 
-using Optimization, OptimizationOptimisers, OptimizationOptimJL
+import Optimization, OptimizationOptimisers, OptimizationOptimJL
 
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(); maxiters = 300)
 prob = Optimization.remake(prob, u0 = res.u)
diff --git a/test/inverted_pendulum_ODESystem.jl b/test/inverted_pendulum_ODESystem.jl
index 9b09860..dd4967d 100644
--- a/test/inverted_pendulum_ODESystem.jl
+++ b/test/inverted_pendulum_ODESystem.jl
@@ -31,11 +31,7 @@ bounds = [
     Dt(θ) ∈ (-10.0, 10.0)
 ]
 
-(open_loop_pendulum_dynamics, _), state_order, p_order = ModelingToolkit.generate_control_function(
-    driven_pendulum; simplify = true)
-
 upright_equilibrium = [π, 0.0]
-p = [defaults[param] for param in p_order]
 
 ####################### Specify neural Lyapunov problem #######################
 
@@ -54,7 +50,7 @@ chain = [Lux.Chain(
          ) for _ in 1:dim_output]
 
 # Define neural network discretization
-strategy = QuasiRandomTraining(1_250)# GridTraining(0.1)
+strategy = QuasiRandomTraining(1_250)
 discretization = PhysicsInformedNN(chain, strategy)
 
 # Define neural Lyapunov structure
@@ -107,6 +103,10 @@ res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 300)
 net = discretization.phi
 _θ = res.u.depvar
 
+(open_loop_pendulum_dynamics, _), state_order, p_order = ModelingToolkit.generate_control_function(
+    driven_pendulum; simplify = true)
+p = [defaults[param] for param in p_order]
+
 V_func, V̇_func = get_numerical_lyapunov_function(
     net,
     _θ,