Switch outer optimization to Optimization.jl

New interface via Optimization.jl, replaces old Optim.jl one. Update description in README.
ElOceanografo · Jun 5, 2024 · f2f1d90 · f2f1d90 · ElOceanografo · Jun 5, 2024
1 parent 9140366
commit f2f1d90
Show file tree

Hide file tree

Showing 3 changed files with 82 additions and 57 deletions.
diff --git a/README.md b/README.md
@@ -101,25 +101,31 @@ logpdf(Normal(0, 1.5), 1.0)
 ```
 
 The point of doing all this was to find an optimal set of parameters `v` for
-your data. This package defines a method for `Optim.optimize` that 
-works directly with a `MarginalLogDensity` object, making  optimization easy. Just pass
-it your marginalized function, a vector of starting values for `v`, and your `data` (which
-can be omitted if your function doesn't use it). 
+your data. This package includes an interface to Optimization.jl that 
+works directly with a `MarginalLogDensity` object, making  optimization easy. The simplest
+way is to construct an `OptimizationProblem` directly from the `MarginalLogDensity` and
+`solve` it:
 
 ```julia
-using Optim
-fit = optimize(marginal_logdensity, initial_v, data)
+using Optimization, OptimizationOptimJL
+
+opt_problem = OptimizationProblem(marginal_logdensity, v0)
+opt_solution = solve(opt_problem, NelderMead())
 ```
 
-Options for `optimize` can be passed as subsequent arguments; refer to the 
-Optim.jl docs for details. One of particular interest is the choice of optimizer:
-the default is Nelder-Mead, but you can also use gradient-based and second-order methods:
+If you want more control over options, for instance setting an AD backend, you can
+construct an `OptimizationFunction` explicitly:
 
 ```julia
-optimize(marginal_logdensity, initial_v, data, LBFGS())
-optimize(marginal_logdensity, initial_v, data, Newton())
+opt_function = OptimizationFunction(marginal_logdensity, AutoFiniteDiff())
+opt_problem = OptimizationProblem(opt_function, v0)
+opt_solution = solve(opt_problem, LBFGS())
 ```
-(Note that these outer optimizations only work with `autodiff=:finite` for now.)
+
+Note that at present we can't differentiate through the Laplace approximation, so outer 
+optimizations like this need to either use a gradient-free solver (like `NelderMead()`),
+or a finite-difference backend (like `AutoFiniteDiff()`). This is on the list of planned
+improvements.
 
 A more realistic application to a mixed-effects regression can be found in this
-[example script](https://github.com/ElOceanografo/MarginalLogDensities.jl/blob/master/test/example.jl).
+[example script](https://github.com/ElOceanografo/MarginalLogDensities.jl/blob/master/examples/example.jl).
diff --git a/src/MarginalLogDensities.jl b/src/MarginalLogDensities.jl
@@ -221,7 +221,7 @@ function Base.show(io::IO, mld::MarginalLogDensity)
     write(io, str)
 end
 
-function (mld::MarginalLogDensity)(v::AbstractVector{T}, data; verbose=false) where T
+function (mld::MarginalLogDensity)(v::AbstractVector{T}, data=mld.data; verbose=false) where T
     return _marginalize(mld, v, data, mld.method, verbose)
 end
 
@@ -333,8 +333,16 @@ function _marginalize(mld, v, data, method::Cubature, verbose)
     return log(integral)
 end
 
-# function Optim.optimize(mld::MarginalLogDensity, init_v, data=(), args...; kwargs...)
-#     return optimize(v -> -mld(v, data), init_v, args...; kwargs...)
-# end
+
+function Optimization.OptimizationFunction(mld::MarginalLogDensity,
+        args...; kwargs...)
+    return OptimizationFunction((w, p) -> -mld(w, p), args...; kwargs...)
+end
+
+function Optimization.OptimizationProblem(mld::MarginalLogDensity, v0, p=mld.data;
+    kwargs...)
+    f = OptimizationFunction(mld)
+    return OptimizationProblem(f, v0, p)
+end
 
 end # module
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -114,46 +114,6 @@ end
     @test logpdf_cubature2 >= mld_cubature2.logdensity(x, ())
 end
 
-@testset "Parameters" begin
-    ncategories = 8
-    categories = 1:ncategories
-    μ0 = 5.0
-    σ0 = 5.0
-    aa = rand(Normal(μ0, σ0), ncategories)
-    b = 4.5
-    σ = 0.5
-    category = repeat(categories, inner=200)
-    n = length(category)
-    x = rand(Uniform(-1, 1), n)
-    μ = [aa[category[i]] + b * x[i] for i in 1:n]
-    y = rand.(Normal.(μ, σ))
-
-    function loglik(θ::Vector{T}, p) where T
-        μ0 = θ[1]
-        σ0 = exp(θ[2])
-        aa = θ[3:10]
-        b = θ[11]
-        σ = exp(θ[12])
-        μ = [aa[p.category[i]] + b * p.x[i] for i in 1:p.n]
-        return loglikelihood(Normal(μ0, σ0), aa) + sum(logpdf.(Normal.(μ, σ), p.y))
-    end
-
-    θtrue = [μ0; log(σ0); aa; b; log(σ)]
-    p = (; category, x, y, n)
-    nθ = length(θtrue)
-
-    θ0 = ones(length(θtrue))
-    θmarg = θ0[[1, 2, 11, 12]]
-    mld_laplace = MarginalLogDensity(loglik, θ0, collect(3:10), p, LaplaceApprox())
-    mld_cubature = MarginalLogDensity(loglik, θ0, collect(3:10), p,
-        Cubature(lower=fill(-5.0, 8), upper=fill(5, 8)))
-
-    # opt_laplace = optimize(θ -> -mld_laplace(θ, p), ones(4))
-    # opt_cubature = optimize(θ -> -mld_cubature(θ, p), ones(4))
-    # println(opt_laplace.minimizer)
-    # println(opt_cubature.minimizer)
-    # @test all(opt_laplace.minimizer .≈ opt_cubature.minimizer)
-end
 
 @testset "AD types" begin
     adtypes = [
@@ -207,4 +167,55 @@ end
     @test ! issparse(cached_hessian(mldd))
     @test mlds(v, p) ≈ mldd(v, p)
     @test all(Matrix(cached_hessian(mlds)) .≈ cached_hessian(mldd))
+end
+
+@testset "Outer Optimization" begin
+    ncategories = 8
+    categories = 1:ncategories
+    μ0 = 5.0
+    σ0 = 5.0
+    aa = rand(Normal(μ0, σ0), ncategories)
+    b = 4.5
+    σ = 0.5
+    category = repeat(categories, inner=200)
+    n = length(category)
+    x = rand(Uniform(-1, 1), n)
+    μ = [aa[category[i]] + b * x[i] for i in 1:n]
+    y = rand.(Normal.(μ, σ))
+
+    function loglik(θ::Vector{T}, p) where T
+        μ0 = θ[1]
+        σ0 = exp(θ[2])
+        aa = θ[3:10]
+        b = θ[11]
+        σ = exp(θ[12])
+        μ = [aa[p.category[i]] + b * p.x[i] for i in 1:p.n]
+        return loglikelihood(Normal(μ0, σ0), aa) + sum(logpdf.(Normal.(μ, σ), p.y))
+    end
+
+    θtrue = [μ0; log(σ0); aa; b; log(σ)]
+    p = (; category, x, y, n)
+    nθ = length(θtrue)
+
+    θ0 = ones(length(θtrue))
+    θmarg = θ0[[1, 2, 11, 12]]
+    mld_laplace = MarginalLogDensity(loglik, θ0, collect(3:10), p, LaplaceApprox())
+    # mld_cubature = MarginalLogDensity(loglik, θ0, collect(3:10), p,
+    #     Cubature(lower=fill(-5.0, 8), upper=fill(5, 8)))
+
+    opt_func = OptimizationFunction(mld_laplace, AutoFiniteDiff())
+    v0 = ones(length(θmarg))
+    opt_prob1 = OptimizationProblem(opt_func, v0, p)
+    opt_prob2 = OptimizationProblem(mld_laplace, v0) 
+    opt_sol1 = solve(opt_prob1, NelderMead())
+    opt_sol2 = solve(opt_prob2, NelderMead())
+    @test all(isapprox.(opt_sol1.u, opt_sol2.u))
+
+    opt_sol1_1 = solve(opt_prob1, LBFGS())
+    @test all(isapprox.(opt_sol1.u, opt_sol1_1.u, atol=0.01))
+
+    # opt_prob3 = OptimizationProblem(mld_cubature, v0)
+    # opt_sol3 = solve(opt_prob3, NelderMead())
+    # println(maximum(abs.(opt_sol1.u .- opt_sol3.u)))
+    # @test all(isapprox.(opt_sol1.u, opt_sol3.u, atol=0.01))
 end