Skip to content

Commit

Permalink
Merge pull request #57 from arvigj/adam
Browse files Browse the repository at this point in the history
Add ADAM optimizer
  • Loading branch information
arvigj authored Dec 8, 2023
2 parents e9bb718 + 49edbec commit fbfb31d
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 0 deletions.
83 changes: 83 additions & 0 deletions non-linear-solver-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"LBFGS",
"LBFGSB",
"Newton",
"ADAM",
"StochasticADAM",
"StochasticGradientDescent",
"box_constraints",
"advanced"
Expand All @@ -29,6 +31,8 @@
"Newton",
"DenseNewton",
"GradientDescent",
"ADAM",
"StochasticADAM",
"StochasticGradientDescent",
"L-BFGS",
"BFGS",
Expand Down Expand Up @@ -166,6 +170,85 @@
"type": "bool",
"doc": "Use PSD as fallback using second order solvers (i.e., Newton's method)."
},
{
"pointer": "/ADAM",
"default": null,
"type": "object",
"optional": [
"alpha",
"beta_1",
"beta_2",
"epsilon"
],
"doc": "Options for ADAM."
},
{
"pointer": "/ADAM/alpha",
"default": 0.001,
"type": "float",
"doc": "Parameter alpha for ADAM."
},
{
"pointer": "/ADAM/beta_1",
"default": 0.9,
"type": "float",
"doc": "Parameter beta_1 for ADAM."
},
{
"pointer": "/ADAM/beta_2",
"default": 0.999,
"type": "float",
"doc": "Parameter beta_2 for ADAM."
},
{
"pointer": "/ADAM/epsilon",
"default": 1e-8,
"type": "float",
"doc": "Parameter epsilon for ADAM."
},
{
"pointer": "/StochasticADAM",
"default": null,
"type": "object",
"optional": [
"alpha",
"beta_1",
"beta_2",
"epsilon",
"erase_component_probability"
],
"doc": "Options for ADAM."
},
{
"pointer": "/StochasticADAM/alpha",
"default": 0.001,
"type": "float",
"doc": "Parameter alpha for ADAM."
},
{
"pointer": "/StochasticADAM/beta_1",
"default": 0.9,
"type": "float",
"doc": "Parameter beta_1 for ADAM."
},
{
"pointer": "/StochasticADAM/beta_2",
"default": 0.999,
"type": "float",
"doc": "Parameter beta_2 for ADAM."
},
{
"pointer": "/StochasticADAM/epsilon",
"default": 1e-8,
"type": "float",
"doc": "Parameter epsilon for ADAM."
},
{
"pointer": "/StochasticADAM/erase_component_probability",
"default": 0.3,
"type": "float",
"doc": "Probability of erasing a component on the gradient for ADAM."
},
{
"pointer": "/StochasticGradientDescent",
"default": null,
Expand Down
14 changes: 14 additions & 0 deletions src/polysolve/nonlinear/Solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "descent_strategies/BFGS.hpp"
#include "descent_strategies/Newton.hpp"
#include "descent_strategies/ADAM.hpp"
#include "descent_strategies/GradientDescent.hpp"
#include "descent_strategies/LBFGS.hpp"

Expand Down Expand Up @@ -85,6 +86,17 @@ namespace polysolve::nonlinear
solver->add_strategy(std::make_unique<LBFGS>(
solver_params, characteristic_length, logger));
}
else if (solver_name == "ADAM" || solver_name == "adam")
{
solver->add_strategy(std::make_unique<ADAM>(
solver_params, false, characteristic_length, logger));
}
else if (solver_name == "StochasticADAM" || solver_name == "stochastic_adam")
{
solver->add_strategy(std::make_unique<ADAM>(
solver_params, true, characteristic_length, logger));
}

else if (solver_name == "StochasticGradientDescent" || solver_name == "stochastic_gradient_descent")
{
solver->add_strategy(std::make_unique<GradientDescent>(
Expand All @@ -109,6 +121,8 @@ namespace polysolve::nonlinear
return {"BFGS",
"DenseNewton",
"Newton",
"ADAM",
"StochasticADAM",
"GradientDescent",
"StochasticGradientDescent",
"L-BFGS"};
Expand Down
67 changes: 67 additions & 0 deletions src/polysolve/nonlinear/descent_strategies/ADAM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// ADAM from "ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION"

#include "ADAM.hpp"

namespace polysolve::nonlinear
{

ADAM::ADAM(const json &solver_params,
const bool is_stochastic,
const double characteristic_length,
spdlog::logger &logger)
: Superclass(solver_params, characteristic_length, logger), is_stochastic_(is_stochastic)
{
std::string param_name = is_stochastic ? "StochasticADAM" : "ADAM";
alpha_ = solver_params[param_name]["alpha"];
beta_1_ = solver_params[param_name]["beta_1"];
beta_2_ = solver_params[param_name]["beta_2"];
epsilon_ = solver_params[param_name]["epsilon"];
if (is_stochastic)
erase_component_probability_ = solver_params["StochasticADAM"]["erase_component_probability"];
}

void ADAM::reset(const int ndof)
{
Superclass::reset(ndof);
m_prev_ = Eigen::VectorXd::Zero(ndof);
v_prev_ = Eigen::VectorXd::Zero(ndof);
t_ = 0;
}

bool ADAM::compute_update_direction(
Problem &objFunc,
const TVector &x,
const TVector &grad,
TVector &direction)
{
if (m_prev_.size() == 0)
m_prev_ = Eigen::VectorXd::Zero(x.size());
if (v_prev_.size() == 0)
v_prev_ = Eigen::VectorXd::Zero(x.size());

TVector grad_modified = grad;

if (is_stochastic_)
{
Eigen::VectorXd mask = (Eigen::VectorXd::Random(direction.size()).array() + 1.) / 2.;
for (int i = 0; i < direction.size(); ++i)
grad_modified(i) *= (mask(i) < erase_component_probability_) ? 0. : 1.;
}

TVector m = (beta_1_ * m_prev_) + ((1 - beta_1_) * grad_modified);
TVector v = beta_2_ * v_prev_;
for (int i = 0; i < v.size(); ++i)
v(i) += (1 - beta_2_) * grad_modified(i) * grad_modified(i);

m = m.array() / (1 - pow(beta_1_, t_));
v = v.array() / (1 - pow(beta_2_, t_));

direction = -alpha_ * m;
for (int i = 0; i < v.size(); ++i)
direction(i) /= sqrt(v(i) + epsilon_);

++t_;

return true;
}
} // namespace polysolve::nonlinear
45 changes: 45 additions & 0 deletions src/polysolve/nonlinear/descent_strategies/ADAM.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#pragma once

#include "DescentStrategy.hpp"
#include <polysolve/Utils.hpp>

#include <polysolve/linear/Solver.hpp>

namespace polysolve::nonlinear
{
class ADAM : public DescentStrategy
{
public:
using Superclass = DescentStrategy;

ADAM(const json &solver_params,
const bool is_stochastic,
const double characteristic_length,
spdlog::logger &logger);

std::string name() const override { return is_stochastic_ ? "StochasticADAM" : "ADAM"; }

void reset(const int ndof) override;

virtual bool compute_update_direction(
Problem &objFunc,
const TVector &x,
const TVector &grad,
TVector &direction) override;

bool is_direction_descent() override { return false; }

private:
TVector m_prev_;
TVector v_prev_;

double beta_1_, beta_2_;
double alpha_;

int t_ = 0;
double epsilon_;

bool is_stochastic_;
double erase_component_probability_ = 0;
};
} // namespace polysolve::nonlinear
2 changes: 2 additions & 0 deletions src/polysolve/nonlinear/descent_strategies/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ set(SOURCES
BFGS.hpp
GradientDescent.cpp
GradientDescent.hpp
ADAM.cpp
ADAM.hpp
Newton.hpp
Newton.cpp
)
Expand Down

0 comments on commit fbfb31d

Please sign in to comment.