1 #ifndef STAN_OPTIMIZATION_BFGS_HPP 2 #define STAN_OPTIMIZATION_BFGS_HPP 4 #include <stan/math/prim/mat.hpp> 10 #include <boost/math/special_functions/fpclassify.hpp> 19 namespace optimization {
31 template<
typename Scalar =
double>
54 template<
typename Scalar =
double>
68 template<
typename FunctorType,
typename QNUpdateType,
69 typename Scalar = double,
int DimAtCompile = Eigen::Dynamic>
72 typedef Eigen::Matrix<Scalar, DimAtCompile, 1>
VectorT;
73 typedef Eigen::Matrix<Scalar, DimAtCompile, DimAtCompile>
HessianT;
77 VectorT _gk, _gk_1,
_xk_1, _xk, _pk, _pk_1;
91 const Scalar &
curr_f()
const {
return _fk; }
92 const VectorT &
curr_x()
const {
return _xk; }
93 const VectorT &
curr_g()
const {
return _gk; }
94 const VectorT &
curr_p()
const {
return _pk; }
96 const Scalar &
prev_f()
const {
return _fk_1; }
97 const VectorT &
prev_x()
const {
return _xk_1; }
98 const VectorT &
prev_g()
const {
return _gk_1; }
99 const VectorT &
prev_p()
const {
return _pk_1; }
103 return -_pk.dot(_gk) / std::max(std::fabs(_fk), _conv_opts.
fScale);
106 return std::fabs(_fk_1 - _fk) / std::max(std::fabs(_fk_1),
107 std::max(std::fabs(_fk),
111 const Scalar &
alpha0()
const {
return _alpha0; }
112 const Scalar &
alpha()
const {
return _alpha; }
115 const std::string &
note()
const {
return _note; }
120 return std::string(
"Successful step completed");
122 return std::string(
"Convergence detected: absolute change " 123 "in objective function was below tolerance");
125 return std::string(
"Convergence detected: relative change " 126 "in objective function was below tolerance");
128 return std::string(
"Convergence detected: " 129 "gradient norm is below tolerance");
131 return std::string(
"Convergence detected: relative " 132 "gradient magnitude is below tolerance");
134 return std::string(
"Convergence detected: " 135 "absolute parameter change was below tolerance");
137 return std::string(
"Maximum number of iterations hit, " 138 "may not be at an optima");
140 return std::string(
"Line search failed to achieve a sufficient " 141 "decrease, no more progress can be made");
143 return std::string(
"Unknown termination code");
152 ret = _func(_xk, _fk, _gk);
154 throw std::runtime_error(
"Error evaluating initial BFGS point.");
163 Scalar gradNorm, stepNorm;
181 _pk.noalias() = -_gk;
185 if (_itNum > 1 && resetB != 2) {
187 _alpha0 = _alpha = std::min(1.0,
196 _alpha0 = _alpha = _ls_opts.
alpha0;
203 _ls_opts.
c1, _ls_opts.
c2,
215 _note +=
"LS failed, Hessian reset";
224 std::swap(_fk, _fk_1);
229 sk.noalias() = _xk - _xk_1;
230 yk.noalias() = _gk - _gk_1;
232 gradNorm = _gk.norm();
233 stepNorm = sk.norm();
239 Scalar B0fact = _qn.update(yk, sk,
true);
241 _alphak_1 = _alpha*B0fact;
247 _qn.search_direction(_pk, _gk);
250 if (std::fabs(_fk_1 - _fk) < _conv_opts.
tolAbsF) {
253 }
else if (gradNorm < _conv_opts.
tolAbsGrad) {
255 }
else if (stepNorm < _conv_opts.
tolAbsX) {
257 }
else if (_itNum >= _conv_opts.
maxIts) {
259 }
else if (rel_obj_decrease()
261 * std::numeric_limits<Scalar>::epsilon()) {
264 }
else if (rel_grad_norm()
266 * std::numeric_limits<Scalar>::epsilon()) {
280 while (!(retcode = step()))
291 std::vector<int> _params_i;
293 std::vector<double> _x, _g;
298 const std::vector<int>& params_i,
300 : _model(model), _params_i(params_i), _msgs(msgs), _fevals(0) {}
302 size_t fevals()
const {
return _fevals; }
303 int operator()(
const Eigen::Matrix<double, Eigen::Dynamic, 1> &x,
306 using Eigen::Dynamic;
307 using stan::math::index_type;
309 typedef typename index_type<Matrix<double, Dynamic, 1> >::type idx_t;
312 for (idx_t i = 0; i < x.size(); i++)
316 f = - log_prob_propto<false>(_model, _x, _params_i, _msgs);
317 }
catch (
const std::exception& e) {
319 (*_msgs) << e.what() << std::endl;
323 if (boost::math::isfinite(f)) {
327 *_msgs <<
"Error evaluating model log probability: " 328 "Non-finite function evaluation." << std::endl;
332 int operator()(
const Eigen::Matrix<double, Eigen::Dynamic, 1> &x,
334 Eigen::Matrix<double, Eigen::Dynamic, 1> &g) {
336 using Eigen::Dynamic;
337 using stan::math::index_type;
339 typedef typename index_type<Matrix<double, Dynamic, 1> >::type idx_t;
342 for (idx_t i = 0; i < x.size(); i++)
348 f = - log_prob_grad<true, false>(_model, _x, _params_i, _g, _msgs);
349 }
catch (
const std::exception& e) {
351 (*_msgs) << e.what() << std::endl;
356 for (
size_t i = 0; i < _g.size(); i++) {
357 if (!boost::math::isfinite(_g[i])) {
359 *_msgs <<
"Error evaluating model log probability: " 360 "Non-finite gradient." << std::endl;
366 if (boost::math::isfinite(f)) {
370 *_msgs <<
"Error evaluating model log probability: " 371 <<
"Non-finite function evaluation." 376 int df(
const Eigen::Matrix<double, Eigen::Dynamic, 1> &x,
377 Eigen::Matrix<double, Eigen:: Dynamic, 1> &g) {
379 return (*
this)(x, f, g);
383 template<
typename M,
typename QNUpdateType,
typename Scalar = double,
384 int DimAtCompile = Eigen::Dynamic>
387 Scalar, DimAtCompile> {
395 typedef typename stan::math::index_type<vector_t>::type
idx_t;
398 const std::vector<double>& params_r,
399 const std::vector<int>& params_i,
400 std::ostream* msgs = 0)
402 _adaptor(model, params_i, msgs) {
403 initialize(params_r);
407 Eigen::Matrix<double, Eigen::Dynamic, 1> x;
408 x.resize(params_r.size());
409 for (
size_t i = 0; i < params_r.size(); i++)
411 BFGSBase::initialize(x);
415 double logp() {
return -(this->curr_f()); }
417 void grad(std::vector<double>& g) {
418 const vector_t &cg(this->curr_g());
420 for (idx_t i = 0; i < cg.size(); i++)
424 const vector_t &cx(this->curr_x());
426 for (idx_t i = 0; i < cx.size(); i++)
void params_r(std::vector< double > &x)
QNUpdateType & get_qnupdate()
Scalar prev_step_size() const
ModelAdaptor(M &model, const std::vector< int > ¶ms_i, std::ostream *msgs)
double log_prob_grad(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::vector< double > &gradient, std::ostream *msgs=0)
Compute the gradient using reverse-mode automatic differentiation, writing the result into the specif...
BFGSMinimizer< ModelAdaptor< M >, QNUpdateType, Scalar, DimAtCompile > BFGSBase
const std::string & note() const
ConvergenceOptions< Scalar > _conv_opts
const size_t iter_num() const
Probability, optimization and sampling library.
BFGSMinimizer(FunctorType &f)
const VectorT & prev_x() const
const Scalar & prev_f() const
int WolfeLineSearch(FunctorType &func, Scalar &alpha, XType &x1, Scalar &f1, XType &gradx1, const XType &p, const XType &x0, const Scalar &f0, const XType &gradx0, const Scalar &c1, const Scalar &c2, const Scalar &minAlpha)
Perform a line search which finds an approximate solution to: satisfying the strong Wolfe conditions...
stan::math::index_type< vector_t >::type idx_t
int minimize(VectorT &x0)
std::string get_code_string(int retCode)
const VectorT & curr_p() const
void grad(std::vector< double > &g)
void initialize(const VectorT &x0)
int df(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, Eigen::Matrix< double, Eigen::Dynamic, 1 > &g)
Eigen::Matrix< Scalar, DimAtCompile, 1 > VectorT
BFGSBase::VectorT vector_t
Scalar rel_grad_norm() const
int operator()(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &g)
const VectorT & curr_x() const
Eigen::Matrix< Scalar, DimAtCompile, DimAtCompile > HessianT
const Scalar & alpha() const
Scalar CubicInterp(const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
Find the minima in an interval [loX, hiX] of a cubic function which interpolates the points...
LSOptions< Scalar > _ls_opts
BFGSLineSearch(M &model, const std::vector< double > ¶ms_r, const std::vector< int > ¶ms_i, std::ostream *msgs=0)
const Scalar & alpha0() const
const VectorT & prev_g() const
void initialize(const std::vector< double > ¶ms_r)
int operator()(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f)
const VectorT & prev_p() const
double log_prob_propto(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::ostream *msgs=0)
Helper function to calculate log probability for double scalars up to a proportion.
const QNUpdateType & get_qnupdate() const
const VectorT & curr_g() const
const Scalar & curr_f() const
Scalar rel_obj_decrease() const