Point Cloud Library (PCL)  1.15.1-dev
bfgs.h
1 #pragma once
2 #include <pcl/pcl_macros.h>
3 
4 #if defined __GNUC__
5 #pragma GCC system_header
6 #endif
7 
8 #include <unsupported/Eigen/Polynomials> // for PolynomialSolver, PolynomialSolverBase
9 
10 #include <cassert> // for assert
11 
12 namespace Eigen {
13 template <typename _Scalar>
14 class PolynomialSolver<_Scalar, 2> : public PolynomialSolverBase<_Scalar, 2> {
15 public:
16  using PS_Base = PolynomialSolverBase<_Scalar, 2>;
17  EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES(PS_Base)
18 
19 public:
20  virtual ~PolynomialSolver() = default;
21 
22  template <typename OtherPolynomial>
23  inline PolynomialSolver(const OtherPolynomial& poly, bool& hasRealRoot)
24  {
25  compute(poly, hasRealRoot);
26  }
27 
28  /** Computes the complex roots of a new polynomial. */
29  template <typename OtherPolynomial>
30  void
31  compute(const OtherPolynomial& poly, bool& hasRealRoot)
32  {
33  constexpr Scalar ZERO(0);
34  Scalar a2(2 * poly[2]);
35  assert(ZERO != poly[poly.size() - 1]);
36  Scalar discriminant((poly[1] * poly[1]) - (4 * poly[0] * poly[2]));
37  if (ZERO < discriminant) {
38  Scalar discriminant_root(std::sqrt(discriminant));
39  m_roots[0] = (-poly[1] - discriminant_root) / (a2);
40  m_roots[1] = (-poly[1] + discriminant_root) / (a2);
41  hasRealRoot = true;
42  }
43  else {
44  if (ZERO == discriminant) {
45  m_roots.resize(1);
46  m_roots[0] = -poly[1] / a2;
47  hasRealRoot = true;
48  }
49  else {
50  Scalar discriminant_root(std::sqrt(-discriminant));
51  m_roots[0] = RootType(-poly[1] / a2, -discriminant_root / a2);
52  m_roots[1] = RootType(-poly[1] / a2, discriminant_root / a2);
53  hasRealRoot = false;
54  }
55  }
56  }
57 
58  template <typename OtherPolynomial>
59  void
60  compute(const OtherPolynomial& poly)
61  {
62  bool hasRealRoot;
63  compute(poly, hasRealRoot);
64  }
65 
66 protected:
67  using PS_Base::m_roots;
68 };
69 } // namespace Eigen
70 
71 namespace BFGSSpace {
72 enum Status {
74  NotStarted = -2,
75  Running = -1,
76  Success = 0,
77  NoProgress = 1
78 };
79 }
80 
81 template <typename _Scalar, int NX = Eigen::Dynamic>
83  using Scalar = _Scalar;
84  enum { InputsAtCompileTime = NX };
85  using VectorType = Eigen::Matrix<Scalar, InputsAtCompileTime, 1>;
86 
87  const int m_inputs;
88 
91 
92  virtual ~BFGSDummyFunctor() = default;
93  int
94  inputs() const
95  {
96  return m_inputs;
97  }
98 
99  virtual double
100  operator()(const VectorType& x) = 0;
101  virtual void
102  df(const VectorType& x, VectorType& df) = 0;
103  virtual void
104  fdf(const VectorType& x, Scalar& f, VectorType& df) = 0;
105  virtual BFGSSpace::Status
106  checkGradient(const VectorType& /*g*/)
107  {
108  return BFGSSpace::NotStarted;
109  };
110 };
111 
112 /**
113  * BFGS stands for Broyden–Fletcher–Goldfarb–Shanno (BFGS) method for solving
114  * unconstrained nonlinear optimization problems.
115  * For further details please visit: https://en.wikipedia.org/wiki/BFGS_method
116  * The method provided here is almost similar to the one provided by GSL.
117  * It reproduces Fletcher's original algorithm in Practical Methods of Optimization
118  * algorithms : 2.6.2 and 2.6.4 and uses the same politics in GSL with cubic
119  * interpolation whenever it is possible else falls to quadratic interpolation for
120  * alpha parameter.
121  */
122 template <typename FunctorType>
123 class BFGS {
124 public:
125  using Scalar = typename FunctorType::Scalar;
126  using FVectorType = typename FunctorType::VectorType;
127 
128  BFGS(FunctorType& _functor) : pnorm(0), g0norm(0), iter(-1), functor(_functor) {}
129 
130  using Index = Eigen::DenseIndex;
131 
132  struct Parameters {
134  : max_iters(400)
135  , bracket_iters(100)
136  , section_iters(100)
137  , rho(0.01)
138  , sigma(0.01)
139  , tau1(9)
140  , tau2(0.05)
141  , tau3(0.5)
142  , step_size(1)
143  , order(3)
144  {}
145  Index max_iters; // maximum number of function evaluation
155  };
156 
158  minimize(FVectorType& x);
164  testGradient();
165  void
167  {
169  }
170 
174 
175 private:
176  BFGS&
177  operator=(const BFGS&);
179  lineSearch(Scalar rho,
180  Scalar sigma,
181  Scalar tau1,
182  Scalar tau2,
183  Scalar tau3,
184  int order,
185  Scalar alpha1,
186  Scalar& alpha_new);
187  Scalar
188  interpolate(Scalar a,
189  Scalar fa,
190  Scalar fpa,
191  Scalar b,
192  Scalar fb,
193  Scalar fpb,
194  Scalar xmin,
195  Scalar xmax,
196  int order);
197  void
198  checkExtremum(const Eigen::Matrix<Scalar, 4, 1>& coefficients,
199  Scalar x,
200  Scalar& xmin,
201  Scalar& fmin);
202  void
203  moveTo(Scalar alpha);
204  Scalar
205  slope();
206  Scalar
207  applyF(Scalar alpha);
208  Scalar
209  applyDF(Scalar alpha);
210  void
211  applyFDF(Scalar alpha, Scalar& f, Scalar& df);
212  void
213  updatePosition(Scalar alpha, FVectorType& x, Scalar& f, FVectorType& g);
214  void
215  changeDirection();
216 
217  Scalar delta_f, fp0;
218  FVectorType x0, dx0, dg0, g0, dx, p;
219  Scalar pnorm, g0norm;
220 
221  Scalar f_alpha;
222  Scalar df_alpha;
223  FVectorType x_alpha;
224  FVectorType g_alpha;
225 
226  // cache "keys"
227  Scalar f_cache_key;
228  Scalar df_cache_key;
229  Scalar x_cache_key;
230  Scalar g_cache_key;
231 
232  Index iter;
233  FunctorType& functor;
234 };
235 
236 template <typename FunctorType>
237 void
238 BFGS<FunctorType>::checkExtremum(const Eigen::Matrix<Scalar, 4, 1>& coefficients,
239  Scalar x,
240  Scalar& xmin,
241  Scalar& fmin)
242 {
243  Scalar y = Eigen::poly_eval(coefficients, x);
244  if (y < fmin) {
245  xmin = x;
246  fmin = y;
247  }
248 }
249 
250 template <typename FunctorType>
251 void
252 BFGS<FunctorType>::moveTo(Scalar alpha)
253 {
254  x_alpha = x0 + alpha * p;
255  x_cache_key = alpha;
256 }
257 
258 template <typename FunctorType>
261 {
262  return (g_alpha.dot(p));
263 }
264 
265 template <typename FunctorType>
267 BFGS<FunctorType>::applyF(Scalar alpha)
268 {
269  if (alpha == f_cache_key)
270  return f_alpha;
271  moveTo(alpha);
272  f_alpha = functor(x_alpha);
273  f_cache_key = alpha;
274  return (f_alpha);
275 }
276 
277 template <typename FunctorType>
279 BFGS<FunctorType>::applyDF(Scalar alpha)
280 {
281  if (alpha == df_cache_key)
282  return df_alpha;
283  moveTo(alpha);
284  if (alpha != g_cache_key) {
285  functor.df(x_alpha, g_alpha);
286  g_cache_key = alpha;
287  }
288  df_alpha = slope();
289  df_cache_key = alpha;
290  return (df_alpha);
291 }
292 
293 template <typename FunctorType>
294 void
295 BFGS<FunctorType>::applyFDF(Scalar alpha, Scalar& f, Scalar& df)
296 {
297  if (alpha == f_cache_key && alpha == df_cache_key) {
298  f = f_alpha;
299  df = df_alpha;
300  return;
301  }
302 
303  if (alpha == f_cache_key || alpha == df_cache_key) {
304  f = applyF(alpha);
305  df = applyDF(alpha);
306  return;
307  }
308 
309  moveTo(alpha);
310  functor.fdf(x_alpha, f_alpha, g_alpha);
311  f_cache_key = alpha;
312  g_cache_key = alpha;
313  df_alpha = slope();
314  df_cache_key = alpha;
315  f = f_alpha;
316  df = df_alpha;
317 }
318 
319 template <typename FunctorType>
320 void
322  FVectorType& x,
323  Scalar& f,
324  FVectorType& g)
325 {
326  {
327  Scalar f_alpha, df_alpha;
328  applyFDF(alpha, f_alpha, df_alpha);
329  };
330 
331  f = f_alpha;
332  x = x_alpha;
333  g = g_alpha;
334 }
335 
336 template <typename FunctorType>
337 void
339 {
340  x_alpha = x0;
341  x_cache_key = 0.0;
342  f_cache_key = 0.0;
343  g_alpha = g0;
344  g_cache_key = 0.0;
345  df_alpha = slope();
346  df_cache_key = 0.0;
347 }
348 
349 template <typename FunctorType>
352 {
353  BFGSSpace::Status status = minimizeInit(x);
354  do {
355  status = minimizeOneStep(x);
356  ++iter;
357  } while (status == BFGSSpace::Success && iter < parameters.max_iters);
358  return status;
359 }
360 
361 template <typename FunctorType>
364 {
365  iter = 0;
366  delta_f = 0;
367  dx.setZero();
368  functor.fdf(x, f, gradient);
369  x0 = x;
370  g0 = gradient;
371  g0norm = g0.norm();
372  p = gradient * -1 / g0norm;
373  pnorm = p.norm();
374  fp0 = -g0norm;
375 
376  {
377  x_alpha = x0;
378  x_cache_key = 0;
379 
380  f_alpha = f;
381  f_cache_key = 0;
382 
383  g_alpha = g0;
384  g_cache_key = 0;
385 
386  df_alpha = slope();
387  df_cache_key = 0;
388  }
389 
390  return BFGSSpace::NotStarted;
391 }
392 
393 template <typename FunctorType>
396 {
397  Scalar alpha = 0.0, alpha1;
398  Scalar f0 = f;
399  if (pnorm == 0.0 || g0norm == 0.0 || fp0 == 0) {
400  dx.setZero();
401  return BFGSSpace::NoProgress;
402  }
403 
404  if (delta_f < 0) {
405  Scalar del =
406  std::max(-delta_f, 10 * std::numeric_limits<Scalar>::epsilon() * std::abs(f0));
407  alpha1 = std::min(1.0, 2.0 * del / (-fp0));
408  }
409  else
410  alpha1 = std::abs(parameters.step_size);
411 
412  BFGSSpace::Status status = lineSearch(parameters.rho,
413  parameters.sigma,
414  parameters.tau1,
415  parameters.tau2,
416  parameters.tau3,
417  parameters.order,
418  alpha1,
419  alpha);
420 
421  if (status != BFGSSpace::Success)
422  return status;
423 
424  updatePosition(alpha, x, f, gradient);
425 
426  delta_f = f - f0;
427 
428  /* Choose a new direction for the next step */
429  {
430  /* This is the BFGS update: */
431  /* p' = g1 - A dx - B dg */
432  /* A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg */
433  /* B = dx.g/dx.dg */
434 
435  Scalar dxg, dgg, dxdg, dgnorm, A, B;
436 
437  /* dx0 = x - x0 */
438  dx0 = x - x0;
439  dx = dx0; /* keep a copy */
440 
441  /* dg0 = g - g0 */
442  dg0 = gradient - g0;
443  dxg = dx0.dot(gradient);
444  dgg = dg0.dot(gradient);
445  dxdg = dx0.dot(dg0);
446  dgnorm = dg0.norm();
447 
448  if (dxdg != 0) {
449  B = dxg / dxdg;
450  A = -(1.0 + dgnorm * dgnorm / dxdg) * B + dgg / dxdg;
451  }
452  else {
453  B = 0;
454  A = 0;
455  }
456 
457  p = -A * dx0;
458  p += gradient;
459  p += -B * dg0;
460  }
461 
462  g0 = gradient;
463  x0 = x;
464  g0norm = g0.norm();
465  pnorm = p.norm();
466 
467  Scalar dir = ((p.dot(gradient)) > 0) ? -1.0 : 1.0;
468  p *= dir / pnorm;
469  pnorm = p.norm();
470  fp0 = p.dot(g0);
471 
472  changeDirection();
473  return BFGSSpace::Success;
474 }
475 
476 template <typename FunctorType>
477 typename BFGSSpace::Status
479 {
480  return functor.checkGradient(gradient);
481 }
482 
483 template <typename FunctorType>
486  Scalar fa,
487  Scalar fpa,
488  Scalar b,
489  Scalar fb,
490  Scalar fpb,
491  Scalar xmin,
492  Scalar xmax,
493  int order)
494 {
495  /* Map [a,b] to [0,1] */
496  Scalar y, alpha, ymin, ymax, fmin;
497 
498  ymin = (xmin - a) / (b - a);
499  ymax = (xmax - a) / (b - a);
500 
501  // Ensure ymin <= ymax
502  if (ymin > ymax) {
503  Scalar tmp = ymin;
504  ymin = ymax;
505  ymax = tmp;
506  };
507 
508  if (order > 2 && !(fpb != fpa) && fpb != std::numeric_limits<Scalar>::infinity()) {
509  fpa = fpa * (b - a);
510  fpb = fpb * (b - a);
511 
512  Scalar eta = 3 * (fb - fa) - 2 * fpa - fpb;
513  Scalar xi = fpa + fpb - 2 * (fb - fa);
514  Scalar c0 = fa, c1 = fpa, c2 = eta, c3 = xi;
515  Scalar y0, y1;
516  Eigen::Matrix<Scalar, 4, 1> coefficients;
517  coefficients << c0, c1, c2, c3;
518 
519  y = ymin;
520  // Evaluate the cubic polyinomial at ymin;
521  fmin = Eigen::poly_eval(coefficients, ymin);
522  checkExtremum(coefficients, ymax, y, fmin);
523  {
524  // Solve quadratic polynomial for the derivate
525  Eigen::Matrix<Scalar, 3, 1> coefficients2;
526  coefficients2 << c1, 2 * c2, 3 * c3;
527  bool real_roots;
528  Eigen::PolynomialSolver<Scalar, 2> solver(coefficients2, real_roots);
529  if (real_roots) {
530  if ((solver.roots()).size() == 2) /* found 2 roots */
531  {
532  y0 = std::real(solver.roots()[0]);
533  y1 = std::real(solver.roots()[1]);
534  if (y0 > y1) {
535  Scalar tmp(y0);
536  y0 = y1;
537  y1 = tmp;
538  }
539  if (y0 > ymin && y0 < ymax)
540  checkExtremum(coefficients, y0, y, fmin);
541  if (y1 > ymin && y1 < ymax)
542  checkExtremum(coefficients, y1, y, fmin);
543  }
544  else if ((solver.roots()).size() == 1) /* found 1 root */
545  {
546  y0 = std::real(solver.roots()[0]);
547  if (y0 > ymin && y0 < ymax)
548  checkExtremum(coefficients, y0, y, fmin);
549  }
550  }
551  }
552  }
553  else {
554  fpa = fpa * (b - a);
555  Scalar fl = fa + ymin * (fpa + ymin * (fb - fa - fpa));
556  Scalar fh = fa + ymax * (fpa + ymax * (fb - fa - fpa));
557  Scalar c = 2 * (fb - fa - fpa); /* curvature */
558  y = ymin;
559  fmin = fl;
560 
561  if (fh < fmin) {
562  y = ymax;
563  fmin = fh;
564  }
565 
566  if (c > a) /* positive curvature required for a minimum */
567  {
568  Scalar z = -fpa / c; /* location of minimum */
569  if (z > ymin && z < ymax) {
570  Scalar f = fa + z * (fpa + z * (fb - fa - fpa));
571  if (f < fmin) {
572  y = z;
573  fmin = f;
574  };
575  }
576  }
577  }
578 
579  alpha = a + y * (b - a);
580  return alpha;
581 }
582 
583 template <typename FunctorType>
586  Scalar sigma,
587  Scalar tau1,
588  Scalar tau2,
589  Scalar tau3,
590  int order,
591  Scalar alpha1,
592  Scalar& alpha_new)
593 {
594  Scalar f0, fp0, falpha, falpha_prev, fpalpha, fpalpha_prev, delta, alpha_next;
595  Scalar alpha = alpha1, alpha_prev = 0.0;
596  Scalar a, b, fa, fb, fpa, fpb;
597  Index i = 0;
598 
599  applyFDF(0.0, f0, fp0);
600 
601  falpha_prev = f0;
602  fpalpha_prev = fp0;
603 
604  /* Avoid uninitialized variables morning */
605  a = 0.0;
606  b = alpha;
607  fa = f0;
608  fb = 0.0;
609  fpa = fp0;
610  fpb = 0.0;
611 
612  /* Begin bracketing */
613 
614  while (i++ < parameters.bracket_iters) {
615  falpha = applyF(alpha);
616 
617  if (falpha > f0 + alpha * rho * fp0 || falpha >= falpha_prev) {
618  a = alpha_prev;
619  fa = falpha_prev;
620  fpa = fpalpha_prev;
621  b = alpha;
622  fb = falpha;
623  fpb = std::numeric_limits<Scalar>::quiet_NaN();
624  break;
625  }
626 
627  fpalpha = applyDF(alpha);
628 
629  /* Fletcher's sigma test */
630  if (std::abs(fpalpha) <= -sigma * fp0) {
631  alpha_new = alpha;
632  return BFGSSpace::Success;
633  }
634 
635  if (fpalpha >= 0) {
636  a = alpha;
637  fa = falpha;
638  fpa = fpalpha;
639  b = alpha_prev;
640  fb = falpha_prev;
641  fpb = fpalpha_prev;
642  break; /* goto sectioning */
643  }
644 
645  delta = alpha - alpha_prev;
646 
647  {
648  Scalar lower = alpha + delta;
649  Scalar upper = alpha + tau1 * delta;
650 
651  alpha_next = interpolate(alpha_prev,
652  falpha_prev,
653  fpalpha_prev,
654  alpha,
655  falpha,
656  fpalpha,
657  lower,
658  upper,
659  order);
660  }
661 
662  alpha_prev = alpha;
663  falpha_prev = falpha;
664  fpalpha_prev = fpalpha;
665  alpha = alpha_next;
666  }
667  /* Sectioning of bracket [a,b] */
668  while (i++ < parameters.section_iters) {
669  delta = b - a;
670 
671  {
672  Scalar lower = a + tau2 * delta;
673  Scalar upper = b - tau3 * delta;
674 
675  alpha = interpolate(a, fa, fpa, b, fb, fpb, lower, upper, order);
676  }
677  falpha = applyF(alpha);
678  if ((a - alpha) * fpa <= std::numeric_limits<Scalar>::epsilon()) {
679  /* roundoff prevents progress */
680  return BFGSSpace::NoProgress;
681  };
682 
683  if (falpha > f0 + rho * alpha * fp0 || falpha >= fa) {
684  /* a_next = a; */
685  b = alpha;
686  fb = falpha;
687  fpb = std::numeric_limits<Scalar>::quiet_NaN();
688  }
689  else {
690  fpalpha = applyDF(alpha);
691 
692  if (std::abs(fpalpha) <= -sigma * fp0) {
693  alpha_new = alpha;
694  return BFGSSpace::Success; /* terminate */
695  }
696 
697  if (((b - a) >= 0 && fpalpha >= 0) || ((b - a) <= 0 && fpalpha <= 0)) {
698  b = a;
699  fb = fa;
700  fpb = fpa;
701  a = alpha;
702  fa = falpha;
703  fpa = fpalpha;
704  }
705  else {
706  a = alpha;
707  fa = falpha;
708  fpa = fpalpha;
709  }
710  }
711  }
712  return BFGSSpace::Success;
713 }
BFGS stands for Broyden–Fletcher–Goldfarb–Shanno (BFGS) method for solving unconstrained nonlinear op...
Definition: bfgs.h:123
Scalar f
Definition: bfgs.h:172
BFGSSpace::Status minimize(FVectorType &x)
Definition: bfgs.h:351
BFGSSpace::Status testGradient()
Definition: bfgs.h:478
typename FunctorType::Scalar Scalar
Definition: bfgs.h:125
Eigen::DenseIndex Index
Definition: bfgs.h:130
BFGSSpace::Status minimizeInit(FVectorType &x)
Definition: bfgs.h:363
BFGSSpace::Status minimizeOneStep(FVectorType &x)
Definition: bfgs.h:395
FVectorType gradient
Definition: bfgs.h:173
Parameters parameters
Definition: bfgs.h:171
typename FunctorType::VectorType FVectorType
Definition: bfgs.h:126
void resetParameters(void)
Definition: bfgs.h:166
BFGS(FunctorType &_functor)
Definition: bfgs.h:128
void compute(const OtherPolynomial &poly)
Definition: bfgs.h:60
void compute(const OtherPolynomial &poly, bool &hasRealRoot)
Computes the complex roots of a new polynomial.
Definition: bfgs.h:31
PolynomialSolverBase< _Scalar, 2 > PS_Base
Definition: bfgs.h:16
PolynomialSolver(const OtherPolynomial &poly, bool &hasRealRoot)
Definition: bfgs.h:23
@ B
Definition: norms.h:54
Definition: bfgs.h:71
Status
Definition: bfgs.h:72
@ NoProgress
Definition: bfgs.h:77
@ NotStarted
Definition: bfgs.h:74
@ Running
Definition: bfgs.h:75
@ Success
Definition: bfgs.h:76
@ NegativeGradientEpsilon
Definition: bfgs.h:73
Definition: bfgs.h:12
Defines all the PCL and non-PCL macros used.
Index max_iters
Definition: bfgs.h:145
Scalar sigma
Definition: bfgs.h:149
Index section_iters
Definition: bfgs.h:147
Scalar tau2
Definition: bfgs.h:151
Scalar rho
Definition: bfgs.h:148
Scalar step_size
Definition: bfgs.h:153
Index bracket_iters
Definition: bfgs.h:146
Scalar tau1
Definition: bfgs.h:150
Scalar tau3
Definition: bfgs.h:152
Index order
Definition: bfgs.h:154
int inputs() const
Definition: bfgs.h:94
virtual void fdf(const VectorType &x, Scalar &f, VectorType &df)=0
BFGSDummyFunctor()
Definition: bfgs.h:89
virtual ~BFGSDummyFunctor()=default
@ InputsAtCompileTime
Definition: bfgs.h:84
const int m_inputs
Definition: bfgs.h:87
virtual BFGSSpace::Status checkGradient(const VectorType &)
Definition: bfgs.h:106
_Scalar Scalar
Definition: bfgs.h:83
BFGSDummyFunctor(int inputs)
Definition: bfgs.h:90
virtual void df(const VectorType &x, VectorType &df)=0
Eigen::Matrix< Scalar, InputsAtCompileTime, 1 > VectorType
Definition: bfgs.h:85
virtual double operator()(const VectorType &x)=0