Point Cloud Library (PCL)  1.13.0-dev
bfgs.h
1 #pragma once
2 #include <pcl/pcl_macros.h>
3 
4 #if defined __GNUC__
5 #pragma GCC system_header
6 #endif
7 
8 #include <unsupported/Eigen/Polynomials> // for PolynomialSolver, PolynomialSolverBase
9 
10 namespace Eigen {
11 template <typename _Scalar>
12 class PolynomialSolver<_Scalar, 2> : public PolynomialSolverBase<_Scalar, 2> {
13 public:
14  using PS_Base = PolynomialSolverBase<_Scalar, 2>;
15  EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES(PS_Base)
16 
17 public:
18  virtual ~PolynomialSolver() = default;
19 
20  template <typename OtherPolynomial>
21  inline PolynomialSolver(const OtherPolynomial& poly, bool& hasRealRoot)
22  {
23  compute(poly, hasRealRoot);
24  }
25 
26  /** Computes the complex roots of a new polynomial. */
27  template <typename OtherPolynomial>
28  void
29  compute(const OtherPolynomial& poly, bool& hasRealRoot)
30  {
31  constexpr Scalar ZERO(0);
32  Scalar a2(2 * poly[2]);
33  assert(ZERO != poly[poly.size() - 1]);
34  Scalar discriminant((poly[1] * poly[1]) - (4 * poly[0] * poly[2]));
35  if (ZERO < discriminant) {
36  Scalar discriminant_root(std::sqrt(discriminant));
37  m_roots[0] = (-poly[1] - discriminant_root) / (a2);
38  m_roots[1] = (-poly[1] + discriminant_root) / (a2);
39  hasRealRoot = true;
40  }
41  else {
42  if (ZERO == discriminant) {
43  m_roots.resize(1);
44  m_roots[0] = -poly[1] / a2;
45  hasRealRoot = true;
46  }
47  else {
48  Scalar discriminant_root(std::sqrt(-discriminant));
49  m_roots[0] = RootType(-poly[1] / a2, -discriminant_root / a2);
50  m_roots[1] = RootType(-poly[1] / a2, discriminant_root / a2);
51  hasRealRoot = false;
52  }
53  }
54  }
55 
56  template <typename OtherPolynomial>
57  void
58  compute(const OtherPolynomial& poly)
59  {
60  bool hasRealRoot;
61  compute(poly, hasRealRoot);
62  }
63 
64 protected:
65  using PS_Base::m_roots;
66 };
67 } // namespace Eigen
68 
69 namespace BFGSSpace {
70 enum Status {
72  NotStarted = -2,
73  Running = -1,
74  Success = 0,
75  NoProgress = 1
76 };
77 }
78 
79 template <typename _Scalar, int NX = Eigen::Dynamic>
81  using Scalar = _Scalar;
82  enum { InputsAtCompileTime = NX };
83  using VectorType = Eigen::Matrix<Scalar, InputsAtCompileTime, 1>;
84 
85  const int m_inputs;
86 
89 
90  virtual ~BFGSDummyFunctor() = default;
91  int
92  inputs() const
93  {
94  return m_inputs;
95  }
96 
97  virtual double
98  operator()(const VectorType& x) = 0;
99  virtual void
100  df(const VectorType& x, VectorType& df) = 0;
101  virtual void
102  fdf(const VectorType& x, Scalar& f, VectorType& df) = 0;
103  virtual BFGSSpace::Status
104  checkGradient(const VectorType& /*g*/)
105  {
106  return BFGSSpace::NotStarted;
107  };
108 };
109 
110 /**
111  * BFGS stands for Broyden–Fletcher–Goldfarb–Shanno (BFGS) method for solving
112  * unconstrained nonlinear optimization problems.
113  * For further details please visit: https://en.wikipedia.org/wiki/BFGS_method
114  * The method provided here is almost similar to the one provided by GSL.
115  * It reproduces Fletcher's original algorithm in Practical Methods of Optimization
116  * algorithms : 2.6.2 and 2.6.4 and uses the same politics in GSL with cubic
117  * interpolation whenever it is possible else falls to quadratic interpolation for
118  * alpha parameter.
119  */
120 template <typename FunctorType>
121 class BFGS {
122 public:
123  using Scalar = typename FunctorType::Scalar;
124  using FVectorType = typename FunctorType::VectorType;
125 
126  BFGS(FunctorType& _functor) : pnorm(0), g0norm(0), iter(-1), functor(_functor) {}
127 
128  using Index = Eigen::DenseIndex;
129 
130  struct Parameters {
132  : max_iters(400)
133  , bracket_iters(100)
134  , section_iters(100)
135  , rho(0.01)
136  , sigma(0.01)
137  , tau1(9)
138  , tau2(0.05)
139  , tau3(0.5)
140  , step_size(1)
141  , order(3)
142  {}
143  Index max_iters; // maximum number of function evaluation
153  };
154 
156  minimize(FVectorType& x);
162  testGradient();
163  void
165  {
167  }
168 
172 
173 private:
174  BFGS&
175  operator=(const BFGS&);
177  lineSearch(Scalar rho,
178  Scalar sigma,
179  Scalar tau1,
180  Scalar tau2,
181  Scalar tau3,
182  int order,
183  Scalar alpha1,
184  Scalar& alpha_new);
185  Scalar
186  interpolate(Scalar a,
187  Scalar fa,
188  Scalar fpa,
189  Scalar b,
190  Scalar fb,
191  Scalar fpb,
192  Scalar xmin,
193  Scalar xmax,
194  int order);
195  void
196  checkExtremum(const Eigen::Matrix<Scalar, 4, 1>& coefficients,
197  Scalar x,
198  Scalar& xmin,
199  Scalar& fmin);
200  void
201  moveTo(Scalar alpha);
202  Scalar
203  slope();
204  Scalar
205  applyF(Scalar alpha);
206  Scalar
207  applyDF(Scalar alpha);
208  void
209  applyFDF(Scalar alpha, Scalar& f, Scalar& df);
210  void
211  updatePosition(Scalar alpha, FVectorType& x, Scalar& f, FVectorType& g);
212  void
213  changeDirection();
214 
215  Scalar delta_f, fp0;
216  FVectorType x0, dx0, dg0, g0, dx, p;
217  Scalar pnorm, g0norm;
218 
219  Scalar f_alpha;
220  Scalar df_alpha;
221  FVectorType x_alpha;
222  FVectorType g_alpha;
223 
224  // cache "keys"
225  Scalar f_cache_key;
226  Scalar df_cache_key;
227  Scalar x_cache_key;
228  Scalar g_cache_key;
229 
230  Index iter;
231  FunctorType& functor;
232 };
233 
234 template <typename FunctorType>
235 void
236 BFGS<FunctorType>::checkExtremum(const Eigen::Matrix<Scalar, 4, 1>& coefficients,
237  Scalar x,
238  Scalar& xmin,
239  Scalar& fmin)
240 {
241  Scalar y = Eigen::poly_eval(coefficients, x);
242  if (y < fmin) {
243  xmin = x;
244  fmin = y;
245  }
246 }
247 
248 template <typename FunctorType>
249 void
250 BFGS<FunctorType>::moveTo(Scalar alpha)
251 {
252  x_alpha = x0 + alpha * p;
253  x_cache_key = alpha;
254 }
255 
256 template <typename FunctorType>
259 {
260  return (g_alpha.dot(p));
261 }
262 
263 template <typename FunctorType>
265 BFGS<FunctorType>::applyF(Scalar alpha)
266 {
267  if (alpha == f_cache_key)
268  return f_alpha;
269  moveTo(alpha);
270  f_alpha = functor(x_alpha);
271  f_cache_key = alpha;
272  return (f_alpha);
273 }
274 
275 template <typename FunctorType>
277 BFGS<FunctorType>::applyDF(Scalar alpha)
278 {
279  if (alpha == df_cache_key)
280  return df_alpha;
281  moveTo(alpha);
282  if (alpha != g_cache_key) {
283  functor.df(x_alpha, g_alpha);
284  g_cache_key = alpha;
285  }
286  df_alpha = slope();
287  df_cache_key = alpha;
288  return (df_alpha);
289 }
290 
291 template <typename FunctorType>
292 void
293 BFGS<FunctorType>::applyFDF(Scalar alpha, Scalar& f, Scalar& df)
294 {
295  if (alpha == f_cache_key && alpha == df_cache_key) {
296  f = f_alpha;
297  df = df_alpha;
298  return;
299  }
300 
301  if (alpha == f_cache_key || alpha == df_cache_key) {
302  f = applyF(alpha);
303  df = applyDF(alpha);
304  return;
305  }
306 
307  moveTo(alpha);
308  functor.fdf(x_alpha, f_alpha, g_alpha);
309  f_cache_key = alpha;
310  g_cache_key = alpha;
311  df_alpha = slope();
312  df_cache_key = alpha;
313  f = f_alpha;
314  df = df_alpha;
315 }
316 
317 template <typename FunctorType>
318 void
320  FVectorType& x,
321  Scalar& f,
322  FVectorType& g)
323 {
324  {
325  Scalar f_alpha, df_alpha;
326  applyFDF(alpha, f_alpha, df_alpha);
327  };
328 
329  f = f_alpha;
330  x = x_alpha;
331  g = g_alpha;
332 }
333 
334 template <typename FunctorType>
335 void
337 {
338  x_alpha = x0;
339  x_cache_key = 0.0;
340  f_cache_key = 0.0;
341  g_alpha = g0;
342  g_cache_key = 0.0;
343  df_alpha = slope();
344  df_cache_key = 0.0;
345 }
346 
347 template <typename FunctorType>
350 {
351  BFGSSpace::Status status = minimizeInit(x);
352  do {
353  status = minimizeOneStep(x);
354  ++iter;
355  } while (status == BFGSSpace::Success && iter < parameters.max_iters);
356  return status;
357 }
358 
359 template <typename FunctorType>
362 {
363  iter = 0;
364  delta_f = 0;
365  dx.setZero();
366  functor.fdf(x, f, gradient);
367  x0 = x;
368  g0 = gradient;
369  g0norm = g0.norm();
370  p = gradient * -1 / g0norm;
371  pnorm = p.norm();
372  fp0 = -g0norm;
373 
374  {
375  x_alpha = x0;
376  x_cache_key = 0;
377 
378  f_alpha = f;
379  f_cache_key = 0;
380 
381  g_alpha = g0;
382  g_cache_key = 0;
383 
384  df_alpha = slope();
385  df_cache_key = 0;
386  }
387 
388  return BFGSSpace::NotStarted;
389 }
390 
391 template <typename FunctorType>
394 {
395  Scalar alpha = 0.0, alpha1;
396  Scalar f0 = f;
397  if (pnorm == 0.0 || g0norm == 0.0 || fp0 == 0) {
398  dx.setZero();
399  return BFGSSpace::NoProgress;
400  }
401 
402  if (delta_f < 0) {
403  Scalar del =
404  std::max(-delta_f, 10 * std::numeric_limits<Scalar>::epsilon() * std::abs(f0));
405  alpha1 = std::min(1.0, 2.0 * del / (-fp0));
406  }
407  else
408  alpha1 = std::abs(parameters.step_size);
409 
410  BFGSSpace::Status status = lineSearch(parameters.rho,
411  parameters.sigma,
412  parameters.tau1,
413  parameters.tau2,
414  parameters.tau3,
415  parameters.order,
416  alpha1,
417  alpha);
418 
419  if (status != BFGSSpace::Success)
420  return status;
421 
422  updatePosition(alpha, x, f, gradient);
423 
424  delta_f = f - f0;
425 
426  /* Choose a new direction for the next step */
427  {
428  /* This is the BFGS update: */
429  /* p' = g1 - A dx - B dg */
430  /* A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg */
431  /* B = dx.g/dx.dg */
432 
433  Scalar dxg, dgg, dxdg, dgnorm, A, B;
434 
435  /* dx0 = x - x0 */
436  dx0 = x - x0;
437  dx = dx0; /* keep a copy */
438 
439  /* dg0 = g - g0 */
440  dg0 = gradient - g0;
441  dxg = dx0.dot(gradient);
442  dgg = dg0.dot(gradient);
443  dxdg = dx0.dot(dg0);
444  dgnorm = dg0.norm();
445 
446  if (dxdg != 0) {
447  B = dxg / dxdg;
448  A = -(1.0 + dgnorm * dgnorm / dxdg) * B + dgg / dxdg;
449  }
450  else {
451  B = 0;
452  A = 0;
453  }
454 
455  p = -A * dx0;
456  p += gradient;
457  p += -B * dg0;
458  }
459 
460  g0 = gradient;
461  x0 = x;
462  g0norm = g0.norm();
463  pnorm = p.norm();
464 
465  Scalar dir = ((p.dot(gradient)) > 0) ? -1.0 : 1.0;
466  p *= dir / pnorm;
467  pnorm = p.norm();
468  fp0 = p.dot(g0);
469 
470  changeDirection();
471  return BFGSSpace::Success;
472 }
473 
474 template <typename FunctorType>
475 typename BFGSSpace::Status
477 {
478  return functor.checkGradient(gradient);
479 }
480 
481 template <typename FunctorType>
484  Scalar fa,
485  Scalar fpa,
486  Scalar b,
487  Scalar fb,
488  Scalar fpb,
489  Scalar xmin,
490  Scalar xmax,
491  int order)
492 {
493  /* Map [a,b] to [0,1] */
494  Scalar y, alpha, ymin, ymax, fmin;
495 
496  ymin = (xmin - a) / (b - a);
497  ymax = (xmax - a) / (b - a);
498 
499  // Ensure ymin <= ymax
500  if (ymin > ymax) {
501  Scalar tmp = ymin;
502  ymin = ymax;
503  ymax = tmp;
504  };
505 
506  if (order > 2 && !(fpb != fpa) && fpb != std::numeric_limits<Scalar>::infinity()) {
507  fpa = fpa * (b - a);
508  fpb = fpb * (b - a);
509 
510  Scalar eta = 3 * (fb - fa) - 2 * fpa - fpb;
511  Scalar xi = fpa + fpb - 2 * (fb - fa);
512  Scalar c0 = fa, c1 = fpa, c2 = eta, c3 = xi;
513  Scalar y0, y1;
514  Eigen::Matrix<Scalar, 4, 1> coefficients;
515  coefficients << c0, c1, c2, c3;
516 
517  y = ymin;
518  // Evaluate the cubic polyinomial at ymin;
519  fmin = Eigen::poly_eval(coefficients, ymin);
520  checkExtremum(coefficients, ymax, y, fmin);
521  {
522  // Solve quadratic polynomial for the derivate
523  Eigen::Matrix<Scalar, 3, 1> coefficients2;
524  coefficients2 << c1, 2 * c2, 3 * c3;
525  bool real_roots;
526  Eigen::PolynomialSolver<Scalar, 2> solver(coefficients2, real_roots);
527  if (real_roots) {
528  if ((solver.roots()).size() == 2) /* found 2 roots */
529  {
530  y0 = std::real(solver.roots()[0]);
531  y1 = std::real(solver.roots()[1]);
532  if (y0 > y1) {
533  Scalar tmp(y0);
534  y0 = y1;
535  y1 = tmp;
536  }
537  if (y0 > ymin && y0 < ymax)
538  checkExtremum(coefficients, y0, y, fmin);
539  if (y1 > ymin && y1 < ymax)
540  checkExtremum(coefficients, y1, y, fmin);
541  }
542  else if ((solver.roots()).size() == 1) /* found 1 root */
543  {
544  y0 = std::real(solver.roots()[0]);
545  if (y0 > ymin && y0 < ymax)
546  checkExtremum(coefficients, y0, y, fmin);
547  }
548  }
549  }
550  }
551  else {
552  fpa = fpa * (b - a);
553  Scalar fl = fa + ymin * (fpa + ymin * (fb - fa - fpa));
554  Scalar fh = fa + ymax * (fpa + ymax * (fb - fa - fpa));
555  Scalar c = 2 * (fb - fa - fpa); /* curvature */
556  y = ymin;
557  fmin = fl;
558 
559  if (fh < fmin) {
560  y = ymax;
561  fmin = fh;
562  }
563 
564  if (c > a) /* positive curvature required for a minimum */
565  {
566  Scalar z = -fpa / c; /* location of minimum */
567  if (z > ymin && z < ymax) {
568  Scalar f = fa + z * (fpa + z * (fb - fa - fpa));
569  if (f < fmin) {
570  y = z;
571  fmin = f;
572  };
573  }
574  }
575  }
576 
577  alpha = a + y * (b - a);
578  return alpha;
579 }
580 
581 template <typename FunctorType>
584  Scalar sigma,
585  Scalar tau1,
586  Scalar tau2,
587  Scalar tau3,
588  int order,
589  Scalar alpha1,
590  Scalar& alpha_new)
591 {
592  Scalar f0, fp0, falpha, falpha_prev, fpalpha, fpalpha_prev, delta, alpha_next;
593  Scalar alpha = alpha1, alpha_prev = 0.0;
594  Scalar a, b, fa, fb, fpa, fpb;
595  Index i = 0;
596 
597  applyFDF(0.0, f0, fp0);
598 
599  falpha_prev = f0;
600  fpalpha_prev = fp0;
601 
602  /* Avoid uninitialized variables morning */
603  a = 0.0;
604  b = alpha;
605  fa = f0;
606  fb = 0.0;
607  fpa = fp0;
608  fpb = 0.0;
609 
610  /* Begin bracketing */
611 
612  while (i++ < parameters.bracket_iters) {
613  falpha = applyF(alpha);
614 
615  if (falpha > f0 + alpha * rho * fp0 || falpha >= falpha_prev) {
616  a = alpha_prev;
617  fa = falpha_prev;
618  fpa = fpalpha_prev;
619  b = alpha;
620  fb = falpha;
621  fpb = std::numeric_limits<Scalar>::quiet_NaN();
622  break;
623  }
624 
625  fpalpha = applyDF(alpha);
626 
627  /* Fletcher's sigma test */
628  if (std::abs(fpalpha) <= -sigma * fp0) {
629  alpha_new = alpha;
630  return BFGSSpace::Success;
631  }
632 
633  if (fpalpha >= 0) {
634  a = alpha;
635  fa = falpha;
636  fpa = fpalpha;
637  b = alpha_prev;
638  fb = falpha_prev;
639  fpb = fpalpha_prev;
640  break; /* goto sectioning */
641  }
642 
643  delta = alpha - alpha_prev;
644 
645  {
646  Scalar lower = alpha + delta;
647  Scalar upper = alpha + tau1 * delta;
648 
649  alpha_next = interpolate(alpha_prev,
650  falpha_prev,
651  fpalpha_prev,
652  alpha,
653  falpha,
654  fpalpha,
655  lower,
656  upper,
657  order);
658  }
659 
660  alpha_prev = alpha;
661  falpha_prev = falpha;
662  fpalpha_prev = fpalpha;
663  alpha = alpha_next;
664  }
665  /* Sectioning of bracket [a,b] */
666  while (i++ < parameters.section_iters) {
667  delta = b - a;
668 
669  {
670  Scalar lower = a + tau2 * delta;
671  Scalar upper = b - tau3 * delta;
672 
673  alpha = interpolate(a, fa, fpa, b, fb, fpb, lower, upper, order);
674  }
675  falpha = applyF(alpha);
676  if ((a - alpha) * fpa <= std::numeric_limits<Scalar>::epsilon()) {
677  /* roundoff prevents progress */
678  return BFGSSpace::NoProgress;
679  };
680 
681  if (falpha > f0 + rho * alpha * fp0 || falpha >= fa) {
682  /* a_next = a; */
683  b = alpha;
684  fb = falpha;
685  fpb = std::numeric_limits<Scalar>::quiet_NaN();
686  }
687  else {
688  fpalpha = applyDF(alpha);
689 
690  if (std::abs(fpalpha) <= -sigma * fp0) {
691  alpha_new = alpha;
692  return BFGSSpace::Success; /* terminate */
693  }
694 
695  if (((b - a) >= 0 && fpalpha >= 0) || ((b - a) <= 0 && fpalpha <= 0)) {
696  b = a;
697  fb = fa;
698  fpb = fpa;
699  a = alpha;
700  fa = falpha;
701  fpa = fpalpha;
702  }
703  else {
704  a = alpha;
705  fa = falpha;
706  fpa = fpalpha;
707  }
708  }
709  }
710  return BFGSSpace::Success;
711 }
BFGS stands for Broyden–Fletcher–Goldfarb–Shanno (BFGS) method for solving unconstrained nonlinear op...
Definition: bfgs.h:121
Scalar f
Definition: bfgs.h:170
BFGSSpace::Status minimize(FVectorType &x)
Definition: bfgs.h:349
BFGSSpace::Status testGradient()
Definition: bfgs.h:476
typename FunctorType::Scalar Scalar
Definition: bfgs.h:123
Eigen::DenseIndex Index
Definition: bfgs.h:128
BFGSSpace::Status minimizeInit(FVectorType &x)
Definition: bfgs.h:361
BFGSSpace::Status minimizeOneStep(FVectorType &x)
Definition: bfgs.h:393
FVectorType gradient
Definition: bfgs.h:171
Parameters parameters
Definition: bfgs.h:169
typename FunctorType::VectorType FVectorType
Definition: bfgs.h:124
void resetParameters(void)
Definition: bfgs.h:164
BFGS(FunctorType &_functor)
Definition: bfgs.h:126
void compute(const OtherPolynomial &poly)
Definition: bfgs.h:58
void compute(const OtherPolynomial &poly, bool &hasRealRoot)
Computes the complex roots of a new polynomial.
Definition: bfgs.h:29
PolynomialSolverBase< _Scalar, 2 > PS_Base
Definition: bfgs.h:14
PolynomialSolver(const OtherPolynomial &poly, bool &hasRealRoot)
Definition: bfgs.h:21
@ B
Definition: norms.h:54
Definition: bfgs.h:69
Status
Definition: bfgs.h:70
@ NoProgress
Definition: bfgs.h:75
@ NotStarted
Definition: bfgs.h:72
@ Running
Definition: bfgs.h:73
@ Success
Definition: bfgs.h:74
@ NegativeGradientEpsilon
Definition: bfgs.h:71
Definition: bfgs.h:10
Defines all the PCL and non-PCL macros used.
Index max_iters
Definition: bfgs.h:143
Scalar sigma
Definition: bfgs.h:147
Index section_iters
Definition: bfgs.h:145
Scalar tau2
Definition: bfgs.h:149
Scalar rho
Definition: bfgs.h:146
Scalar step_size
Definition: bfgs.h:151
Index bracket_iters
Definition: bfgs.h:144
Scalar tau1
Definition: bfgs.h:148
Scalar tau3
Definition: bfgs.h:150
Index order
Definition: bfgs.h:152
int inputs() const
Definition: bfgs.h:92
virtual void fdf(const VectorType &x, Scalar &f, VectorType &df)=0
BFGSDummyFunctor()
Definition: bfgs.h:87
virtual ~BFGSDummyFunctor()=default
@ InputsAtCompileTime
Definition: bfgs.h:82
const int m_inputs
Definition: bfgs.h:85
virtual BFGSSpace::Status checkGradient(const VectorType &)
Definition: bfgs.h:104
_Scalar Scalar
Definition: bfgs.h:81
BFGSDummyFunctor(int inputs)
Definition: bfgs.h:88
virtual void df(const VectorType &x, VectorType &df)=0
Eigen::Matrix< Scalar, InputsAtCompileTime, 1 > VectorType
Definition: bfgs.h:83
virtual double operator()(const VectorType &x)=0