documentation/gpu_2features_2include_2pcl_2gpu_2features_2device_2eigen_8hpp_source.html

 /*

 * Software License Agreement (BSD License)

 *

 *  Copyright (c) 2011, Willow Garage, Inc.

 *  All rights reserved.

 *

 *  Redistribution and use in source and binary forms, with or without

 *  modification, are permitted provided that the following conditions

 *  are met:

 *

 *   * Redistributions of source code must retain the above copyright

 *     notice, this list of conditions and the following disclaimer.

 *   * Redistributions in binary form must reproduce the above

 *     copyright notice, this list of conditions and the following

 *     disclaimer in the documentation and/or other materials provided

 *     with the distribution.

 *   * Neither the name of Willow Garage, Inc. nor the names of its

 *     contributors may be used to endorse or promote products derived

 *     from this software without specific prior written permission.

 *

 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

 *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

 *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

 *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

 *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

 *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

 *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN

 *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 *  POSSIBILITY OF SUCH DAMAGE.

 *

 *  Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)

 */


 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>

 //

 // Eigen is free software; you can redistribute it and/or

 // modify it under the terms of the GNU Lesser General Public

 // License as published by the Free Software Foundation; either

 // version 3 of the License, or (at your option) any later version.

 //

 // Alternatively, you can redistribute it and/or

 // modify it under the terms of the GNU General Public License as

 // published by the Free Software Foundation; either version 2 of

 // the License, or (at your option) any later version.

 //

 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY

 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the

 // GNU General Public License for more details.

 //

 // You should have received a copy of the GNU Lesser General Public

 // License and a copy of the GNU General Public License along with

 // Eigen. If not, see <http://www.gnu.org/licenses/>.


 // The computeRoots function included in this is based on materials

 // covered by the following copyright and license:

 //

 // Geometric Tools, LLC

 // Copyright (c) 1998-2010

 // Distributed under the Boost Software License, Version 1.0.

 //

 // Permission is hereby granted, free of charge, to any person or organization

 // obtaining a copy of the software and accompanying documentation covered by

 // this license (the "Software") to use, reproduce, display, distribute,

 // execute, and transmit the Software, and to prepare derivative works of the

 // Software, and to permit third-parties to whom the Software is furnished to

 // do so, all subject to the following:

 //

 // The copyright notices in the Software and this entire statement, including

 // the above license grant, this restriction and the following disclaimer,

 // must be included in all copies of the Software, in whole or in part, and

 // all derivative works of the Software, unless such copies or derivative

 // works are solely in the form of machine-executable object code generated by

 // a source language processor.

 //

 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT

 // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE

 // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,

 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

 // DEALINGS IN THE SOFTWARE.


 #ifndef PCL_GPU_FEATURES_EIGEN_HPP_

 #define PCL_GPU_FEATURES_EIGEN_HPP_


 #include <limits>


 #include <pcl/gpu/utils/device/algorithm.hpp>

 #include <pcl/gpu/utils/device/vector_math.hpp>


 namespace pcl

 {

     namespace device

     {

         __device__ __forceinline__ void computeRoots2(const float& b, const float& c, float3& roots)

         {

             roots.x = 0.f;

             float d = b * b - 4.f * c;

             if (d < 0.f) // no real roots!!!! THIS SHOULD NOT HAPPEN!

                 d = 0.f;


             float sd = sqrtf(d);


             roots.z = 0.5f * (b + sd);

             roots.y = 0.5f * (b - sd);

         }


         __device__ __forceinline__ void computeRoots3(float c0, float c1, float c2, float3& roots)

         {

             if ( std::abs(c0) < std::numeric_limits<float>::epsilon())// one root is 0 -> quadratic equation

             {

                 computeRoots2 (c2, c1, roots);

             }

             else

             {

                 const float s_inv3 = 1.f/3.f;

                 const float s_sqrt3 = sqrtf(3.f);

                 // Construct the parameters used in classifying the roots of the equation

                 // and in solving the equation for the roots in closed form.

                 float c2_over_3 = c2 * s_inv3;

                 float a_over_3 = (c1 - c2*c2_over_3)*s_inv3;

                 if (a_over_3 > 0.f)

                     a_over_3 = 0.f;


                 float half_b = 0.5f * (c0 + c2_over_3 * (2.f * c2_over_3 * c2_over_3 - c1));


                 float q = half_b * half_b + a_over_3 * a_over_3 * a_over_3;

                 if (q > 0.f)

                     q = 0.f;


                 // Compute the eigenvalues by solving for the roots of the polynomial.

                 float rho = sqrtf(-a_over_3);

                 float theta = std::atan2 (sqrtf (-q), half_b)*s_inv3;

                 float cos_theta = __cosf (theta);

                 float sin_theta = __sinf (theta);

                 roots.x = c2_over_3 + 2.f * rho * cos_theta;

                 roots.y = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta);

                 roots.z = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta);


                 // Sort in increasing order.

                 if (roots.x >= roots.y)

                     swap(roots.x, roots.y);


                 if (roots.y >= roots.z)

                 {

                     swap(roots.y, roots.z);


                     if (roots.x >= roots.y)

                         swap (roots.x, roots.y);

                 }

                 if (roots.x <= 0) // eigenval for symmetric positive semi-definite matrix can not be negative! Set it to 0

                     computeRoots2 (c2, c1, roots);

             }

         }


         struct Eigen33

         {

         public:

             template<int Rows>

             struct MiniMat

             {

                 float3 data[Rows];

                 __device__ __host__ __forceinline__ float3& operator[](int i) { return data[i]; }

                 __device__ __host__ __forceinline__ const float3& operator[](int i) const { return data[i]; }

             };

             using Mat33 = MiniMat<3>;

             using Mat43 = MiniMat<4>;


             static __forceinline__ __device__ float3 unitOrthogonal (const float3& src)

             {

                 float3 perp;

                 /* Let us compute the crossed product of *this with a vector

                 * that is not too close to being colinear to *this.

                 */


                 /* unless the x and y coords are both close to zero, we can

                 * simply take ( -y, x, 0 ) and normalize it.

                 */

                 if(!isMuchSmallerThan(src.x, src.z) || !isMuchSmallerThan(src.y, src.z))

                 {

                     float invnm = rsqrtf(src.x*src.x + src.y*src.y);

                     perp.x = -src.y * invnm;

                     perp.y =  src.x * invnm;

                     perp.z = 0.0f;

                 }

                 /* if both x and y are close to zero, then the vector is close

                 * to the z-axis, so it's far from colinear to the x-axis for instance.

                 * So we take the crossed product with (1,0,0) and normalize it.

                 */

                 else

                 {

                     float invnm = rsqrtf(src.z * src.z + src.y * src.y);

                     perp.x = 0.0f;

                     perp.y = -src.z * invnm;

                     perp.z =  src.y * invnm;

                 }


                 return perp;

             }


             __device__ __forceinline__ Eigen33(volatile float* mat_pkg_arg) : mat_pkg(mat_pkg_arg) {}

             __device__ __forceinline__ void compute(Mat33& tmp, Mat33& vec_tmp, Mat33& evecs, float3& evals)

             {

                 // Scale the matrix so its entries are in [-1,1].  The scaling is applied

                 // only when at least one matrix entry has magnitude larger than 1.


                 float max01 = fmaxf( std::abs(mat_pkg[0]), std::abs(mat_pkg[1]) );

                 float max23 = fmaxf( std::abs(mat_pkg[2]), std::abs(mat_pkg[3]) );

                 float max45 = fmaxf( std::abs(mat_pkg[4]), std::abs(mat_pkg[5]) );

                 float m0123 = fmaxf( max01, max23);

                 float scale = fmaxf( max45, m0123);


                 if (scale <= std::numeric_limits<float>::min())

                     scale = 1.f;


                 mat_pkg[0] /= scale;

                 mat_pkg[1] /= scale;

                 mat_pkg[2] /= scale;

                 mat_pkg[3] /= scale;

                 mat_pkg[4] /= scale;

                 mat_pkg[5] /= scale;


                 // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0.  The

                 // eigenvalues are the roots to this equation, all guaranteed to be

                 // real-valued, because the matrix is symmetric.

                 float c0 = m00() * m11() * m22()

                     + 2.f * m01() * m02() * m12()

                     - m00() * m12() * m12()

                     - m11() * m02() * m02()

                     - m22() * m01() * m01();

                 float c1 = m00() * m11() -

                     m01() * m01() +

                     m00() * m22() -

                     m02() * m02() +

                     m11() * m22() -

                     m12() * m12();

                 float c2 = m00() + m11() + m22();


                 computeRoots3(c0, c1, c2, evals);


                 if(evals.z - evals.x <= std::numeric_limits<float>::epsilon())

                 {

                     evecs[0] = make_float3(1.f, 0.f, 0.f);

                     evecs[1] = make_float3(0.f, 1.f, 0.f);

                     evecs[2] = make_float3(0.f, 0.f, 1.f);

                 }

                 else if (evals.y - evals.x <= std::numeric_limits<float>::epsilon() )

                 {

                     // first and second equal

                     tmp[0] = row0();  tmp[1] = row1();  tmp[2] = row2();

                     tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z;


                     vec_tmp[0] = cross(tmp[0], tmp[1]);

                     vec_tmp[1] = cross(tmp[0], tmp[2]);

                     vec_tmp[2] = cross(tmp[1], tmp[2]);


                     float len1 = dot (vec_tmp[0], vec_tmp[0]);

                     float len2 = dot (vec_tmp[1], vec_tmp[1]);

                     float len3 = dot (vec_tmp[2], vec_tmp[2]);


                     if (len1 >= len2 && len1 >= len3)

                     {

                         evecs[2] = vec_tmp[0] * rsqrtf (len1);

                     }

                     else if (len2 >= len1 && len2 >= len3)

                     {

                         evecs[2] = vec_tmp[1] * rsqrtf (len2);

                     }

                     else

                     {

                         evecs[2] = vec_tmp[2] * rsqrtf (len3);

                     }


                     evecs[1] = unitOrthogonal(evecs[2]);

                     evecs[0] = cross(evecs[1], evecs[2]);

                 }

                 else if (evals.z - evals.y <= std::numeric_limits<float>::epsilon() )

                 {

                     // second and third equal

                     tmp[0] = row0();  tmp[1] = row1();  tmp[2] = row2();

                     tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x;


                     vec_tmp[0] = cross(tmp[0], tmp[1]);

                     vec_tmp[1] = cross(tmp[0], tmp[2]);

                     vec_tmp[2] = cross(tmp[1], tmp[2]);


                     float len1 = dot(vec_tmp[0], vec_tmp[0]);

                     float len2 = dot(vec_tmp[1], vec_tmp[1]);

                     float len3 = dot(vec_tmp[2], vec_tmp[2]);


                     if (len1 >= len2 && len1 >= len3)

                     {

                         evecs[0] = vec_tmp[0] * rsqrtf(len1);

                     }

                     else if (len2 >= len1 && len2 >= len3)

                     {

                         evecs[0] = vec_tmp[1] * rsqrtf(len2);

                     }

                     else

                     {

                         evecs[0] = vec_tmp[2] * rsqrtf(len3);

                     }


                     evecs[1] = unitOrthogonal( evecs[0] );

                     evecs[2] = cross(evecs[0], evecs[1]);

                 }

                 else

                 {


                     tmp[0] = row0();  tmp[1] = row1();  tmp[2] = row2();

                     tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z;


                     vec_tmp[0] = cross(tmp[0], tmp[1]);

                     vec_tmp[1] = cross(tmp[0], tmp[2]);

                     vec_tmp[2] = cross(tmp[1], tmp[2]);


                     float len1 = dot(vec_tmp[0], vec_tmp[0]);

                     float len2 = dot(vec_tmp[1], vec_tmp[1]);

                     float len3 = dot(vec_tmp[2], vec_tmp[2]);


                     float mmax[3];


                     unsigned int min_el = 2;

                     unsigned int max_el = 2;

                     if (len1 >= len2 && len1 >= len3)

                     {

                         mmax[2] = len1;

                         evecs[2] = vec_tmp[0] * rsqrtf (len1);

                     }

                     else if (len2 >= len1 && len2 >= len3)

                     {

                         mmax[2] = len2;

                         evecs[2] = vec_tmp[1] * rsqrtf (len2);

                     }

                     else

                     {

                         mmax[2] = len3;

                         evecs[2] = vec_tmp[2] * rsqrtf (len3);

                     }


                     tmp[0] = row0();  tmp[1] = row1();  tmp[2] = row2();

                     tmp[0].x -= evals.y; tmp[1].y -= evals.y; tmp[2].z -= evals.y;


                     vec_tmp[0] = cross(tmp[0], tmp[1]);

                     vec_tmp[1] = cross(tmp[0], tmp[2]);

                     vec_tmp[2] = cross(tmp[1], tmp[2]);


                     len1 = dot(vec_tmp[0], vec_tmp[0]);

                     len2 = dot(vec_tmp[1], vec_tmp[1]);

                     len3 = dot(vec_tmp[2], vec_tmp[2]);


                     if (len1 >= len2 && len1 >= len3)

                     {

                         mmax[1] = len1;

                         evecs[1] = vec_tmp[0] * rsqrtf (len1);

                         min_el = len1 <= mmax[min_el] ? 1 : min_el;

                         max_el = len1  > mmax[max_el] ? 1 : max_el;

                     }

                     else if (len2 >= len1 && len2 >= len3)

                     {

                         mmax[1] = len2;

                         evecs[1] = vec_tmp[1] * rsqrtf (len2);

                         min_el = len2 <= mmax[min_el] ? 1 : min_el;

                         max_el = len2  > mmax[max_el] ? 1 : max_el;

                     }

                     else

                     {

                         mmax[1] = len3;

                         evecs[1] = vec_tmp[2] * rsqrtf (len3);

                         min_el = len3 <= mmax[min_el] ? 1 : min_el;

                         max_el = len3 >  mmax[max_el] ? 1 : max_el;

                     }


                     tmp[0] = row0();  tmp[1] = row1();  tmp[2] = row2();

                     tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x;


                     vec_tmp[0] = cross(tmp[0], tmp[1]);

                     vec_tmp[1] = cross(tmp[0], tmp[2]);

                     vec_tmp[2] = cross(tmp[1], tmp[2]);


                     len1 = dot (vec_tmp[0], vec_tmp[0]);

                     len2 = dot (vec_tmp[1], vec_tmp[1]);

                     len3 = dot (vec_tmp[2], vec_tmp[2]);


                     if (len1 >= len2 && len1 >= len3)

                     {

                         mmax[0] = len1;

                         evecs[0] = vec_tmp[0] * rsqrtf (len1);

                         min_el = len3 <= mmax[min_el] ? 0 : min_el;

                         max_el = len3  > mmax[max_el] ? 0 : max_el;

                     }

                     else if (len2 >= len1 && len2 >= len3)

                     {

                         mmax[0] = len2;

                         evecs[0] = vec_tmp[1] * rsqrtf (len2);

                         min_el = len3 <= mmax[min_el] ? 0 : min_el;

                         max_el = len3  > mmax[max_el] ? 0 : max_el;

                     }

                     else

                     {

                         mmax[0] = len3;

                         evecs[0] = vec_tmp[2] * rsqrtf (len3);

                         min_el = len3 <= mmax[min_el] ? 0 : min_el;

                         max_el = len3  > mmax[max_el] ? 0 : max_el;

                     }


                     unsigned mid_el = 3 - min_el - max_el;

                     evecs[min_el] = normalized( cross( evecs[(min_el+1) % 3], evecs[(min_el+2) % 3] ) );

                     evecs[mid_el] = normalized( cross( evecs[(mid_el+1) % 3], evecs[(mid_el+2) % 3] ) );

                 }

                 // Rescale back to the original size.

                evals *= scale;

             }

         private:

             volatile float* mat_pkg;


             __device__  __forceinline__ float m00() const { return mat_pkg[0]; }

             __device__  __forceinline__ float m01() const { return mat_pkg[1]; }

             __device__  __forceinline__ float m02() const { return mat_pkg[2]; }

             __device__  __forceinline__ float m10() const { return mat_pkg[1]; }

             __device__  __forceinline__ float m11() const { return mat_pkg[3]; }

             __device__  __forceinline__ float m12() const { return mat_pkg[4]; }

             __device__  __forceinline__ float m20() const { return mat_pkg[2]; }

             __device__  __forceinline__ float m21() const { return mat_pkg[4]; }

             __device__  __forceinline__ float m22() const { return mat_pkg[5]; }


             __device__  __forceinline__ float3 row0() const { return make_float3( m00(), m01(), m02() ); }

             __device__  __forceinline__ float3 row1() const { return make_float3( m10(), m11(), m12() ); }

             __device__  __forceinline__ float3 row2() const { return make_float3( m20(), m21(), m22() ); }


             __device__  __forceinline__ static bool isMuchSmallerThan (float x, float y)

             {

                 // copied from <eigen>/include/Eigen/src/Core/NumTraits.h

                 constexpr float prec_sqr = std::numeric_limits<float>::epsilon() * std::numeric_limits<float>::epsilon();

                 return x * x <= prec_sqr * y * y;

             }


         };

     }

 }


 #endif  /* PCL_GPU_FEATURES_EIGEN_HPP_ */

pcl::device::normalized
__device__ __forceinline__ float3 normalized(const float3 &v)
Definition: utils.hpp:101

pcl::device::dot
__device__ __forceinline__ float dot(const float3 &v1, const float3 &v2)
Definition: utils.hpp:59

pcl::device::computeRoots3
__device__ __forceinline__ void computeRoots3(float c0, float c1, float c2, float3 &roots)
Definition: eigen.hpp:115

pcl::device::computeRoots2
__device__ __forceinline__ void computeRoots2(const float &b, const float &c, float3 &roots)
Definition: eigen.hpp:102

pcl::device::swap
__device__ __host__ __forceinline__ void swap(T &a, T &b)
Definition: utils.hpp:53

pcl::device::cross
__device__ __host__ __forceinline__ float3 cross(const float3 &v1, const float3 &v2)
Definition: utils.hpp:107

pcl
Definition: convolution.h:46

pcl::device::Eigen33::MiniMat
Definition: eigen.hpp:168

pcl::device::Eigen33::MiniMat::data
float3 data[Rows]
Definition: eigen.hpp:169

pcl::device::Eigen33::MiniMat::operator[]
__device__ __host__ __forceinline__ const float3 & operator[](int i) const
Definition: eigen.hpp:171

pcl::device::Eigen33::MiniMat::operator[]
__device__ __host__ __forceinline__ float3 & operator[](int i)
Definition: eigen.hpp:170

pcl::device::Eigen33
Definition: eigen.hpp:164

pcl::device::Eigen33::compute
__device__ __forceinline__ void compute(Mat33 &tmp, Mat33 &vec_tmp, Mat33 &evecs, float3 &evals)
Definition: eigen.hpp:210

pcl::device::Eigen33::unitOrthogonal
static __forceinline__ __device__ float3 unitOrthogonal(const float3 &src)
Definition: eigen.hpp:177

pcl::device::Eigen33::Eigen33
__device__ __forceinline__ Eigen33(volatile float *mat_pkg_arg)
Definition: eigen.hpp:209