90 #ifndef PCL_GPU_FEATURES_EIGEN_HPP_
91 #define PCL_GPU_FEATURES_EIGEN_HPP_
95 #include <pcl/gpu/utils/device/algorithm.hpp>
96 #include <pcl/gpu/utils/device/vector_math.hpp>
102 __device__ __forceinline__
void computeRoots2(
const float& b,
const float& c, float3& roots)
105 float d = b * b - 4.f * c;
111 roots.z = 0.5f * (b + sd);
112 roots.y = 0.5f * (b - sd);
115 __device__ __forceinline__
void computeRoots3(
float c0,
float c1,
float c2, float3& roots)
117 if ( std::abs(c0) < std::numeric_limits<float>::epsilon())
123 const float s_inv3 = 1.f/3.f;
124 const float s_sqrt3 = sqrtf(3.f);
127 float c2_over_3 = c2 * s_inv3;
128 float a_over_3 = (c1 - c2*c2_over_3)*s_inv3;
132 float half_b = 0.5f * (c0 + c2_over_3 * (2.f * c2_over_3 * c2_over_3 - c1));
134 float q = half_b * half_b + a_over_3 * a_over_3 * a_over_3;
139 float rho = sqrtf(-a_over_3);
140 float theta = std::atan2 (sqrtf (-q), half_b)*s_inv3;
141 float cos_theta = __cosf (theta);
142 float sin_theta = __sinf (theta);
143 roots.x = c2_over_3 + 2.f * rho * cos_theta;
144 roots.y = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta);
145 roots.z = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta);
148 if (roots.x >= roots.y)
149 swap(roots.x, roots.y);
151 if (roots.y >= roots.z)
153 swap(roots.y, roots.z);
155 if (roots.x >= roots.y)
156 swap (roots.x, roots.y);
170 __device__ __host__ __forceinline__ float3&
operator[](
int i) {
return data[i]; }
171 __device__ __host__ __forceinline__
const float3&
operator[](
int i)
const {
return data[i]; }
187 if(!isMuchSmallerThan(src.x, src.z) || !isMuchSmallerThan(src.y, src.z))
189 float invnm = rsqrtf(src.x*src.x + src.y*src.y);
190 perp.x = -src.y * invnm;
191 perp.y = src.x * invnm;
200 float invnm = rsqrtf(src.z * src.z + src.y * src.y);
202 perp.y = -src.z * invnm;
203 perp.z = src.y * invnm;
209 __device__ __forceinline__
Eigen33(
volatile float* mat_pkg_arg) : mat_pkg(mat_pkg_arg) {}
215 float max01 = fmaxf( std::abs(mat_pkg[0]), std::abs(mat_pkg[1]) );
216 float max23 = fmaxf( std::abs(mat_pkg[2]), std::abs(mat_pkg[3]) );
217 float max45 = fmaxf( std::abs(mat_pkg[4]), std::abs(mat_pkg[5]) );
218 float m0123 = fmaxf( max01, max23);
219 float scale = fmaxf( max45, m0123);
221 if (scale <= std::numeric_limits<float>::min())
234 float c0 = m00() * m11() * m22()
235 + 2.f * m01() * m02() * m12()
236 - m00() * m12() * m12()
237 - m11() * m02() * m02()
238 - m22() * m01() * m01();
239 float c1 = m00() * m11() -
245 float c2 = m00() + m11() + m22();
249 if(evals.z - evals.x <= std::numeric_limits<float>::epsilon())
251 evecs[0] = make_float3(1.f, 0.f, 0.f);
252 evecs[1] = make_float3(0.f, 1.f, 0.f);
253 evecs[2] = make_float3(0.f, 0.f, 1.f);
255 else if (evals.y - evals.x <= std::numeric_limits<float>::epsilon() )
258 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
259 tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z;
261 vec_tmp[0] =
cross(tmp[0], tmp[1]);
262 vec_tmp[1] =
cross(tmp[0], tmp[2]);
263 vec_tmp[2] =
cross(tmp[1], tmp[2]);
265 float len1 =
dot (vec_tmp[0], vec_tmp[0]);
266 float len2 =
dot (vec_tmp[1], vec_tmp[1]);
267 float len3 =
dot (vec_tmp[2], vec_tmp[2]);
269 if (len1 >= len2 && len1 >= len3)
271 evecs[2] = vec_tmp[0] * rsqrtf (len1);
273 else if (len2 >= len1 && len2 >= len3)
275 evecs[2] = vec_tmp[1] * rsqrtf (len2);
279 evecs[2] = vec_tmp[2] * rsqrtf (len3);
283 evecs[0] =
cross(evecs[1], evecs[2]);
285 else if (evals.z - evals.y <= std::numeric_limits<float>::epsilon() )
288 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
289 tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x;
291 vec_tmp[0] =
cross(tmp[0], tmp[1]);
292 vec_tmp[1] =
cross(tmp[0], tmp[2]);
293 vec_tmp[2] =
cross(tmp[1], tmp[2]);
295 float len1 =
dot(vec_tmp[0], vec_tmp[0]);
296 float len2 =
dot(vec_tmp[1], vec_tmp[1]);
297 float len3 =
dot(vec_tmp[2], vec_tmp[2]);
299 if (len1 >= len2 && len1 >= len3)
301 evecs[0] = vec_tmp[0] * rsqrtf(len1);
303 else if (len2 >= len1 && len2 >= len3)
305 evecs[0] = vec_tmp[1] * rsqrtf(len2);
309 evecs[0] = vec_tmp[2] * rsqrtf(len3);
313 evecs[2] =
cross(evecs[0], evecs[1]);
318 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
319 tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z;
321 vec_tmp[0] =
cross(tmp[0], tmp[1]);
322 vec_tmp[1] =
cross(tmp[0], tmp[2]);
323 vec_tmp[2] =
cross(tmp[1], tmp[2]);
325 float len1 =
dot(vec_tmp[0], vec_tmp[0]);
326 float len2 =
dot(vec_tmp[1], vec_tmp[1]);
327 float len3 =
dot(vec_tmp[2], vec_tmp[2]);
331 unsigned int min_el = 2;
332 unsigned int max_el = 2;
333 if (len1 >= len2 && len1 >= len3)
336 evecs[2] = vec_tmp[0] * rsqrtf (len1);
338 else if (len2 >= len1 && len2 >= len3)
341 evecs[2] = vec_tmp[1] * rsqrtf (len2);
346 evecs[2] = vec_tmp[2] * rsqrtf (len3);
349 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
350 tmp[0].x -= evals.y; tmp[1].y -= evals.y; tmp[2].z -= evals.y;
352 vec_tmp[0] =
cross(tmp[0], tmp[1]);
353 vec_tmp[1] =
cross(tmp[0], tmp[2]);
354 vec_tmp[2] =
cross(tmp[1], tmp[2]);
356 len1 =
dot(vec_tmp[0], vec_tmp[0]);
357 len2 =
dot(vec_tmp[1], vec_tmp[1]);
358 len3 =
dot(vec_tmp[2], vec_tmp[2]);
360 if (len1 >= len2 && len1 >= len3)
363 evecs[1] = vec_tmp[0] * rsqrtf (len1);
364 min_el = len1 <= mmax[min_el] ? 1 : min_el;
365 max_el = len1 > mmax[max_el] ? 1 : max_el;
367 else if (len2 >= len1 && len2 >= len3)
370 evecs[1] = vec_tmp[1] * rsqrtf (len2);
371 min_el = len2 <= mmax[min_el] ? 1 : min_el;
372 max_el = len2 > mmax[max_el] ? 1 : max_el;
377 evecs[1] = vec_tmp[2] * rsqrtf (len3);
378 min_el = len3 <= mmax[min_el] ? 1 : min_el;
379 max_el = len3 > mmax[max_el] ? 1 : max_el;
382 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
383 tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x;
385 vec_tmp[0] =
cross(tmp[0], tmp[1]);
386 vec_tmp[1] =
cross(tmp[0], tmp[2]);
387 vec_tmp[2] =
cross(tmp[1], tmp[2]);
389 len1 =
dot (vec_tmp[0], vec_tmp[0]);
390 len2 =
dot (vec_tmp[1], vec_tmp[1]);
391 len3 =
dot (vec_tmp[2], vec_tmp[2]);
394 if (len1 >= len2 && len1 >= len3)
397 evecs[0] = vec_tmp[0] * rsqrtf (len1);
398 min_el = len3 <= mmax[min_el] ? 0 : min_el;
399 max_el = len3 > mmax[max_el] ? 0 : max_el;
401 else if (len2 >= len1 && len2 >= len3)
404 evecs[0] = vec_tmp[1] * rsqrtf (len2);
405 min_el = len3 <= mmax[min_el] ? 0 : min_el;
406 max_el = len3 > mmax[max_el] ? 0 : max_el;
411 evecs[0] = vec_tmp[2] * rsqrtf (len3);
412 min_el = len3 <= mmax[min_el] ? 0 : min_el;
413 max_el = len3 > mmax[max_el] ? 0 : max_el;
416 unsigned mid_el = 3 - min_el - max_el;
417 evecs[min_el] =
normalized(
cross( evecs[(min_el+1) % 3], evecs[(min_el+2) % 3] ) );
418 evecs[mid_el] =
normalized(
cross( evecs[(mid_el+1) % 3], evecs[(mid_el+2) % 3] ) );
424 volatile float* mat_pkg;
426 __device__ __forceinline__
float m00()
const {
return mat_pkg[0]; }
427 __device__ __forceinline__
float m01()
const {
return mat_pkg[1]; }
428 __device__ __forceinline__
float m02()
const {
return mat_pkg[2]; }
429 __device__ __forceinline__
float m10()
const {
return mat_pkg[1]; }
430 __device__ __forceinline__
float m11()
const {
return mat_pkg[3]; }
431 __device__ __forceinline__
float m12()
const {
return mat_pkg[4]; }
432 __device__ __forceinline__
float m20()
const {
return mat_pkg[2]; }
433 __device__ __forceinline__
float m21()
const {
return mat_pkg[4]; }
434 __device__ __forceinline__
float m22()
const {
return mat_pkg[5]; }
436 __device__ __forceinline__ float3 row0()
const {
return make_float3( m00(), m01(), m02() ); }
437 __device__ __forceinline__ float3 row1()
const {
return make_float3( m10(), m11(), m12() ); }
438 __device__ __forceinline__ float3 row2()
const {
return make_float3( m20(), m21(), m22() ); }
440 __device__ __forceinline__
static bool isMuchSmallerThan (
float x,
float y)
443 constexpr
float prec_sqr = std::numeric_limits<float>::epsilon() * std::numeric_limits<float>::epsilon();
444 return x * x <= prec_sqr * y * y;
__device__ __forceinline__ float3 normalized(const float3 &v)
__device__ __forceinline__ float dot(const float3 &v1, const float3 &v2)
__device__ __forceinline__ void computeRoots3(float c0, float c1, float c2, float3 &roots)
__device__ __forceinline__ void computeRoots2(const float &b, const float &c, float3 &roots)
__device__ __host__ __forceinline__ void swap(T &a, T &b)
__device__ __host__ __forceinline__ float3 cross(const float3 &v1, const float3 &v2)
__device__ __host__ __forceinline__ const float3 & operator[](int i) const
__device__ __host__ __forceinline__ float3 & operator[](int i)
__device__ __forceinline__ void compute(Mat33 &tmp, Mat33 &vec_tmp, Mat33 &evecs, float3 &evals)
static __forceinline__ __device__ float3 unitOrthogonal(const float3 &src)
__device__ __forceinline__ Eigen33(volatile float *mat_pkg_arg)