37 #ifndef PCL_GPU_WARP_REDUCE
38 #define PCL_GPU_WARP_REDUCE
46 __device__ __forceinline__ T
warp_reduce (
volatile T *ptr ,
const unsigned int tid = threadIdx.x )
48 const unsigned int lane = tid & 31;
54 ptr[tid] = partial = partial + ptr[tid + 16];
55 ptr[tid] = partial = partial + ptr[tid + 8];
56 ptr[tid] = partial = partial + ptr[tid + 4];
57 ptr[tid] = partial = partial + ptr[tid + 2];
58 ptr[tid] = partial = partial + ptr[tid + 1];
60 return ptr[tid - lane];
__device__ __forceinline__ T warp_reduce(volatile T *ptr, const unsigned int tid=threadIdx.x)