37 #ifndef PCL_GPU_BITONIC_SORT_WARP_HPP
38 #define PCL_GPU_BITONIC_SORT_WARP_HPP
45 __device__ __forceinline__
void swap(T& a, T& b) { T t = a; a = b; b = t; }
47 template<
typename V,
typename K>
48 __device__ __forceinline__
void bitonicSortWarp(
volatile K* keys,
volatile V* vals,
unsigned int dir = 1)
50 const unsigned int arrayLength = 64;
51 unsigned int lane = threadIdx.x & 31;
53 for(
unsigned int size = 2; size < arrayLength; size <<= 1)
56 unsigned int ddd = dir ^ ( (lane & (size / 2)) != 0 );
58 for(
unsigned int stride = size / 2; stride > 0; stride >>= 1)
60 unsigned int pos = 2 * lane - (lane & (stride - 1));
62 if ( (keys[pos] > keys[pos + stride]) == ddd )
64 swap(keys[pos], keys[pos + stride]);
65 swap(vals[pos], vals[pos + stride]);
71 for(
unsigned int stride = arrayLength / 2; stride > 0; stride >>= 1)
73 unsigned int pos = 2 * lane - (lane & (stride - 1));
75 if ( (keys[pos] > keys[pos + stride]) == dir )
77 swap(keys[pos], keys[pos + stride]);
78 swap(vals[pos], vals[pos + stride]);
__device__ __forceinline__ void bitonicSortWarp(volatile K *keys, volatile V *vals, unsigned int dir=1)
__device__ __host__ __forceinline__ void swap(T &a, T &b)