Point Cloud Library (PCL)  1.14.0-dev
bitonic_sort.hpp
1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Copyright (c) 2011, Willow Garage, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 * * Neither the name of Willow Garage, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35 */
36 
37 #ifndef PCL_GPU_BITONIC_SORT_WARP_HPP
38 #define PCL_GPU_BITONIC_SORT_WARP_HPP
39 
40 namespace pcl
41 {
42  namespace device
43  {
44  template<typename T>
45  __device__ __forceinline__ void swap(T& a, T& b) { T t = a; a = b; b = t; }
46 
47  template<typename V, typename K>
48  __device__ __forceinline__ void bitonicSortWarp(volatile K* keys, volatile V* vals, unsigned int dir = 1)
49  {
50  const unsigned int arrayLength = 64;
51  unsigned int lane = threadIdx.x & 31;
52 
53  for(unsigned int size = 2; size < arrayLength; size <<= 1)
54  {
55  //Bitonic merge
56  unsigned int ddd = dir ^ ( (lane & (size / 2)) != 0 );
57 
58  for(unsigned int stride = size / 2; stride > 0; stride >>= 1)
59  {
60  unsigned int pos = 2 * lane - (lane & (stride - 1));
61 
62  if ( (keys[pos] > keys[pos + stride]) == ddd )
63  {
64  swap(keys[pos], keys[pos + stride]);
65  swap(vals[pos], vals[pos + stride]);
66  }
67  }
68  }
69 
70  //ddd == dir for the last bitonic merge step
71  for(unsigned int stride = arrayLength / 2; stride > 0; stride >>= 1)
72  {
73  unsigned int pos = 2 * lane - (lane & (stride - 1));
74 
75  if ( (keys[pos] > keys[pos + stride]) == dir )
76  {
77  swap(keys[pos], keys[pos + stride]);
78  swap(vals[pos], vals[pos + stride]);
79  }
80  }
81  }
82 
83  }
84 }
85 
86 #endif /* PCL_GPU_BITONIC_SORT_WARP_HPP */
@ K
Definition: norms.h:54
__device__ __forceinline__ void bitonicSortWarp(volatile K *keys, volatile V *vals, unsigned int dir=1)
__device__ __host__ __forceinline__ void swap(T &a, T &b)
Definition: utils.hpp:53