Point Cloud Library (PCL)  1.14.1-dev
warp_reduce.hpp
1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Copyright (c) 2011, Willow Garage, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 * * Neither the name of Willow Garage, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35 */
36 
37 #ifndef PCL_GPU_WARP_REDUCE
38 #define PCL_GPU_WARP_REDUCE
39 
40 
41 namespace pcl
42 {
43  namespace device
44  {
45  template <class T>
46  __device__ __forceinline__ T warp_reduce ( volatile T *ptr , const unsigned int tid = threadIdx.x )
47  {
48  const unsigned int lane = tid & 31; // index of thread in warp (0..31)
49 
50  if (lane < 16)
51  {
52  T partial = ptr[tid];
53 
54  ptr[tid] = partial = partial + ptr[tid + 16];
55  ptr[tid] = partial = partial + ptr[tid + 8];
56  ptr[tid] = partial = partial + ptr[tid + 4];
57  ptr[tid] = partial = partial + ptr[tid + 2];
58  ptr[tid] = partial = partial + ptr[tid + 1];
59  }
60  return ptr[tid - lane];
61 
62  }
63  }
64 }
65 
66 #endif
__device__ __forceinline__ T warp_reduce(volatile T *ptr, const unsigned int tid=threadIdx.x)
Definition: warp_reduce.hpp:46