Point Cloud Library (PCL)  1.14.0-dev
reduce.hpp
1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Copyright (c) 2011, Willow Garage, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 * * Neither the name of Willow Garage, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35 */
36 
37 #ifndef PCL_GPU_DEVICE_REDUCE_HPP_
38 #define PCL_GPU_DEVICE_REDUCE_HPP_
39 
40 namespace pcl
41 {
42  namespace device
43  {
44  template <unsigned int CTA_SIZE, typename T, typename BinaryFunction>
45  __device__ __forceinline__ void reduce_block(volatile T* data, BinaryFunction op, unsigned int tid = threadIdx.x)
46  {
47  T val = data[tid];
48 
49  //if (CTA_SIZE >= 1024) { if (tid < 512) { data[tid] = val = op(val, data[tdi + 512]); } __syncthreads(); }
50  if (CTA_SIZE >= 512) { if (tid < 256) { data[tid] = val = op(val, data[tid + 256]); } __syncthreads(); }
51  if (CTA_SIZE >= 256) { if (tid < 128) { data[tid] = val = op(val, data[tid + 128]); } __syncthreads(); }
52  if (CTA_SIZE >= 128) { if (tid < 64) { data[tid] = val = op(val, data[tid + 64]); } __syncthreads(); }
53 
54  if (tid < 32)
55  {
56  if (CTA_SIZE >= 64) data[tid] = val = op(val, data[tid + 32]);
57  if (CTA_SIZE >= 32) data[tid] = val = op(val, data[tid + 16]);
58  if (CTA_SIZE >= 16) data[tid] = val = op(val, data[tid + 8]);
59  if (CTA_SIZE >= 8) data[tid] = val = op(val, data[tid + 4]);
60  if (CTA_SIZE >= 4) data[tid] = val = op(val, data[tid + 2]);
61  if (CTA_SIZE >= 2) data[tid] = val = op(val, data[tid + 1]);
62  }
63  };
64  }
65 }
66 
67 
68 #endif
__device__ __forceinline__ void reduce_block(volatile T *data, BinaryFunction op, unsigned int tid=threadIdx.x)
Definition: reduce.hpp:45