Point Cloud Library (PCL)  1.14.0-dev
scan_block.hpp
1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Copyright (c) 2011, Willow Garage, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 * * Neither the name of Willow Garage, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35 */
36 
37 #ifndef PCL_GPU_OCTREE_SCAN_BLOCK_HPP
38 #define PCL_GPU_OCTREE_SCAN_BLOCK_HPP
39 
40 
41 namespace pcl
42 {
43  namespace device
44  {
46 
47  template <ScanKind Kind , class T>
48  __device__ __forceinline__ T scan_warp ( volatile T *ptr , const unsigned int idx = threadIdx.x )
49  {
50  const unsigned int lane = idx & 31; // index of thread in warp (0..31)
51 
52  if ( lane >= 1) ptr [idx ] = ptr [idx - 1] + ptr [idx];
53  if ( lane >= 2) ptr [idx ] = ptr [idx - 2] + ptr [idx];
54  if ( lane >= 4) ptr [idx ] = ptr [idx - 4] + ptr [idx];
55  if ( lane >= 8) ptr [idx ] = ptr [idx - 8] + ptr [idx];
56  if ( lane >= 16) ptr [idx ] = ptr [idx - 16] + ptr [idx];
57 
58  if( Kind == inclusive )
59  return ptr [idx ];
60  else
61  return (lane > 0) ? ptr [idx - 1] : 0;
62  }
63 
64  template <ScanKind Kind , class T>
65  __device__ __forceinline__ T scan_block( volatile T *ptr , const unsigned int idx = threadIdx.x )
66  {
67  const unsigned int lane = idx & 31;
68  const unsigned int warpid = idx >> 5;
69 
70  // Step 1: Intra - warp scan in each warp
71  T val = scan_warp <Kind>( ptr , idx );
72 
73  __syncthreads ();
74 
75  // Step 2: Collect per - warp partial results
76 
77  /* if( warpid == 0 )
78  if( lane == 31 )
79  ptr [ warpid ] = ptr [idx ];
80 
81  __syncthreads ();
82 
83  if( warpid > 0 ) */
84  if( lane == 31 )
85  ptr [ warpid ] = ptr [idx ];
86 
87  __syncthreads ();
88 
89  // Step 3: Use 1st warp to scan per - warp results
90  if( warpid == 0 )
91  scan_warp<inclusive>( ptr , idx );
92 
93  __syncthreads ();
94 
95  // Step 4: Accumulate results from Steps 1 and 3
96  if ( warpid > 0)
97  val = ptr [warpid -1] + val;
98 
99  __syncthreads ();
100 
101  // Step 5: Write and return the final result
102  ptr[idx] = val;
103 
104  __syncthreads ();
105 
106  return val ;
107  }
108  }
109 }
110 
111 #endif /* PCL_GPU_OCTREE_SCAN_BLOCK_HPP */
__device__ __forceinline__ T scan_warp(volatile T *ptr, const unsigned int idx=threadIdx.x)
Definition: device.hpp:87
__device__ __forceinline__ T scan_block(volatile T *ptr, const unsigned int idx=threadIdx.x)
Definition: scan_block.hpp:65