Point Cloud Library (PCL)  1.14.0-dev
warp.hpp
1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Copyright (c) 2011, Willow Garage, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 * * Neither the name of Willow Garage, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35 */
36 
37 #ifndef PCL_DEVICE_UTILS_WARP_HPP_
38 #define PCL_DEVICE_UTILS_WARP_HPP_
39 
40 namespace pcl
41 {
42  namespace device
43  {
44  struct Warp
45  {
46  enum
47  {
48  LOG_WARP_SIZE = 5,
49  WARP_SIZE = 1 << LOG_WARP_SIZE,
51  };
52 
53  /** \brief Returns the warp lane ID of the calling thread. */
54  static __device__ __forceinline__ unsigned int laneId()
55  {
56  unsigned int ret;
57  asm("mov.u32 %0, %laneid;" : "=r"(ret) );
58  return ret;
59  }
60 
61  static __device__ __forceinline__ int laneMaskLe()
62  {
63  unsigned int ret;
64  asm("mov.u32 %0, %lanemask_le;" : "=r"(ret) );
65  return ret;
66  }
67 
68  static __device__ __forceinline__ int laneMaskLt()
69  {
70  unsigned int ret;
71  asm("mov.u32 %0, %lanemask_lt;" : "=r"(ret) );
72  return ret;
73  }
74  static __device__ __forceinline__ unsigned int id()
75  {
76  int tid = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
77  return tid >> LOG_WARP_SIZE;
78  }
79 
80  static __device__ __forceinline__ int binaryInclScan(int ballot_mask)
81  {
82  return __popc(Warp::laneMaskLe() & ballot_mask);
83  }
84 
85  static __device__ __forceinline__ int binaryExclScan(int ballot_mask)
86  {
87  return __popc(Warp::laneMaskLt() & ballot_mask);
88  }
89 
90  template<typename It, typename T>
91  static __device__ __forceinline__ void fill(It beg, It end, const T& value)
92  {
93  for(It t = beg + laneId(); t < end; t += STRIDE)
94  *t = value;
95  }
96 
97  template<typename InIt, typename OutIt>
98  static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
99  {
100  unsigned int lane = laneId();
101  InIt t = beg + lane;
102  OutIt o = out + lane;
103 
104  for(; t < end; t += STRIDE, o += STRIDE)
105  *o = *t;
106  return o;
107  }
108 
109  template<typename InIt, typename OutIt, class UnOp>
110  static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
111  {
112  unsigned int lane = laneId();
113  InIt t = beg + lane;
114  OutIt o = out + lane;
115 
116  for(InIt t = beg + laneId(); t < end; t += STRIDE, o += STRIDE)
117  *o = op(*t);
118  return o;
119  }
120 
121  template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
122  static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
123  {
124  unsigned int lane = laneId();
125  InIt1 t1 = beg1 + lane;
126  InIt2 t2 = beg2 + lane;
127  OutIt o = out + lane;
128 
129  for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
130  *o = op(*t1, *t2);
131  return o;
132  }
133 
134  template<typename OutIt, typename T>
135  static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
136  {
137  unsigned int lane = laneId();
138  value += lane;
139 
140  for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
141  *t = value;
142  }
143 
144  template<typename T, class BinOp>
145  static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
146  {
147  unsigned int lane = laneId();
148  T val = buffer[lane];
149 
150  if (lane < 16)
151  {
152  buffer[lane] = val = op(val, buffer[lane + 16]);
153  buffer[lane] = val = op(val, buffer[lane + 8]);
154  buffer[lane] = val = op(val, buffer[lane + 4]);
155  buffer[lane] = val = op(val, buffer[lane + 2]);
156  buffer[lane] = val = op(val, buffer[lane + 1]);
157  }
158  }
159 
160  template<typename T, class BinOp>
161  static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
162  {
163  unsigned int lane = laneId();
164  T val = buffer[lane] = init;
165 
166  if (lane < 16)
167  {
168  buffer[lane] = val = op(val, buffer[lane + 16]);
169  buffer[lane] = val = op(val, buffer[lane + 8]);
170  buffer[lane] = val = op(val, buffer[lane + 4]);
171  buffer[lane] = val = op(val, buffer[lane + 2]);
172  buffer[lane] = val = op(val, buffer[lane + 1]);
173  }
174  return buffer[0];
175  }
176  };
177  }
178 }
179 
180 #endif /* PCL_DEVICE_UTILS_WARP_HPP_ */
static __device__ __forceinline__ int laneMaskLe()
Definition: warp.hpp:61
static __device__ __forceinline__ int binaryInclScan(int ballot_mask)
Definition: warp.hpp:80
static __device__ __forceinline__ void reduce(volatile T *buffer, BinOp op)
Definition: warp.hpp:145
static __device__ __forceinline__ unsigned int laneId()
Returns the warp lane ID of the calling thread.
Definition: warp.hpp:54
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
Definition: warp.hpp:122
static __device__ __forceinline__ int binaryExclScan(int ballot_mask)
Definition: warp.hpp:85
static __device__ __forceinline__ int laneMaskLt()
Definition: warp.hpp:68
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
Definition: warp.hpp:98
static __device__ __forceinline__ void fill(It beg, It end, const T &value)
Definition: warp.hpp:91
static __device__ __forceinline__ T reduce(volatile T *buffer, T init, BinOp op)
Definition: warp.hpp:161
static __device__ __forceinline__ unsigned int id()
Definition: warp.hpp:74
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
Definition: warp.hpp:135
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
Definition: warp.hpp:110