2
* Copyright 2008-2011 NVIDIA Corporation
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
19
#include <thrust/detail/config.h>
21
// do not attempt to compile this file, which uses CUDA built-in variables, with any compiler other than nvcc
22
#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
35
template <unsigned int block_size, typename ValueIterator, typename BinaryFunction>
37
void reduce(ValueIterator data, BinaryFunction binary_op)
39
if (block_size >= 1024) { if (threadIdx.x < 512) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 512]); } __syncthreads(); }
40
if (block_size >= 512) { if (threadIdx.x < 256) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 256]); } __syncthreads(); }
41
if (block_size >= 256) { if (threadIdx.x < 128) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 128]); } __syncthreads(); }
42
if (block_size >= 128) { if (threadIdx.x < 64) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 64]); } __syncthreads(); }
43
if (block_size >= 64) { if (threadIdx.x < 32) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 32]); } __syncthreads(); }
44
if (block_size >= 32) { if (threadIdx.x < 16) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 16]); } __syncthreads(); }
45
if (block_size >= 16) { if (threadIdx.x < 8) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 8]); } __syncthreads(); }
46
if (block_size >= 8) { if (threadIdx.x < 4) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 4]); } __syncthreads(); }
47
if (block_size >= 4) { if (threadIdx.x < 2) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 2]); } __syncthreads(); }
48
if (block_size >= 2) { if (threadIdx.x < 1) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 1]); } __syncthreads(); }
51
template <typename ValueIterator, typename BinaryFunction>
53
void reduce_n(ValueIterator data, const unsigned int n, BinaryFunction binary_op)
56
if (n > 512) { if (threadIdx.x < 512 && threadIdx.x + 512 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 512]); } __syncthreads(); }
57
if (n > 256) { if (threadIdx.x < 256 && threadIdx.x + 256 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 256]); } __syncthreads(); }
58
if (n > 128) { if (threadIdx.x < 128 && threadIdx.x + 128 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 128]); } __syncthreads(); }
59
if (n > 64) { if (threadIdx.x < 64 && threadIdx.x + 64 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 64]); } __syncthreads(); }
60
if (n > 32) { if (threadIdx.x < 32 && threadIdx.x + 32 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 32]); } __syncthreads(); }
61
if (n > 16) { if (threadIdx.x < 16 && threadIdx.x + 16 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 16]); } __syncthreads(); }
62
if (n > 8) { if (threadIdx.x < 8 && threadIdx.x + 8 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 8]); } __syncthreads(); }
63
if (n > 4) { if (threadIdx.x < 4 && threadIdx.x + 4 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 4]); } __syncthreads(); }
64
if (n > 2) { if (threadIdx.x < 2 && threadIdx.x + 2 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 2]); } __syncthreads(); }
65
if (n > 1) { if (threadIdx.x < 1 && threadIdx.x + 1 < n) { data[threadIdx.x] = binary_op(data[threadIdx.x], data[threadIdx.x + 1]); } __syncthreads(); }
68
} // end namespace block
69
} // end namespace cuda
70
} // end namespace device
71
} // end namespace detail
72
} // end namespace thrust
74
#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC