2
* Copyright 2008-2011 NVIDIA Corporation
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
19
* \brief Inline file for scan.h.
22
#include <thrust/detail/config.h>
23
#include <thrust/detail/device/cuda/dispatch/scan.h>
24
#include <thrust/detail/static_assert.h>
35
template<typename InputIterator,
36
typename OutputIterator,
37
typename AssociativeOperator>
38
OutputIterator inclusive_scan(InputIterator first,
40
OutputIterator result,
41
AssociativeOperator binary_op)
43
// we're attempting to launch a kernel, assert we're compiling with nvcc
44
// ========================================================================
45
// X Note to the user: If you've found this line due to a compiler error, X
46
// X you need to compile your code using nvcc, rather than g++ or cl.exe X
47
// ========================================================================
48
THRUST_STATIC_ASSERT( (depend_on_instantiation<InputIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
50
typedef typename thrust::iterator_value<OutputIterator>::type OutputType;
52
// whether to use fast_scan or safe_scan
53
// TODO profile this threshold
54
#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC && CUDA_VERSION >= 3010
55
// CUDA 3.1 and higher support non-pod types in statically-allocated __shared__ memory
56
static const bool use_fast_scan = sizeof(OutputType) <= 16;
58
// CUDA 3.0 and earlier must use safe_scan for non-pod types
59
static const bool use_fast_scan = sizeof(OutputType) <= 16 && thrust::detail::is_pod<OutputType>::value;
62
// XXX WAR nvcc unused variable warning
65
return thrust::detail::device::cuda::dispatch::inclusive_scan
66
(first, last, result, binary_op,
67
thrust::detail::integral_constant<bool, use_fast_scan>());
70
template<typename InputIterator,
71
typename OutputIterator,
73
typename AssociativeOperator>
74
OutputIterator exclusive_scan(InputIterator first,
76
OutputIterator result,
78
AssociativeOperator binary_op)
80
// we're attempting to launch a kernel, assert we're compiling with nvcc
81
// ========================================================================
82
// X Note to the user: If you've found this line due to a compiler error, X
83
// X you need to compile your code using nvcc, rather than g++ or cl.exe X
84
// ========================================================================
85
THRUST_STATIC_ASSERT( (depend_on_instantiation<InputIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
87
typedef typename thrust::iterator_value<OutputIterator>::type OutputType;
89
// whether to use fast_scan or safe_scan
90
// TODO profile this threshold
91
#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC && CUDA_VERSION >= 3010
92
// CUDA 3.1 and higher support non-pod types in statically-allocated __shared__ memory
93
static const bool use_fast_scan = sizeof(OutputType) <= 16;
95
// CUDA 3.0 and earlier must use safe_scan for non-pod types
96
static const bool use_fast_scan = sizeof(OutputType) <= 16 && thrust::detail::is_pod<OutputType>::value;
99
// XXX WAR nvcc 3.0 unused variable warning
100
(void) use_fast_scan;
102
return thrust::detail::device::cuda::dispatch::exclusive_scan
103
(first, last, result, init, binary_op,
104
thrust::detail::integral_constant<bool, use_fast_scan>());
107
} // end namespace cuda
108
} // end namespace device
109
} // end namespace detail
110
} // end namespace thrust