1
/******************************************************************************
2
* Copyright (c) 2011, Duane Merrill. All rights reserved.
3
* Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved.
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
******************************************************************************/
31
* Random-access iterator types
36
#include "thread/thread_load.cuh"
37
#include "util_device.cuh"
38
#include "util_debug.cuh"
39
#include "util_namespace.cuh"
41
/// Optional outer namespace(s)
48
/******************************************************************************
50
*****************************************************************************/
52
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
54
// Anonymous namespace
57
/// Templated texture reference type
61
// Texture reference type
62
typedef texture<T, cudaTextureType1D, cudaReadModeElementType> TexRef;
69
static cudaError_t BindTexture(void *d_in)
71
cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc<T>();
73
return (CubDebug(cudaBindTexture(NULL, ref, d_in, tex_desc)));
81
static cudaError_t UnbindTexture()
83
return CubDebug(cudaUnbindTexture(ref));
87
// Texture reference definitions
88
template <typename Value>
89
typename TexIteratorRef<Value>::TexRef TexIteratorRef<Value>::ref = 0;
91
} // Anonymous namespace
94
#endif // DOXYGEN_SHOULD_SKIP_THIS
103
* \addtogroup UtilModule
108
/******************************************************************************
110
*****************************************************************************/
113
* \brief A simple random-access iterator pointing to a range of constant values
116
* ConstantIteratorRA is a random-access iterator that when dereferenced, always
117
* returns the supplied constant of type \p OutputType.
119
* \tparam OutputType The value type of this iterator
121
template <typename OutputType>
122
class ConstantIteratorRA
126
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
128
typedef ConstantIteratorRA self_type;
129
typedef OutputType value_type;
130
typedef OutputType reference;
131
typedef OutputType* pointer;
132
typedef std::random_access_iterator_tag iterator_category;
133
typedef int difference_type;
135
#endif // DOXYGEN_SHOULD_SKIP_THIS
144
__host__ __device__ __forceinline__ ConstantIteratorRA(
145
const OutputType &val) ///< Constant value for the iterator instance to report
150
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
152
__host__ __device__ __forceinline__ self_type operator++()
158
__host__ __device__ __forceinline__ self_type operator++(int junk)
163
__host__ __device__ __forceinline__ reference operator*()
168
template <typename SizeT>
169
__host__ __device__ __forceinline__ self_type operator+(SizeT n)
171
return ConstantIteratorRA(val);
174
template <typename SizeT>
175
__host__ __device__ __forceinline__ self_type operator-(SizeT n)
177
return ConstantIteratorRA(val);
180
template <typename SizeT>
181
__host__ __device__ __forceinline__ reference operator[](SizeT n)
183
return ConstantIteratorRA(val);
186
__host__ __device__ __forceinline__ pointer operator->()
191
__host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
193
return (val == rhs.val);
196
__host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
198
return (val != rhs.val);
201
#endif // DOXYGEN_SHOULD_SKIP_THIS
208
* \brief A simple random-access transform iterator for applying a transformation operator.
211
* TransformIteratorRA is a random-access iterator that wraps both a native
212
* device pointer of type <tt>InputType*</tt> and a unary conversion functor of
213
* type \p ConversionOp. \p OutputType references are made by pulling \p InputType
214
* values through the \p ConversionOp instance.
216
* \tparam InputType The value type of the pointer being wrapped
217
* \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p OutputType. Must have member <tt>OutputType operator()(const InputType &datum)</tt>.
218
* \tparam OutputType The value type of this iterator
220
template <typename OutputType, typename ConversionOp, typename InputType>
221
class TransformIteratorRA
225
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
227
typedef TransformIteratorRA self_type;
228
typedef OutputType value_type;
229
typedef OutputType reference;
230
typedef OutputType* pointer;
231
typedef std::random_access_iterator_tag iterator_category;
232
typedef int difference_type;
234
#endif // DOXYGEN_SHOULD_SKIP_THIS
238
ConversionOp conversion_op;
245
* @param ptr Native pointer to wrap
246
* @param conversion_op Binary transformation functor
248
__host__ __device__ __forceinline__ TransformIteratorRA(InputType* ptr, ConversionOp conversion_op) :
249
conversion_op(conversion_op),
252
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
254
__host__ __device__ __forceinline__ self_type operator++()
261
__host__ __device__ __forceinline__ self_type operator++(int junk)
267
__host__ __device__ __forceinline__ reference operator*()
269
return conversion_op(*ptr);
272
template <typename SizeT>
273
__host__ __device__ __forceinline__ self_type operator+(SizeT n)
275
TransformIteratorRA retval(ptr + n, conversion_op);
279
template <typename SizeT>
280
__host__ __device__ __forceinline__ self_type operator-(SizeT n)
282
TransformIteratorRA retval(ptr - n, conversion_op);
286
template <typename SizeT>
287
__host__ __device__ __forceinline__ reference operator[](SizeT n)
289
return conversion_op(ptr[n]);
292
__host__ __device__ __forceinline__ pointer operator->()
294
return &conversion_op(*ptr);
297
__host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
299
return (ptr == rhs.ptr);
302
__host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
304
return (ptr != rhs.ptr);
307
#endif // DOXYGEN_SHOULD_SKIP_THIS
314
* \brief A simple random-access iterator for loading primitive values through texture cache.
317
* TexIteratorRA is a random-access iterator that wraps a native
318
* device pointer of type <tt>T*</tt>. References made through TexIteratorRA
319
* causes values to be pulled through texture cache.
321
* \par Usage Considerations
322
* - Can only be used with primitive types (e.g., \p char, \p int, \p float), with the exception of \p double
323
* - Only one TexIteratorRA or TexIteratorRA of a certain \p InputType can be bound at any given time (per host thread)
325
* \tparam InputType The value type of the pointer being wrapped
326
* \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p OutputType. Must have member <tt>OutputType operator()(const InputType &datum)</tt>.
327
* \tparam OutputType The value type of this iterator
329
template <typename T>
333
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
335
typedef TexIteratorRA self_type;
336
typedef T value_type;
339
typedef std::random_access_iterator_tag iterator_category;
340
typedef int difference_type;
342
#endif // DOXYGEN_SHOULD_SKIP_THIS
344
/// Tag identifying iterator type as being texture-bindable
345
typedef void TexBindingTag;
350
size_t tex_align_offset;
351
cudaTextureObject_t tex_obj;
358
__host__ __device__ __forceinline__ TexIteratorRA()
365
/// \brief Bind iterator to texture reference
366
cudaError_t BindTexture(
367
T *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
368
size_t bytes, ///< Number of items
369
size_t tex_align_offset = 0) ///< Offset (in items) from ptr denoting the position of the iterator
372
this->tex_align_offset = tex_align_offset;
375
cudaError_t error = cudaSuccess;
376
if (CubDebug(error = PtxVersion(ptx_version))) return error;
377
if (ptx_version >= 300)
379
// Use texture object
380
cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<T>();
381
cudaResourceDesc res_desc;
382
cudaTextureDesc tex_desc;
383
memset(&res_desc, 0, sizeof(cudaResourceDesc));
384
memset(&tex_desc, 0, sizeof(cudaTextureDesc));
385
res_desc.resType = cudaResourceTypeLinear;
386
res_desc.res.linear.devPtr = ptr;
387
res_desc.res.linear.desc = channel_desc;
388
res_desc.res.linear.sizeInBytes = bytes;
389
tex_desc.readMode = cudaReadModeElementType;
390
return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
394
// Use texture reference
395
return TexIteratorRef<T>::BindTexture(ptr);
399
/// \brief Unbind iterator to texture reference
400
cudaError_t UnbindTexture()
403
cudaError_t error = cudaSuccess;
404
if (CubDebug(error = PtxVersion(ptx_version))) return error;
405
if (ptx_version < 300)
407
// Use texture reference
408
return TexIteratorRef<T>::UnbindTexture();
412
// Use texture object
413
return cudaDestroyTextureObject(tex_obj);
417
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
419
__host__ __device__ __forceinline__ self_type operator++()
427
__host__ __device__ __forceinline__ self_type operator++(int junk)
434
__host__ __device__ __forceinline__ reference operator*()
436
#if (CUB_PTX_ARCH == 0)
437
// Simply dereference the pointer on the host
439
#elif (CUB_PTX_ARCH < 300)
440
// Use the texture reference
441
return tex1Dfetch(TexIteratorRef<T>::ref, tex_align_offset);
443
// Use the texture object
444
return conversion_op(tex1Dfetch<InputType>(tex_obj, tex_align_offset));
448
template <typename SizeT>
449
__host__ __device__ __forceinline__ self_type operator+(SizeT n)
451
TexIteratorRA retval;
452
retval.ptr = ptr + n;
453
retval.tex_align_offset = tex_align_offset + n;
457
template <typename SizeT>
458
__host__ __device__ __forceinline__ self_type operator-(SizeT n)
460
TexIteratorRA retval;
461
retval.ptr = ptr - n;
462
retval.tex_align_offset = tex_align_offset - n;
466
template <typename SizeT>
467
__host__ __device__ __forceinline__ reference operator[](SizeT n)
469
#if (CUB_PTX_ARCH == 0)
470
// Simply dereference the pointer on the host
472
#elif (CUB_PTX_ARCH < 300)
473
// Use the texture reference
474
return tex1Dfetch(TexIteratorRef<T>::ref, tex_align_offset + n);
476
// Use the texture object
477
return conversion_op(tex1Dfetch<InputType>(tex_obj, tex_align_offset + n));
481
__host__ __device__ __forceinline__ pointer operator->()
483
#if (CUB_PTX_ARCH == 0)
484
// Simply dereference the pointer on the host
486
#elif (CUB_PTX_ARCH < 300)
487
// Use the texture reference
488
return &(tex1Dfetch(TexIteratorRef<T>::ref, tex_align_offset));
490
// Use the texture object
491
return conversion_op(tex1Dfetch<InputType>(tex_obj, tex_align_offset));
495
__host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
497
return (ptr == rhs.ptr);
500
__host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
502
return (ptr != rhs.ptr);
505
#endif // DOXYGEN_SHOULD_SKIP_THIS
511
* \brief A simple random-access transform iterator for loading primitive values through texture cache and and subsequently applying a transformation operator.
514
* TexTransformIteratorRA is a random-access iterator that wraps both a native
515
* device pointer of type <tt>InputType*</tt> and a unary conversion functor of
516
* type \p ConversionOp. \p OutputType references are made by pulling \p InputType
517
* values through the texture cache and then transformed them using the
518
* \p ConversionOp instance.
520
* \par Usage Considerations
521
* - Can only be used with primitive types (e.g., \p char, \p int, \p float), with the exception of \p double
522
* - Only one TexIteratorRA or TexTransformIteratorRA of a certain \p InputType can be bound at any given time (per host thread)
524
* \tparam InputType The value type of the pointer being wrapped
525
* \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p OutputType. Must have member <tt>OutputType operator()(const InputType &datum)</tt>.
526
* \tparam OutputType The value type of this iterator
528
template <typename OutputType, typename ConversionOp, typename InputType>
529
class TexTransformIteratorRA
533
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
535
typedef TexTransformIteratorRA self_type;
536
typedef OutputType value_type;
537
typedef OutputType reference;
538
typedef OutputType* pointer;
539
typedef std::random_access_iterator_tag iterator_category;
540
typedef int difference_type;
542
#endif // DOXYGEN_SHOULD_SKIP_THIS
544
/// Tag identifying iterator type as being texture-bindable
545
typedef void TexBindingTag;
549
ConversionOp conversion_op;
551
size_t tex_align_offset;
552
cudaTextureObject_t tex_obj;
559
TexTransformIteratorRA(
560
ConversionOp conversion_op) ///< Binary transformation functor
562
conversion_op(conversion_op),
568
/// \brief Bind iterator to texture reference
569
cudaError_t BindTexture(
570
InputType* ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
571
size_t bytes, ///< Number of items
572
size_t tex_align_offset = 0) ///< Offset (in items) from ptr denoting the position of the iterator
575
this->tex_align_offset = tex_align_offset;
578
cudaError_t error = cudaSuccess;
579
if (CubDebug(error = PtxVersion(ptx_version))) return error;
580
if (ptx_version >= 300)
582
// Use texture object
583
cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<InputType>();
584
cudaResourceDesc res_desc;
585
cudaTextureDesc tex_desc;
586
memset(&res_desc, 0, sizeof(cudaResourceDesc));
587
memset(&tex_desc, 0, sizeof(cudaTextureDesc));
588
res_desc.resType = cudaResourceTypeLinear;
589
res_desc.res.linear.devPtr = ptr;
590
res_desc.res.linear.desc = channel_desc;
591
res_desc.res.linear.sizeInBytes = bytes;
592
tex_desc.readMode = cudaReadModeElementType;
593
return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
597
// Use texture reference
598
return TexIteratorRef<InputType>::BindTexture(ptr);
602
/// \brief Unbind iterator to texture reference
603
cudaError_t UnbindTexture()
606
cudaError_t error = cudaSuccess;
607
if (CubDebug(error = PtxVersion(ptx_version))) return error;
608
if (ptx_version >= 300)
610
// Use texture object
611
return cudaDestroyTextureObject(tex_obj);
615
// Use texture reference
616
return TexIteratorRef<InputType>::UnbindTexture();
620
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
622
__host__ __device__ __forceinline__ self_type operator++()
630
__host__ __device__ __forceinline__ self_type operator++(int junk)
637
__host__ __device__ __forceinline__ reference operator*()
639
#if (CUB_PTX_ARCH == 0)
640
// Simply dereference the pointer on the host
641
return conversion_op(*ptr);
642
#elif (CUB_PTX_ARCH < 300)
643
// Use the texture reference
644
return conversion_op(tex1Dfetch(TexIteratorRef<InputType>::ref, tex_align_offset));
646
// Use the texture object
647
return conversion_op(tex1Dfetch<InputType>(tex_obj, tex_align_offset));
651
template <typename SizeT>
652
__host__ __device__ __forceinline__ self_type operator+(SizeT n)
654
TexTransformIteratorRA retval(conversion_op);
655
retval.ptr = ptr + n;
656
retval.tex_align_offset = tex_align_offset + n;
660
template <typename SizeT>
661
__host__ __device__ __forceinline__ self_type operator-(SizeT n)
663
TexTransformIteratorRA retval(conversion_op);
664
retval.ptr = ptr - n;
665
retval.tex_align_offset = tex_align_offset - n;
669
template <typename SizeT>
670
__host__ __device__ __forceinline__ reference operator[](SizeT n)
672
#if (CUB_PTX_ARCH == 0)
673
// Simply dereference the pointer on the host
674
return conversion_op(ptr[n]);
675
#elif (CUB_PTX_ARCH < 300)
676
// Use the texture reference
677
return conversion_op(tex1Dfetch(TexIteratorRef<InputType>::ref, tex_align_offset + n));
679
// Use the texture object
680
return conversion_op(tex1Dfetch<InputType>(tex_obj, tex_align_offset + n));
684
__host__ __device__ __forceinline__ pointer operator->()
686
#if (CUB_PTX_ARCH == 0)
687
// Simply dereference the pointer on the host
688
return &conversion_op(*ptr);
689
#elif (CUB_PTX_ARCH < 300)
690
// Use the texture reference
691
return &conversion_op(tex1Dfetch(TexIteratorRef<InputType>::ref, tex_align_offset));
693
// Use the texture object
694
return &conversion_op(tex1Dfetch<InputType>(tex_obj, tex_align_offset));
698
__host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
700
return (ptr == rhs.ptr);
703
__host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
705
return (ptr != rhs.ptr);
708
#endif // DOXYGEN_SHOULD_SKIP_THIS
715
/** @} */ // end group UtilModule
718
CUB_NS_POSTFIX // Optional outer namespace(s)