1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16
// Third party copyrights are property of their respective owners.
18
// Redistribution and use in source and binary forms, with or without modification,
19
// are permitted provided that the following conditions are met:
21
// * Redistribution's of source code must retain the above copyright notice,
22
// this list of conditions and the following disclaimer.
24
// * Redistribution's in binary form must reproduce the above copyright notice,
25
// this list of conditions and the following disclaimer in the documentation
26
// and/or other materials provided with the distribution.
28
// * The name of the copyright holders may not be used to endorse or promote products
29
// derived from this software without specific prior written permission.
31
// This software is provided by the copyright holders and contributors "as is" and
32
// any express or implied warranties, including, but not limited to, the implied
33
// warranties of merchantability and fitness for a particular purpose are disclaimed.
34
// In no event shall the Intel Corporation or contributors be liable for any direct,
35
// indirect, incidental, special, exemplary, or consequential damages
36
// (including, but not limited to, procurement of substitute goods or services;
37
// loss of use, data, or profits; or business interruption) however caused
38
// and on any theory of liability, whether in contract, strict liability,
39
// or tort (including negligence or otherwise) arising in any way out of
40
// the use of this software, even if advised of the possibility of such damage.
44
#ifndef __OPENCV_CORE_CUDA_HPP__
45
#define __OPENCV_CORE_CUDA_HPP__
48
# error cuda.hpp header must be compiled as C++
51
#include "opencv2/core.hpp"
52
#include "opencv2/core/cuda_types.hpp"
55
@defgroup cuda CUDA-accelerated Computer Vision
57
@defgroup cudacore Core part
59
@defgroup cudacore_init Initalization and Information
60
@defgroup cudacore_struct Data Structures
65
namespace cv { namespace cuda {
67
//! @addtogroup cudacore_struct
70
//===================================================================================
72
//===================================================================================
74
/** @brief Base storage class for GPU memory with reference counting.
76
Its interface matches the Mat interface with the following limitations:
78
- no arbitrary dimensions support (only 2D)
79
- no functions that return references to their data (because references on GPU are not valid for
81
- no expression templates technique support
83
Beware that the latter limitation may lead to overloaded matrix operators that cause memory
84
allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
85
passed directly to the kernel.
87
@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
88
aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
90
@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
91
on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
92
release function returns error if the CUDA context has been destroyed before.
96
class CV_EXPORTS GpuMat
99
class CV_EXPORTS Allocator
102
virtual ~Allocator() {}
104
// allocator must fill data, step and refcount fields
105
virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
106
virtual void free(GpuMat* mat) = 0;
109
//! default allocator
110
static Allocator* defaultAllocator();
111
static void setDefaultAllocator(Allocator* allocator);
113
//! default constructor
114
explicit GpuMat(Allocator* allocator = defaultAllocator());
116
//! constructs GpuMat of the specified size and type
117
GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
118
GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
120
//! constucts GpuMat and fills it with the specified value _s
121
GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
122
GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
125
GpuMat(const GpuMat& m);
127
//! constructor for GpuMat headers pointing to user-allocated data
128
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
129
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
131
//! creates a GpuMat header for a part of the bigger matrix
132
GpuMat(const GpuMat& m, Range rowRange, Range colRange);
133
GpuMat(const GpuMat& m, Rect roi);
135
//! builds GpuMat from host memory (Blocking call)
136
explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
138
//! destructor - calls release()
141
//! assignment operators
142
GpuMat& operator =(const GpuMat& m);
144
//! allocates new GpuMat data unless the GpuMat already has specified size and type
145
void create(int rows, int cols, int type);
146
void create(Size size, int type);
148
//! decreases reference counter, deallocate the data when reference counter reaches 0
151
//! swaps with other smart pointer
152
void swap(GpuMat& mat);
154
//! pefroms upload data to GpuMat (Blocking call)
155
void upload(InputArray arr);
157
//! pefroms upload data to GpuMat (Non-Blocking call)
158
void upload(InputArray arr, Stream& stream);
160
//! pefroms download data from device to host memory (Blocking call)
161
void download(OutputArray dst) const;
163
//! pefroms download data from device to host memory (Non-Blocking call)
164
void download(OutputArray dst, Stream& stream) const;
166
//! returns deep copy of the GpuMat, i.e. the data is copied
167
GpuMat clone() const;
169
//! copies the GpuMat content to device memory (Blocking call)
170
void copyTo(OutputArray dst) const;
172
//! copies the GpuMat content to device memory (Non-Blocking call)
173
void copyTo(OutputArray dst, Stream& stream) const;
175
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
176
void copyTo(OutputArray dst, InputArray mask) const;
178
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
179
void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
181
//! sets some of the GpuMat elements to s (Blocking call)
182
GpuMat& setTo(Scalar s);
184
//! sets some of the GpuMat elements to s (Non-Blocking call)
185
GpuMat& setTo(Scalar s, Stream& stream);
187
//! sets some of the GpuMat elements to s, according to the mask (Blocking call)
188
GpuMat& setTo(Scalar s, InputArray mask);
190
//! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
191
GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
193
//! converts GpuMat to another datatype (Blocking call)
194
void convertTo(OutputArray dst, int rtype) const;
196
//! converts GpuMat to another datatype (Non-Blocking call)
197
void convertTo(OutputArray dst, int rtype, Stream& stream) const;
199
//! converts GpuMat to another datatype with scaling (Blocking call)
200
void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
202
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
203
void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
205
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
206
void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
208
void assignTo(GpuMat& m, int type=-1) const;
210
//! returns pointer to y-th row
211
uchar* ptr(int y = 0);
212
const uchar* ptr(int y = 0) const;
214
//! template version of the above method
215
template<typename _Tp> _Tp* ptr(int y = 0);
216
template<typename _Tp> const _Tp* ptr(int y = 0) const;
218
template <typename _Tp> operator PtrStepSz<_Tp>() const;
219
template <typename _Tp> operator PtrStep<_Tp>() const;
221
//! returns a new GpuMat header for the specified row
222
GpuMat row(int y) const;
224
//! returns a new GpuMat header for the specified column
225
GpuMat col(int x) const;
227
//! ... for the specified row span
228
GpuMat rowRange(int startrow, int endrow) const;
229
GpuMat rowRange(Range r) const;
231
//! ... for the specified column span
232
GpuMat colRange(int startcol, int endcol) const;
233
GpuMat colRange(Range r) const;
235
//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
236
GpuMat operator ()(Range rowRange, Range colRange) const;
237
GpuMat operator ()(Rect roi) const;
239
//! creates alternative GpuMat header for the same data, with different
240
//! number of channels and/or different number of rows
241
GpuMat reshape(int cn, int rows = 0) const;
243
//! locates GpuMat header within a parent GpuMat
244
void locateROI(Size& wholeSize, Point& ofs) const;
246
//! moves/resizes the current GpuMat ROI inside the parent GpuMat
247
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
249
//! returns true iff the GpuMat data is continuous
250
//! (i.e. when there are no gaps between successive rows)
251
bool isContinuous() const;
253
//! returns element size in bytes
254
size_t elemSize() const;
256
//! returns the size of element channel in bytes
257
size_t elemSize1() const;
259
//! returns element type
262
//! returns element type
265
//! returns number of channels
266
int channels() const;
268
//! returns step/elemSize1()
269
size_t step1() const;
271
//! returns GpuMat size : width == number of columns, height == number of rows
274
//! returns true if GpuMat data is NULL
277
/*! includes several bit-fields:
278
- the magic signature
285
//! the number of rows and columns
288
//! a distance between successive rows in bytes; includes the gap if any
291
//! pointer to the data
294
//! pointer to the reference counter;
295
//! when GpuMat points to user-allocated data, the pointer is NULL
298
//! helper fields used in locateROI and adjustROI
300
const uchar* dataend;
303
Allocator* allocator;
306
/** @brief Creates a continuous matrix.
308
@param rows Row count.
309
@param cols Column count.
310
@param type Type of the matrix.
311
@param arr Destination matrix. This parameter changes only if it has a proper type and area (
312
\f$\texttt{rows} \times \texttt{cols}\f$ ).
314
Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
317
CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
319
/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
321
@param rows Minimum desired number of rows.
322
@param cols Minimum desired number of columns.
323
@param type Desired matrix type.
324
@param arr Destination matrix.
326
The function does not reallocate memory if the matrix has proper attributes already.
328
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
330
//! BufferPool management (must be called before Stream creation)
331
CV_EXPORTS void setBufferPoolUsage(bool on);
332
CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
334
//===================================================================================
336
//===================================================================================
338
/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
340
Its interface is also Mat-like but with additional memory type parameters.
342
- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
343
uploading/downloading data from/to GPU.
344
- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
345
address space, if supported.
346
- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
347
used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
350
@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
351
Pinned Memory APIs* document or *CUDA C Programming Guide*.
353
class CV_EXPORTS HostMem
356
enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
358
static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
360
explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
362
HostMem(const HostMem& m);
364
HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
365
HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
367
//! creates from host memory with coping data
368
explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
372
HostMem& operator =(const HostMem& m);
374
//! swaps with other smart pointer
375
void swap(HostMem& b);
377
//! returns deep copy of the matrix, i.e. the data is copied
378
HostMem clone() const;
380
//! allocates new matrix data unless the matrix already has specified size and type.
381
void create(int rows, int cols, int type);
382
void create(Size size, int type);
384
//! creates alternative HostMem header for the same data, with different
385
//! number of channels and/or different number of rows
386
HostMem reshape(int cn, int rows = 0) const;
388
//! decrements reference counter and released memory if needed.
391
//! returns matrix header with disabled reference counting for HostMem data.
392
Mat createMatHeader() const;
394
/** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
397
This can be done only if memory was allocated with the SHARED flag and if it is supported by the
398
hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
399
eliminates an extra copy.
401
GpuMat createGpuMatHeader() const;
403
// Please see cv::Mat for descriptions
404
bool isContinuous() const;
405
size_t elemSize() const;
406
size_t elemSize1() const;
409
int channels() const;
410
size_t step1() const;
414
// Please see cv::Mat for descriptions
423
const uchar* dataend;
425
AllocType alloc_type;
428
/** @brief Page-locks the memory of matrix and maps it for the device(s).
430
@param m Input matrix.
432
CV_EXPORTS void registerPageLocked(Mat& m);
434
/** @brief Unmaps the memory of matrix and makes it pageable again.
436
@param m Input matrix.
438
CV_EXPORTS void unregisterPageLocked(Mat& m);
440
//===================================================================================
442
//===================================================================================
444
/** @brief This class encapsulates a queue of asynchronous calls.
446
@note Currently, you may face problems if an operation is enqueued twice with different data. Some
447
functions use the constant GPU memory, and next call may update the memory before the previous one
448
has been finished. But calling different operations asynchronously is safe because each operation
449
has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
452
class CV_EXPORTS Stream
454
typedef void (Stream::*bool_type)() const;
455
void this_type_does_not_support_comparisons() const {}
458
typedef void (*StreamCallback)(int status, void* userData);
460
//! creates a new asynchronous stream
463
/** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
465
bool queryIfComplete() const;
467
/** @brief Blocks the current CPU thread until all operations in the stream are complete.
469
void waitForCompletion();
471
/** @brief Makes a compute stream wait on an event.
473
void waitEvent(const Event& event);
475
/** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
478
@note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
479
that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
480
Callbacks without a mandated order (in independent streams) execute in undefined order and may be
483
void enqueueHostCallback(StreamCallback callback, void* userData);
485
//! return Stream object for default CUDA stream
486
static Stream& Null();
488
//! returns true if stream object is not default (!= 0)
489
operator bool_type() const;
495
Stream(const Ptr<Impl>& impl);
497
friend struct StreamAccessor;
498
friend class BufferPool;
499
friend class DefaultDeviceInitializer;
502
class CV_EXPORTS Event
507
DEFAULT = 0x00, /**< Default event flag */
508
BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
509
DISABLE_TIMING = 0x02, /**< Event will not record timing data */
510
INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
513
explicit Event(CreateFlags flags = DEFAULT);
516
void record(Stream& stream = Stream::Null());
518
//! queries an event's status
519
bool queryIfComplete() const;
521
//! waits for an event to complete
522
void waitForCompletion();
524
//! computes the elapsed time between events
525
static float elapsedTime(const Event& start, const Event& end);
531
Event(const Ptr<Impl>& impl);
533
friend struct EventAccessor;
536
//! @} cudacore_struct
538
//===================================================================================
539
// Initialization & Info
540
//===================================================================================
542
//! @addtogroup cudacore_init
545
/** @brief Returns the number of installed CUDA-enabled devices.
547
Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
548
this function returns 0.
550
CV_EXPORTS int getCudaEnabledDeviceCount();
552
/** @brief Sets a device and initializes it for the current thread.
554
@param device System index of a CUDA device starting with 0.
556
If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
558
CV_EXPORTS void setDevice(int device);
560
/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
562
CV_EXPORTS int getDevice();
564
/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
567
Any subsequent API call to this device will reinitialize the device.
569
CV_EXPORTS void resetDevice();
571
/** @brief Enumeration providing CUDA computing features.
575
FEATURE_SET_COMPUTE_10 = 10,
576
FEATURE_SET_COMPUTE_11 = 11,
577
FEATURE_SET_COMPUTE_12 = 12,
578
FEATURE_SET_COMPUTE_13 = 13,
579
FEATURE_SET_COMPUTE_20 = 20,
580
FEATURE_SET_COMPUTE_21 = 21,
581
FEATURE_SET_COMPUTE_30 = 30,
582
FEATURE_SET_COMPUTE_32 = 32,
583
FEATURE_SET_COMPUTE_35 = 35,
584
FEATURE_SET_COMPUTE_50 = 50,
586
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
587
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
588
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
589
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
590
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
593
//! checks whether current device supports the given feature
594
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
596
/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
599
According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
600
capability can always be compiled to binary code of greater or equal compute capability".
602
class CV_EXPORTS TargetArchs
605
/** @brief The following method checks whether the module was built with the support of the given feature:
607
@param feature_set Features to be checked. See :ocvcuda::FeatureSet.
609
static bool builtWith(FeatureSet feature_set);
611
/** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
612
code for the given architecture(s):
614
@param major Major compute capability version.
615
@param minor Minor compute capability version.
617
static bool has(int major, int minor);
618
static bool hasPtx(int major, int minor);
619
static bool hasBin(int major, int minor);
621
static bool hasEqualOrLessPtx(int major, int minor);
622
static bool hasEqualOrGreater(int major, int minor);
623
static bool hasEqualOrGreaterPtx(int major, int minor);
624
static bool hasEqualOrGreaterBin(int major, int minor);
627
/** @brief Class providing functionality for querying the specified GPU properties.
629
class CV_EXPORTS DeviceInfo
632
//! creates DeviceInfo object for the current GPU
635
/** @brief The constructors.
637
@param device_id System index of the CUDA device starting with 0.
639
Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
640
constructs an object for the current device.
642
DeviceInfo(int device_id);
644
/** @brief Returns system index of the CUDA device starting with 0.
646
int deviceID() const;
648
//! ASCII string identifying device
649
const char* name() const;
651
//! global memory available on device in bytes
652
size_t totalGlobalMem() const;
654
//! shared memory available per block in bytes
655
size_t sharedMemPerBlock() const;
657
//! 32-bit registers available per block
658
int regsPerBlock() const;
660
//! warp size in threads
661
int warpSize() const;
663
//! maximum pitch in bytes allowed by memory copies
664
size_t memPitch() const;
666
//! maximum number of threads per block
667
int maxThreadsPerBlock() const;
669
//! maximum size of each dimension of a block
670
Vec3i maxThreadsDim() const;
672
//! maximum size of each dimension of a grid
673
Vec3i maxGridSize() const;
675
//! clock frequency in kilohertz
676
int clockRate() const;
678
//! constant memory available on device in bytes
679
size_t totalConstMem() const;
681
//! major compute capability
682
int majorVersion() const;
684
//! minor compute capability
685
int minorVersion() const;
687
//! alignment requirement for textures
688
size_t textureAlignment() const;
690
//! pitch alignment requirement for texture references bound to pitched memory
691
size_t texturePitchAlignment() const;
693
//! number of multiprocessors on device
694
int multiProcessorCount() const;
696
//! specified whether there is a run time limit on kernels
697
bool kernelExecTimeoutEnabled() const;
699
//! device is integrated as opposed to discrete
700
bool integrated() const;
702
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
703
bool canMapHostMemory() const;
707
ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
708
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
709
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
710
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
714
ComputeMode computeMode() const;
716
//! maximum 1D texture size
717
int maxTexture1D() const;
719
//! maximum 1D mipmapped texture size
720
int maxTexture1DMipmap() const;
722
//! maximum size for 1D textures bound to linear memory
723
int maxTexture1DLinear() const;
725
//! maximum 2D texture dimensions
726
Vec2i maxTexture2D() const;
728
//! maximum 2D mipmapped texture dimensions
729
Vec2i maxTexture2DMipmap() const;
731
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
732
Vec3i maxTexture2DLinear() const;
734
//! maximum 2D texture dimensions if texture gather operations have to be performed
735
Vec2i maxTexture2DGather() const;
737
//! maximum 3D texture dimensions
738
Vec3i maxTexture3D() const;
740
//! maximum Cubemap texture dimensions
741
int maxTextureCubemap() const;
743
//! maximum 1D layered texture dimensions
744
Vec2i maxTexture1DLayered() const;
746
//! maximum 2D layered texture dimensions
747
Vec3i maxTexture2DLayered() const;
749
//! maximum Cubemap layered texture dimensions
750
Vec2i maxTextureCubemapLayered() const;
752
//! maximum 1D surface size
753
int maxSurface1D() const;
755
//! maximum 2D surface dimensions
756
Vec2i maxSurface2D() const;
758
//! maximum 3D surface dimensions
759
Vec3i maxSurface3D() const;
761
//! maximum 1D layered surface dimensions
762
Vec2i maxSurface1DLayered() const;
764
//! maximum 2D layered surface dimensions
765
Vec3i maxSurface2DLayered() const;
767
//! maximum Cubemap surface dimensions
768
int maxSurfaceCubemap() const;
770
//! maximum Cubemap layered surface dimensions
771
Vec2i maxSurfaceCubemapLayered() const;
773
//! alignment requirements for surfaces
774
size_t surfaceAlignment() const;
776
//! device can possibly execute multiple kernels concurrently
777
bool concurrentKernels() const;
779
//! device has ECC support enabled
780
bool ECCEnabled() const;
782
//! PCI bus ID of the device
783
int pciBusID() const;
785
//! PCI device ID of the device
786
int pciDeviceID() const;
788
//! PCI domain ID of the device
789
int pciDomainID() const;
791
//! true if device is a Tesla device using TCC driver, false otherwise
792
bool tccDriver() const;
794
//! number of asynchronous engines
795
int asyncEngineCount() const;
797
//! device shares a unified address space with the host
798
bool unifiedAddressing() const;
800
//! peak memory clock frequency in kilohertz
801
int memoryClockRate() const;
803
//! global memory bus width in bits
804
int memoryBusWidth() const;
806
//! size of L2 cache in bytes
807
int l2CacheSize() const;
809
//! maximum resident threads per multiprocessor
810
int maxThreadsPerMultiProcessor() const;
812
//! gets free and total device memory
813
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
814
size_t freeMemory() const;
815
size_t totalMemory() const;
817
/** @brief Provides information on CUDA feature support.
819
@param feature_set Features to be checked. See cuda::FeatureSet.
821
This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
823
bool supports(FeatureSet feature_set) const;
825
/** @brief Checks the CUDA module and device compatibility.
827
This function returns true if the CUDA module can be run on the specified device. Otherwise, it
830
bool isCompatible() const;
836
CV_EXPORTS void printCudaDeviceInfo(int device);
837
CV_EXPORTS void printShortCudaDeviceInfo(int device);
841
}} // namespace cv { namespace cuda {
844
#include "opencv2/core/cuda.inl.hpp"
846
#endif /* __OPENCV_CORE_CUDA_HPP__ */