2
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
6
* This source code is subject to NVIDIA ownership rights under U.S. and
7
* international Copyright laws. Users and possessors of this source code
8
* are hereby granted a nonexclusive, royalty-free license to use this code
9
* in individual and commercial software.
11
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
12
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
13
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
14
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
15
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
16
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
17
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
18
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
19
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
20
* OR PERFORMANCE OF THIS SOURCE CODE.
22
* U.S. Government End Users. This source code is a "commercial item" as
23
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
24
* "commercial computer software" and "commercial computer software
25
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
26
* and is provided to the U.S. Government only as a commercial end item.
27
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
28
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
29
* source code with only those rights set forth herein.
31
* Any use of this source code in individual and commercial software must
32
* include, in the user documentation and internal comments to the code,
33
* the above Disclaimer and U.S. Government End Users Notice.
36
#ifndef __cuda_cuda_h__
37
#define __cuda_cuda_h__
43
* \name Data types used by CUDA driver
44
* \author NVIDIA Corporation
45
* \brief Data types used by CUDA driver
49
* \defgroup CUDA_TYPES Data types used by CUDA driver
50
* \ingroup CUDA_DRIVER
55
* CUDA API version number
57
#define CUDA_VERSION 3010 /* 3.1 */
62
typedef unsigned int CUdeviceptr; ///< CUDA device pointer
64
typedef int CUdevice; ///< CUDA device
65
typedef struct CUctx_st *CUcontext; ///< CUDA context
66
typedef struct CUmod_st *CUmodule; ///< CUDA module
67
typedef struct CUfunc_st *CUfunction; ///< CUDA function
68
typedef struct CUarray_st *CUarray; ///< CUDA array
69
typedef struct CUtexref_st *CUtexref; ///< CUDA texture reference
70
typedef struct CUsurfref_st *CUsurfref; ///< CUDA surface reference
71
typedef struct CUevent_st *CUevent; ///< CUDA event
72
typedef struct CUstream_st *CUstream; ///< CUDA stream
73
typedef struct CUgraphicsResource_st *CUgraphicsResource; ///< CUDA graphics interop resource
75
typedef struct CUuuid_st { ///< CUDA definition of UUID
79
/************************************
83
***********************************/
86
* Context creation flags
88
typedef enum CUctx_flags_enum {
89
CU_CTX_SCHED_AUTO = 0, ///< Automatic scheduling
90
CU_CTX_SCHED_SPIN = 1, ///< Set spin as default scheduling
91
CU_CTX_SCHED_YIELD = 2, ///< Set yield as default scheduling
92
CU_CTX_SCHED_MASK = 0x3,
93
CU_CTX_BLOCKING_SYNC = 4, ///< Use blocking synchronization
94
CU_CTX_MAP_HOST = 8, ///< Support mapped pinned allocations
95
CU_CTX_LMEM_RESIZE_TO_MAX = 16, ///< Keep local memory allocation after launch
96
CU_CTX_FLAGS_MASK = 0x1f
100
* Event creation flags
102
typedef enum CUevent_flags_enum {
103
CU_EVENT_DEFAULT = 0, ///< Default event flag
104
CU_EVENT_BLOCKING_SYNC = 1 ///< Event uses blocking synchronization
110
typedef enum CUarray_format_enum {
111
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, ///< Unsigned 8-bit integers
112
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, ///< Unsigned 16-bit integers
113
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, ///< Unsigned 32-bit integers
114
CU_AD_FORMAT_SIGNED_INT8 = 0x08, ///< Signed 8-bit integers
115
CU_AD_FORMAT_SIGNED_INT16 = 0x09, ///< Signed 16-bit integers
116
CU_AD_FORMAT_SIGNED_INT32 = 0x0a, ///< Signed 32-bit integers
117
CU_AD_FORMAT_HALF = 0x10, ///< 16-bit floating point
118
CU_AD_FORMAT_FLOAT = 0x20 ///< 32-bit floating point
122
* Texture reference addressing modes
124
typedef enum CUaddress_mode_enum {
125
CU_TR_ADDRESS_MODE_WRAP = 0, ///< Wrapping address mode
126
CU_TR_ADDRESS_MODE_CLAMP = 1, ///< Clamp to edge address mode
127
CU_TR_ADDRESS_MODE_MIRROR = 2 ///< Mirror address mode
131
* Texture reference filtering modes
133
typedef enum CUfilter_mode_enum {
134
CU_TR_FILTER_MODE_POINT = 0, ///< Point filter mode
135
CU_TR_FILTER_MODE_LINEAR = 1 ///< Linear filter mode
141
typedef enum CUdevice_attribute_enum {
142
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, ///< Maximum number of threads per block
143
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, ///< Maximum block dimension X
144
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, ///< Maximum block dimension Y
145
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, ///< Maximum block dimension Z
146
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, ///< Maximum grid dimension X
147
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, ///< Maximum grid dimension Y
148
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, ///< Maximum grid dimension Z
149
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, ///< Maximum shared memory available per block in bytes
150
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, ///< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
151
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, ///< Memory available on device for __constant__ variables in a CUDA C kernel in bytes
152
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, ///< Warp size in threads
153
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, ///< Maximum pitch in bytes allowed by memory copies
154
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, ///< Maximum number of 32-bit registers available per block
155
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, ///< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
156
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, ///< Peak clock frequency in kilohertz
157
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, ///< Alignment requirement for textures
159
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, ///< Device can possibly copy memory and execute a kernel concurrently
160
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, ///< Number of multiprocessors on device
161
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, ///< Specifies whether there is a run time limit on kernels
162
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, ///< Device is integrated with host memory
163
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, ///< Device can map host memory into CUDA address space
164
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, ///< Compute mode (See ::CUcomputemode for details)
165
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, ///< Maximum 1D texture width
166
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, ///< Maximum 2D texture width
167
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,///< Maximum 2D texture height
168
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, ///< Maximum 3D texture width
169
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,///< Maximum 3D texture height
170
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, ///< Maximum 3D texture depth
171
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, ///< Maximum texture array width
172
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,///< Maximum texture array height
173
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, ///< Maximum slices in a texture array
174
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, ///< Alignment requirement for surfaces
175
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, ///< Device can possibly execute multiple kernels concurrently
176
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, ///< Device has ECC support enabled
177
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, ////< PCI bus ID of the device
178
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34 ////< PCI device ID of the device
179
} CUdevice_attribute;
182
* Legacy device properties
184
typedef struct CUdevprop_st {
185
int maxThreadsPerBlock; ///< Maximum number of threads per block
186
int maxThreadsDim[3]; ///< Maximum size of each dimension of a block
187
int maxGridSize[3]; ///< Maximum size of each dimension of a grid
188
int sharedMemPerBlock; ///< Shared memory available per block in bytes
189
int totalConstantMemory; ///< Constant memory available on device in bytes
190
int SIMDWidth; ///< Warp size in threads
191
int memPitch; ///< Maximum pitch in bytes allowed by memory copies
192
int regsPerBlock; ///< 32-bit registers available per block
193
int clockRate; ///< Clock frequency in kilohertz
194
int textureAlign; ///< Alignment requirement for textures
198
* Function properties
200
typedef enum CUfunction_attribute_enum {
202
* The number of threads beyond which a launch of the function would fail.
203
* This number depends on both the function and the device on which the
204
* function is currently loaded.
206
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
209
* The size in bytes of statically-allocated shared memory required by
210
* this function. This does not include dynamically-allocated shared
211
* memory requested by the user at runtime.
213
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
216
* The size in bytes of user-allocated constant memory required by this
219
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
222
* The size in bytes of thread local memory used by this function.
224
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
227
* The number of registers used by each thread of this function.
229
CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
232
* The PTX virtual architecture version for which the function was compiled.
234
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
237
* The binary version for which the function was compiled.
239
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
241
CU_FUNC_ATTRIBUTE_MAX
242
} CUfunction_attribute;
245
* Function cache configurations
247
typedef enum CUfunc_cache_enum {
248
CU_FUNC_CACHE_PREFER_NONE = 0x00,
249
CU_FUNC_CACHE_PREFER_SHARED = 0x01,
250
CU_FUNC_CACHE_PREFER_L1 = 0x02
256
typedef enum CUmemorytype_enum {
257
CU_MEMORYTYPE_HOST = 0x01, ///< Host memory
258
CU_MEMORYTYPE_DEVICE = 0x02, ///< Device memory
259
CU_MEMORYTYPE_ARRAY = 0x03 ///< Array memory
265
typedef enum CUcomputemode_enum {
266
CU_COMPUTEMODE_DEFAULT = 0, ///< Default compute mode (Multiple contexts allowed per device)
267
CU_COMPUTEMODE_EXCLUSIVE = 1, ///< Compute-exclusive mode (Only one context can be present on this device at a time)
268
CU_COMPUTEMODE_PROHIBITED = 2 ///< Compute-prohibited mode (No contexts can be created on this device at this time)
272
* Online compiler options
274
typedef enum CUjit_option_enum
277
* Max number of registers that a thread may use.\n
278
* Option type: unsigned int
280
CU_JIT_MAX_REGISTERS = 0,
283
* IN: Specifies minimum number of threads per block to target compilation
285
* OUT: Returns the number of threads the compiler actually targeted.
286
* This restricts the resource utilization fo the compiler (e.g. max
287
* registers) such that a block with the given number of threads should be
288
* able to launch based on register limitations. Note, this option does not
289
* currently take into account any other resource limitations, such as
290
* shared memory utilization.\n
291
* Option type: unsigned int
293
CU_JIT_THREADS_PER_BLOCK,
296
* Returns a float value in the option of the wall clock time, in
297
* milliseconds, spent creating the cubin\n
303
* Pointer to a buffer in which to print any log messsages from PTXAS
304
* that are informational in nature (the buffer size is specified via
305
* option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) \n
308
CU_JIT_INFO_LOG_BUFFER,
311
* IN: Log buffer size in bytes. Log messages will be capped at this size
312
* (including null terminator)\n
313
* OUT: Amount of log buffer filled with messages\n
314
* Option type: unsigned int
316
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
319
* Pointer to a buffer in which to print any log messages from PTXAS that
320
* reflect errors (the buffer size is specified via option
321
* ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n
324
CU_JIT_ERROR_LOG_BUFFER,
327
* IN: Log buffer size in bytes. Log messages will be capped at this size
328
* (including null terminator)\n
329
* OUT: Amount of log buffer filled with messages\n
330
* Option type: unsigned int
332
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
335
* Level of optimizations to apply to generated code (0 - 4), with 4
336
* being the default and highest level of optimizations.\n
337
* Option type: unsigned int
339
CU_JIT_OPTIMIZATION_LEVEL,
342
* No option value required. Determines the target based on the current
343
* attached context (default)\n
344
* Option type: No option value needed
346
CU_JIT_TARGET_FROM_CUCONTEXT,
349
* Target is chosen based on supplied ::CUjit_target_enum.\n
350
* Option type: unsigned int for enumerated type ::CUjit_target_enum
355
* Specifies choice of fallback strategy if matching cubin is not found.
356
* Choice is based on supplied ::CUjit_fallback_enum.\n
357
* Option type: unsigned int for enumerated type ::CUjit_fallback_enum
359
CU_JIT_FALLBACK_STRATEGY
364
* Online compilation targets
366
typedef enum CUjit_target_enum
368
CU_TARGET_COMPUTE_10 = 0, ///< Compute device class 1.0
369
CU_TARGET_COMPUTE_11, ///< Compute device class 1.1
370
CU_TARGET_COMPUTE_12, ///< Compute device class 1.2
371
CU_TARGET_COMPUTE_13, ///< Compute device class 1.3
372
CU_TARGET_COMPUTE_20 ///< Compute device class 2.0
376
* Cubin matching fallback strategies
378
typedef enum CUjit_fallback_enum
380
/** Prefer to compile ptx */
383
/** Prefer to fall back to compatible binary code */
389
* Flags to register a graphics resource
391
typedef enum CUgraphicsRegisterFlags_enum {
392
CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00
393
} CUgraphicsRegisterFlags;
396
* Flags for mapping and unmapping interop resources
398
typedef enum CUgraphicsMapResourceFlags_enum {
399
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
400
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
401
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
402
} CUgraphicsMapResourceFlags;
405
* Array indices for cube faces
407
typedef enum CUarray_cubemap_face_enum {
408
CU_CUBEMAP_FACE_POSITIVE_X = 0x00, ///< Positive X face of cubemap
409
CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, ///< Negative X face of cubemap
410
CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, ///< Positive Y face of cubemap
411
CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, ///< Negative Y face of cubemap
412
CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, ///< Positive Z face of cubemap
413
CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 ///< Negative Z face of cubemap
414
} CUarray_cubemap_face;
419
typedef enum CUlimit_enum {
420
CU_LIMIT_STACK_SIZE = 0x00, ///< GPU thread stack size
421
CU_LIMIT_PRINTF_FIFO_SIZE = 0x01 ///< GPU printf FIFO size
424
/************************************
428
***********************************/
433
typedef enum cudaError_enum {
435
CUDA_SUCCESS = 0, ///< No errors
436
CUDA_ERROR_INVALID_VALUE = 1, ///< Invalid value
437
CUDA_ERROR_OUT_OF_MEMORY = 2, ///< Out of memory
438
CUDA_ERROR_NOT_INITIALIZED = 3, ///< Driver not initialized
439
CUDA_ERROR_DEINITIALIZED = 4, ///< Driver deinitialized
441
CUDA_ERROR_NO_DEVICE = 100, ///< No CUDA-capable device available
442
CUDA_ERROR_INVALID_DEVICE = 101, ///< Invalid device
444
CUDA_ERROR_INVALID_IMAGE = 200, ///< Invalid kernel image
445
CUDA_ERROR_INVALID_CONTEXT = 201, ///< Invalid context
446
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, ///< Context already current
447
CUDA_ERROR_MAP_FAILED = 205, ///< Map failed
448
CUDA_ERROR_UNMAP_FAILED = 206, ///< Unmap failed
449
CUDA_ERROR_ARRAY_IS_MAPPED = 207, ///< Array is mapped
450
CUDA_ERROR_ALREADY_MAPPED = 208, ///< Already mapped
451
CUDA_ERROR_NO_BINARY_FOR_GPU = 209, ///< No binary for GPU
452
CUDA_ERROR_ALREADY_ACQUIRED = 210, ///< Already acquired
453
CUDA_ERROR_NOT_MAPPED = 211, ///< Not mapped
454
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, ///< Mapped resource not available for access as an array
455
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, ///< Mapped resource not available for access as a pointer
456
CUDA_ERROR_ECC_UNCORRECTABLE = 214, ///< Uncorrectable ECC error detected
457
CUDA_ERROR_UNSUPPORTED_LIMIT = 215, ///< CUlimit not supported by device
459
CUDA_ERROR_INVALID_SOURCE = 300, ///< Invalid source
460
CUDA_ERROR_FILE_NOT_FOUND = 301, ///< File not found
461
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, ///< Link to a shared object failed to resolve
462
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, ///< Shared object initialization failed
464
CUDA_ERROR_INVALID_HANDLE = 400, ///< Invalid handle
466
CUDA_ERROR_NOT_FOUND = 500, ///< Not found
468
CUDA_ERROR_NOT_READY = 600, ///< CUDA not ready
470
CUDA_ERROR_LAUNCH_FAILED = 700, ///< Launch failed
471
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, ///< Launch exceeded resources
472
CUDA_ERROR_LAUNCH_TIMEOUT = 702, ///< Launch exceeded timeout
473
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, ///< Launch with incompatible texturing
475
CUDA_ERROR_POINTER_IS_64BIT = 800, ///< Attempted to retrieve 64-bit pointer via 32-bit API function
476
CUDA_ERROR_SIZE_IS_64BIT = 801, ///< Attempted to retrieve 64-bit size via 32-bit API function
478
CUDA_ERROR_UNKNOWN = 999 ///< Unknown error
482
* If set, host memory is portable between CUDA contexts.
483
* Flag for ::cuMemHostAlloc()
485
#define CU_MEMHOSTALLOC_PORTABLE 0x01
488
* If set, host memory is mapped into CUDA address space and
489
* ::cuMemHostGetDevicePointer() may be called on the host pointer.
490
* Flag for ::cuMemHostAlloc()
492
#define CU_MEMHOSTALLOC_DEVICEMAP 0x02
495
* If set, host memory is allocated as write-combined - fast to write,
496
* faster to DMA, slow to read except via SSE4 streaming load instruction
498
* Flag for ::cuMemHostAlloc()
500
#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
503
* 2D memory copy parameters
505
typedef struct CUDA_MEMCPY2D_st {
507
unsigned int srcXInBytes, ///< Source X in bytes
509
CUmemorytype srcMemoryType; ///< Source memory type (host, device, array)
510
const void *srcHost; ///< Source host pointer
511
CUdeviceptr srcDevice; ///< Source device pointer
512
CUarray srcArray; ///< Source array reference
513
unsigned int srcPitch; ///< Source pitch (ignored when src is array)
515
unsigned int dstXInBytes, ///< Destination X in bytes
516
dstY; ///< Destination Y
517
CUmemorytype dstMemoryType; ///< Destination memory type (host, device, array)
518
void *dstHost; ///< Destination host pointer
519
CUdeviceptr dstDevice; ///< Destination device pointer
520
CUarray dstArray; ///< Destination array reference
521
unsigned int dstPitch; ///< Destination pitch (ignored when dst is array)
523
unsigned int WidthInBytes; ///< Width of 2D memory copy in bytes
524
unsigned int Height; ///< Height of 2D memory copy
528
* 3D memory copy parameters
530
typedef struct CUDA_MEMCPY3D_st {
532
unsigned int srcXInBytes, ///< Source X in bytes
535
unsigned int srcLOD; ///< Source LOD
536
CUmemorytype srcMemoryType; ///< Source memory type (host, device, array)
537
const void *srcHost; ///< Source host pointer
538
CUdeviceptr srcDevice; ///< Source device pointer
539
CUarray srcArray; ///< Source array reference
540
void *reserved0; ///< Must be NULL
541
unsigned int srcPitch; ///< Source pitch (ignored when src is array)
542
unsigned int srcHeight; ///< Source height (ignored when src is array; may be 0 if Depth==1)
544
unsigned int dstXInBytes, ///< Destination X in bytes
545
dstY, ///< Destination Y
546
dstZ; ///< Destination Z
547
unsigned int dstLOD; ///< Destination LOD
548
CUmemorytype dstMemoryType; ///< Destination memory type (host, device, array)
549
void *dstHost; ///< Destination host pointer
550
CUdeviceptr dstDevice; ///< Destination device pointer
551
CUarray dstArray; ///< Destination array reference
552
void *reserved1; ///< Must be NULL
553
unsigned int dstPitch; ///< Destination pitch (ignored when dst is array)
554
unsigned int dstHeight; ///< Destination height (ignored when dst is array; may be 0 if Depth==1)
556
unsigned int WidthInBytes; ///< Width of 3D memory copy in bytes
557
unsigned int Height; ///< Height of 3D memory copy
558
unsigned int Depth; ///< Depth of 3D memory copy
566
unsigned int Width; ///< Width of array
567
unsigned int Height; ///< Height of array
569
CUarray_format Format; ///< Array format
571
unsigned int NumChannels; ///< Channels per array element
572
} CUDA_ARRAY_DESCRIPTOR;
575
* 3D array descriptor
579
unsigned int Width; ///< Width of 3D array
580
unsigned int Height; ///< Height of 3D array
581
unsigned int Depth; ///< Depth of 3D array
583
CUarray_format Format; ///< Array format
585
unsigned int NumChannels; ///< Channels per array element
587
unsigned int Flags; ///< Flags
588
} CUDA_ARRAY3D_DESCRIPTOR;
590
// if set, the CUDA array contains an array of 2D slices
591
// and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
592
// the number of slices, not the depth of a 3D array.
593
#define CUDA_ARRAY3D_2DARRAY 0x01
595
// this flag must be set in order to bind a surface reference
597
#define CUDA_ARRAY3D_SURFACE_LDST 0x02
600
* Override the texref format with a format inferred from the array.
601
* Flag for ::cuTexRefSetArray()
603
#define CU_TRSA_OVERRIDE_FORMAT 0x01
606
* Read the texture as integers rather than promoting the values to floats
607
* in the range [0,1].
608
* Flag for ::cuTexRefSetFlags()
610
#define CU_TRSF_READ_AS_INTEGER 0x01
613
* Use normalized texture coordinates in the range [0,1) instead of [0,dim).
614
* Flag for ::cuTexRefSetFlags()
616
#define CU_TRSF_NORMALIZED_COORDINATES 0x02
619
* For texture references loaded into the module, use default texunit from
622
#define CU_PARAM_TR_DEFAULT -1
625
/** @} */ /* END CUDA_TYPES */
628
#define CUDAAPI __stdcall
633
/*********************************
635
*********************************/
636
CUresult CUDAAPI cuInit(unsigned int Flags);
638
/*********************************
639
** Driver Version Query
640
*********************************/
641
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
643
/************************************
647
***********************************/
649
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
650
CUresult CUDAAPI cuDeviceGetCount(int *count);
651
CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);
652
CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
653
CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev);
654
CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
655
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
657
/************************************
659
** Context management
661
***********************************/
663
CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev );
664
CUresult CUDAAPI cuCtxDestroy( CUcontext ctx );
665
CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags);
666
CUresult CUDAAPI cuCtxDetach(CUcontext ctx);
667
CUresult CUDAAPI cuCtxPushCurrent( CUcontext ctx );
668
CUresult CUDAAPI cuCtxPopCurrent( CUcontext *pctx );
669
CUresult CUDAAPI cuCtxGetDevice(CUdevice *device);
670
CUresult CUDAAPI cuCtxSynchronize(void);
673
/************************************
677
***********************************/
679
CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname);
680
CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image);
681
CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
682
CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
683
CUresult CUDAAPI cuModuleUnload(CUmodule hmod);
684
CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
685
CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *bytes, CUmodule hmod, const char *name);
686
CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
687
CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
689
/************************************
693
***********************************/
695
CUresult CUDAAPI cuMemGetInfo(unsigned int *free, unsigned int *total);
697
CUresult CUDAAPI cuMemAlloc( CUdeviceptr *dptr, unsigned int bytesize);
698
CUresult CUDAAPI cuMemAllocPitch( CUdeviceptr *dptr,
699
unsigned int *pPitch,
700
unsigned int WidthInBytes,
702
// size of biggest r/w to be performed by kernels on this memory
704
unsigned int ElementSizeBytes
706
CUresult CUDAAPI cuMemFree(CUdeviceptr dptr);
707
CUresult CUDAAPI cuMemGetAddressRange( CUdeviceptr *pbase, unsigned int *psize, CUdeviceptr dptr );
709
CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize);
710
CUresult CUDAAPI cuMemFreeHost(void *p);
712
CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags );
714
CUresult CUDAAPI cuMemHostGetDevicePointer( CUdeviceptr *pdptr, void *p, unsigned int Flags );
715
CUresult CUDAAPI cuMemHostGetFlags( unsigned int *pFlags, void *p );
717
/************************************
719
** Synchronous Memcpy
721
** Intra-device memcpy's done with these functions may execute in parallel with the CPU,
722
** but if host memory is involved, they wait until the copy is done before returning.
724
***********************************/
727
// system <-> device memory
728
CUresult CUDAAPI cuMemcpyHtoD (CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount );
729
CUresult CUDAAPI cuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount );
731
// device <-> device memory
732
CUresult CUDAAPI cuMemcpyDtoD (CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount );
734
// device <-> array memory
735
CUresult CUDAAPI cuMemcpyDtoA ( CUarray dstArray, unsigned int dstOffset, CUdeviceptr srcDevice, unsigned int ByteCount );
736
CUresult CUDAAPI cuMemcpyAtoD ( CUdeviceptr dstDevice, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount );
738
// system <-> array memory
739
CUresult CUDAAPI cuMemcpyHtoA( CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount );
740
CUresult CUDAAPI cuMemcpyAtoH( void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount );
742
// array <-> array memory
743
CUresult CUDAAPI cuMemcpyAtoA( CUarray dstArray, unsigned int dstOffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount );
747
CUresult CUDAAPI cuMemcpy2D( const CUDA_MEMCPY2D *pCopy );
748
CUresult CUDAAPI cuMemcpy2DUnaligned( const CUDA_MEMCPY2D *pCopy );
752
CUresult CUDAAPI cuMemcpy3D( const CUDA_MEMCPY3D *pCopy );
754
/************************************
756
** Asynchronous Memcpy
758
** Any host memory involved must be DMA'able (e.g., allocated with cuMemAllocHost).
759
** memcpy's done with these functions execute in parallel with the CPU and, if
760
** the hardware is available, may execute in parallel with the GPU.
761
** Asynchronous memcpy must be accompanied by appropriate stream synchronization.
763
***********************************/
766
// system <-> device memory
767
CUresult CUDAAPI cuMemcpyHtoDAsync (CUdeviceptr dstDevice,
768
const void *srcHost, unsigned int ByteCount, CUstream hStream );
769
CUresult CUDAAPI cuMemcpyDtoHAsync (void *dstHost,
770
CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream );
772
// device <-> device memory
773
CUresult CUDAAPI cuMemcpyDtoDAsync (CUdeviceptr dstDevice,
774
CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream );
776
// system <-> array memory
777
CUresult CUDAAPI cuMemcpyHtoAAsync( CUarray dstArray, unsigned int dstOffset,
778
const void *srcHost, unsigned int ByteCount, CUstream hStream );
779
CUresult CUDAAPI cuMemcpyAtoHAsync( void *dstHost, CUarray srcArray, unsigned int srcOffset,
780
unsigned int ByteCount, CUstream hStream );
783
CUresult CUDAAPI cuMemcpy2DAsync( const CUDA_MEMCPY2D *pCopy, CUstream hStream );
786
CUresult CUDAAPI cuMemcpy3DAsync( const CUDA_MEMCPY3D *pCopy, CUstream hStream );
788
/************************************
792
***********************************/
793
CUresult CUDAAPI cuMemsetD8( CUdeviceptr dstDevice, unsigned char uc, unsigned int N );
794
CUresult CUDAAPI cuMemsetD16( CUdeviceptr dstDevice, unsigned short us, unsigned int N );
795
CUresult CUDAAPI cuMemsetD32( CUdeviceptr dstDevice, unsigned int ui, unsigned int N );
797
CUresult CUDAAPI cuMemsetD2D8( CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, unsigned int Width, unsigned int Height );
798
CUresult CUDAAPI cuMemsetD2D16( CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, unsigned int Width, unsigned int Height );
799
CUresult CUDAAPI cuMemsetD2D32( CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, unsigned int Width, unsigned int Height );
801
/************************************
803
** Function management
805
***********************************/
808
CUresult CUDAAPI cuFuncSetBlockShape (CUfunction hfunc, int x, int y, int z);
809
CUresult CUDAAPI cuFuncSetSharedSize (CUfunction hfunc, unsigned int bytes);
810
CUresult CUDAAPI cuFuncGetAttribute (int *pi, CUfunction_attribute attrib, CUfunction hfunc);
811
CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
813
/************************************
817
***********************************/
819
CUresult CUDAAPI cuArrayCreate( CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray );
820
CUresult CUDAAPI cuArrayGetDescriptor( CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray );
821
CUresult CUDAAPI cuArrayDestroy( CUarray hArray );
823
CUresult CUDAAPI cuArray3DCreate( CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray );
824
CUresult CUDAAPI cuArray3DGetDescriptor( CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray );
827
/************************************
829
** Texture reference management
831
***********************************/
832
CUresult CUDAAPI cuTexRefCreate( CUtexref *pTexRef );
833
CUresult CUDAAPI cuTexRefDestroy( CUtexref hTexRef );
835
CUresult CUDAAPI cuTexRefSetArray( CUtexref hTexRef, CUarray hArray, unsigned int Flags );
836
CUresult CUDAAPI cuTexRefSetAddress( unsigned int *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, unsigned int bytes );
837
CUresult CUDAAPI cuTexRefSetAddress2D( CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch);
838
CUresult CUDAAPI cuTexRefSetFormat( CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents );
839
CUresult CUDAAPI cuTexRefSetAddressMode( CUtexref hTexRef, int dim, CUaddress_mode am );
840
CUresult CUDAAPI cuTexRefSetFilterMode( CUtexref hTexRef, CUfilter_mode fm );
841
CUresult CUDAAPI cuTexRefSetFlags( CUtexref hTexRef, unsigned int Flags );
843
CUresult CUDAAPI cuTexRefGetAddress( CUdeviceptr *pdptr, CUtexref hTexRef );
844
CUresult CUDAAPI cuTexRefGetArray( CUarray *phArray, CUtexref hTexRef );
845
CUresult CUDAAPI cuTexRefGetAddressMode( CUaddress_mode *pam, CUtexref hTexRef, int dim );
846
CUresult CUDAAPI cuTexRefGetFilterMode( CUfilter_mode *pfm, CUtexref hTexRef );
847
CUresult CUDAAPI cuTexRefGetFormat( CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef );
848
CUresult CUDAAPI cuTexRefGetFlags( unsigned int *pFlags, CUtexref hTexRef );
850
/************************************
852
** Surface reference management
854
***********************************/
856
CUresult CUDAAPI cuSurfRefSetArray( CUsurfref hSurfRef, CUarray hArray, unsigned int Flags );
857
CUresult CUDAAPI cuSurfRefGetArray( CUarray *phArray, CUsurfref hSurfRef );
859
/************************************
861
** Parameter management
863
***********************************/
865
CUresult CUDAAPI cuParamSetSize (CUfunction hfunc, unsigned int numbytes);
866
CUresult CUDAAPI cuParamSeti (CUfunction hfunc, int offset, unsigned int value);
867
CUresult CUDAAPI cuParamSetf (CUfunction hfunc, int offset, float value);
868
CUresult CUDAAPI cuParamSetv (CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
869
CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
872
/************************************
876
***********************************/
878
CUresult CUDAAPI cuLaunch ( CUfunction f );
879
CUresult CUDAAPI cuLaunchGrid (CUfunction f, int grid_width, int grid_height);
880
CUresult CUDAAPI cuLaunchGridAsync( CUfunction f, int grid_width, int grid_height, CUstream hStream );
882
/************************************
886
***********************************/
887
CUresult CUDAAPI cuEventCreate( CUevent *phEvent, unsigned int Flags );
888
CUresult CUDAAPI cuEventRecord( CUevent hEvent, CUstream hStream );
889
CUresult CUDAAPI cuEventQuery( CUevent hEvent );
890
CUresult CUDAAPI cuEventSynchronize( CUevent hEvent );
891
CUresult CUDAAPI cuEventDestroy( CUevent hEvent );
892
CUresult CUDAAPI cuEventElapsedTime( float *pMilliseconds, CUevent hStart, CUevent hEnd );
894
/************************************
898
***********************************/
899
CUresult CUDAAPI cuStreamCreate( CUstream *phStream, unsigned int Flags );
900
CUresult CUDAAPI cuStreamQuery( CUstream hStream );
901
CUresult CUDAAPI cuStreamSynchronize( CUstream hStream );
902
CUresult CUDAAPI cuStreamDestroy( CUstream hStream );
904
/************************************
908
***********************************/
909
CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource);
910
CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel );
911
CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( CUdeviceptr *pDevPtr, unsigned int *pSize, CUgraphicsResource resource );
912
CUresult CUDAAPI cuGraphicsResourceSetMapFlags( CUgraphicsResource resource, unsigned int flags );
913
CUresult CUDAAPI cuGraphicsMapResources( unsigned int count, CUgraphicsResource *resources, CUstream hStream );
914
CUresult CUDAAPI cuGraphicsUnmapResources( unsigned int count, CUgraphicsResource *resources, CUstream hStream );
916
/************************************
920
***********************************/
921
CUresult CUDAAPI cuGetExportTable( const void **ppExportTable, const CUuuid *pExportTableId );
923
/************************************
927
***********************************/
929
CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value);
930
CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);
936
#endif /* __cuda_cuda_h__ */