3
// ************************************************************************
6
// Manycore Performance-Portable Multidimensional Arrays
8
// Copyright (2012) Sandia Corporation
10
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
11
// the U.S. Government retains certain rights in this software.
13
// Redistribution and use in source and binary forms, with or without
14
// modification, are permitted provided that the following conditions are
17
// 1. Redistributions of source code must retain the above copyright
18
// notice, this list of conditions and the following disclaimer.
20
// 2. Redistributions in binary form must reproduce the above copyright
21
// notice, this list of conditions and the following disclaimer in the
22
// documentation and/or other materials provided with the distribution.
24
// 3. Neither the name of the Corporation nor the names of the
25
// contributors may be used to endorse or promote products derived from
26
// this software without specific prior written permission.
28
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
29
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
32
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
35
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
36
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
42
// ************************************************************************
46
#ifndef KOKKOS_CUDA_HPP
47
#define KOKKOS_CUDA_HPP
49
#include <Kokkos_Core_fwd.hpp>
51
// If CUDA execution space is enabled then use this header file.
53
#if defined( KOKKOS_HAVE_CUDA )
58
#include <Kokkos_CudaSpace.hpp>
60
#include <Kokkos_Parallel.hpp>
61
#include <Kokkos_Layout.hpp>
62
#include <Kokkos_ScratchSpace.hpp>
63
#include <Kokkos_MemoryTraits.hpp>
64
#include <impl/Kokkos_Tags.hpp>
66
/*--------------------------------------------------------------------------*/
74
/*--------------------------------------------------------------------------*/
79
/// \brief Kokkos Execution Space that uses CUDA to run on GPUs.
81
/// An "execution space" represents a parallel execution model. It tells Kokkos
82
/// how to parallelize the execution of kernels in a parallel_for or
83
/// parallel_reduce. For example, the Threads execution space uses Pthreads or
84
/// C++11 threads on a CPU, the OpenMP execution space uses the OpenMP language
85
/// extensions, and the Serial execution space executes "parallel" kernels
86
/// sequentially. The Cuda execution space uses NVIDIA's CUDA programming
87
/// model to execute kernels in parallel on GPUs.
90
//! \name Type declarations that all Kokkos execution spaces must provide.
93
//! Tag this class as a kokkos execution space
94
typedef Cuda execution_space ;
96
#if defined( KOKKOS_USE_CUDA_UVM )
97
//! This execution space's preferred memory space.
98
typedef CudaUVMSpace memory_space ;
100
//! This execution space's preferred memory space.
101
typedef CudaSpace memory_space ;
104
//! The size_type best suited for this execution space.
105
typedef memory_space::size_type size_type ;
107
//! This execution space's preferred array layout.
108
typedef LayoutLeft array_layout ;
110
//! For backward compatibility
111
typedef Cuda device_type ;
113
typedef ScratchMemorySpace< Cuda > scratch_memory_space ;
116
//--------------------------------------------------
117
//! \name Functions that all Kokkos devices must implement.
120
/// \brief True if and only if this method is being called in a
121
/// thread-parallel function.
122
KOKKOS_INLINE_FUNCTION static int in_parallel() {
123
#if defined( __CUDA_ARCH__ )
130
/** \brief Set the device in a "sleep" state.
132
* This function sets the device in a "sleep" state in which it is
133
* not ready for work. This may consume less resources than if the
134
* device were in an "awake" state, but it may also take time to
135
* bring the device from a sleep state to be ready for work.
137
* \return True if the device is in the "sleep" state, else false if
138
* the device is actively working and could not enter the "sleep"
143
/// \brief Wake the device from the 'sleep' state so it is ready for work.
145
/// \return True if the device is in the "ready" state, else "false"
146
/// if the device is actively working (which also means that it's
150
/// \brief Wait until all dispatched functors complete.
152
/// The parallel_for or parallel_reduce dispatch of a functor may
153
/// return asynchronously, before the functor completes. This
154
/// method does not return until all dispatched functors on this
155
/// device have completed.
158
//! Free any resources being consumed by the device.
159
static void finalize();
161
//! Has been initialized
162
static int is_initialized();
164
//! Print configuration information to the given output stream.
165
static void print_configuration( std::ostream & , const bool detail = false );
168
//--------------------------------------------------
169
//! \name Cuda space instances
173
explicit Cuda( const int instance_id );
175
#if defined( KOKKOS_HAVE_CXX11 )
176
Cuda & operator = ( const Cuda & ) = delete ;
179
Cuda & operator = ( const Cuda & );
183
//--------------------------------------------------------------------------
184
//! \name Device-specific functions
187
struct SelectDevice {
189
SelectDevice() : cuda_device_id(0) {}
190
explicit SelectDevice( int id ) : cuda_device_id( id ) {}
193
//! Initialize, telling the CUDA run-time library which device to use.
194
static void initialize( const SelectDevice = SelectDevice()
195
, const size_t num_instances = 1 );
197
/// \brief Cuda device architecture of the selected device.
199
/// This matches the __CUDA_ARCH__ specification.
200
static size_type device_arch();
202
//! Query device count.
203
static size_type detect_device_count();
205
/** \brief Detect the available devices and their architecture
206
* as defined by the __CUDA_ARCH__ specification.
208
static std::vector<unsigned> detect_device_arch();
211
//--------------------------------------------------------------------------
213
const cudaStream_t m_stream ;
217
} // namespace Kokkos
219
/*--------------------------------------------------------------------------*/
220
/*--------------------------------------------------------------------------*/
226
struct VerifyExecutionCanAccessMemorySpace
228
, Kokkos::Cuda::scratch_memory_space
231
enum { value = true };
232
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
233
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
237
struct VerifyExecutionCanAccessMemorySpace
239
, Kokkos::Cuda::scratch_memory_space
242
enum { value = false };
243
inline static void verify( void ) { CudaSpace::access_error(); }
244
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
248
} // namespace Kokkos
250
/*--------------------------------------------------------------------------*/
251
/*--------------------------------------------------------------------------*/
253
#include <Cuda/Kokkos_CudaExec.hpp>
254
#include <Cuda/Kokkos_Cuda_View.hpp>
255
#include <Cuda/Kokkos_Cuda_Parallel.hpp>
257
//----------------------------------------------------------------------------
259
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
260
#endif /* #ifndef KOKKOS_CUDA_HPP */