3
// ************************************************************************
5
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
6
// Copyright (2012) Sandia Corporation
8
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9
// the U.S. Government retains certain rights in this software.
11
// Redistribution and use in source and binary forms, with or without
12
// modification, are permitted provided that the following conditions are
15
// 1. Redistributions of source code must retain the above copyright
16
// notice, this list of conditions and the following disclaimer.
18
// 2. Redistributions in binary form must reproduce the above copyright
19
// notice, this list of conditions and the following disclaimer in the
20
// documentation and/or other materials provided with the distribution.
22
// 3. Neither the name of the Corporation nor the names of the
23
// contributors may be used to endorse or promote products derived from
24
// this software without specific prior written permission.
26
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
40
// ************************************************************************
44
#ifndef KOKKOS_QTHREAD_PARALLEL_HPP
45
#define KOKKOS_QTHREAD_PARALLEL_HPP
49
#include <Kokkos_Parallel.hpp>
51
#include <impl/Kokkos_StaticAssert.hpp>
52
#include <impl/Kokkos_FunctorAdapter.hpp>
54
#include <Qthread/Kokkos_QthreadExec.hpp>
56
//----------------------------------------------------------------------------
61
//----------------------------------------------------------------------------
63
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
64
class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
68
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
70
const FunctorType m_func ;
71
const Policy m_policy ;
73
template< class PType >
74
KOKKOS_FORCEINLINE_FUNCTION static
75
void driver( typename Impl::enable_if<
76
( Impl::is_same< typename PType::work_tag , void >::value )
77
, const FunctorType & >::type functor
78
, const PType & range )
80
const typename PType::member_type e = range.end();
81
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
86
template< class PType >
87
KOKKOS_FORCEINLINE_FUNCTION static
88
void driver( typename Impl::enable_if<
89
( ! Impl::is_same< typename PType::work_tag , void >::value )
90
, const FunctorType & >::type functor
91
, const PType & range )
93
const typename PType::member_type e = range.end();
94
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
95
functor( typename PType::work_tag() , i );
99
// Function is called once by every concurrent thread.
100
static void execute( QthreadExec & exec , const void * arg )
102
const ParallelFor & self = * ((const ParallelFor *) arg );
104
driver( self.m_func , typename Policy::WorkRange( self.m_policy , exec.worker_rank() , exec.worker_size() ) );
106
// All threads wait for completion.
107
exec.exec_all_barrier();
112
ParallelFor( const FunctorType & functor
113
, const Policy & policy
118
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::execute , this );
122
//----------------------------------------------------------------------------
124
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
125
class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
129
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
130
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
131
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
133
typedef typename ValueTraits::pointer_type pointer_type ;
134
typedef typename ValueTraits::reference_type reference_type ;
136
const FunctorType m_func ;
137
const Policy m_policy ;
139
template< class PType >
140
KOKKOS_FORCEINLINE_FUNCTION static
141
void driver( typename Impl::enable_if<
142
( Impl::is_same< typename PType::work_tag , void >::value )
143
, const FunctorType & >::type functor
144
, reference_type update
145
, const PType & range )
147
const typename PType::member_type e = range.end();
148
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
149
functor( i , update );
153
template< class PType >
154
KOKKOS_FORCEINLINE_FUNCTION static
155
void driver( typename Impl::enable_if<
156
( ! Impl::is_same< typename PType::work_tag , void >::value )
157
, const FunctorType & >::type functor
158
, reference_type update
159
, const PType & range )
161
const typename PType::member_type e = range.end();
162
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
163
functor( typename PType::work_tag() , i , update );
167
static void execute( QthreadExec & exec , const void * arg )
169
const ParallelReduce & self = * ((const ParallelReduce *) arg );
172
, ValueInit::init( self.m_func , exec.exec_all_reduce_value() )
173
, typename Policy::WorkRange( self.m_policy , exec.worker_rank() , exec.worker_size() )
176
exec.template exec_all_reduce<FunctorType, typename Policy::work_tag >( self.m_func );
181
template< class HostViewType >
182
ParallelReduce( const FunctorType & functor
183
, const Policy & policy
184
, const HostViewType & result_view )
188
QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_func ) , 0 );
190
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::execute , this );
192
const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result();
194
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_func , data );
196
if ( result_view.ptr_on_device() ) {
197
const unsigned n = ValueTraits::value_count( m_func );
198
for ( unsigned i = 0 ; i < n ; ++i ) { result_view.ptr_on_device()[i] = data[i]; }
203
//----------------------------------------------------------------------------
205
template< class FunctorType , class Arg0 , class Arg1 >
206
class ParallelFor< FunctorType , TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > >
210
typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > Policy ;
212
const FunctorType m_func ;
213
const Policy m_team ;
215
template< class TagType >
216
KOKKOS_FORCEINLINE_FUNCTION
217
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
218
const typename Policy::member_type & >::type member ) const
219
{ m_func( member ); }
221
template< class TagType >
222
KOKKOS_FORCEINLINE_FUNCTION
223
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
224
const typename Policy::member_type & >::type member ) const
225
{ m_func( TagType() , member ); }
227
static void execute( QthreadExec & exec , const void * arg )
229
const ParallelFor & self = * ((const ParallelFor *) arg );
231
typename Policy::member_type member( exec , self.m_team );
234
self.ParallelFor::template driver< typename Policy::work_tag >( member );
235
member.team_barrier();
239
exec.exec_all_barrier();
244
ParallelFor( const FunctorType & functor ,
245
const Policy & policy )
249
QthreadExec::resize_worker_scratch
250
( /* reduction memory */ 0
251
, /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) );
253
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::execute , this );
257
//----------------------------------------------------------------------------
259
template< class FunctorType , class Arg0 , class Arg1 >
260
class ParallelReduce< FunctorType , TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > >
264
typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > Policy ;
266
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
267
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
269
typedef typename ValueTraits::pointer_type pointer_type ;
270
typedef typename ValueTraits::reference_type reference_type ;
272
const FunctorType m_func ;
273
const Policy m_team ;
275
template< class TagType >
276
KOKKOS_FORCEINLINE_FUNCTION
277
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
278
const typename Policy::member_type & >::type member
279
, reference_type update ) const
280
{ m_func( member , update ); }
282
template< class TagType >
283
KOKKOS_FORCEINLINE_FUNCTION
284
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
285
const typename Policy::member_type & >::type member
286
, reference_type update ) const
287
{ m_func( TagType() , member , update ); }
289
static void execute( QthreadExec & exec , const void * arg )
291
const ParallelReduce & self = * ((const ParallelReduce *) arg );
293
// Initialize thread-local value
294
reference_type update = ValueInit::init( self.m_func , exec.exec_all_reduce_value() );
296
typename Policy::member_type member( exec , self.m_team );
299
self.ParallelReduce::template driver< typename Policy::work_tag >( member , update );
300
member.team_barrier();
304
exec.template exec_all_reduce< FunctorType , typename Policy::work_tag >( self.m_func );
309
template< class ViewType >
310
ParallelReduce( const FunctorType & functor ,
311
const Policy & policy ,
312
const ViewType & result )
316
QthreadExec::resize_worker_scratch
317
( /* reduction memory */ ValueTraits::value_size( functor )
318
, /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) );
320
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::execute , this );
322
const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result();
324
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_func , data );
326
const unsigned n = ValueTraits::value_count( m_func );
327
for ( unsigned i = 0 ; i < n ; ++i ) { result.ptr_on_device()[i] = data[i]; }
331
//----------------------------------------------------------------------------
332
//----------------------------------------------------------------------------
334
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
335
class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
339
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
340
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
341
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
343
typedef typename ValueTraits::pointer_type pointer_type ;
344
typedef typename ValueTraits::reference_type reference_type ;
346
const FunctorType m_func ;
347
const Policy m_policy ;
349
template< class PType >
350
KOKKOS_FORCEINLINE_FUNCTION static
351
void driver( typename Impl::enable_if<
352
( Impl::is_same< typename PType::work_tag , void >::value )
353
, const FunctorType & >::type functor
354
, reference_type update
356
, const PType & range )
358
const typename PType::member_type e = range.end();
359
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
360
functor( i , update , final );
364
template< class PType >
365
KOKKOS_FORCEINLINE_FUNCTION static
366
void driver( typename Impl::enable_if<
367
( ! Impl::is_same< typename PType::work_tag , void >::value )
368
, const FunctorType & >::type functor
369
, reference_type update
371
, const PType & range )
373
const typename PType::member_type e = range.end();
374
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
375
functor( typename PType::work_tag() , i , update , final );
379
static void execute( QthreadExec & exec , const void * arg )
381
const ParallelScan & self = * ((const ParallelScan *) arg );
383
const typename Policy::WorkRange range( self.m_policy , exec.worker_rank() , exec.worker_size() );
385
// Initialize thread-local value
386
reference_type update = ValueInit::init( self.m_func , exec.exec_all_reduce_value() );
388
driver( self.m_func , update , false , range );
390
exec.template exec_all_scan< FunctorType , typename Policy::work_tag >( self.m_func );
392
driver( self.m_func , update , true , range );
394
exec.exec_all_barrier();
399
ParallelScan( const FunctorType & functor
400
, const Policy & policy
405
QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_func ) , 0 );
407
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelScan::execute , this );
412
} // namespace Kokkos
414
//----------------------------------------------------------------------------
415
//----------------------------------------------------------------------------
417
#endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */