2
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
4
This file is part of Threading Building Blocks.
6
Threading Building Blocks is free software; you can redistribute it
7
and/or modify it under the terms of the GNU General Public License
8
version 2 as published by the Free Software Foundation.
10
Threading Building Blocks is distributed in the hope that it will be
11
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with Threading Building Blocks; if not, write to the Free Software
17
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
As a special exception, you may use this file as part of a free software
20
library without restriction. Specifically, if other files instantiate
21
templates or use macros or inline functions from this file, or you compile
22
this file and link it with other files to produce an executable, this
23
file does not by itself cause the resulting executable to be covered by
24
the GNU General Public License. This exception does not however
25
invalidate any other reasons why the executable file might be covered by
26
the GNU General Public License.
29
#ifndef __TBB_parallel_reduce_H
30
#define __TBB_parallel_reduce_H
33
#include "aligned_space.h"
34
#include "partitioner.h"
42
//! ITT instrumented routine that stores src into location pointed to by dst.
43
void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3( void* dst, void* src );
45
//! ITT instrumented routine that loads pointer from location pointed to by src.
46
void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3( const void* src );
48
template<typename T> inline void parallel_reduce_store_body( T*& dst, T* src ) {
49
#if TBB_USE_THREADING_TOOLS
50
itt_store_pointer_with_release_v3(&dst,src);
52
__TBB_store_with_release(dst,src);
53
#endif /* TBB_USE_THREADING_TOOLS */
56
template<typename T> inline T* parallel_reduce_load_body( T*& src ) {
57
#if TBB_USE_THREADING_TOOLS
58
return static_cast<T*>(itt_load_pointer_with_acquire_v3(&src));
60
return __TBB_load_with_acquire(src);
61
#endif /* TBB_USE_THREADING_TOOLS */
64
//! 0 if root, 1 if a left child, 2 if a right child.
65
/** Represented as a char, not enum, for compactness. */
66
typedef char reduction_context;
68
//! Task type use to combine the partial results of parallel_reduce with affinity_partitioner.
69
/** @ingroup algorithms */
70
template<typename Body>
71
class finish_reduce: public task {
72
//! Pointer to body, or NULL if the left child has not yet finished.
74
bool has_right_zombie;
75
const reduction_context my_context;
76
aligned_space<Body,1> zombie_space;
77
finish_reduce( char context ) :
79
has_right_zombie(false),
84
if( has_right_zombie ) {
85
// Right child was stolen.
86
Body* s = zombie_space.begin();
91
parallel_reduce_store_body( static_cast<finish_reduce*>(parent())->my_body, my_body );
94
template<typename Range,typename Body_, typename Partitioner>
95
friend class start_reduce;
98
//! Task type used to split the work of parallel_reduce with affinity_partitioner.
99
/** @ingroup algorithms */
100
template<typename Range, typename Body, typename Partitioner>
101
class start_reduce: public task {
102
typedef finish_reduce<Body> finish_type;
105
typename Partitioner::partition_type my_partition;
106
reduction_context my_context;
107
/*override*/ task* execute();
108
template<typename Body_>
109
friend class finish_reduce;
111
//! Constructor used for root task
112
start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
115
my_partition(partitioner),
119
//! Splitting constructor used to generate children.
120
/** this becomes left child. Newly constructed object is right child. */
121
start_reduce( start_reduce& parent, split ) :
122
my_body(parent.my_body),
123
my_range(parent.my_range,split()),
124
my_partition(parent.my_partition,split()),
127
my_partition.set_affinity(*this);
128
parent.my_context = 1;
130
//! Update affinity info, if any
131
/*override*/ void note_affinity( affinity_id id ) {
132
my_partition.note_affinity( id );
136
static void run( const Range& range, Body& body, Partitioner& partitioner ) {
137
if( !range.empty() ) {
138
#if !__TBB_EXCEPTIONS || TBB_JOIN_OUTER_TASK_GROUP
139
task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
141
// Bound context prevents exceptions from body to affect nesting or sibling algorithms,
142
// and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
143
task_group_context context;
144
task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
145
#endif /* __TBB_EXCEPTIONS && !TBB_JOIN_OUTER_TASK_GROUP */
149
static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
151
task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
153
#endif /* __TBB_EXCEPTIONS */
156
template<typename Range, typename Body, typename Partitioner>
157
task* start_reduce<Range,Body,Partitioner>::execute() {
158
if( my_context==2 ) {
159
finish_type* p = static_cast<finish_type*>(parent() );
160
if( !parallel_reduce_load_body(p->my_body) ) {
161
my_body = new( p->zombie_space.begin() ) Body(*my_body,split());
162
p->has_right_zombie = true;
165
if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
166
(*my_body)( my_range );
168
parallel_reduce_store_body(static_cast<finish_type*>(parent())->my_body, my_body );
169
return my_partition.continue_after_execute_range(*this);
171
finish_type& c = *new( allocate_continuation()) finish_type(my_context);
172
recycle_as_child_of(c);
174
bool delay = my_partition.decide_whether_to_delay();
175
start_reduce& b = *new( c.allocate_child() ) start_reduce(*this,split());
176
my_partition.spawn_or_delay(delay,*this,b);
181
//! Auxiliary class for parallel_reduce; for internal use only.
182
/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
183
using given \ref parallel_reduce_lambda_req "anonymous function objects".
185
/** @ingroup algorithms */
186
template<typename Range, typename Value, typename RealBody, typename Reduction>
187
class lambda_reduce_body {
189
//FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
190
// (might require some performance measurements)
192
const Value& identity_element;
193
const RealBody& my_real_body;
194
const Reduction& my_reduction;
196
lambda_reduce_body& operator= ( const lambda_reduce_body& other );
198
lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
199
: identity_element(identity)
201
, my_reduction(reduction)
204
lambda_reduce_body( const lambda_reduce_body& other )
205
: identity_element(other.identity_element)
206
, my_real_body(other.my_real_body)
207
, my_reduction(other.my_reduction)
208
, my_value(other.my_value)
210
lambda_reduce_body( lambda_reduce_body& other, tbb::split )
211
: identity_element(other.identity_element)
212
, my_real_body(other.my_real_body)
213
, my_reduction(other.my_reduction)
214
, my_value(other.identity_element)
216
void operator()(Range& range) {
217
my_value = my_real_body(range, const_cast<const Value&>(my_value));
219
void join( lambda_reduce_body& rhs ) {
220
my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
222
Value result() const {
227
} // namespace internal
230
// Requirements on Range concept are documented in blocked_range.h
232
/** \page parallel_reduce_body_req Requirements on parallel_reduce body
233
Class \c Body implementing the concept of parallel_reduce body must define:
234
- \code Body::Body( Body&, split ); \endcode Splitting constructor.
235
Must be able to run concurrently with operator() and method \c join
236
- \code Body::~Body(); \endcode Destructor
237
- \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
238
and accumulating the result
239
- \code void Body::join( Body& b ); \endcode Join results.
240
The result in \c b should be merged into the result of \c this
243
/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
247
/** \name parallel_reduce
248
See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
251
//! Parallel iteration with reduction and default partitioner.
252
/** @ingroup algorithms **/
253
template<typename Range, typename Body>
254
void parallel_reduce( const Range& range, Body& body ) {
255
internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
258
//! Parallel iteration with reduction and simple_partitioner
259
/** @ingroup algorithms **/
260
template<typename Range, typename Body>
261
void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
262
internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
265
//! Parallel iteration with reduction and auto_partitioner
266
/** @ingroup algorithms **/
267
template<typename Range, typename Body>
268
void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
269
internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
272
//! Parallel iteration with reduction and affinity_partitioner
273
/** @ingroup algorithms **/
274
template<typename Range, typename Body>
275
void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
276
internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
280
//! Parallel iteration with reduction, simple partitioner and user-supplied context.
281
/** @ingroup algorithms **/
282
template<typename Range, typename Body>
283
void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
284
internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
287
//! Parallel iteration with reduction, auto_partitioner and user-supplied context
288
/** @ingroup algorithms **/
289
template<typename Range, typename Body>
290
void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
291
internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
294
//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
295
/** @ingroup algorithms **/
296
template<typename Range, typename Body>
297
void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
298
internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
300
#endif /* __TBB_EXCEPTIONS */
302
/** parallel_reduce overloads that work with anonymous function objects
303
(see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
305
//! Parallel iteration with reduction and default partitioner.
306
/** @ingroup algorithms **/
307
template<typename Range, typename Value, typename RealBody, typename Reduction>
308
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
309
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
310
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
311
::run(range, body, __TBB_DEFAULT_PARTITIONER() );
312
return body.result();
315
//! Parallel iteration with reduction and simple_partitioner.
316
/** @ingroup algorithms **/
317
template<typename Range, typename Value, typename RealBody, typename Reduction>
318
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
319
const simple_partitioner& partitioner ) {
320
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
321
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
322
::run(range, body, partitioner );
323
return body.result();
326
//! Parallel iteration with reduction and auto_partitioner
327
/** @ingroup algorithms **/
328
template<typename Range, typename Value, typename RealBody, typename Reduction>
329
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
330
const auto_partitioner& partitioner ) {
331
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
332
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
333
::run( range, body, partitioner );
334
return body.result();
337
//! Parallel iteration with reduction and affinity_partitioner
338
/** @ingroup algorithms **/
339
template<typename Range, typename Value, typename RealBody, typename Reduction>
340
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
341
affinity_partitioner& partitioner ) {
342
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
343
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
344
::run( range, body, partitioner );
345
return body.result();
349
//! Parallel iteration with reduction, simple partitioner and user-supplied context.
350
/** @ingroup algorithms **/
351
template<typename Range, typename Value, typename RealBody, typename Reduction>
352
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
353
const simple_partitioner& partitioner, task_group_context& context ) {
354
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
355
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
356
::run( range, body, partitioner, context );
357
return body.result();
360
//! Parallel iteration with reduction, auto_partitioner and user-supplied context
361
/** @ingroup algorithms **/
362
template<typename Range, typename Value, typename RealBody, typename Reduction>
363
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
364
const auto_partitioner& partitioner, task_group_context& context ) {
365
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
366
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
367
::run( range, body, partitioner, context );
368
return body.result();
371
//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
372
/** @ingroup algorithms **/
373
template<typename Range, typename Value, typename RealBody, typename Reduction>
374
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
375
affinity_partitioner& partitioner, task_group_context& context ) {
376
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
377
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
378
::run( range, body, partitioner, context );
379
return body.result();
381
#endif /* __TBB_EXCEPTIONS */
386
#endif /* __TBB_parallel_reduce_H */