1
// Copyright 2012 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
5
// Parallel for algorithm.
8
#include "arch_GOARCH.h"
12
// the thread's iteration space [32lsb, 32msb)
20
byte pad[CacheLineSize];
24
runtime·parforalloc(uint32 nthrmax)
28
// The ParFor object is followed by CacheLineSize padding
29
// and then nthrmax ParForThread.
30
desc = (ParFor*)runtime·malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
31
desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
32
desc->nthrmax = nthrmax;
36
// For testing from Go
37
// func parforalloc2(nthrmax uint32) *ParFor
39
runtime·parforalloc2(uint32 nthrmax, ParFor *desc)
41
desc = runtime·parforalloc(nthrmax);
46
runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
51
if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
52
runtime·printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
53
runtime·throw("parfor: invalid args");
68
for(i=0; i<nthr; i++) {
69
begin = (uint64)n*i / nthr;
70
end = (uint64)n*(i+1) / nthr;
71
pos = &desc->thr[i].pos;
72
if(((uintptr)pos & 7) != 0)
73
runtime·throw("parforsetup: pos is not aligned");
74
*pos = (uint64)begin | (((uint64)end)<<32);
78
// For testing from Go
79
// func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32))
81
runtime·parforsetup2(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void *body)
83
runtime·parforsetup(desc, nthr, n, ctx, wait, *(void(**)(ParFor*, uint32))body);
87
runtime·parfordo(ParFor *desc)
90
uint32 tid, begin, end, begin2, try, victim, i;
91
uint64 *mypos, *victimpos, pos, newpos;
92
void (*body)(ParFor*, uint32);
95
// Obtain 0-based thread index.
96
tid = runtime·xadd(&desc->thrseq, 1) - 1;
97
if(tid >= desc->nthr) {
98
runtime·printf("tid=%d nthr=%d\n", tid, desc->nthr);
99
runtime·throw("parfor: invalid tid");
102
// If single-threaded, just execute the for serially.
104
for(i=0; i<desc->cnt; i++)
110
me = &desc->thr[tid];
114
// While there is local work,
115
// bump low index and execute the iteration.
116
pos = runtime·xadd64(mypos, 1);
117
begin = (uint32)pos-1;
118
end = (uint32)(pos>>32);
126
// Out of work, need to steal something.
129
// If we don't see any work for long enough,
130
// increment the done counter...
131
if(try > desc->nthr*4 && !idle) {
133
runtime·xadd(&desc->done, 1);
135
// ...if all threads have incremented the counter,
137
if(desc->done + !idle == desc->nthr) {
139
runtime·xadd(&desc->done, 1);
142
// Choose a random victim for stealing.
143
victim = runtime·fastrand1() % (desc->nthr-1);
146
victimpos = &desc->thr[victim].pos;
147
pos = runtime·atomicload64(victimpos);
149
// See if it has any work.
151
end = (uint32)(pos>>32);
157
runtime·xadd(&desc->done, -1);
160
begin2 = begin + (end-begin)/2;
161
newpos = (uint64)begin | (uint64)begin2<<32;
162
if(runtime·cas64(victimpos, &pos, newpos)) {
168
// Has successfully stolen some work.
170
runtime·throw("parfor: should not be idle");
171
runtime·atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
173
me->nstealcnt += end-begin;
177
if(try < desc->nthr) {
179
} else if (try < 4*desc->nthr) {
181
runtime·procyield(20);
182
// If a caller asked not to wait for the others, exit now
183
// (assume that most work is already done at this point).
184
} else if (!desc->wait) {
186
runtime·xadd(&desc->done, 1);
188
} else if (try < 6*desc->nthr) {
198
runtime·xadd64(&desc->nsteal, me->nsteal);
199
runtime·xadd64(&desc->nstealcnt, me->nstealcnt);
200
runtime·xadd64(&desc->nprocyield, me->nprocyield);
201
runtime·xadd64(&desc->nosyield, me->nosyield);
202
runtime·xadd64(&desc->nsleep, me->nsleep);
210
// For testing from Go
211
// func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr)
213
runtime·parforiters(ParFor *desc, uintptr tid, uintptr start, uintptr end)
215
start = (uint32)desc->thr[tid].pos;
216
end = (uint32)(desc->thr[tid].pos>>32);