15
15
// You should have received a copy of the GNU Lesser General Public License
16
16
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
18
// Simulate the processing of the current workload
19
// (include jobs that are downloading)
20
// with weighted round-robin (WRR) scheduling.
22
// For efficiency, we simulate an approximation of WRR.
23
// We don't model time-slicing.
24
// Instead we use a continuous model where, at a given point,
25
// each project has a set of running jobs that uses at most all CPUs.
26
// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
27
// and each project gets total CPU proportionate to its RRS.
29
// For coprocessors, we saturate the resource;
30
// i.e. with 2 GPUs, we'd let a 1-GPU app and a 2-GPU app run together.
31
// Otherwise, there'd be the possibility of computing
32
// a nonzero shortfall inappropriately.
34
// Outputs are changes to global state:
35
// - deadline misses (per-project count, per-result flag)
36
// Deadline misses are not counted for tasks
37
// that are too large to run in RAM right now.
38
// - resource shortfalls (per-project and total)
39
// - counts of resources idle now
19
45
#include "boinc_win.h"
25
50
#include "client_state.h"
27
52
#include "client_msgs.h"
54
inline void rsc_string(RESULT* rp, char* buf) {
55
APP_VERSION* avp = rp->avp;
57
sprintf(buf, "%.2f CPU + %.2f NV", avp->avg_ncpus, avp->ncudas);
58
} else if (avp->natis) {
59
sprintf(buf, "%.2f CPU + %.2f ATI", avp->avg_ncpus, avp->natis);
61
sprintf(buf, "%.2f CPU", avp->avg_ncpus);
65
// this is here (rather than rr_sim.h) because its inline functions
29
68
struct RR_SIM_STATUS {
30
69
std::vector<RESULT*> active;
34
inline bool can_run(RESULT* rp) {
35
return coprocs.sufficient_coprocs(
36
rp->avp->coprocs, log_flags.rr_simulation, "rr_sim"
39
74
inline void activate(RESULT* rp, double when) {
40
coprocs.reserve_coprocs(
41
rp->avp->coprocs, rp, log_flags.rr_simulation, "rr_sim"
43
if (log_flags.rr_simulation) {
44
msg_printf(rp->project, MSG_INFO,
45
"[rr_sim] starting at %f: %s", when, rp->name
75
PROJECT* p = rp->project;
76
if (log_flags.rr_simulation) {
79
msg_printf(p, MSG_INFO,
80
"[rr_sim] %.2f: starting %s (%s)",
48
84
active.push_back(rp);
49
active_ncpus += rp->avp->avg_ncpus;
85
cpu_work_fetch.sim_nused += rp->avp->avg_ncpus;
86
cuda_work_fetch.sim_nused += rp->avp->ncudas;
87
ati_work_fetch.sim_nused += rp->avp->natis;
51
89
// remove *rpbest from active set,
52
// and adjust CPU time left for other results
90
// and adjust FLOPS left for other results
54
92
inline void remove_active(RESULT* rpbest) {
55
coprocs.free_coprocs(rpbest->avp->coprocs, rpbest, log_flags.rr_simulation, "rr_sim");
56
93
vector<RESULT*>::iterator it = active.begin();
57
94
while (it != active.end()) {
77
active_ncpus -= rpbest->avp->avg_ncpus;
80
inline int nactive() {
81
return (int) active.size();
88
coprocs.delete_coprocs();
114
cpu_work_fetch.sim_nused -= rpbest->avp->avg_ncpus;
115
cuda_work_fetch.sim_nused -= rpbest->avp->ncudas;
116
ati_work_fetch.sim_nused -= rpbest->avp->natis;
92
127
void RR_SIM_PROJECT_STATUS::activate(RESULT* rp) {
93
128
active.push_back(rp);
94
active_ncpus += rp->avp->avg_ncpus;
129
rp->project->cpu_pwf.sim_nused += rp->avp->avg_ncpus;
130
rp->project->cuda_pwf.sim_nused += rp->avp->ncudas;
131
rp->project->ati_pwf.sim_nused += rp->avp->natis;
97
bool RR_SIM_PROJECT_STATUS::can_run(RESULT* rp, int ncpus) {
98
if (rp->uses_coprocs()) return true;
99
return active_ncpus < ncpus;
101
void RR_SIM_PROJECT_STATUS::remove_active(RESULT* r) {
134
void RR_SIM_PROJECT_STATUS::remove_active(RESULT* rp) {
102
135
std::vector<RESULT*>::iterator it = active.begin();
103
136
while (it != active.end()) {
105
138
it = active.erase(it);
110
active_ncpus -= r->avp->avg_ncpus;
143
rp->project->cpu_pwf.sim_nused -= rp->avp->avg_ncpus;
144
rp->project->cuda_pwf.sim_nused -= rp->avp->ncudas;
145
rp->project->ati_pwf.sim_nused -= rp->avp->natis;
113
// Set the project's rrsim_proc_rate:
114
// the fraction of CPU that it will get in round-robin mode.
115
// This should not be applied only to coproc jobs.
148
// estimate the rate (FLOPS) that this job will get long-term
149
// with weighted round-robin scheduling
117
void PROJECT::set_rrsim_proc_rate(double rrs) {
121
x = resource_share/rrs;
123
x = 1; // pathological case; maybe should be 1/# runnable projects
126
rr_sim_status.proc_rate = x*gstate.overall_cpu_frac();
151
void set_rrsim_flops(RESULT* rp) {
152
// For coproc jobs, use app version estimate
154
if (rp->uses_coprocs()) {
155
rp->rrsim_flops = rp->avp->flops * gstate.overall_gpu_frac();
158
PROJECT* p = rp->project;
160
// For CPU jobs, estimate how many CPU seconds per second this job would get
161
// running with other jobs of this project, ignoring other factors
164
if (p->cpu_pwf.sim_nused > gstate.ncpus) {
165
x = gstate.ncpus/p->cpu_pwf.sim_nused;
167
double r1 = x*rp->avp->avg_ncpus;
169
// if the project's total CPU usage is more than its share, scale
171
double share_cpus = p->cpu_pwf.runnable_share*gstate.ncpus;
172
if (!share_cpus) share_cpus = gstate.ncpus;
173
// deal with projects w/ resource share = 0
175
if (p->cpu_pwf.sim_nused > share_cpus) {
176
r2 *= (share_cpus / p->cpu_pwf.sim_nused);
179
// scale by overall CPU availability
181
double r3 = r2 * gstate.overall_cpu_frac();
183
rp->rrsim_flops = r3 * rp->avp->flops;
127
185
if (log_flags.rr_simulation) {
128
msg_printf(this, MSG_INFO,
129
"[rr_sim] set_rrsim_proc_rate: %f (rrs %f, rs %f, ocf %f",
130
rr_sim_status.proc_rate, rrs, resource_share, gstate.overall_cpu_frac()
186
msg_printf(p, MSG_INFO,
187
"[rr_sim] set_rrsim_flops: %.2fG (r1 %.4f r2 %.4f r3 %.4f)",
188
rp->rrsim_flops/1e9, r1, r2, r3
135
194
void CLIENT_STATE::print_deadline_misses() {
139
198
for (i=0; i<results.size(); i++){
141
if (rp->rr_sim_misses_deadline && !rp->last_rr_sim_missed_deadline) {
142
msg_printf(rp->project, MSG_INFO,
143
"[cpu_sched_debug] Result %s projected to miss deadline.", rp->name
146
else if (!rp->rr_sim_misses_deadline && rp->last_rr_sim_missed_deadline) {
147
msg_printf(rp->project, MSG_INFO,
148
"[cpu_sched_debug] Result %s projected to meet deadline.", rp->name
200
if (rp->rr_sim_misses_deadline) {
201
msg_printf(rp->project, MSG_INFO,
202
"[cpu_sched] Result %s projected to miss deadline.",
152
207
for (i=0; i<projects.size(); i++) {
154
if (p->rr_sim_status.deadlines_missed) {
155
msg_printf(p, MSG_INFO,
156
"[cpu_sched_debug] Project has %d projected deadline misses",
157
p->rr_sim_status.deadlines_missed
209
if (p->cpu_pwf.deadlines_missed) {
210
msg_printf(p, MSG_INFO,
211
"[cpu_sched] Project has %d projected CPU deadline misses",
212
p->cpu_pwf.deadlines_missed
215
if (p->cuda_pwf.deadlines_missed) {
216
msg_printf(p, MSG_INFO,
217
"[cpu_sched] Project has %d projected NVIDIA GPU deadline misses",
218
p->cuda_pwf.deadlines_missed
221
if (p->ati_pwf.deadlines_missed) {
222
msg_printf(p, MSG_INFO,
223
"[cpu_sched] Project has %d projected ATI GPU deadline misses",
224
p->ati_pwf.deadlines_missed
163
// Do a simulation of the current workload
164
// with weighted round-robin (WRR) scheduling.
165
// Include jobs that are downloading.
167
// For efficiency, we simulate a crude approximation of WRR.
168
// We don't model time-slicing.
169
// Instead we use a continuous model where, at a given point,
170
// each project has a set of running jobs that uses at most all CPUs
171
// (and obeys coprocessor limits).
172
// These jobs are assumed to run at a rate proportionate to their avg_ncpus,
173
// and each project gets CPU proportionate to its RRS.
175
// Outputs are changes to global state:
176
// For each project p:
177
// p->rr_sim_deadlines_missed
179
// For each result r:
180
// r->rr_sim_misses_deadline
181
// r->last_rr_sim_missed_deadline
182
// gstate.cpu_shortfall
184
// Deadline misses are not counted for tasks
185
// that are too large to run in RAM right now.
187
230
void CLIENT_STATE::rr_simulation() {
188
double rrs = nearly_runnable_resource_share();
190
231
PROJECT* p, *pbest;
191
232
RESULT* rp, *rpbest;
192
233
RR_SIM_STATUS sim_status;
195
sim_status.coprocs.clone(coprocs, false);
196
236
double ar = available_ram();
238
work_fetch.rr_init();
198
240
if (log_flags.rr_simulation) {
199
241
msg_printf(0, MSG_INFO,
200
"[rr_sim] rr_sim start: now %f work_buf_total %f rrs %f ncpus %d",
201
now, work_buf_total(), rrs, ncpus
242
"[rr_sim] rr_sim start: work_buf_total %.2f on_frac %.3f active_frac %.3f",
243
work_buf_total(), time_stats.on_frac, time_stats.active_frac
211
253
// Decide what jobs to include in the simulation,
212
// and pick the ones that are initially running
254
// and pick the ones that are initially running.
255
// NOTE: "results" is sorted by increasing arrival time
214
257
for (i=0; i<results.size(); i++) {
259
rp->rr_sim_misses_deadline = false;
216
260
if (!rp->nearly_runnable()) continue;
217
261
if (rp->some_download_stalled()) continue;
218
262
if (rp->project->non_cpu_intensive) continue;
219
263
rp->rrsim_flops_left = rp->estimated_flops_remaining();
220
if (rp->rrsim_flops_left <= 0) continue;
265
//if (rp->rrsim_flops_left <= 0) continue;
266
// job may have fraction_done=1 but not be done;
267
// if it's past its deadline, we need to mark it as such
222
if (p->rr_sim_status.can_run(rp, gstate.ncpus) && sim_status.can_run(rp)) {
223
sim_status.activate(rp, now);
224
p->rr_sim_status.activate(rp);
270
p->pwf.has_runnable_jobs = true;
271
p->cpu_pwf.nused_total += rp->avp->avg_ncpus;
272
if (rp->uses_cuda() && host_info.have_cuda()) {
273
p->cuda_pwf.nused_total += rp->avp->ncudas;
274
p->cuda_pwf.has_runnable_jobs = true;
275
if (cuda_work_fetch.sim_nused < host_info.coprocs.cuda.count) {
276
sim_status.activate(rp, 0);
277
p->rr_sim_status.activate(rp);
279
cuda_work_fetch.pending.push_back(rp);
281
} else if (rp->uses_ati() && host_info.have_ati()) {
282
p->ati_pwf.nused_total += rp->avp->natis;
283
p->ati_pwf.has_runnable_jobs = true;
284
if (ati_work_fetch.sim_nused < host_info.coprocs.ati.count) {
285
sim_status.activate(rp, 0);
286
p->rr_sim_status.activate(rp);
288
ati_work_fetch.pending.push_back(rp);
226
p->rr_sim_status.add_pending(rp);
291
p->cpu_pwf.has_runnable_jobs = true;
292
if (p->cpu_pwf.sim_nused + rp->avp->avg_ncpus <= ncpus) {
293
sim_status.activate(rp, 0);
294
p->rr_sim_status.activate(rp);
296
p->rr_sim_status.add_pending(rp);
228
rp->last_rr_sim_missed_deadline = rp->rr_sim_misses_deadline;
229
rp->rr_sim_misses_deadline = false;
230
if (rp->uses_coprocs()) {
231
p->rr_sim_status.has_coproc_jobs = true;
233
p->rr_sim_status.has_cpu_jobs = true;
238
for (i=0; i<projects.size(); i++) {
240
if (p->non_cpu_intensive) continue;
241
p->set_rrsim_proc_rate(rrs);
242
if (!p->rr_sim_status.has_coproc_jobs || p->rr_sim_status.has_cpu_jobs) {
243
trs += p->resource_share;
301
// note the number of idle instances
303
cpu_work_fetch.nidle_now = ncpus - cpu_work_fetch.sim_nused;
304
if (cpu_work_fetch.nidle_now < 0) cpu_work_fetch.nidle_now = 0;
305
if (host_info.have_cuda()) {
306
cuda_work_fetch.nidle_now = host_info.coprocs.cuda.count - cuda_work_fetch.sim_nused;
307
if (cuda_work_fetch.nidle_now < 0) cuda_work_fetch.nidle_now = 0;
309
if (host_info.have_ati()) {
310
ati_work_fetch.nidle_now = host_info.coprocs.ati.count - ati_work_fetch.sim_nused;
311
if (ati_work_fetch.nidle_now < 0) ati_work_fetch.nidle_now = 0;
314
work_fetch.compute_shares();
316
// Simulation loop. Keep going until all work done
247
318
double buf_end = now + work_buf_total();
249
// Simulation loop. Keep going until work done
251
319
double sim_now = now;
253
bool all_projects_have_pending = false;
254
320
while (sim_status.active.size()) {
256
322
// compute finish times and see which result finishes first
283
337
if (log_flags.rr_simulation) {
284
338
msg_printf(pbest, MSG_INFO,
285
"[rr_sim] result %s finishes after %f (%f/%f)",
339
"[rr_sim] %.2f: %s finishes after %.2f (%.2fG/%.2fG)",
286
341
rpbest->name, rpbest->rrsim_finish_delay,
287
rpbest->rrsim_flops_left, rpbest->rrsim_flops
342
rpbest->rrsim_flops_left/1e9, rpbest->rrsim_flops/1e9
291
346
// "rpbest" is first result to finish. Does it miss its deadline?
293
double diff = sim_now + rpbest->rrsim_finish_delay - ((rpbest->computation_deadline()-now)*CPU_PESSIMISM_FACTOR + now);
348
double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline();
295
350
ACTIVE_TASK* atp = lookup_active_task_by_result(rpbest);
296
351
if (atp && atp->procinfo.working_set_size_smoothed > ar) {
297
352
if (log_flags.rr_simulation) {
298
353
msg_printf(pbest, MSG_INFO,
299
"[rr_sim] result %s misses deadline but too large to run",
354
"[rr_sim] %s misses deadline but too large to run",
304
359
rpbest->rr_sim_misses_deadline = true;
305
pbest->rr_sim_status.deadlines_missed++;
360
if (rpbest->uses_cuda()) {
361
pbest->cuda_pwf.deadlines_missed++;
362
cuda_work_fetch.deadline_missed_instances += rpbest->avp->ncudas;
363
} else if (rpbest->uses_ati()) {
364
pbest->ati_pwf.deadlines_missed++;
365
ati_work_fetch.deadline_missed_instances += rpbest->avp->natis;
367
pbest->cpu_pwf.deadlines_missed++;
368
cpu_work_fetch.deadline_missed_instances += rpbest->avp->avg_ncpus;
306
370
if (log_flags.rr_simulation) {
307
371
msg_printf(pbest, MSG_INFO,
308
"[rr_sim] result %s misses deadline by %f",
372
"[rr_sim] %s misses deadline by %.2f",
309
373
rpbest->name, diff
315
// increment CPU shortfalls if necessary
379
// update saturated time
381
double end_time = sim_now + rpbest->rrsim_finish_delay;
382
double x = end_time - gstate.now;
383
cpu_work_fetch.update_saturated_time(x);
384
if (host_info.have_cuda()) {
385
cuda_work_fetch.update_saturated_time(x);
387
if (host_info.have_ati()) {
388
ati_work_fetch.update_saturated_time(x);
393
if (rpbest->rr_sim_misses_deadline) {
394
double frac = rpbest->uses_coprocs()?gstate.overall_gpu_frac():gstate.overall_cpu_frac();
395
double dur = rpbest->estimated_time_remaining() / frac;
396
cpu_work_fetch.update_busy_time(dur, rpbest->avp->avg_ncpus);
397
if (rpbest->uses_cuda()) {
398
cuda_work_fetch.update_busy_time(dur, rpbest->avp->ncudas);
400
if (rpbest->uses_ati()) {
401
ati_work_fetch.update_busy_time(dur, rpbest->avp->natis);
405
// increment resource shortfalls
317
407
if (sim_now < buf_end) {
318
// check whether all projects have pending jobs;
319
// if so, we won't increment overall CPU shortfall
321
all_projects_have_pending = true;
322
for (i=0; i<projects.size(); i++) {
324
if (p->non_cpu_intensive) continue;
325
if (!p->rr_sim_status.pending.size()) {
326
all_projects_have_pending = false;
330
double end_time = sim_now + rpbest->rrsim_finish_delay;
331
408
if (end_time > buf_end) end_time = buf_end;
332
409
double d_time = end_time - sim_now;
333
double nidle_cpus = ncpus - sim_status.active_ncpus;
334
if (nidle_cpus<0) nidle_cpus = 0;
336
if (nidle_cpus > 0 && !all_projects_have_pending) {
337
cpu_shortfall += d_time*nidle_cpus;
338
if (log_flags.rr_simulation) {
339
msg_printf(0, MSG_INFO,
340
"[rr_sim] new overall CPU shortfall: %f", cpu_shortfall
344
if (log_flags.rr_simulation) {
345
msg_printf(0, MSG_INFO,
346
"[rr_sim] no change in overall CPU shortfall: nidle %f all have pending %d",
347
nidle_cpus, all_projects_have_pending
352
for (i=0; i<projects.size(); i++) {
354
if (p->non_cpu_intensive) continue;
355
if (p->rr_sim_status.pending.size()) continue;
356
double rsf = trs?p->resource_share/trs:1;
357
double proj_cpu_share = ncpus*rsf;
359
if (log_flags.rr_simulation) {
360
msg_printf(p, MSG_INFO,
361
"[rr_sim] npending %d last ncpus %f cpu share %f",
362
(int)p->rr_sim_status.pending.size(), p->rr_sim_status.active_ncpus, proj_cpu_share
365
double x = proj_cpu_share - p->rr_sim_status.active_ncpus;
367
p->rr_sim_status.cpu_shortfall += d_time*x;
369
if (log_flags.rr_simulation) {
370
msg_printf(p, MSG_INFO,
371
"[rr_sim] new shortfall %f d_time %f proj_cpu_share %f lpan %f",
372
p->rr_sim_status.cpu_shortfall, d_time, proj_cpu_share, p->rr_sim_status.active_ncpus
411
cpu_work_fetch.accumulate_shortfall(d_time);
413
if (host_info.have_cuda()) {
414
cuda_work_fetch.accumulate_shortfall(d_time);
416
if (host_info.have_ati()) {
417
ati_work_fetch.accumulate_shortfall(d_time);
378
421
sim_status.remove_active(rpbest);
379
422
pbest->rr_sim_status.remove_active(rpbest);
381
// If project has more results, add one or more to active set.
382
// TODO: do this for other projects too, since coproc may have been freed
385
rp = pbest->rr_sim_status.get_pending();
387
if (pbest->rr_sim_status.can_run(rp, gstate.ncpus) && sim_status.can_run(rp)) {
388
sim_status.activate(rp, sim_now);
389
pbest->rr_sim_status.activate(rp);
391
pbest->rr_sim_status.add_pending(rp);
396
// If all work done for a project, subtract that project's share
397
// and recompute processing rates
399
if (pbest->rr_sim_status.none_active()) {
400
rrs -= pbest->resource_share;
401
if (log_flags.rr_simulation) {
402
msg_printf(pbest, MSG_INFO,
403
"[rr_sim] decr rrs by %f, new value %f",
404
pbest->resource_share, rrs
407
for (i=0; i<projects.size(); i++) {
409
if (p->non_cpu_intensive) continue;
410
p->set_rrsim_proc_rate(rrs);
414
424
sim_now += rpbest->rrsim_finish_delay;
426
// start new jobs; may need to start more than one
427
// if this job used multiple resource instances
429
if (rpbest->uses_cuda()) {
431
if (cuda_work_fetch.sim_nused >= host_info.coprocs.cuda.count) break;
432
if (!cuda_work_fetch.pending.size()) break;
433
RESULT* rp = cuda_work_fetch.pending[0];
434
cuda_work_fetch.pending.erase(cuda_work_fetch.pending.begin());
435
sim_status.activate(rp, sim_now-now);
436
pbest->rr_sim_status.activate(rp);
438
} else if (rpbest->uses_ati()) {
440
if (ati_work_fetch.sim_nused >= host_info.coprocs.ati.count) break;
441
if (!ati_work_fetch.pending.size()) break;
442
RESULT* rp = ati_work_fetch.pending[0];
443
ati_work_fetch.pending.erase(ati_work_fetch.pending.begin());
444
sim_status.activate(rp, sim_now-now);
445
pbest->rr_sim_status.activate(rp);
449
RESULT* rp = pbest->rr_sim_status.get_pending();
451
if (pbest->cpu_pwf.sim_nused + rp->avp->avg_ncpus > ncpus) break;
452
sim_status.activate(rp, sim_now-now);
453
pbest->rr_sim_status.activate(rp);
417
// if simulation ends before end of buffer, take the tail into account
458
// if simulation ends before end of buffer, take the tail into account
419
460
if (sim_now < buf_end) {
420
double d_time = buf_end - sim_now;
421
cpu_shortfall += d_time * ncpus;
422
for (i=0; i<projects.size(); i++) {
424
if (p->non_cpu_intensive) continue;
425
double rsf = trs?p->resource_share/trs:1;
426
double proj_cpu_share = ncpus*rsf;
427
p->rr_sim_status.cpu_shortfall += d_time*proj_cpu_share;
431
if (log_flags.rr_simulation) {
432
for (i=0; i<projects.size(); i++) {
434
if (p->non_cpu_intensive) continue;
435
if (p->rr_sim_status.cpu_shortfall) {
436
msg_printf(p, MSG_INFO,
437
"[rr_sim] shortfall %f\n", p->rr_sim_status.cpu_shortfall
441
msg_printf(NULL, MSG_INFO,
442
"[rr_sim] done; total shortfall %f\n",
461
double d_time = buf_end - sim_now;
462
cpu_work_fetch.accumulate_shortfall(d_time);
463
if (host_info.have_cuda()) {
464
cuda_work_fetch.accumulate_shortfall(d_time);
466
if (host_info.have_ati()) {
467
ati_work_fetch.accumulate_shortfall(d_time);