1
// This file is part of BOINC.
2
// http://boinc.berkeley.edu
3
// Copyright (C) 2008 University of California
5
// BOINC is free software; you can redistribute it and/or modify it
6
// under the terms of the GNU Lesser General Public License
7
// as published by the Free Software Foundation,
8
// either version 3 of the License, or (at your option) any later version.
10
// BOINC is distributed in the hope that it will be useful,
11
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13
// See the GNU Lesser General Public License for more details.
15
// You should have received a copy of the GNU Lesser General Public License
16
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
20
#include "sched_main.h"
21
#include "sched_msgs.h"
22
#include "sched_config.h"
23
#include "sched_customize.h"
24
#include "sched_types.h"
25
#include "sched_util.h"
28
#include "sched_version.h"
30
inline bool is_64b_platform(const char* name) {
31
return (strstr(name, "64") != NULL);
34
inline void dont_need_message(
35
const char* p, APP_VERSION* avp, CLIENT_APP_VERSION* cavp
37
if (!config.debug_version_select) return;
39
log_messages.printf(MSG_NORMAL,
40
"[version] [AV#%d] Don't need %s jobs, skipping\n",
44
log_messages.printf(MSG_NORMAL,
45
"[version] Don't need %s jobs, skipping anonymous version %d for %s (%s)\n",
46
p, cavp->version_num, cavp->app_name, cavp->plan_class
51
// for new-style requests, check that the app version uses a
52
// resource for which we need work
54
bool need_this_resource(
55
HOST_USAGE& host_usage, APP_VERSION* avp, CLIENT_APP_VERSION* cavp
57
if (g_wreq->rsc_spec_request) {
58
if (host_usage.ncudas) {
59
if (!g_wreq->need_cuda()) {
60
dont_need_message("CUDA", avp, cavp);
63
} else if (host_usage.natis) {
64
if (!g_wreq->need_ati()) {
65
dont_need_message("ATI", avp, cavp);
69
if (!g_wreq->need_cpu()) {
70
dont_need_message("CPU", avp, cavp);
78
static DB_HOST_APP_VERSION* lookup_host_app_version(int gavid) {
79
for (unsigned int i=0; i<g_wreq->host_app_versions.size(); i++) {
80
DB_HOST_APP_VERSION& hav = g_wreq->host_app_versions[i];
81
if (hav.app_version_id == gavid) return &hav;
86
static inline bool app_version_is_trusted(int gavid) {
87
DB_HOST_APP_VERSION* havp = lookup_host_app_version(gavid);
88
if (!havp) return false;
92
static inline bool app_version_is_reliable(int gavid) {
93
DB_HOST_APP_VERSION* havp = lookup_host_app_version(gavid);
94
if (!havp) return false;
95
return havp->reliable;
98
inline int host_usage_to_gavid(HOST_USAGE& hu, APP& app) {
99
return app.id*1000000 - hu.resource_type();
102
// scale daily quota by # processors and/or by config.gpu_multiplier
104
inline int scaled_max_jobs_per_day(DB_HOST_APP_VERSION& hav, HOST_USAGE& hu) {
105
int n = hav.max_jobs_per_day;
107
if (g_request->coprocs.cuda.count) {
108
n *= g_request->coprocs.cuda.count;
110
if (config.gpu_multiplier) {
111
n *= config.gpu_multiplier;
113
} else if (hu.natis) {
114
if (g_request->coprocs.ati.count) {
115
n *= g_request->coprocs.ati.count;
117
if (config.gpu_multiplier) {
118
n *= config.gpu_multiplier;
121
if (g_reply->host.p_ncpus) {
122
n *= g_reply->host.p_ncpus;
125
if (config.debug_quota) {
126
log_messages.printf(MSG_NORMAL,
127
"[quota] [AV#%d] scaled max jobs per day: %d\n",
135
inline bool daily_quota_exceeded(int gavid, HOST_USAGE& hu) {
136
DB_HOST_APP_VERSION* havp = lookup_host_app_version(gavid);
137
if (!havp) return false;
138
int q = scaled_max_jobs_per_day(*havp, hu);
139
if (havp->n_jobs_today >= q) {
140
if (config.debug_quota) {
141
log_messages.printf(MSG_NORMAL,
142
"[quota] [AV#%d] daily quota exceeded: %d >= %d\n",
143
gavid, havp->n_jobs_today, q
146
havp->daily_quota_exceeded = true;
152
// scan through client's anonymous apps and pick the best one
154
CLIENT_APP_VERSION* get_app_version_anonymous(
155
APP& app, bool need_64b, bool reliable_only
158
CLIENT_APP_VERSION* best = NULL;
162
if (config.debug_version_select) {
163
log_messages.printf(MSG_NORMAL,
164
"[version] get_app_version_anonymous: app %s%s\n",
165
app.name, reliable_only?" (reliable only)":""
168
for (i=0; i<g_request->client_app_versions.size(); i++) {
169
CLIENT_APP_VERSION& cav = g_request->client_app_versions[i];
170
if (!cav.app) continue;
171
if (cav.app->id != app.id) {
174
if (need_64b && !is_64b_platform(cav.platform)) {
177
int gavid = host_usage_to_gavid(cav.host_usage, app);
178
if (reliable_only && !app_version_is_reliable(gavid)) {
179
if (config.debug_version_select) {
180
log_messages.printf(MSG_NORMAL,
181
"[version] %d %s not reliable\n",
182
cav.version_num, cav.plan_class
187
if (daily_quota_exceeded(gavid, cav.host_usage)) {
188
if (config.debug_version_select) {
189
log_messages.printf(MSG_NORMAL,
190
"[version] %d %s daily quota exceeded\n",
191
cav.version_num, cav.plan_class
196
if (cav.version_num < app.min_version) {
197
if (config.debug_version_select) {
198
log_messages.printf(MSG_NORMAL,
199
"[version] %d %s version < min version\n",
200
cav.version_num, cav.plan_class
206
if (!need_this_resource(cav.host_usage, NULL, &cav)) {
207
if (config.debug_version_select) {
208
log_messages.printf(MSG_NORMAL,
209
"[version] %d %s don't need resource\n",
210
cav.version_num, cav.plan_class
216
if (cav.host_usage.projected_flops > best->host_usage.projected_flops) {
224
if (config.debug_version_select) {
225
log_messages.printf(MSG_NORMAL,
226
"[version] Didn't find anonymous platform app for %s\n",
234
_("Your app_info.xml file doesn't have a usable version of"),
235
app.user_friendly_name
237
add_no_work_message(message);
243
// cav.host_usage.projected_flops
244
// This is the <flops> specified in app_info.xml
245
// If not specific there, it's a conservative estimate
246
// (CPU speed * (ncpus = ngpus))
247
// In either case, this value will be used by the client
248
// to estimate job runtime and runtime limit
249
// est runtime = wu.rsc_fpops_est/x
250
// runtime limit = wu.rsc_fpops_bound/x
251
// x may be way off from the actual speed.
252
// To get accurate runtime est, we need to adjust wu.rsc_fpops_est
255
// cav.host_usage.projected_flops
256
// An estimate of the actual FLOPS the app will get,
257
// based on elapsed time history (if possible).
258
// This is used by the scheduler to estimate runtime.
259
// cav.rsc_fpops_scale
260
// wu.rsc_fpops_est and wu.rsc_fpops_bound will be scaled by this
262
// called at start of send_work().
264
void estimate_flops_anon_platform() {
266
for (i=0; i<g_request->client_app_versions.size(); i++) {
267
CLIENT_APP_VERSION& cav = g_request->client_app_versions[i];
268
if (!cav.app) continue;
270
cav.rsc_fpops_scale = 1;
272
if (cav.host_usage.avg_ncpus == 0 && cav.host_usage.ncudas == 0 && cav.host_usage.natis == 0) {
273
cav.host_usage.avg_ncpus = 1;
276
// if projected_flops is missing, make a wild guess
278
if (cav.host_usage.projected_flops == 0) {
279
cav.host_usage.projected_flops = g_reply->host.p_fpops;
282
DB_HOST_APP_VERSION* havp = gavid_to_havp(
283
generalized_app_version_id(
284
cav.host_usage.resource_type(), cav.app->id
288
&& (havp->et.n > MIN_HOST_SAMPLES)
289
&& (havp->et.get_avg() > 0)
291
double new_flops = 1./havp->et.get_avg();
292
cav.rsc_fpops_scale = cav.host_usage.projected_flops/new_flops;
293
cav.host_usage.projected_flops = new_flops;
294
if (config.debug_version_select) {
295
log_messages.printf(MSG_NORMAL,
296
"[version] (%s) setting projected flops to %fG based on ET\n",
297
cav.plan_class, new_flops/1e9
299
log_messages.printf(MSG_NORMAL,
300
"[version] setting rsc_fpops_scale to %g\n",
305
if (config.debug_version_select) {
306
log_messages.printf(MSG_NORMAL,
307
"[version] (%s) using client-supplied flops %fG\n",
308
cav.plan_class, cav.host_usage.projected_flops/1e9
315
// if we have enough statistics to estimate the app version's
316
// actual FLOPS on this host, do so.
318
void estimate_flops(HOST_USAGE& hu, APP_VERSION& av) {
319
DB_HOST_APP_VERSION* havp = gavid_to_havp(av.id);
320
if (havp && havp->et.n > MIN_HOST_SAMPLES) {
321
double new_flops = 1./havp->et.get_avg();
322
hu.projected_flops = new_flops;
323
if (config.debug_version_select) {
324
log_messages.printf(MSG_NORMAL,
325
"[version] [AV#%d] (%s) setting projected flops based on host elapsed time avg: %.2fG\n",
326
av.id, av.plan_class, hu.projected_flops/1e9
331
hu.projected_flops *= av.pfc_scale;
332
if (config.debug_version_select) {
333
log_messages.printf(MSG_NORMAL,
334
"[version] [AV#%d] (%s) adjusting projected flops based on PFC scale: %.2fG\n",
335
av.id, av.plan_class, hu.projected_flops/1e9
339
if (config.debug_version_select) {
340
log_messages.printf(MSG_NORMAL,
341
"[version] [AV#%d] (%s) using unscaled projected flops: %.2fG\n",
342
av.id, av.plan_class, hu.projected_flops/1e9
349
// return a string describing an app version
351
static void app_version_desc(BEST_APP_VERSION& bav, char* buf) {
357
sprintf(buf, "anonymous platform (%s)", bav.host_usage.resource_name());
359
sprintf(buf, "[AV#%d]", bav.avp->id);
363
// different OSs have different max user address space for 32 bit apps
365
static double max_32b_address_space() {
366
if (strstr(g_request->platform.name, "windows")) {
368
} else if (strstr(g_request->platform.name, "linux")) {
370
} else if (strstr(g_request->platform.name, "darwin")) {
372
} else if (strstr(g_request->platform.name, "solaris")) {
374
} else if (strstr(g_request->platform.name, "anonymous")) {
375
// problem case. assume windows
381
// return BEST_APP_VERSION for the given job and host, or NULL if none
383
// check_req: check whether we still need work for the resource
384
// This check is not done for:
387
// reliable_only: use only versions for which this host is "reliable"
389
// We "memoize" the results, maintaining an array g_wreq->best_app_versions
390
// that maps app ID to the best app version (or NULL).
392
BEST_APP_VERSION* get_app_version(
393
WORKUNIT& wu, bool check_req, bool reliable_only
397
BEST_APP_VERSION* bavp;
398
char message[256], buf[256];
399
bool job_needs_64b = (wu.rsc_memory_bound > max_32b_address_space());
401
if (config.debug_version_select) {
403
log_messages.printf(MSG_NORMAL,
404
"[version] job needs 64-bit app version: mem bnd %f\n",
410
APP* app = ssp->lookup_app(wu.appid);
412
log_messages.printf(MSG_CRITICAL,
413
"WU refers to nonexistent app: %d\n", wu.appid
418
// see if app is already in memoized array
420
std::vector<BEST_APP_VERSION*>::iterator bavi;
421
bavi = g_wreq->best_app_versions.begin();
422
while (bavi != g_wreq->best_app_versions.end()) {
424
if (bavp->appid == wu.appid && (job_needs_64b == bavp->for_64b_jobs)) {
425
if (!bavp->present) {
427
if (config.debug_version_select) {
428
log_messages.printf(MSG_NORMAL,
429
"[version] returning cached NULL\n"
436
// if we're at the jobs-in-progress limit for this
437
// app and resource type, fall through and find another version
439
if (config.max_jobs_in_progress.exceeded(app, bavp->host_usage.uses_gpu())) {
440
if (config.debug_version_select) {
441
app_version_desc(*bavp, buf);
442
log_messages.printf(MSG_NORMAL,
443
"[version] %s: max jobs in progress exceeded\n", buf
446
g_wreq->best_app_versions.erase(bavi);
450
// if we previously chose a CUDA app but don't need more CUDA work,
451
// fall through and find another version
454
&& g_wreq->rsc_spec_request
455
&& bavp->host_usage.ncudas > 0
456
&& !g_wreq->need_cuda()
458
if (config.debug_version_select) {
459
log_messages.printf(MSG_NORMAL,
460
"[version] have CUDA version but no more CUDA work needed\n"
463
g_wreq->best_app_versions.erase(bavi);
470
&& g_wreq->rsc_spec_request
471
&& bavp->host_usage.natis > 0
472
&& !g_wreq->need_ati()
474
if (config.debug_version_select) {
475
log_messages.printf(MSG_NORMAL,
476
"[version] have ATI version but no more ATI work needed\n"
479
g_wreq->best_app_versions.erase(bavi);
486
&& g_wreq->rsc_spec_request
487
&& !bavp->host_usage.ncudas
488
&& !bavp->host_usage.natis
489
&& !g_wreq->need_cpu()
491
if (config.debug_version_select) {
492
log_messages.printf(MSG_NORMAL,
493
"[version] have CPU version but no more CPU work needed\n"
496
g_wreq->best_app_versions.erase(bavi);
500
if (config.debug_version_select) {
501
app_version_desc(*bavp, buf);
502
log_messages.printf(MSG_NORMAL,
503
"[version] returning cached version: %s\n", buf
511
if (config.debug_version_select) {
512
log_messages.printf(MSG_NORMAL,
513
"[version] looking for version of %s\n",
518
bavp = new BEST_APP_VERSION;
519
bavp->appid = wu.appid;
520
bavp->for_64b_jobs = job_needs_64b;
521
if (g_wreq->anonymous_platform) {
522
CLIENT_APP_VERSION* cavp = get_app_version_anonymous(
523
*app, job_needs_64b, reliable_only
526
bavp->present = false;
528
bavp->present = true;
529
bavp->host_usage = cavp->host_usage;
531
int gavid = host_usage_to_gavid(cavp->host_usage, *app);
532
bavp->reliable = app_version_is_reliable(gavid);
533
bavp->trusted = app_version_is_trusted(gavid);
534
if (config.debug_version_select) {
535
app_version_desc(*bavp, buf);
536
log_messages.printf(MSG_NORMAL, "[version] using %s\n", buf);
539
g_wreq->best_app_versions.push_back(bavp);
540
g_wreq->all_best_app_versions.push_back(bavp);
541
if (!bavp->present) return NULL;
545
// Go through the client's platforms,
546
// and scan the app versions for each platform.
547
// Pick the one with highest expected FLOPS
549
// if config.prefer_primary_platform is set:
550
// stop scanning platforms once we find a feasible version
552
bavp->host_usage.projected_flops = 0;
554
bool no_version_for_platform = true;
555
for (i=0; i<g_request->platforms.list.size(); i++) {
556
bool found_feasible_version = false;
557
PLATFORM* p = g_request->platforms.list[i];
558
if (job_needs_64b && !is_64b_platform(p->name)) {
561
for (j=0; j<ssp->napp_versions; j++) {
562
HOST_USAGE host_usage;
563
APP_VERSION& av = ssp->app_versions[j];
564
if (av.appid != wu.appid) continue;
565
if (av.platformid != p->id) continue;
566
no_version_for_platform = false;
568
if (g_request->core_client_version < av.min_core_version) {
569
if (config.debug_version_select) {
570
log_messages.printf(MSG_NORMAL,
571
"[version] [AV#%d] client version %d < min core version %d\n",
572
av.id, g_request->core_client_version, av.min_core_version
575
g_wreq->outdated_client = true;
578
if (strlen(av.plan_class)) {
579
if (!g_request->client_cap_plan_class) {
580
if (config.debug_version_select) {
581
log_messages.printf(MSG_NORMAL,
582
"[version] [AV#%d] client %d lacks plan class capability\n",
583
av.id, g_request->core_client_version
588
if (!app_plan(*g_request, av.plan_class, host_usage)) {
592
host_usage.sequential_app(g_reply->host.p_fpops);
595
// skip versions that go against resource prefs
597
if (host_usage.ncudas && g_wreq->no_cuda) {
598
if (config.debug_version_select) {
599
log_messages.printf(MSG_NORMAL,
600
"[version] [AV#%d] Skipping CUDA version - user prefs say no CUDA\n",
603
g_wreq->no_cuda_prefs = true;
607
if (host_usage.natis && g_wreq->no_ati) {
608
if (config.debug_version_select) {
609
log_messages.printf(MSG_NORMAL,
610
"[version] [AV#%d] Skipping ATI version - user prefs say no ATI\n",
613
g_wreq->no_ati_prefs = true;
617
if (!(host_usage.uses_gpu()) && g_wreq->no_cpu) {
618
if (config.debug_version_select) {
619
log_messages.printf(MSG_NORMAL,
620
"[version] [AV#%d] Skipping CPU version - user prefs say no CPUs\n",
623
g_wreq->no_cpu_prefs = true;
628
if (reliable_only && !app_version_is_reliable(av.id)) {
629
if (config.debug_version_select) {
630
log_messages.printf(MSG_NORMAL,
631
"[version] [AV#%d] not reliable\n", av.id
637
if (daily_quota_exceeded(av.id, host_usage)) {
638
if (config.debug_version_select) {
639
log_messages.printf(MSG_NORMAL,
640
"[version] [AV#%d] daily quota exceeded\n", av.id
646
// skip versions for which we're at the jobs-in-progress limit
648
if (config.max_jobs_in_progress.exceeded(app, host_usage.uses_gpu())) {
652
// skip versions for resources we don't need
654
if (!need_this_resource(host_usage, &av, NULL)) {
658
// at this point we know the version is feasible,
659
// so if config.prefer_primary_platform is set
660
// we won't look any further.
662
found_feasible_version = true;
664
estimate_flops(host_usage, av);
666
// pick the fastest version
668
if (host_usage.projected_flops > bavp->host_usage.projected_flops) {
669
bavp->host_usage = host_usage;
671
bavp->reliable = app_version_is_reliable(av.id);
672
bavp->trusted = app_version_is_trusted(av.id);
674
} // loop over app versions
676
if (config.prefer_primary_platform && found_feasible_version) {
679
} // loop over client platforms
682
if (config.debug_version_select) {
683
log_messages.printf(MSG_NORMAL,
684
"[version] Best version of app %s is [AV#%d] (%.2f GFLOPS)\n",
685
app->name, bavp->avp->id, bavp->host_usage.projected_flops/1e9
688
bavp->present = true;
689
g_wreq->best_app_versions.push_back(bavp);
691
// Here if there's no app version we can use.
693
if (config.debug_version_select) {
694
log_messages.printf(MSG_NORMAL,
695
"[version] returning NULL; platforms:\n"
697
for (i=0; i<g_request->platforms.list.size(); i++) {
698
PLATFORM* p = g_request->platforms.list[i];
699
log_messages.printf(MSG_NORMAL,
705
if (no_version_for_platform) {
708
app->user_friendly_name,
709
_("is not available for your type of computer")
711
add_no_work_message(message);
713
g_wreq->best_app_versions.push_back(bavp);