1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
42
#include "sge_qinstance.h"
43
#include "sge_centry.h"
44
#include "sge_hostname.h"
45
#include "sge_schedd_conf.h"
48
#include "sge_resource_utilization.h"
49
#include "sge_advance_reservation.h"
51
#include "sge_select_queue.h"
53
/* At that point in time we only keep references to in the iterator that
54
* allow for efficiently iterating trough relevant queue end times in
55
* that affect resouce utilization of a particular job.
57
* Further improvements with sge_qeti_t might allow to notably reduce
58
* the time for the actual resource selection: It might be useful for
59
* example to enhance sge_qeti_t towards keeping all information required
60
* for decide very quickly about eligibility of each host/queue.
64
lList *cr_refs_global;
70
* Assume a parallel job with a license and h_vmem request. At the time
71
* when we seek for reservation the changes with the resource utilization
72
* diagrams relevant for this job are marked with a '+'
74
* After intializing the time iterator using sge_qeti_allocate() the
75
* iterator keeps references to all resource instances (QETI_resource_instance)
76
* shown in the diagram below and all queue end next (QETI_queue_end_next)
77
* references refer to the very end of those resource diagrams.
79
* mpi_pe slots +------+-----------+
80
* Global license +------+-----------+-----+
81
* Host1 h_vmem +------+-----------+
82
* Host2 h_vmem +------+-----------+
83
* Queue1 slots +------+---+-------+-----+
84
* Queue2 slots +--+------+---+-------+
87
* After sge_qeti_first() returned time mark #1 the queue end next
88
* references for Global license and the Queue1 slot point to #2.
89
* Then after sge_qeti_next() returned time mark #2 the queue end
90
* next references for Queue1/2 slot point to #3 whereas all remaining
91
* queue end next references point to #4 ...
97
/****** sge_qeti/sge_qeti_list_add() *******************************************
99
* sge_qeti_list_add() -- Adds a resource utilization to QETI resource list
102
* static int sge_qeti_list_add(lList **lpp, const char *name, lList*
103
* rue_lp, double total, bool must_exist)
109
* lList **lpp - QETI resource list
110
* const char *name - Name of the resource
111
* lList* rue_lp - Resource utilization entry (RUE_Type)
112
* double total - Total resource amount
113
* bool must_exist - If true the entry must exist in 'lpp'.
116
* static int - 0 on success
119
* MT-NOTE: sge_qeti_list_add() is not MT safe
120
*******************************************************************************/
121
static int sge_qeti_list_add(lList **lpp, const char *name, lList* rue_lp, double total, bool must_exist)
123
lListElem *tmp_cr_ref, *ep;
125
DENTER(TOP_LAYER, "sge_qeti_list_add");
127
if (!(tmp_cr_ref = lGetElemStr(rue_lp, RUE_name, name))) {
128
DRETURN(must_exist?-1:0);
131
if (!*lpp && !(*lpp = lCreateList("pe_qeti", QETI_Type))) {
135
if (!(ep = lCreateElem(QETI_Type))) {
140
lSetRef(ep, QETI_resource_instance, tmp_cr_ref);
141
lSetDouble(ep, QETI_total, total);
142
lAppendElem(*lpp, ep);
147
static int sge_add_qeti_resource_container(lList **qeti_to_add, lList* rue_list,
148
lList* total_list, lList* centry_list, lList* requests, bool force_slots)
150
lListElem *req, *actual, *tep;
152
lListElem *centry_config = NULL;
154
DENTER(TOP_LAYER, "sge_add_qeti_resource_container");
156
/* implicit slot request */
157
if ( ((tep = lGetElemStr(total_list, CE_name, SGE_ATTR_SLOTS)) != NULL) && force_slots) {
162
if (tep && sge_qeti_list_add(qeti_to_add, SGE_ATTR_SLOTS, rue_list,
163
lGetDouble(tep, CE_doubleval), true)) {
168
/* default request */
169
actual = lGetElemStr(rue_list, RUE_name, SGE_ATTR_SLOTS);
170
if (actual != NULL) {
171
name = lGetString(actual, RUE_name);
172
centry_config = lGetElemStr(centry_list, CE_name, name);
174
if (lGetBool(centry_config, CE_consumable)==true && !is_requested(requests, name)) {
175
if (!(tep = lGetElemStr(total_list, CE_name, name)) ||
176
sge_qeti_list_add(qeti_to_add, name, rue_list, lGetDouble(tep, CE_doubleval), true)) {
183
/* explicit requests */
184
for_each(req, requests) {
185
name = lGetString(req, CE_name);
186
centry_config = lGetElemStr(centry_list, CE_name, name);
188
if ((centry_config && lGetBool(centry_config, CE_consumable)==true) &&
189
(tep = lGetElemStr(total_list, CE_name, name))) {
190
if (sge_qeti_list_add(qeti_to_add, name, rue_list, lGetDouble(tep, CE_doubleval), false)) {
201
sge_qeti_t *sge_qeti_allocate2(lList *cr_list)
205
if (!(iter = calloc(1, sizeof(sge_qeti_t)))) {
209
sge_qeti_list_add(&iter->cr_refs_pe, SGE_ATTR_SLOTS, cr_list, 10, true);
213
sge_qeti_t *sge_qeti_allocate(lListElem *job, lListElem *pe, lListElem *ckpt,
214
lList *host_list, lList *queue_list, lList *centry_list, lList *acl_list, lList *hgrp_list, lList *ar_list)
216
int ar_id = lGetUlong(job, JB_ar);
217
sge_qeti_t *iter = NULL;
218
lListElem *next_queue, *qep, *hep;
219
lList *requests = lGetList(job, JB_hard_resource_list);
221
DENTER(TOP_LAYER, "sge_qeti_allocate");
223
if (!(iter = calloc(1, sizeof(sge_qeti_t)))) {
229
/* add "slot" resource utilization entry of parallel environment */
230
if (sge_qeti_list_add(&iter->cr_refs_pe, SGE_ATTR_SLOTS,
231
lGetList(pe, PE_resource_utilization), lGetUlong(pe, PE_slots), true)) {
232
sge_qeti_release(&iter);
237
/* add references to global resource utilization entries
238
that might affect jobs queue end time */
239
if ((hep = host_list_locate(host_list, SGE_GLOBAL_NAME))) {
240
if (sge_add_qeti_resource_container(&iter->cr_refs_global,
241
lGetList(hep, EH_resource_utilization), lGetList(hep, EH_consumable_config_list),
242
centry_list, requests, false)!=0) {
243
sge_qeti_release(&iter);
250
/* add references to per host resource utilization entries
251
that might affect jobs queue end time */
252
for_each (hep, host_list) {
255
const void *queue_iterator = NULL;
257
if (!strcmp((eh_name=lGetHost(hep, EH_name)), SGE_GLOBAL_NAME)) {
261
if (sge_host_match_static(job, NULL, hep, centry_list, acl_list) == DISPATCH_NEVER_CAT) {
265
/* There must be at least one queue referenced with the parallel
266
environment that resides at this host. And secondly we only
267
consider those hosts that match this job (statically) */
269
for (next_queue = lGetElemHostFirst(queue_list, QU_qhostname, eh_name, &queue_iterator);
271
next_queue = lGetElemHostNext(queue_list, QU_qhostname, eh_name, &queue_iterator)) {
273
if (!qinstance_is_pe_referenced(qep, pe)) {
277
/* consider only those queues that match this job (statically) */
278
if (sge_queue_match_static(qep, job, pe, ckpt, centry_list, acl_list, hgrp_list, ar_list) != DISPATCH_OK) {
283
if (sge_add_qeti_resource_container(&iter->cr_refs_queue,
284
lGetList(qep, QU_resource_utilization), lGetList(qep, QU_consumable_config_list),
285
centry_list, requests, false)!=0) {
286
sge_qeti_release(&iter);
290
const char *qname = lGetString(qep, QU_full_name);
292
lListElem *ar_ep = lGetElemUlong(ar_list, AR_id, ar_id);
294
ar_queue = lGetSubStr(ar_ep, QU_full_name, qname, AR_reserved_queues);
295
if (sge_add_qeti_resource_container(&iter->cr_refs_queue,
296
lGetList(ar_queue, QU_resource_utilization), lGetList(ar_queue, QU_consumable_config_list),
297
centry_list, requests, false)!=0) {
298
sge_qeti_release(&iter);
308
if (sge_add_qeti_resource_container(&iter->cr_refs_host,
309
lGetList(hep, EH_resource_utilization), lGetList(hep, EH_consumable_config_list),
310
centry_list, requests, false)!=0) {
311
sge_qeti_release(&iter);
318
DPRINTF(("QETI: P %d G %d H %d Q %d\n",
319
lGetNumberOfElem(iter->cr_refs_pe),
320
lGetNumberOfElem(iter->cr_refs_global),
321
lGetNumberOfElem(iter->cr_refs_host),
322
lGetNumberOfElem(iter->cr_refs_queue)));
329
static void sge_qeti_init_refs(lList *cref_lp)
332
lList *utilization_diagram;
335
DENTER(TOP_LAYER, "sge_qeti_init_refs");
337
for_each(cr_ep, cref_lp) {
338
rue_ep = lGetRef(cr_ep, QETI_resource_instance);
339
utilization_diagram = lGetList((lListElem *)lGetRef(cr_ep, QETI_resource_instance), RUE_utilized);
340
DPRINTF((" QETI INIT: %s %p\n", lGetString(rue_ep, RUE_name), utilization_diagram));
341
/* lLast() correctly returns a NULL reference
342
in case of an empty resource utilization diagram */
343
lSetRef(cr_ep, QETI_queue_end_next, lLast(utilization_diagram));
349
/* an empty resource utilization diagrams actually means the resource
350
is available now - thus we can skip it when determining the maximum */
351
static void sge_qeti_max_end_time(u_long32 *max_time, const lList *cref_lp)
353
lListElem *cr_ep, *ref;
354
u_long32 tmp_time = *max_time;
357
DENTER(TOP_LAYER, "sge_qeti_max_end_time");
359
for_each (cr_ep, cref_lp) {
360
rue_ep = lGetRef(cr_ep, QETI_resource_instance);
361
if (!(ref = lGetRef(cr_ep, QETI_queue_end_next))) {
362
DPRINTF((" QETI END: %s\n", lGetString(rue_ep, RUE_name)));
365
DPRINTF((" QETI END: %s "sge_U32CFormat" ("sge_U32CFormat")\n",
366
lGetString(rue_ep, RUE_name), lGetUlong(ref, RDE_time), tmp_time));
367
tmp_time = MAX(tmp_time, lGetUlong(ref, RDE_time));
369
*max_time = tmp_time;
375
/* switch queue end next references to the next entry
376
whose time is larger or equal the specified time */
377
static void sge_qeti_switch_to_next(u_long32 time, lList *cref_lp)
379
lListElem *cr_ep, *ref;
382
DENTER(TOP_LAYER, "sge_qeti_switch_to_next");
386
for_each (cr_ep, cref_lp) {
387
rue_ep = lGetRef(cr_ep, QETI_resource_instance);
388
if (!(ref = lGetRef(cr_ep, QETI_queue_end_next))) {
389
DPRINTF((" QETI NEXT: %s (finished)\n", lGetString(rue_ep, RUE_name)));
393
while (ref && time < lGetUlong(ref, RDE_time)) {
397
DPRINTF((" QETI NEXT: %s set to "sge_U32CFormat" (%p)\n",
398
lGetString(rue_ep, RUE_name), ref?lGetUlong(ref, RDE_time):0, ref));
399
lSetRef(cr_ep, QETI_queue_end_next, ref);
405
/****** sge_qeti/sge_qeti_next_before() ****************************************
407
* sge_qeti_next_before() -- ???
410
* void sge_qeti_next_before(sge_qeti_t *qeti, u_long32 start)
413
* All queue end next references are set in a way that will
414
* sge_qeti_next() return a time value that is before (i.e. less than)
418
* sge_qeti_t *qeti - ???
419
* u_long32 start - ???
422
* MT-NOTE: sge_qeti_next_before() is MT safe
423
*******************************************************************************/
424
void sge_qeti_next_before(sge_qeti_t *qeti, u_long32 start)
426
sge_qeti_switch_to_next(start, qeti->cr_refs_pe);
427
sge_qeti_switch_to_next(start, qeti->cr_refs_global);
428
sge_qeti_switch_to_next(start, qeti->cr_refs_host);
429
sge_qeti_switch_to_next(start, qeti->cr_refs_queue);
433
/****** sge_resource_utilization/sge_qeti_first() ******************************
435
* sge_qeti_first() --
438
* u_long32 sge_qeti_first(sge_qeti_t *qeti)
441
* Initialize/Reinitialize Queue End Time Iterator. All queue end next
442
* references are initialized to the queue end of all resourece instances.
443
* Before we return the time that is most in the future queue end next
444
* references are switched to the next entry that is earlier than the time
448
* sge_qeti_t *qeti - ???
454
* MT-NOTE: sge_qeti_first() is MT safe
455
*******************************************************************************/
456
u_long32 sge_qeti_first(sge_qeti_t *qeti)
458
u_long32 all_resources_queue_end_time = 0;
460
DENTER(TOP_LAYER, "sge_qeti_first");
462
/* (re)init all queue end next references */
463
sge_qeti_init_refs(qeti->cr_refs_pe);
464
sge_qeti_init_refs(qeti->cr_refs_global);
465
sge_qeti_init_refs(qeti->cr_refs_host);
466
sge_qeti_init_refs(qeti->cr_refs_queue);
468
/* determine all resources queue end time */
469
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_pe);
470
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_global);
471
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_host);
472
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_queue);
474
DPRINTF(("sge_qeti_first() determines "sge_u32"\n", all_resources_queue_end_time));
476
/* switch to the next entry with all queue end next references whose
477
time is larger (?) or equal to all resources queue end time */
478
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_pe);
479
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_global);
480
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_host);
481
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_queue);
484
return all_resources_queue_end_time;
487
/****** sge_resource_utilization/sge_qeti_next() *******************************
489
* sge_qeti_next() -- ???
492
* u_long32 sge_qeti_next(sge_qeti_t *qeti)
495
* Return next the time that is most in the future. Then queue end next
496
* references are switched to the next entry that is earlier than the time
500
* sge_qeti_t *qeti - ???
506
* MT-NOTE: sge_qeti_next() is MT safe
507
*******************************************************************************/
508
u_long32 sge_qeti_next(sge_qeti_t *qeti)
510
u_long32 all_resources_queue_end_time = DISPATCH_TIME_NOW;
512
DENTER(TOP_LAYER, "sge_qeti_next");
514
/* determine all resources queue end time */
515
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_pe);
516
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_global);
517
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_host);
518
sge_qeti_max_end_time(&all_resources_queue_end_time, qeti->cr_refs_queue);
520
DPRINTF(("sge_qeti_next() determines "sge_u32"\n", all_resources_queue_end_time));
522
/* switch to the next entry with all queue end next references whose
523
time is larger (?) or equal to all resources queue end time */
524
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_pe);
525
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_global);
526
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_host);
527
sge_qeti_switch_to_next(all_resources_queue_end_time, qeti->cr_refs_queue);
530
return all_resources_queue_end_time;
533
/****** sge_resource_utilization/sge_qeti_release() ****************************
535
* sge_qeti_release() -- Release queue end time iterator
538
* void sge_qeti_release(sge_qeti_t *qeti)
541
* Release all resources of the queue end time iterator. Refered
542
* resource utilization diagrams are not affected.
545
* sge_qeti_t *qeti - ???
548
* MT-NOTE: sge_qeti_release() is MT safe
549
*******************************************************************************/
550
void sge_qeti_release(sge_qeti_t **qeti)
552
if (qeti == NULL || *qeti == NULL) {
556
lFreeList(&((*qeti)->cr_refs_pe));
557
lFreeList(&((*qeti)->cr_refs_global));
558
lFreeList(&((*qeti)->cr_refs_host));
559
lFreeList(&((*qeti)->cr_refs_queue));