344
342
* decoded value of time in seconds in the unsigned long integer.
347
static int gettime(pres, ret)
346
gettime(resource *pres, unsigned long *ret)
352
349
if (pres->rs_value.at_type != ATR_TYPE_LONG)
395
392
* Accepts a job id. Returns the sum of all cpu time consumed for all
396
393
* tasks executed by the job, in seconds, adjusted by cputfactor.
398
static unsigned long cput_sum(pjob)
401
398
static char id[] = "cput_sum";
407
sprintf(log_buffer,"proc_array loop start - jobid = %s",
408
pjob->ji_qs.ji_jobid);
410
log_record(PBSEVENT_DEBUG,0,id,log_buffer);
408
413
for (i = 0; i < nproc; i++)
418
423
cputime += pp->ki_runtime / 1000000;
427
sprintf(log_buffer,"%s: session=%d pid=%d cputime=%llu",
431
(long long unsigned)pp->ki_runtime / 1000000);
433
log_record(PBSEVENT_SYSTEM,0,id,log_buffer);
420
436
DBPRT(("%s: ses %d pid %d cputime %llu\n", id,
421
437
sess_tbl[i], pp->ki_pid, (long long unsigned)pp->ki_runtime / 1000000))
524
539
error(char *string, int value)
529
543
assert(string != NULL);
530
544
assert(*string != '\0');
531
assert(value > PBSE_); /* minimum PBS error number */
532
assert(value <= PBSE_NOSYNCMSTR); /* maximum PBS error number */
533
assert(pbs_err_to_txt[i].err_no != 0);
537
if (pbs_err_to_txt[i].err_no == value)
540
while (pbs_err_to_txt[++i].err_no != 0);
542
assert(pbs_err_to_txt[i].err_txt != NULL);
544
message = *pbs_err_to_txt[i].err_txt;
546
message = pbse_to_txt(value);
546
548
assert(message != NULL);
548
549
assert(*message != '\0');
550
551
(void)fprintf(stderr, msg_momsetlim, string, message);
575
576
* existing limits. Cannot alter those set by setrlimit (kernel)
576
577
* because we are the wrong process.
578
int mom_set_limits(pjob, set_mode)
580
int set_mode; /* SET_LIMIT_SET or SET_LIMIT_ALTER */
582
int set_mode /* SET_LIMIT_SET or SET_LIMIT_ALTER */
582
585
char *id = "mom_set_limits";
611
614
if (strcmp(pname, "cput") == 0)
613
/* cpu time - check, if less than pcput use it */
614
retval = gettime(pres, &value);
616
if (igncput == FALSE)
618
/* cpu time - check, if less than pcput use it */
619
retval = gettime(pres, &value);
616
if (retval != PBSE_NONE)
617
return (error(pname, retval));
621
if (retval != PBSE_NONE)
622
return (error(pname, retval));
619
625
else if (strcmp(pname, "pcput") == 0)
621
/* process cpu time - set */
622
retval = gettime(pres, &value);
624
if (retval != PBSE_NONE)
625
return (error(pname, retval));
627
reslim.rlim_cur = reslim.rlim_max =
628
(unsigned long)((double)value / cputfactor);
630
if (setrlimit(RLIMIT_CPU, &reslim) < 0)
631
return (error("RLIMIT_CPU", PBSE_SYSTEM));
627
if (igncput == FALSE)
629
/* process cpu time - set */
630
retval = gettime(pres, &value);
632
if (retval != PBSE_NONE)
633
return (error(pname, retval));
635
reslim.rlim_cur = reslim.rlim_max =
636
(unsigned long)((double)value / cputfactor);
638
if (setrlimit(RLIMIT_CPU, &reslim) < 0)
639
return (error("RLIMIT_CPU", PBSE_SYSTEM));
633
642
else if (strcmp(pname, "file") == 0) /* set */
651
660
else if (strcmp(pname, "vmem") == 0) /* check */
653
retval = getsize(pres, &value);
655
if (retval != PBSE_NONE)
656
return (error(pname, retval));
658
if ((mem_limit == 0) || (value < mem_limit))
661
else if (strcmp(pname, "pvmem") == 0) /* set */
663
if (set_mode == SET_LIMIT_SET)
662
if (ignvmem == FALSE)
665
664
retval = getsize(pres, &value);
667
666
if (retval != PBSE_NONE)
668
667
return (error(pname, retval));
670
if (value > ULONG_MAX)
671
return (error(pname, PBSE_BADATVAL));
673
669
if ((mem_limit == 0) || (value < mem_limit))
674
670
mem_limit = value;
673
else if (strcmp(pname, "pvmem") == 0) /* set */
675
if (ignvmem == FALSE)
677
if (set_mode == SET_LIMIT_SET)
679
retval = getsize(pres, &value);
681
if (retval != PBSE_NONE)
682
return (error(pname, retval));
684
if (value > ULONG_MAX)
685
return (error(pname, PBSE_BADATVAL));
687
if ((mem_limit == 0) || (value < mem_limit))
677
692
else if (strcmp(pname, "mem") == 0) /* ignore */
680
695
else if (strcmp(pname, "pmem") == 0) /* set */
682
if (set_mode == SET_LIMIT_SET)
684
retval = getsize(pres, &value);
686
if (retval != PBSE_NONE)
687
return (error(pname, retval));
689
reslim.rlim_cur = reslim.rlim_max = value;
691
if (setrlimit(RLIMIT_RSS, &reslim) < 0)
692
return (error("RLIMIT_RSS", PBSE_SYSTEM));
699
if (set_mode == SET_LIMIT_SET)
701
retval = getsize(pres, &value);
703
if (retval != PBSE_NONE)
704
return (error(pname, retval));
706
reslim.rlim_cur = reslim.rlim_max = value;
708
if (setrlimit(RLIMIT_RSS, &reslim) < 0)
709
return (error("RLIMIT_RSS", PBSE_SYSTEM));
695
713
else if (strcmp(pname, "walltime") == 0) /* Check */
1094
1108
sesid = ptask->ti_qs.ti_sid;
1096
1110
if (sesid <= 1)
1114
sprintf(log_buffer,"cannot send signal %d to task (no session id)",
1120
ptask->ti_job->ji_qs.ji_jobid,
1131
sprintf(log_buffer,"sending signal %d to task",
1137
ptask->ti_job->ji_qs.ji_jobid,
1099
1141
if ((err = mom_get_sample()) != PBSE_NONE)
1107
1149
if (sesid != sess_tbl[i])
1110
DBPRT(("%s: send signal %d to pid %d\n", id,
1154
sprintf(log_buffer,"%s: killing pid %d task %d with sig %d",
1157
ptask->ti_qs.ti_task,
1163
ptask->ti_job->ji_qs.ji_jobid,
1112
1167
(void)kill(pp->ki_pid, sig);
1158
1221
* If abort is true, kill it too.
1161
int mach_checkpoint(ptask, file, abort)
1225
mach_checkpoint(task *ptask, char *file, int abort)
1280
sprintf(log_buffer,"proc_array loop start - jobid = %d",
1283
log_record(PBSEVENT_DEBUG,0,id,log_buffer);
1217
1286
for (i = 0; i < nproc; i++)
1508
sprintf(log_buffer,"proc_array loop start - jobid = %d",
1511
log_record(PBSEVENT_DEBUG,0,id,log_buffer);
1436
1514
for (i = 0; i < nproc; i++)