1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
34
* pdc.c - Portable Data Collector Library and Test Module
38
#if !defined(COMPILE_DC)
44
#include "basis_types.h"
45
#include "sge_language.h"
49
int main(int argc,char *argv[])
51
#ifdef __SGE_COMPILE_WITH_GETTEXT__
52
/* init language output for gettext() , it will use the right language */
53
sge_init_language_func((gettext_func_type) gettext,
54
(setlocale_func_type) setlocale,
55
(bindtextdomain_func_type) bindtextdomain,
56
(textdomain_func_type) textdomain);
57
sge_init_language(NULL,NULL);
58
#endif /* __SGE_COMPILE_WITH_GETTEXT__ */
59
printf("sorry - no pdc for this architecture yet\n");
73
#include <sys/types.h>
77
#include <sys/sysmp.h>
78
#include <sys/syssgi.h>
79
#include <sys/arsess.h>
80
#include <sys/procfs.h>
81
#include <sys/sysinfo.h>
82
#include <sys/tcpipstats.h>
83
#include <sys/systeminfo.h>
89
# include <sys/sysinfo.h>
90
# include <machine/hal_sysinfo.h>
92
# include </sys/include/vm/vm_perf.h>
96
#if defined(NECSX4) || defined(NECSX5)
99
# include <sys/types.h>
100
# include <sys/time.h>
101
# include <sys/resource.h>
105
#include <sys/param.h>
106
#include <sys/table.h>
107
#include <sys/sysinfo.h>
109
#include <sys/session.h>
110
#include <sys/cred.h>
111
#include <sys/aoutdata.h>
112
#include <sys/proc.h>
114
#include <sys/swap.h>
115
#include <sys/acct.h>
116
#include <sys/stat.h>
117
#include <sys/machcons.h>
118
#include "sge_unistd.h"
122
# if defined(_ALL_SOURCE)
125
#include <procinfo.h>
126
#include <sys/types.h>
130
#include <sys/param.h>
131
#include <sys/sysctl.h>
132
#include <sys/user.h>
140
#include <sys/sysctl.h>
141
#include <mach/mach.h>
142
#include <mach/task.h>
143
#include <mach/mach_init.h>
148
#include <sys/param.h>
149
#include <sys/pstat.h>
152
#if defined(LINUX) || defined(ALPHA) || defined(IRIX) || defined(SOLARIS) || defined(DARWIN) || defined (FREEBSD) || defined(NETBSD) || defined(HP1164) || defined(AIX)
163
#elif defined(LINUX) || defined(SOLARIS)
172
static FILE *df = NULL;
176
int getpagesize(void);
182
#include "exec_ifm.h"
185
#include "basis_types.h"
188
#include "sge_feature.h"
189
#include "sge_language.h"
193
int job_collection_interval; /* max job data collection interval */
194
int prc_collection_interval; /* max process data collection interval */
195
int sys_collection_interval; /* max system data collection interval */
198
/* default collection intervals */
199
static ps_config_t ps_config = { 0, 0, 5 };
202
long pagesize; /* size of a page of memory (probably 8k) */
203
int physical_memory; /* size of real mem in KB */
204
char unixname[128]; /* the name of the booted kernel */
210
#define INCPTR(type, ptr, nbyte) ptr = (type *)((char *)ptr + nbyte)
211
#define INCJOBPTR(ptr, nbyte) INCPTR(struct psJob_s, ptr, nbyte)
212
#define INCPROCPTR(ptr, nbyte) INCPTR(struct psProc_s, ptr, nbyte)
215
int sup_groups_in_proc (void) {
216
return(sup_grp_in_proc);
220
#if defined(LINUX) || defined(SOLARIS) || defined(ALPHA) || defined(FREEBSD) || defined(DARWIN)
222
void pdc_kill_addgrpid(gid_t add_grp_id, int sig,
223
tShepherd_trace shepherd_trace)
225
#if defined(LINUX) || defined(SOLARIS) || defined(ALPHA)
226
procfs_kill_addgrpid(add_grp_id, sig, shepherd_trace);
227
#elif defined(FREEBSD)
230
struct kinfo_proc *procs;
231
char kerrbuf[_POSIX2_LINE_MAX];
233
kd = kvm_openfiles(NULL, NULL, NULL, O_RDONLY, kerrbuf);
236
fprintf(stderr, "kvm_openfiles: error %s\n", kerrbuf);
241
procs = kvm_getprocs(kd, KERN_PROC_ALL, 0, &nprocs);
244
fprintf(stderr, "kvm_getprocs: error %s\n", kvm_geterr(kd));
249
for (; nprocs >= 0; nprocs--, procs++) {
250
for (i = 0; i < procs->ki_ngroups; i++) {
251
if (procs->ki_groups[i] == add_grp_id) {
254
if (procs->ki_uid != 0 && procs->ki_ruid != 0 &&
255
procs->ki_svuid != 0 &&
256
procs->ki_rgid != 0 && procs->ki_svgid != 0) {
257
kill(procs->ki_pid, sig);
258
sprintf(err_str, MSG_SGE_KILLINGPIDXY_UI ,
259
sge_u32c(procs->ki_pid), add_grp_id);
261
sprintf(err_str, MSG_SGE_DONOTKILLROOTPROCESSXY_UI ,
262
sge_u32c(procs->ki_pid), add_grp_id);
265
shepherd_trace(err_str);
270
#elif defined(DARWIN)
272
struct kinfo_proc *procs;
273
struct kinfo_proc *procs_begin;
274
int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0 };
277
if (sysctl(mib, 4, NULL, &bufSize, NULL, 0) < 0) {
280
if ((procs = (struct kinfo_proc *)malloc(bufSize)) == NULL) {
283
if (sysctl(mib, 4, procs, &bufSize, NULL, 0) < 0) {
288
nprocs = bufSize/sizeof(struct kinfo_proc);
290
for (; nprocs >= 0; nprocs--, procs++) {
291
for (i = 0; i < procs->kp_eproc.e_ucred.cr_ngroups; i++) {
292
if (procs->kp_eproc.e_ucred.cr_groups[i] == add_grp_id) {
295
if (procs->kp_eproc.e_ucred.cr_uid != 0 && procs->kp_eproc.e_pcred.p_ruid != 0 &&
296
procs->kp_eproc.e_pcred.p_svuid != 0 &&
297
procs->kp_eproc.e_pcred.p_rgid != 0 && procs->kp_eproc.e_pcred.p_svgid != 0) {
298
kill(procs->kp_proc.p_pid, sig);
299
sprintf(err_str, MSG_SGE_KILLINGPIDXY_UI ,
300
sge_u32c(procs->kp_proc.p_pid), add_grp_id);
302
sprintf(err_str, MSG_SGE_DONOTKILLROOTPROCESSXY_UI ,
303
sge_u32c(procs->kp_proc.p_pid), add_grp_id);
306
shepherd_trace(err_str);
315
lnk_link_t * find_job(JobID_t jid) {
318
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
319
if (jid == LNK_DATA(curr, job_elem_t, link)->job.jd_jid)
325
#if defined(NECSX4) || defined(NECSX5)
329
return sysconf(_SC_PAGESIZE);
332
# define MICROSEC2SECS(msecs) ((double)(msecs)/(double)1000000)
338
* This is a structure containing all the fields that we need
339
* out of the arsess_t structure. It is filled in by the
340
* pdc_get_arsess() and pdc_get_arsess64() routines.
360
pdc_get_arsess(pdc_arsess_t *parse, arsess_t *arse)
362
parse->ash = arse->as_handle;
363
parse->pid = arse->as_pid;
364
parse->prid = arse->as_prid;
365
parse->start = arse->as_start;
366
parse->refcnt = arse->as_refcnt;
367
parse->utime = arse->as_timers.ac_utime;
368
parse->stime = arse->as_timers.ac_stime;
369
parse->bwtime = arse->as_timers.ac_bwtime;
370
parse->rwtime = arse->as_timers.ac_rwtime;
371
parse->qwtime = arse->as_timers.ac_qwtime;
372
parse->mem = arse->as_counts.ac_mem;
373
parse->chr = arse->as_counts.ac_chr;
374
parse->chw = arse->as_counts.ac_chw;
379
* define a 64-bit version of arsess_t for use on 64-bit IRIX
382
typedef struct arsess64 {
383
ash_t as_handle; /* array session handle */
384
prid_t as_prid; /* project ID */
386
lock_t as_lock; /* update lock */
388
struct arsess *as_next; /* next arsess in act/free list */
389
struct arsess *as_prev; /* previous arsess in act list */
394
int as_refcnt; /* reference count */
395
time_t as_start; /* start time (secs since 1970) */
396
time_t as_ticks; /* lbolt at start */
397
pid_t as_pid; /* pid that started this session */
398
ushort_t as_flag; /* various flags */
399
char as_nice; /* initial nice value of as_pid */
401
/* Accounting data */
402
acct_spi_t as_spi; /* Service Provider Information */
403
acct_timers_t as_timers; /* accounting timers */
404
acct_counts_t as_counts; /* accounting counters */
406
__uint64_t as_fill; /* fill for 64-bit structure */
410
typedef struct arsess65 {
411
ash_t as_handle; /* array session handle */
412
prid_t as_prid; /* project ID */
413
int as_refcnt; /* reference count */
414
time_t as_start; /* start time (secs since 1970) */
415
time_t as_ticks; /* lbolt at start */
416
pid_t as_pid; /* pid that started this session */
417
int as_spilen; /* length of Service Provider Info */
418
ushort_t as_flag; /* various flags */
419
char as_nice; /* initial nice value of as_pid */
420
char as_rsrv1[985]; /* reserved */
422
/* Accounting data */
423
char as_spi[1024]; /* Service Provider Info */
424
acct_timers_t as_timers; /* accounting timers */
425
acct_counts_t as_counts; /* accounting counters */
426
char as_rsrv2[1888]; /* reserved */
430
int pdc_get_arsess65(pdc_arsess_t *parse, arsess_t *arsein)
432
arsess65_t *arse = (arsess65_t *)arsein;
434
parse->ash = arse->as_handle;
435
parse->pid = arse->as_pid;
436
parse->prid = arse->as_prid;
437
parse->start = arse->as_start;
438
parse->refcnt = arse->as_refcnt;
439
parse->utime = arse->as_timers.ac_utime;
440
parse->stime = arse->as_timers.ac_stime;
441
parse->bwtime = arse->as_timers.ac_bwtime;
442
parse->rwtime = arse->as_timers.ac_rwtime;
443
parse->qwtime = arse->as_timers.ac_qwtime;
444
parse->mem = arse->as_counts.ac_mem;
445
parse->chr = arse->as_counts.ac_chr;
446
parse->chw = arse->as_counts.ac_chw;
451
int pdc_get_arsess64(pdc_arsess_t *parse, arsess_t *arsein)
453
arsess64_t *arse = (arsess64_t *)arsein;
455
parse->ash = arse->as_handle;
456
parse->pid = arse->as_pid;
457
parse->prid = arse->as_prid;
458
parse->start = arse->as_start;
459
parse->refcnt = arse->as_refcnt;
460
parse->utime = arse->as_timers.ac_utime;
461
parse->stime = arse->as_timers.ac_stime;
462
parse->bwtime = arse->as_timers.ac_bwtime;
463
parse->rwtime = arse->as_timers.ac_rwtime;
464
parse->qwtime = arse->as_timers.ac_qwtime;
465
parse->mem = arse->as_counts.ac_mem;
466
parse->chr = arse->as_counts.ac_chr;
467
parse->chw = arse->as_counts.ac_chw;
472
static struct nlist mem_nl[] = {
473
{ "vm_perfsum" }, /* PERFSUM */
480
int readk(off_t where, char *addr, int size) {
481
if (lseek(kmem_fd, where, SEEK_SET) == -1)
483
if (read(kmem_fd, addr, size) == -1)
491
#define MAX(a,b) ((a)>(b)?(a):(b))
501
read_kernel_table(char *name, void **table, long *size, int *entries)
506
if (tabinfo(name, &tinfo) < 0) {
510
tsize = tinfo.head + (tinfo.ent * tinfo.len);
512
if (*table) free(*table);
513
*table = malloc(tsize);
514
if (*table == NULL) {
517
memset(*table, 0, tsize);
521
if (tabread(name, (char *)*table, tsize, 0) == -1) {
525
if (entries) *entries = tinfo.ent;
531
cvt_comp_t(comp_t comp)
535
if (comp == 0x1fffff) return -1;
536
frac = comp & 0xffff;
537
exp = (comp >> 16) & 0x1f;
543
#define PACCT "/usr/adm/acct/day/pacct"
545
#define CLOCKS2SECS(clocks) ((double)(clocks)/(double)clk_tck)
548
* read_pacct reads end of process and end of job records from the process
549
* accounting (pacct) file. The process records contain the memory integral
550
* and characters transferred by the process during its lifetime. This
551
* information is not available in the kernel. Instead, it is stored in
552
* the user area of the process while it is running. The end of job
553
* record indicates that a job has completed. This routine is designed
554
* to keep the pacct file open. To handle switching pacct files and to
555
* handle a corrupted pacct file. If it encounters a corrupted pacct file
556
* it will skip reading until a new pacct file is available. There is a
557
* race condition that read_pacct must protect itself from. When the
558
* shepherd forks the job, the O.S. job ID is set and the job ID is
559
* communicated back to the execd through a file. Once the job ID is
560
* read from the file, then it is communicated to the PDC which will
561
* then recognize any pacct records for processes belonging to the job.
562
* However, while the execd is waiting to read the file, processes running
563
* in the job will likely run and complete and process completion records
564
* will be written to the pacct file. If the PDC reads these pacct records
565
* before the job ID has been registered with the PDC, then these pacct
566
* records will be missed and the memory and I/O usage for the processes
567
* will not be accounted for in the job usage totals. To prevent this, the
568
* PDC will precreate job elements for any processes which it reads in the
569
* pacct data for which a job element does not already exist. If these
570
* precreated jobs are not monitored with a psWatchJob() call within 30
571
* seconds, they will be deleted.
574
#define READ_PACCT_WAIT 0
577
read_pacct(lnk_link_t *job_list, time_t time_stamp)
582
int hdrsize = sizeof(struct achead)+1;
586
SGE_STRUCT_STAT pstat;
587
int more_records = 1;
593
static int corrupted;
594
static fpos_t offset;
596
static FILE *fp = NULL;
597
static SGE_INO_T pacct_inode;
601
df = fopen("/tmp/pacct.out", "w");
605
clk_tck = sysconf(_SC_CLK_TCK);
608
* get inode of pacct file. If it has changed, we know
609
* the old pacct file has been deleted and a new one
610
* has been created. However there may still be records
611
* in the old file that we have not completed reading
612
* so for now we set the newfile flag and will try to
613
* read to the end of the old file before switching to
614
* the new pacct file.
617
if (SGE_STAT(PACCT, &pstat)==0 && pacct_inode != pstat.st_ino)
620
/* don't read corrupted pacct file */
622
if (corrupted && !newfile) {
627
fsetpos(fp, &offset);
630
while (more_records) {
632
while(fp && !feof(fp) && in_window && !corrupted) {
634
if (fread(&hdr, sizeof(hdr), 1, fp) != 1) {
641
if (fread(&flag, 1, 1, fp) != 1) {
646
if (hdr.ah_size > sizeof(acct)) {
651
bytes = hdr.ah_size - hdrsize;
653
if (fread((char *)&acct + hdrsize, bytes, 1, fp) != 1) {
658
if ((flag & ACCTR) == ACCTBASE) {
659
job_elem_t *job_elem;
665
printf("%d. pid=%d uid=%d gid=%d btime=%d utime=%d stime=%d "
667
acct.acctbs.ac_pid, acct.acctbs.ac_uid,
668
acct.acctbs.ac_gid, acct.acctbs.ac_btime,
669
(int)CLOCKS2SECS(cvt_comp_t(acct.acctbs.ac_utime)),
670
(int)CLOCKS2SECS(cvt_comp_t(acct.acctbs.ac_stime)),
671
(int)CLOCKS2SECS(cvt_comp_t(acct.acctbs.ac_etime)));
674
end_time = acct.acctbs.ac_btime +
675
(int)CLOCKS2SECS(cvt_comp_t(acct.acctbs.ac_etime));
677
/* skip pacct records more than a day old */
678
if (end_time < (time_stamp - 60*60*24))
681
in_window = (end_time < (time_stamp - READ_PACCT_WAIT));
686
if (curr=find_job(acct.acctbs.ac_jobid)) {
687
job_elem = LNK_DATA(curr, job_elem_t, link);
688
job = &job_elem->job;
692
fprintf(df, "%d job=%d jid=%d pid=%d uid=%d gid=%d btime=%d "
693
"utime=%d stime=%d etime=%d mem=%d chars=%d\n",
694
time_stamp, job->jd_jid, acct.acctbs.ac_jobid,
695
acct.acctbs.ac_pid, acct.acctbs.ac_uid,
696
acct.acctbs.ac_gid, acct.acctbs.ac_btime,
697
(int)CLOCKS2SECS(cvt_comp_t(acct.acctbs.ac_utime)),
698
(int)CLOCKS2SECS(cvt_comp_t(acct.acctbs.ac_stime)),
699
(int)CLOCKS2SECS(cvt_comp_t(acct.acctbs.ac_etime)),
700
cvt_comp_t(acct.acctbs.ac_mem)*(NBPC/1024)/OS_HZ,
701
cvt_comp_t(acct.acctbs.ac_io));
709
/* If the job is not in the list, add it just in case
710
it is later monitored. If this job is not monitored
711
within 30 seconds of when it was added, it will be
712
deleted. This allows us to account for usage for
713
processes which end before the psWatchJob is called
714
and for processes which end while the execd is down. */
718
fprintf(df, "%d precreating "F64"\n", time_stamp,
719
acct.acctbs.ac_jobid);
724
job_elem = (job_elem_t *)malloc(sizeof(job_elem_t));
725
job = &job_elem->job;
726
memset(job_elem, 0, sizeof(job_elem_t));
727
job_elem->precreated = time_stamp;
728
job_elem->starttime = acct.acctbs.ac_btime;
729
job_elem->job.jd_jid = acct.acctbs.ac_jobid;
730
job_elem->job.jd_length = sizeof(psJob_t);
731
LNK_INIT(&job_elem->procs);
732
LNK_INIT(&job_elem->arses);
733
/* add to job list */
734
LNK_ADD(job_list->prev, &job_elem->link);
738
/* set earliest start time */
739
if (acct.acctbs.ac_btime < job_elem->starttime)
740
job_elem->starttime = acct.acctbs.ac_btime;
742
/* memory used (integral) in K seconds */
744
cvt_comp_t(acct.acctbs.ac_mem)*(NBPC/1024)/OS_HZ;
746
/* characters moved */
747
job->jd_chars += cvt_comp_t(acct.acctbs.ac_io);
749
} else if ((flag & ACCTR) == ACCTEOJ) {
754
printf("%d. jid=%d maxvmem=%d etime=%d\n", jobcount,
755
acct.accteoj.ace_jobid, acct.accteoj.ace_himem,
756
acct.accteoj.ace_etime);
759
end_time = acct.accteoj.ace_etime;
761
/* skip pacct records more than a day old */
762
if (end_time < (time_stamp - 60*60*24))
765
in_window = (end_time < (time_stamp - READ_PACCT_WAIT));
770
if (curr=find_job(acct.accteoj.ace_jobid)) {
771
job_elem_t *job_elem = LNK_DATA(curr, job_elem_t, link);
772
psJob_t *job = &job_elem->job;
775
/* mark job as complete */
779
job->jd_etime = acct.accteoj.ace_etime - job_elem->starttime;
780
if (job->jd_etime < 0) job->jd_etime = 0;
782
/* high-water memory size */
783
himem = cvt_comp_t(acct.accteoj.ace_himem)*NBPC;
784
job->jd_himem = MAX(job->jd_himem, himem);
786
/* file system blocks consumed */
787
job->jd_fsblks = acct.accteoj.ace_fsblkused;
792
fgetpos(fp, &offset);
796
* If we are at the end of the old (deleted) pacct file
797
* or the pacct file is not open or the old file was corrupt,
798
* close the old pacct file and open the new one.
801
if (newfile && (fp==NULL || feof(fp) || corrupted)) {
806
if (SGE_STAT(PACCT, &pstat)==0 && (fp = fopen(PACCT, "r"))) {
807
pacct_inode = pstat.st_ino;
810
fgetpos(fp, &offset);
817
more_records = fp && !feof(fp) && in_window;
833
gettimeofday(&now, NULL);
836
gettimeofday(&now, &tzp);
842
#ifdef PDC_STANDALONE
843
static psSys_t sysdata;
845
#if defined(IRIX) || defined(CRAY)
859
psSetCollectionIntervals(int jobi, int prci, int sysi)
862
ps_config.job_collection_interval = jobi;
865
ps_config.prc_collection_interval = prci;
868
ps_config.sys_collection_interval = sysi;
872
#ifdef PDC_STANDALONE
873
int psRetrieveSystemData(void)
884
off_t swapmax, swapvirt, swaprsrv, swaptot, swapfree;
885
double utime, stime, itime, srtime, wtime, ttime;
887
static uint64 prev_runque, prev_runocc, prev_swpque, prev_swpocc;
888
long clock_tick = sysconf(_SC_CLK_TCK);
890
struct vm_statistics vmstats;
892
static struct sysinfo *si;
894
static struct pw *pw;
896
static struct swapper *sw;
898
double utime, stime, itime, srtime, wtime, ttime;
900
static int prev_runque, prev_runocc, prev_swpque, prev_swpocc;
904
time_t time_stamp = get_gmt();
905
time_t prev_time_stamp;
908
if (time_stamp <= next) {
911
next = time_stamp + ps_config.sys_collection_interval;
913
prev_time_stamp = sysdata.sys_tstamp;
915
/* Time of last snap */
916
sysdata.sys_tstamp = time_stamp;
921
if (sysmp(MP_SAGET, MPSA_SINFO, &si, sizeof(si))<0) {
925
if (sysmp(MP_SAGET, MPSA_RMINFO, &rmi, sizeof(rmi))<0) {
929
if (sysmp(MP_SAGET, MPSA_MINFO, &mi, sizeof(mi))<0) {
935
if (sysmp(MP_SAGET, MPSA_SERR, &se, sizeof(se))<0) {
939
if (sysmp(MP_SAGET, MPSA_DINFO, &di, sizeof(di))<0) {
943
if (sysmp(MP_SAGET, MPSA_TCPIPSTATS, &k, sizeof(k))<0) {
949
if (swapctl(SC_GETFREESWAP, &swapfree)<0) {
953
if (swapctl(SC_GETSWAPMAX, &swapmax)<0) {
957
if (swapctl(SC_GETSWAPVIRT, &swapvirt)<0) {
961
if (swapctl(SC_GETRESVSWAP, &swaprsrv)<0) {
965
if (swapctl(SC_GETSWAPTOT, &swaptot)<0) {
969
/* convert CPU time values to double CPU seconds */
970
utime = (double)si.cpu[CPU_USER] / (double)clock_tick;
971
stime = (double)si.cpu[CPU_KERNEL] / (double)clock_tick;
972
itime = (double)si.cpu[CPU_IDLE] / (double)clock_tick;
974
wtime = (double)si.cpu[CPU_WAIT] / (double)clock_tick;
975
ttime = ((double)si.cpu[CPU_IDLE] + (double)si.cpu[CPU_USER] +
976
(double)si.cpu[CPU_KERNEL] + (double)si.cpu[CPU_WAIT] +
977
(double)si.cpu[CPU_SXBRK] + (double)si.cpu[CPU_INTR]) /
980
/* if this is the first time, intialize base CPU time values */
982
if (!base.initialized) {
983
base.initialized = 1;
987
base.srtime = srtime;
990
prev_runque = si.runque;
991
prev_runocc = si.runocc;
992
prev_swpque = si.swpque;
993
prev_swpocc = si.swpocc;
996
/* total cpu time avail (this int) */
997
sysdata.sys_ttime = ttime - (base.ttime + sysdata.sys_ttimet);
999
/* total cpu time avail (since start) */
1000
sysdata.sys_ttimet = ttime - base.ttime;
1002
/* user time this interval */
1003
sysdata.sys_utime = utime - (base.utime + sysdata.sys_utimet);
1005
/* user time (since start) */
1006
sysdata.sys_utimet = utime - base.utime;
1008
/* system time this interval */
1009
sysdata.sys_stime = stime - (base.stime + sysdata.sys_stimet);
1011
/* system time (since start) */
1012
sysdata.sys_stimet = stime - base.stime;
1014
/* idle time this interval */
1015
sysdata.sys_itime = itime - (base.itime + sysdata.sys_itimet);
1017
/* idle time (since start) */
1018
sysdata.sys_itimet = itime - base.itime;
1020
/* srun wait this interval */
1021
sysdata.sys_srtime = srtime - (base.srtime + sysdata.sys_srtimet);
1023
/* srun wait (since start) */
1024
sysdata.sys_srtimet = srtime - base.srtime;
1026
/* I/O wait time this interval */
1027
sysdata.sys_wtime = wtime - (base.wtime + sysdata.sys_wtimet);
1029
/* I/O wait time (since start) */
1030
sysdata.sys_wtimet = wtime - base.wtime;
1032
/* Total Swap space available */
1033
sysdata.sys_swp_total = (uint64)swaptot * 512;
1035
/* Swap space free */
1036
sysdata.sys_swp_free = (uint64)swapfree * 512;
1038
/* Swap space in use (bytes) */
1039
sysdata.sys_swp_used = ((uint64)swaptot - (uint64)swapfree) * 512;
1041
/* swaprsrv is the amount of space currently reserved by processes
1042
which is not the same as that which is in use by processes
1043
see swapctl(SC_GETRESVSWAP) */
1045
/* Swap space reserved (bytes) */
1046
sysdata.sys_swp_rsvd = (uint64)swaprsrv * 512;
1048
/* Virtual Swap space avail (bytes) */
1049
sysdata.sys_swp_virt = (uint64)swapvirt * 512;
1051
/* Swap rate in bytes/second */
1052
sysdata.sys_swp_rate = 0;
1054
/* Memory available (unused, free) */
1055
sysdata.sys_mem_avail = ((uint64)rmi.freemem + (uint64)rmi.chunkpages) *
1058
/* Memory in use (bytes) (SVD 10/19/98 - s/rmi.availrmem/rmi.physmem/) */
1059
sysdata.sys_mem_used = (uint64)rmi.physmem*pagesize - sysdata.sys_mem_avail;
1061
/* Memory + swap used (bytes) */
1062
sysdata.sys_mswp_used = sysdata.sys_swp_used + sysdata.sys_mem_used;
1064
/* Memory + swap avail (bytes) */
1065
sysdata.sys_mswp_avail = sysdata.sys_swp_free + sysdata.sys_mem_avail;
1067
if ((time_stamp - prev_time_stamp) > 0)
1068
period = (time_stamp - prev_time_stamp);
1072
/* Swap "Occ" delta */
1073
sysdata.sys_swpocc = ((double)si.swpocc - prev_swpocc) / period;
1074
prev_swpocc = si.swpocc;
1076
/* Swap Queue delta */
1077
sysdata.sys_swpque = ((double)si.swpque - prev_swpque) / period;
1078
prev_swpque = si.swpque;
1080
/* Run "Occ" delta */
1081
sysdata.sys_runocc = ((double)si.runocc - prev_runocc) / period;
1082
prev_runocc = si.runocc;
1084
/* Run Queue delta */
1085
sysdata.sys_runque = ((double)si.runque - prev_runque) / period;
1086
if (sysdata.sys_ncpus > 1) sysdata.sys_runque /= sysdata.sys_ncpus;
1087
prev_runque = si.runque;
1089
/* characters read */
1090
sysdata.sys_readch = si.readch;
1092
/* characters written */
1093
sysdata.sys_writech = si.writech;
1095
#elif defined(ALPHA)
1097
struct vm_perf perf;
1099
/* memory information */
1100
/* this is possibly bogus - we work out total # pages by */
1101
/* adding up the free, active, inactive, wired down, and */
1102
/* zero filled. Anyone who knows a better way, TELL ME! */
1103
/* Change: dont use zero filled. */
1105
if (mem_nl[PERFSUM].n_value) {
1106
if (readk((off_t)mem_nl[PERFSUM].n_value,(char *)&perf,sizeof perf))
1107
/* Virtual Swap space avail (bytes) */
1108
sysdata.sys_swp_free = perf.vpf_swapspace*pagesize;
1111
(void) vm_statistics(current_task(),&vmstats);
1114
sysdata.sys_mem_avail = vmstats.free_count*pagesize;
1116
/* Memory in use (bytes) */
1117
sysdata.sys_mem_used = (physical_memory*1024) - sysdata.sys_mem_avail;
1119
/* Swap space reserved (bytes) */
1120
sysdata.sys_swp_rsvd = sysdata.sys_swp_used + sysdata.sys_mem_used;
1122
/* Memory + swap used (bytes) */
1123
sysdata.sys_mswp_used = sysdata.sys_swp_used + sysdata.sys_mem_used;
1125
/* Memory + swap avail (bytes) */
1126
sysdata.sys_mswp_avail = sysdata.sys_swp_free + sysdata.sys_mem_avail;
1133
clk_tck = sysconf(_SC_CLK_TCK);
1135
if (read_kernel_table(SINFO, (void **)&si, &si_size, NULL)<0)
1138
if (read_kernel_table(PWS, (void **)&pw, &pw_size, NULL)<0)
1141
if (read_kernel_table(SWAPTAB, (void **)&sw, &sw_size, NULL)<0)
1144
/* convert CPU time values to double CPU seconds */
1146
utime = stime = itime = srtime = wtime = ttime = 0;
1148
for (i=0; i<pw->pw_ccpu; i++) {
1149
utime += CLOCKS2SECS(pw->pws[i].pw_userc);
1150
stime += CLOCKS2SECS(pw->pws[i].pw_unixc);
1151
itime += CLOCKS2SECS(pw->pws[i].pw_idlec);
1152
wtime += CLOCKS2SECS(pw->pws[i].pw_syswc);
1153
ttime += CLOCKS2SECS(pw->pws[i].pw_syswc + pw->pws[i].pw_unixc +
1154
pw->pws[i].pw_userc + pw->pws[i].pw_idlec +
1155
pw->pws[i].pw_guestc);
1158
/* if this is the first time, intialize base CPU time values */
1160
if (!base.initialized) {
1161
base.initialized = 1;
1165
base.srtime = srtime;
1168
prev_runque = si->runque;
1169
prev_runocc = si->runocc;
1170
prev_swpque = si->swpque;
1171
prev_swpocc = si->swpocc;
1174
/* total CPUs available (dynamic on Cray) */
1175
sysdata.sys_ncpus = sysconf(_SC_CRAY_NCPU);
1177
/* total cpu time avail (this int) */
1178
sysdata.sys_ttime = ttime - (base.ttime + sysdata.sys_ttimet);
1180
/* total cpu time avail (since start) */
1181
sysdata.sys_ttimet = ttime - base.ttime;
1183
/* user time this interval */
1184
sysdata.sys_utime = utime - (base.utime + sysdata.sys_utimet);
1186
/* user time (since start) */
1187
sysdata.sys_utimet = utime - base.utime;
1189
/* system time this interval */
1190
sysdata.sys_stime = stime - (base.stime + sysdata.sys_stimet);
1192
/* system time (since start) */
1193
sysdata.sys_stimet = stime - base.stime;
1195
/* idle time this interval */
1196
sysdata.sys_itime = itime - (base.itime + sysdata.sys_itimet);
1198
/* idle time (since start) */
1199
sysdata.sys_itimet = itime - base.itime;
1201
/* srun wait this interval */
1202
sysdata.sys_srtime = srtime - (base.srtime + sysdata.sys_srtimet);
1204
/* srun wait (since start) */
1205
sysdata.sys_srtimet = srtime - base.srtime;
1207
/* I/O wait time this interval */
1208
sysdata.sys_wtime = wtime - (base.wtime + sysdata.sys_wtimet);
1210
/* I/O wait time (since start) */
1211
sysdata.sys_wtimet = wtime - base.wtime;
1213
/* Memory available (unused, free) */
1214
sysdata.sys_mem_avail = sysconf(_SC_CRAY_USRMEM) * 8 - si->umemused * NBPC;
1216
/* Memory in use (bytes) */
1217
sysdata.sys_mem_used = si->umemused * NBPC + sysconf(_SC_CRAY_SYSMEM) * 8;
1219
/* Total Swap space available */
1220
sysdata.sys_swp_total = sw->swp_map.bmp_total * sw->swp_wght * 4096;
1222
/* Swap space free */
1223
sysdata.sys_swp_free = sw->swp_map.bmp_avail * sw->swp_wght * 4096;
1225
/* Swap space in use (bytes) */
1226
sysdata.sys_swp_used = sysdata.sys_swp_total - sysdata.sys_swp_free;
1228
/* Swap space reserved (bytes) */
1229
sysdata.sys_swp_rsvd = sysdata.sys_swp_used + sysdata.sys_mem_used;
1231
/* Virtual (CRAY: sys_mem_used + sys_swp_used) Swap space avail (bytes) */
1232
sysdata.sys_swp_virt = sysdata.sys_swp_used + sysdata.sys_mem_used;
1234
/* Memory + swap used (bytes) */
1235
sysdata.sys_mswp_used = sysdata.sys_swp_used + sysdata.sys_mem_used;
1237
/* Memory + swap avail (bytes) */
1238
sysdata.sys_mswp_avail = sysdata.sys_swp_free + sysdata.sys_mem_avail;
1240
/* Swap rate in bytes/second */
1241
sysdata.sys_swp_rate = sw->swp_interv==0 ? (double)0 :
1242
(double)(sw->swp_blksper * 4096) / (double)sw->swp_interv;
1244
if ((time_stamp - prev_time_stamp) > 0)
1245
period = (time_stamp - prev_time_stamp);
1249
/* Swap "Occ" delta */
1250
sysdata.sys_swpocc = ((double)si->swpocc - prev_swpocc) / period;
1251
prev_swpocc = si->swpocc;
1253
/* Swap Queue delta */
1254
sysdata.sys_swpque = ((double)si->swpque - prev_swpque) / period;
1255
prev_swpque = si->swpque;
1257
/* Run "Occ" delta */
1258
sysdata.sys_runocc = ((double)si->runocc - prev_runocc) / period;
1259
prev_runocc = si->runocc;
1261
/* Run Queue delta */
1262
sysdata.sys_runque = ((double)si->runque - prev_runque) / period;
1263
if (sysdata.sys_ncpus > 1) sysdata.sys_runque /= sysdata.sys_ncpus;
1264
prev_runque = si->runque;
1266
/* characters read */
1267
sysdata.sys_readch = si->readch;
1269
/* characters written */
1270
sysdata.sys_writech = si->writech;
1283
for (curr=job_list.next; curr != &job_list; curr=curr->next)
1284
if (LNK_DATA(curr, job_elem_t, link)->precreated == 0)
1291
/* only used on IRIX 6 */
1297
#define ASHMAXINC 100
1299
/* only used on IRIX 6 */
1301
get_arsess_list(lnk_link_t *arsess_list)
1304
static ash_t *ashes;
1311
static int (*get_arsess_p)(pdc_arsess_t *, arsess_t *);
1313
if (get_arsess_p == NULL) {
1314
char irix_release[10];
1315
sysinfo(SI_RELEASE, irix_release, sizeof(irix_release));
1316
if (strcmp(irix_release, "6.5")>=0)
1317
get_arsess_p = &pdc_get_arsess65;
1318
else if (sysconf(_SC_KERN_POINTERS) == 64)
1319
get_arsess_p = &pdc_get_arsess64;
1321
get_arsess_p = &pdc_get_arsess;
1324
if (ashes == NULL) {
1325
ash_max = ASHMAXINC;
1326
ashes = (ash_t *)malloc(sizeof(ash_t)*ash_max);
1327
memset(ashes, 0, sizeof(ash_t)*ash_max);
1330
LNK_INIT(arsess_list);
1332
while ((num_ashes = syssgi(SGI_ENUMASHS, ashes, ash_max)) < 0 &&
1334
ash_max += ASHMAXINC;
1335
ashes = (ash_t *)sge_realloc(ashes, sizeof(ash_t)*ash_max, 1);
1338
if (num_ashes > 0) {
1339
for (i=0; i<num_ashes; i++) {
1340
if (syssgi(SGI_GETARSESS, &ashes[i], &ar) >= 0) {
1341
arsess_elem_t *arse_elem;
1342
arse_elem = malloc(sizeof(arsess_elem_t));
1343
memset(arse_elem, 0, sizeof(arsess_elem_t));
1344
(*get_arsess_p)(&arse_elem->arse, &ar.arse);
1345
LNK_ADD(arsess_list->prev, &arse_elem->link);
1354
/* only used on IRIX 6 */
1356
free_arsess_list(lnk_link_t *arsess_list)
1359
while((curra=arsess_list->next) != arsess_list) {
1361
free(LNK_DATA(curra, arsess_elem_t, link));
1365
/* only used on IRIX 6 */
1366
static arsess_elem_t *
1367
find_arsess(lnk_link_t *arsess_list, ash_t ash)
1370
for(curra=arsess_list->next; curra!=arsess_list; curra=curra->next) {
1371
arsess_elem_t *arsess_elem = LNK_DATA(curra, arsess_elem_t, link);
1372
if (arsess_elem->arse.ash == ash)
1378
/* only used on IRIX 6 */
1380
in_pidlist(pid_t *pidlist, int max, pid_t pid)
1383
for (j=0; pidlist[j] && j<max; j++)
1384
if (pidlist[j] == pid)
1392
free_process_list(job_elem_t *job_elem)
1396
/* free process list */
1397
while((currp=job_elem->procs.next) != &job_elem->procs) {
1399
free(LNK_DATA(currp, proc_elem_t, link));
1404
free_job(job_elem_t *job_elem)
1410
free_process_list(job_elem);
1413
/* free arse list */
1414
while((currp=job_elem->arses.next) != &job_elem->arses) {
1416
free(LNK_DATA(currp, arsess_elem_t, link));
1420
/* free job element */
1424
static int psRetrieveOSJobData(void) {
1425
lnk_link_t *curr, *next;
1426
time_t time_stamp = get_gmt();
1427
static time_t next_time, pnext_time;
1430
lnk_link_t arsess_list;
1431
arsess_elem_t *arse_elem;
1433
static struct proc *pt;
1434
static long pt_size;
1435
static struct sess *st;
1436
static long st_size;
1437
int nproc, nsess, i;
1441
DENTER(TOP_LAYER, "psRetrieveOSJobData");
1443
if (time_stamp <= next_time) {
1446
next_time = time_stamp + ps_config.job_collection_interval;
1450
/* go get all the array sessions */
1452
get_arsess_list(&arsess_list);
1454
#elif defined(ALPHA) || defined(LINUX) || defined(SOLARIS)
1457
/* There is no way to retrieve a pid list containing all processes
1458
of a session id. So we have to iterate through the whole process
1459
table to decide whether a process is needed for a job or not. */
1462
while (!pt_dispatch_proc_to_job(&job_list, time_stamp))
1470
struct procsinfo pinfo[SIZE];
1473
job_elem_t *job_elem;
1474
double old_time = 0.0;
1475
uint64 old_vmem = 0;
1478
while ((count = getprocs(pinfo, sizeof(struct procsinfo), NULL, 0, &index, SIZE)) > 0) {
1481
/* for all processes */
1482
for (i=0; i < count; i++)
1484
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
1487
job_elem = LNK_DATA(curr, job_elem_t, link);
1489
if (job_elem->job.jd_jid == pinfo[i].pi_pgrp) {
1492
proc_elem_t *proc_elem;
1495
for (curr2=job_elem->procs.next; curr2 != &job_elem->procs; curr2=curr2->next) {
1497
proc_elem = LNK_DATA(curr2, proc_elem_t, link);
1499
if (proc_elem->proc.pd_pid == pinfo[i].pi_pid) {
1506
proc_elem = (proc_elem_t *) malloc(sizeof(proc_elem_t));
1508
if (proc_elem == NULL) {
1512
memset(proc_elem, 0, sizeof(proc_elem_t));
1513
proc_elem->proc.pd_length = sizeof(psProc_t);
1514
proc_elem->proc.pd_state = 1; /* active */
1516
LNK_ADD(job_elem->procs.prev, &proc_elem->link);
1517
job_elem->job.jd_proccount++;
1521
/* save previous usage data - needed to build delta usage */
1522
old_time = proc_elem->proc.pd_utime + proc_elem->proc.pd_stime;
1523
old_vmem = proc_elem->vmem;
1526
proc_elem->proc.pd_tstamp = time_stamp;
1527
proc_elem->proc.pd_pid = pinfo[i].pi_pid;
1529
proc_elem->proc.pd_utime = pinfo[i].pi_ru.ru_utime.tv_sec;
1530
proc_elem->proc.pd_stime = pinfo[i].pi_ru.ru_stime.tv_sec;
1532
proc_elem->proc.pd_uid = pinfo[i].pi_uid;
1533
proc_elem->vmem = pinfo[i].pi_dvm +
1534
pinfo[i].pi_tsize + pinfo[i].pi_dsize;
1535
proc_elem->rss = pinfo[i].pi_drss + pinfo[i].pi_trss;
1536
proc_elem->proc.pd_pstart = pinfo[i].pi_start;
1538
proc_elem->mem = ((proc_elem->proc.pd_stime + proc_elem->proc.pd_utime) - old_time) *
1539
(( old_vmem + proc_elem->vmem)/2);
1542
} /* for job_list */
1546
#elif defined(HP1164)
1549
struct pst_status pstat_buffer[SIZE];
1551
job_elem_t *job_elem;
1552
double old_time = 0;
1553
uint64 old_vmem = 0;
1555
while ((count = pstat_getproc(pstat_buffer, sizeof(struct pst_status), SIZE, idx)) > 0) {
1558
/* for all processes */
1559
for (i=0; i < count; i++)
1561
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
1564
job_elem = LNK_DATA(curr, job_elem_t, link);
1566
if (job_elem->job.jd_jid == pstat_buffer[i].pst_pgrp) {
1569
proc_elem_t *proc_elem;
1572
for (curr2=job_elem->procs.next; curr2 != &job_elem->procs; curr2=curr2->next) {
1574
proc_elem = LNK_DATA(curr2, proc_elem_t, link);
1576
if (proc_elem->proc.pd_pid == pstat_buffer[i].pst_pid) {
1583
proc_elem = (proc_elem_t *) malloc(sizeof(proc_elem_t));
1585
if (proc_elem == NULL) {
1589
memset(proc_elem, 0, sizeof(proc_elem_t));
1590
proc_elem->proc.pd_length = sizeof(psProc_t);
1591
proc_elem->proc.pd_state = 1; /* active */
1593
LNK_ADD(job_elem->procs.prev, &proc_elem->link);
1594
job_elem->job.jd_proccount++;
1598
/* save previous usage data - needed to build delta usage */
1599
old_time = proc_elem->proc.pd_utime + proc_elem->proc.pd_stime;
1600
old_vmem = proc_elem->vmem;
1603
proc_elem->proc.pd_tstamp = time_stamp;
1604
proc_elem->proc.pd_pid = pstat_buffer[i].pst_pid;
1606
proc_elem->proc.pd_utime = pstat_buffer[i].pst_utime;
1607
proc_elem->proc.pd_stime = pstat_buffer[i].pst_stime;
1609
proc_elem->proc.pd_uid = pstat_buffer[i].pst_uid;
1610
proc_elem->proc.pd_gid = pstat_buffer[i].pst_gid;
1611
proc_elem->vmem = pstat_buffer[i].pst_vdsize +
1612
pstat_buffer[i].pst_vtsize + pstat_buffer[i].pst_vssize;
1613
proc_elem->rss = pstat_buffer[i].pst_rssize;
1614
proc_elem->proc.pd_pstart = pstat_buffer[i].pst_start;
1616
proc_elem->vmem = proc_elem->vmem * getpagesize();
1617
proc_elem->rss = proc_elem->rss * getpagesize();
1619
proc_elem->mem = ((proc_elem->proc.pd_stime + proc_elem->proc.pd_utime) - old_time) *
1620
(( old_vmem + proc_elem->vmem)/2);
1623
} /* for job_list */
1626
idx = pstat_buffer[count-1].pst_idx + 1;
1629
#elif defined(FREEBSD)
1633
struct kinfo_proc *procs;
1634
char kerrbuf[_POSIX2_LINE_MAX];
1635
job_elem_t *job_elem;
1636
double old_time = 0.0;
1637
uint64 old_vmem = 0;
1639
kd = kvm_openfiles(NULL, NULL, NULL, O_RDONLY, kerrbuf);
1641
DPRINTF(("kvm_openfiles: error %s\n", kerrbuf));
1645
procs = kvm_getprocs(kd, KERN_PROC_ALL, 0, &nprocs);
1646
if (procs == NULL) {
1647
DPRINTF(("kvm_getprocs: error %s\n", kvm_geterr(kd)));
1651
for (; nprocs >= 0; nprocs--, procs++) {
1652
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
1653
job_elem = LNK_DATA(curr, job_elem_t, link);
1655
for (i = 0; i < procs->ki_ngroups; i++) {
1656
if (job_elem->job.jd_jid == procs->ki_groups[i]) {
1658
proc_elem_t *proc_elem;
1661
if (job_elem->job.jd_proccount != 0) {
1662
for (curr2=job_elem->procs.next; curr2 != &job_elem->procs; curr2=curr2->next) {
1663
proc_elem = LNK_DATA(curr2, proc_elem_t, link);
1665
if (proc_elem->proc.pd_pid == procs->ki_pid) {
1672
proc_elem = malloc(sizeof(proc_elem_t));
1673
if (proc_elem == NULL) {
1678
memset(proc_elem, 0, sizeof(proc_elem_t));
1679
proc_elem->proc.pd_length = sizeof(psProc_t);
1680
proc_elem->proc.pd_state = 1; /* active */
1681
proc_elem->proc.pd_pstart = procs->ki_start.tv_sec;
1683
LNK_ADD(job_elem->procs.prev, &proc_elem->link);
1684
job_elem->job.jd_proccount++;
1686
/* save previous usage data - needed to build delta usage */
1687
old_time = proc_elem->proc.pd_utime + proc_elem->proc.pd_stime;
1688
old_vmem = proc_elem->vmem;
1690
proc_elem->proc.pd_tstamp = time_stamp;
1691
proc_elem->proc.pd_pid = procs->ki_pid;
1693
proc_elem->proc.pd_utime = procs->ki_rusage.ru_utime.tv_sec;
1694
proc_elem->proc.pd_stime = procs->ki_rusage.ru_stime.tv_sec;
1696
proc_elem->proc.pd_uid = procs->ki_uid;
1697
proc_elem->proc.pd_gid = procs->ki_rgid;
1698
proc_elem->vmem = procs->ki_size;
1699
proc_elem->rss = procs->ki_rssize;
1700
proc_elem->mem = ((proc_elem->proc.pd_stime + proc_elem->proc.pd_utime) - old_time) *
1701
((old_vmem + proc_elem->vmem)/2);
1708
#elif defined(DARWIN)
1711
struct kinfo_proc *procs;
1712
struct kinfo_proc *procs_begin;
1713
job_elem_t *job_elem;
1714
double old_time = 0.0;
1715
uint64 old_vmem = 0;
1716
int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_ALL, 0 };
1719
if (sysctl(mib, 4, NULL, &bufSize, NULL, 0) < 0) {
1720
DPRINTF(("sysctl() failed(1)\n"));
1723
if ((procs = (struct kinfo_proc *)malloc(bufSize)) == NULL) {
1724
DPRINTF(("malloc() failed\n"));
1727
if (sysctl(mib, 4, procs, &bufSize, NULL, 0) < 0) {
1728
DPRINTF(("sysctl() failed(2)\n"));
1732
procs_begin = procs;
1733
nprocs = bufSize/sizeof(struct kinfo_proc);
1735
for (; nprocs >= 0; nprocs--, procs++) {
1736
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
1737
job_elem = LNK_DATA(curr, job_elem_t, link);
1739
for (i = 0; i < procs->kp_eproc.e_ucred.cr_ngroups; i++) {
1740
if (job_elem->job.jd_jid == procs->kp_eproc.e_ucred.cr_groups[i]) {
1742
proc_elem_t *proc_elem;
1745
if (job_elem->job.jd_proccount != 0) {
1746
for (curr2=job_elem->procs.next; curr2 != &job_elem->procs; curr2=curr2->next) {
1747
proc_elem = LNK_DATA(curr2, proc_elem_t, link);
1749
if (proc_elem->proc.pd_pid == procs->kp_proc.p_pid) {
1756
proc_elem = malloc(sizeof(proc_elem_t));
1757
if (proc_elem == NULL) {
1762
memset(proc_elem, 0, sizeof(proc_elem_t));
1763
proc_elem->proc.pd_length = sizeof(psProc_t);
1764
proc_elem->proc.pd_state = 1; /* active */
1765
proc_elem->proc.pd_pstart = procs->kp_proc.p_starttime.tv_sec;
1767
LNK_ADD(job_elem->procs.prev, &proc_elem->link);
1768
job_elem->job.jd_proccount++;
1770
/* save previous usage data - needed to build delta usage */
1771
old_time = proc_elem->proc.pd_utime + proc_elem->proc.pd_stime;
1772
old_vmem = proc_elem->vmem;
1774
proc_elem->proc.pd_tstamp = time_stamp;
1775
proc_elem->proc.pd_pid = procs->kp_proc.p_pid;
1776
DPRINTF(("pid: %d\n", proc_elem->proc.pd_pid));
1779
struct task_basic_info t_info;
1780
struct task_thread_times_info t_times_info;
1782
unsigned int info_count = TASK_BASIC_INFO_COUNT;
1784
if (task_for_pid(mach_task_self(), proc_elem->proc.pd_pid, &task) != KERN_SUCCESS) {
1785
DPRINTF(("task_for_pid() error"));
1787
if (task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &info_count) != KERN_SUCCESS) {
1788
DPRINTF(("task_info() error"));
1790
proc_elem->vmem = t_info.virtual_size/1024;
1791
DPRINTF(("vmem: %d\n", proc_elem->vmem));
1792
proc_elem->rss = t_info.resident_size/1024;
1793
DPRINTF(("rss: %d\n", proc_elem->rss));
1796
info_count = TASK_THREAD_TIMES_INFO_COUNT;
1797
if (task_info(task, TASK_THREAD_TIMES_INFO, (task_info_t)&t_times_info, &info_count) != KERN_SUCCESS) {
1798
DPRINTF(("task_info() error\n"));
1800
proc_elem->proc.pd_utime = t_times_info.user_time.seconds;
1801
DPRINTF(("user_time: %d\n", proc_elem->proc.pd_utime));
1802
proc_elem->proc.pd_stime = t_times_info.system_time.seconds;
1803
DPRINTF(("system_time: %d\n", proc_elem->proc.pd_stime));
1808
proc_elem->proc.pd_uid = procs->kp_eproc.e_ucred.cr_uid;
1809
DPRINTF(("uid: %d\n", proc_elem->proc.pd_uid));
1810
proc_elem->proc.pd_gid = procs->kp_eproc.e_pcred.p_rgid;
1811
DPRINTF(("gid: %d\n", proc_elem->proc.pd_gid));
1812
proc_elem->mem = ((proc_elem->proc.pd_stime + proc_elem->proc.pd_utime) - old_time) *
1813
((old_vmem + proc_elem->vmem)/2);
1814
DPRINTF(("mem %d\n", proc_elem->mem));
1821
#elif defined(NECSX4) || defined(NECSX5)
1823
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
1824
job_elem_t *job_elem = LNK_DATA(curr, job_elem_t, link);
1825
psJob_t *job = &job_elem->job;
1826
id_t jid = (id_t) job->jd_jid;
1827
struct jresourcecpu resourcecpu;
1828
struct jresourcemem resourcemem;
1829
struct jresourcetmpf resourcetmpf;
1830
struct jresourceproc resourceproc;
1832
u_long32 delta_t = 0;
1834
/* skip precreated jobs */
1835
if (job_elem->precreated)
1838
/* try to get resource information */
1840
if (getresourcej(jid, CURR_ALL, &resourcecpu) == -1) {
1843
delta_t = MICROSEC2SECS(resourcecpu.jr_ucpu)
1844
+ MICROSEC2SECS(resourcecpu.jr_scpu)
1845
- job_elem->utime - job_elem->stime;
1847
job->jd_utime_a = MICROSEC2SECS(resourcecpu.jr_ucpu);
1848
job->jd_stime_a = MICROSEC2SECS(resourcecpu.jr_scpu);
1849
job->jd_utime_c = 0;
1850
job->jd_stime_c = 0;
1852
job_elem->utime = job->jd_utime_a;
1853
job_elem->stime = job->jd_stime_a;
1856
if (getresourcej(jid, CURR_UMEM, &resourcemem) == -1) {
1859
job->jd_mem += resourcemem.jr_umem.mv_used * delta_t;
1862
if (getresourcej(jid, CURR_PROC, &resourceproc) == -1) {
1865
job->jd_refcnt = resourceproc.jr_proc;
1869
if (job->jd_tstamp == 0) {
1873
job->jd_tstamp = time_stamp;
1874
job->jd_etime = time_stamp - job_elem->starttime;
1875
job->jd_vmem = job->jd_mem;
1876
job->jd_rss = job->jd_vmem;
1884
clk_tck = sysconf(_SC_CLK_TCK);
1886
if (read_kernel_table(SESS, (void **)&st, &st_size, &nsess)<0) {
1890
if (read_kernel_table(PROCTAB, (void **)&pt, &pt_size, &nproc)<0) {
1894
/* scan session table */
1896
for(i=0; i<nsess; i++) {
1897
struct sess *ss = &st[i];
1898
if (ss->s_sid == 0) continue;
1899
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
1900
job_elem_t *job_elem = LNK_DATA(curr, job_elem_t, link);
1901
psJob_t *job = &job_elem->job;
1902
if (job_elem->precreated) continue; /* skip precreated jobs */
1903
if (ss->s_sid == job->jd_jid) {
1904
job->jd_uid = ss->s_uid;
1905
if (job->jd_tstamp == 0)
1907
job->jd_tstamp = time_stamp;
1908
job->jd_refcnt = ss->s_nprocs;
1909
job->jd_etime = time_stamp - job_elem->starttime; /* estimate */
1910
job_elem->utime = CLOCKS2SECS(ss->s_ucputime);
1911
job_elem->stime = CLOCKS2SECS(ss->s_scputime);
1912
job->jd_vmem = ss->s_memuse * NBPC;
1913
job->jd_rss = job->jd_vmem;
1914
job->jd_himem = ss->s_memhiwat * NBPC;
1920
/* scan process table */
1922
for(i=0; i<nproc; i++) {
1923
struct proc *pp = &pt[i];
1924
job_elem_t *job_elem;
1927
proc_elem_t *proc_elem;
1930
/* skip blank process entries */
1931
if (pp->p_pid == 0) continue;
1932
if (pp->p_pcomm.pc_cred.cr_sid == 0) continue;
1934
/* search for job based on session ID */
1935
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
1936
job_elem = LNK_DATA(curr, job_elem_t, link);
1937
job = &job_elem->job;
1938
if (job_elem->precreated) continue; /* skip precreated jobs */
1939
if (pp->p_pcomm.pc_cred.cr_sid == job->jd_jid)
1942
if (curr == &job_list) /* if not found, go to next proctab entry */
1945
/* search job's process list for pid */
1946
for(currp=job_elem->procs.next; currp != &job_elem->procs;
1947
currp=currp->next) {
1948
proc_elem = LNK_DATA(currp, proc_elem_t, link);
1949
if (proc_elem->proc.pd_pid == pp->p_pid)
1953
/* if this process is not in process list, chain on new one */
1954
if (currp == &job_elem->procs) {
1955
proc_elem = (proc_elem_t *)malloc(sizeof(proc_elem_t));
1959
memset(proc_elem, 0, sizeof(proc_elem_t));
1960
proc_elem->proc.pd_length = sizeof(psProc_t);
1961
LNK_ADD(job_elem->procs.prev, &proc_elem->link);
1962
job->jd_proccount++;
1965
/* set process fields */
1966
proc = &proc_elem->proc;
1967
proc->pd_tstamp = time_stamp;
1968
proc->pd_pid = pp->p_pid;
1969
proc->pd_uid = pp->p_pcomm.pc_cred.cr_uid;
1970
proc->pd_gid = pp->p_pcomm.pc_cred.cr_groups[0];
1971
proc->pd_acid = pp->p_pcomm.pc_cred.cr_acid;
1972
if (job->jd_gid == -1) {
1973
job->jd_gid = proc->pd_gid; /* job group ID */
1974
job->jd_acid = proc->pd_acid; /* job acct ID */
1977
if (proc->pd_pstart == 0) proc->pd_pstart = time_stamp;
1978
proc->pd_utime = CLOCKS2SECS(pp->p_utime);
1979
proc->pd_stime = CLOCKS2SECS(pp->p_stime);
1980
proc_elem->qwtime = (double)pp->p_pcomm.pc_srunwtime;
1983
/* call routine to get pacct data */
1985
read_pacct(&job_list, time_stamp);
1989
for (curr=job_list.next; curr != &job_list; curr=next) {
1991
job_elem_t *job_elem;
1994
job_elem = LNK_DATA(curr, job_elem_t, link);
1995
job = &job_elem->job;
1997
/* if job has not been watched within 30 seconds of being pre-added
1998
to job list, delete it */
2000
if (job_elem->precreated) {
2002
if ((job_elem->precreated + 30) < time_stamp) {
2006
fprintf(df, "%d deleting precreated "F64"\n", time_stamp,
2012
/* remove from list */
2017
continue; /* skip precreated jobs */
2022
if ((arse_elem = find_arsess(&arsess_list, job->jd_jid)) == NULL) {
2025
job->jd_proccount = 0;
2026
free_process_list(job_elem);
2027
job->jd_utime_c += job->jd_utime_a;
2028
job->jd_stime_c += job->jd_stime_a;
2029
job->jd_bwtime_c += job->jd_bwtime_a;
2030
job->jd_rwtime_c += job->jd_rwtime_a;
2031
job->jd_srtime_c += job->jd_srtime_a;
2032
job->jd_utime_a = 0;
2033
job->jd_stime_a = 0;
2034
job->jd_bwtime_a = 0;
2035
job->jd_rwtime_a = 0;
2036
job->jd_srtime_a = 0;
2039
pid_t pidlist[2048], ses_pidlist[1024];
2040
int pidmax = sizeof(pidlist)/sizeof(pid_t);
2041
int ses_pidmax = sizeof(ses_pidlist)/sizeof(pid_t);
2042
lnk_link_t *curra, *nexta;
2043
pdc_arsess_t *arse = &arse_elem->arse;
2044
static int pagesize;
2047
pagesize = getpagesize();
2049
memset(&pidlist, 0, sizeof(pidlist));
2050
memset(&ses_pidlist, 0, sizeof(ses_pidlist));
2052
/* get pids in the array session */
2054
syssgi(SGI_PIDSINASH, &job->jd_jid, &pidlist, pidmax);
2056
if (job->jd_tstamp == 0) {
2060
job->jd_tstamp = time_stamp;
2061
job->jd_mem = arse->mem * ((double)pagesize/1024.0/(double)HZ);
2062
job->jd_chars = arse->chr + arse->chw;
2063
/* Account ID of this job */
2064
job->jd_acid = arse->prid;
2065
/* total user time used (completed processes) */
2066
job->jd_utime_c = arse->utime*1E-9;
2067
/* total system time used (completed processes) */
2068
job->jd_stime_c = arse->stime*1E-9;
2069
/* total block-io-wait time used (completed processes) */
2070
job->jd_bwtime_c = arse->bwtime*1E-9;
2071
/* total raw-io-wait time used (completed processes) */
2072
job->jd_rwtime_c = arse->rwtime*1E-9;
2073
/* total srun-wait time used (completed processes) */
2074
job->jd_srtime_c = arse->qwtime*1E-9;
2075
/* Elapsed time of the job */
2076
job->jd_etime = time_stamp - arse->start;
2077
/* attached process count (from OS) */
2078
job->jd_refcnt = (long)arse->refcnt;
2080
/* get pids in the POSIX session */
2082
syssgi(SGI_GETSESPID, arse_elem->arse.pid, &ses_pidlist, ses_pidmax);
2084
/* search for any array sessions created in the POSIX session
2085
by checking to see if the pid creating the array session
2086
is one of the POSIX session pids. */
2088
for(curra=arsess_list.next; curra != &arsess_list; curra=nexta) {
2089
arsess_elem_t *arsess_elem = LNK_DATA(curra, arsess_elem_t, link);
2090
pdc_arsess_t *arse = &arsess_elem->arse;
2091
nexta = curra->next;
2093
if (arse->ash != job->jd_jid &&
2094
in_pidlist(ses_pidlist, ses_pidmax, arse->pid)) {
2096
arsess_elem_t *elem;
2098
/* remove array session element from main array session list
2099
and chain it onto the job array session list */
2102
if ((elem=find_arsess(&job_elem->arses, arse->ash))) {
2103
LNK_DELETE(&elem->link);
2106
LNK_ADD(job_elem->arses.prev, &arsess_elem->link);
2108
/* attached process count (from OS) */
2109
job->jd_refcnt += arse->refcnt;
2114
/* get pids for all of the array sessions associated with the job */
2116
for(curra=job_elem->arses.next; curra != &job_elem->arses;
2117
curra=curra->next) {
2119
arsess_elem_t *arsess_elem = LNK_DATA(curra, arsess_elem_t, link);
2120
pdc_arsess_t *arse = &arsess_elem->arse;
2123
/* append pids in this array session to the pidlist */
2125
for(j=0; pidlist[j] && j<pidmax; j++) ;
2127
syssgi(SGI_PIDSINASH, &arse->ash, &pidlist[j],
2131
/* if it is not time to collect process data then just
2132
add the process usage times to the job data. */
2134
if (time_stamp <= pnext_time) {
2137
/* initialize active process times */
2138
job->jd_utime_a = 0;
2139
job->jd_stime_a = 0;
2140
job->jd_bwtime_a = 0;
2141
job->jd_rwtime_a = 0;
2142
job->jd_srtime_a = 0;
2144
for(currp=job_elem->procs.next; currp != &job_elem->procs;
2145
currp=currp->next) {
2147
proc_elem_t *proc_elem = LNK_DATA(currp, proc_elem_t, link);
2148
psProc_t *proc = &proc_elem->proc;
2150
/* Note: if the process interval is larger than the
2151
job interval, then there is a possibility that the
2152
usage for a completed job will be counted both in the
2153
in the active and complete process usage. We avoid this by
2154
only adding the process's usage to the job usage if the
2155
process is in the ASH table active pid list of the job. */
2158
for (j=0; pidlist[j] && j<sizeof(pidlist)/sizeof(pid_t); j++)
2159
if (pidlist[j] == proc->pd_pid) {
2161
/* total user time used (active processes) */
2162
job->jd_utime_a += proc->pd_utime;
2164
/* total system time used (active processes) */
2165
job->jd_stime_a += proc->pd_stime;
2167
/* total block-io-wait time used (active processes) */
2168
job->jd_bwtime_a += proc_elem->bwtime;
2170
/* total raw-io-wait time used (active processes) */
2171
job->jd_rwtime_a += proc_elem->rwtime;
2173
/* total srun-wait time used (active processes) */
2174
job->jd_srtime_a += proc_elem->qwtime;
2176
/* add active process memory usage to job */
2177
job->jd_mem += proc_elem->mem;
2179
/* add active process I/O usage to job */
2180
job->jd_chars += proc_elem->chars;
2187
proc_elem_t *proc_elem;
2189
lnk_link_t old_procs;
2191
LNK_INIT(&old_procs);
2193
/* save old process list */
2194
if (job_elem->procs.next != &job_elem->procs) {
2195
old_procs.next = job_elem->procs.next;
2196
old_procs.prev = job_elem->procs.prev;
2197
old_procs.next->prev = &old_procs;
2198
old_procs.prev->next = &old_procs;
2199
LNK_INIT(&job_elem->procs);
2202
/* build new process list */
2204
/* initialize active process times */
2205
job->jd_utime_a = 0;
2206
job->jd_stime_a = 0;
2207
job->jd_bwtime_a = 0;
2208
job->jd_rwtime_a = 0;
2209
job->jd_srtime_a = 0;
2213
for (j=0; pidlist[j] && j<sizeof(pidlist)/sizeof(pid_t); j++) {
2215
if ((proc_elem=(proc_elem_t *)malloc(sizeof(proc_elem_t)))) {
2221
psProc_t *proc = &proc_elem->proc;
2223
memset(proc_elem, 0, sizeof(proc_elem_t));
2224
proc->pd_length = sizeof(psProc_t);
2226
/* get data from /proc file system */
2228
sprintf(fname, "/proc/%05ld", pidlist[j]);
2229
fd = open(fname, O_RDONLY);
2230
if (fd < 0) continue;
2232
if (ioctl(fd, PIOCPSINFO, &psinfo) < 0 ||
2233
ioctl(fd, PIOCACINFO, &prinfo) < 0) {
2236
pidlist[j] = -pidlist[j]; /* force report of old usage */
2240
proc->pd_tstamp = time_stamp;
2241
proc->pd_pid = pidlist[j];
2242
proc->pd_uid = psinfo.pr_uid;
2243
proc->pd_gid = psinfo.pr_gid;
2244
if (job->jd_uid == -1) {
2245
/* user ID of this job */
2246
job->jd_uid = proc->pd_uid;
2247
/* group ID of this job */
2248
job->jd_gid = proc->pd_gid;
2250
proc->pd_acid = prinfo.pr_prid;
2252
proc->pd_pstart = psinfo.pr_start.tv_sec +
2253
psinfo.pr_start.tv_nsec*1E-9;
2254
proc->pd_utime = prinfo.pr_timers.ac_utime*1E-9;
2255
proc->pd_stime = prinfo.pr_timers.ac_stime*1E-9;
2256
proc_elem->jid = prinfo.pr_ash;
2257
proc_elem->bwtime = prinfo.pr_timers.ac_bwtime*1E-9;
2258
proc_elem->rwtime = prinfo.pr_timers.ac_rwtime*1E-9;
2259
proc_elem->qwtime = prinfo.pr_timers.ac_qwtime*1E-9;
2260
proc_elem->mem = prinfo.pr_counts.ac_mem *
2261
((double)pagesize/1024.0/(double)HZ);
2262
proc_elem->chars = prinfo.pr_counts.ac_chr +
2263
prinfo.pr_counts.ac_chw;
2264
proc_elem->vmem = psinfo.pr_size * pagesize;
2265
proc_elem->rss = psinfo.pr_rssize * pagesize;
2267
job->jd_vmem += proc_elem->vmem;
2269
job->jd_rss += proc_elem->rss;
2271
/* total user time used (active processes) */
2272
job->jd_utime_a += proc->pd_utime;
2274
/* total system time used (active processes) */
2275
job->jd_stime_a += proc->pd_stime;
2277
/* total block-io-wait time used (active processes) */
2278
job->jd_bwtime_a += proc_elem->bwtime;
2280
/* total raw-io-wait time used (active processes) */
2281
job->jd_rwtime_a += proc_elem->rwtime;
2283
/* total srun-wait time used (active processes) */
2284
job->jd_srtime_a += proc_elem->qwtime;
2286
/* add active process memory usage to job */
2287
job->jd_mem += proc_elem->mem;
2289
/* add active process I/O usage to job */
2290
job->jd_chars += proc_elem->chars;
2294
/* add process element to end of process list */
2295
LNK_ADD(job_elem->procs.prev, &proc_elem->link);
2302
job->jd_proccount = proccount;
2303
job->jd_himem = MAX(job->jd_himem, job->jd_vmem);
2305
/* free old process list. If one of the old processes is not
2306
in the new pid list and the old process belongs to a
2307
different ASH than the main job ASH, then accumulate its
2308
usage. Also if a process is in the pid list but is deleted
2309
before we are able to collect its process usage, then report
2310
its process usage as completed usage. */
2313
while((currp=old_procs.next) != &old_procs) {
2314
proc_elem_t *tproc_elem = LNK_DATA(currp, proc_elem_t, link);
2315
psProc_t *tproc = &tproc_elem->proc;
2316
if (!in_pidlist(pidlist, pidmax, tproc->pd_pid)) {
2317
if (job->jd_jid != tproc_elem->jid) {
2318
job_elem->utime += tproc->pd_utime;
2319
job_elem->stime += tproc->pd_stime;
2320
job_elem->bwtime += tproc_elem->bwtime;
2321
job_elem->rwtime += tproc_elem->rwtime;
2322
job_elem->srtime += tproc_elem->qwtime;
2323
job_elem->mem += tproc_elem->mem;
2324
job_elem->chars += tproc_elem->chars;
2325
} else if (in_pidlist(pidlist, pidmax, -tproc->pd_pid)) {
2326
job->jd_utime_c += tproc->pd_utime;
2327
job->jd_stime_c += tproc->pd_stime;
2328
job->jd_bwtime_c += tproc_elem->bwtime;
2329
job->jd_rwtime_c += tproc_elem->rwtime;
2330
job->jd_srtime_c += tproc_elem->qwtime;
2331
job->jd_mem += tproc_elem->mem;
2332
job->jd_chars += tproc_elem->chars;
2341
/* add in usage for completed processes from other ASHes */
2342
job->jd_utime_c += job_elem->utime;
2343
job->jd_stime_c += job_elem->stime;
2344
job->jd_bwtime_c += job_elem->bwtime;
2345
job->jd_rwtime_c += job_elem->rwtime;
2346
job->jd_srtime_c += job_elem->srtime;
2347
job->jd_mem += job_elem->mem;
2348
job->jd_chars += job_elem->chars;
2351
/* add in memory and I/O usage from other ASHes */
2352
for(curra=job_elem->arses.next; curra!=&job_elem->arses;
2353
curra=curra->next) {
2354
arsess_elem_t *arsess_elem = LNK_DATA(curra, arsess_elem_t, link);
2355
pdc_arsess_t *arse = &arsess_elem->arse;
2357
job->jd_mem += arse->mem;
2358
job->jd_chars += arse->chr + arse->chw;
2364
#elif defined(ALPHA) || defined(FREEBSD) || defined(LINUX) || defined(SOLARIS) || defined(HP1164) || defined(DARWIN)
2367
lnk_link_t *currp, *nextp;
2369
/* sum up usage of each processes for this job */
2370
proccount = job->jd_proccount;
2371
job->jd_utime_a = job->jd_stime_a = 0;
2375
for(currp=job_elem->procs.next; currp != &job_elem->procs;
2378
proc_elem_t *proc_elem = LNK_DATA(currp, proc_elem_t, link);
2379
psProc_t *proc = &proc_elem->proc;
2381
nextp = currp->next; /* in case currp is deleted */
2383
if (time_stamp == proc->pd_tstamp) {
2384
/* maybe still living */
2385
job->jd_utime_a += proc->pd_utime;
2386
job->jd_stime_a += proc->pd_stime;
2387
job->jd_vmem += proc_elem->vmem;
2388
job->jd_rss += proc_elem->rss;
2389
job->jd_mem += (proc_elem->mem/1024.0);
2391
job->jd_chars += proc_elem->delta_chars;
2394
/* most likely exited */
2395
job->jd_utime_c += proc->pd_utime;
2396
job->jd_stime_c += proc->pd_stime;
2397
job->jd_proccount--;
2399
/* remove process entry from list */
2401
INFO((SGE_EVENT, "lost process "pid_t_fmt" for job "pid_t_fmt" (utime = %f stime = %f)\n",
2402
proc->pd_pid, job->jd_jid, proc->pd_utime, proc->pd_stime));
2408
/* estimate high water memory mark */
2409
if (job->jd_vmem > job->jd_himem)
2410
job->jd_himem = job->jd_vmem;
2416
lnk_link_t *currp, *nextp;
2418
/* If the job was not in the session table, set a timeout after */
2419
/* which we will consider the job complete. The timeout is set */
2420
/* to give us a chance to read the job completion record from */
2421
/* the pacct data during the next interval. */
2423
if (job->jd_tstamp != time_stamp && job->jd_refcnt) {
2424
if (job_elem->timeout == 0)
2425
job_elem->timeout = time_stamp + 5;
2426
else if (job_elem->timeout < time_stamp)
2430
/* set the job's active CPU time to the total CPU time */
2431
/* of the active processes */
2433
job->jd_utime_a = 0;
2434
job->jd_stime_a = 0;
2435
job->jd_srtime_a = 0;
2437
for(currp=job_elem->procs.next; currp != &job_elem->procs;
2439
proc_elem_t *proc_elem = LNK_DATA(currp, proc_elem_t, link);
2440
psProc_t *proc = &proc_elem->proc;
2442
nextp = currp->next; /* in case currp is deleted */
2444
if (time_stamp == proc->pd_tstamp) {
2446
job->jd_utime_a += proc->pd_utime; /* job active user time */
2447
job->jd_stime_a += proc->pd_stime; /* job active system time */
2448
job->jd_srtime_a += proc_elem->qwtime; /* job srun-wait time */
2452
/* process exited, remove process entry from list */
2453
job->jd_proccount--;
2454
job->jd_srtime_c += proc_elem->qwtime; /* job srun-wait time */
2460
/* set the job's completed CPU time to the session CPU time */
2461
/* minus the job's active CPU time */
2463
job->jd_utime_c = MAX(job_elem->utime - job->jd_utime_a, 0);
2464
job->jd_stime_c = MAX(job_elem->stime - job->jd_stime_a, 0);
2471
free_arsess_list(&arsess_list);
2474
if (time_stamp > pnext_time)
2475
pnext_time = time_stamp + ps_config.prc_collection_interval;
2480
static time_t start_time;
2482
int psStartCollector(void)
2484
static int initialized = 0;
2485
#ifdef PDC_STANDALONE
2500
* supplementary groups in proc filesystem?
2502
sup_grp_in_proc = groups_in_proc();
2505
LNK_INIT(&job_list);
2506
start_time = get_gmt();
2509
#ifdef PDC_STANDALONE
2510
/* Length of struct (set@run-time) */
2511
sysdata.sys_length = sizeof(sysdata);
2515
pagesize = getpagesize();
2517
/* retrieve static parameters */
2518
#if defined(LINUX) || defined(ALINUX) || defined(IRIX) || defined(SOLARIS) || defined(DARWIN) || defined(FREEBSD) || defined(NETBSD) || defined(HP1164)
2519
#ifdef PDC_STANDALONE
2520
ncpus = sge_nprocs();
2522
#elif defined(ALPHA)
2524
#ifdef PDC_STANDALONE
2525
/* Number of CPUs */
2526
ncpus = sge_nprocs();
2527
if (getsysinfo(GSI_PHYSMEM, (caddr_t)&physical_memory,sizeof(int),0,NULL)==-1) {
2532
if ((getsysinfo(GSI_BOOTEDFILE, &unixname[1],
2533
sizeof(unixname), NULL, NULL)) <= 0) {
2534
strcpy(unixname, _PATH_UNIX);
2537
if (nlist(unixname, mem_nl) == -1) {
2540
if (mem_nl[PERFSUM].n_value == 0) {
2544
if ((kmem_fd = open(_PATH_KMEM,O_RDONLY,0)) == -1) {
2552
#ifdef PDC_STANDALONE
2553
ncpus = 0; /* Set in psRetrieveSysData because it is dynamic on Cray */
2557
#ifdef PDC_STANDALONE
2558
sysdata.sys_ncpus = ncpus;
2564
int psStopCollector(void)
2574
int psWatchJob(JobID_t JobID)
2581
df = fopen("/tmp/pacct.out", "w");
2582
fprintf(df, "%d watching "F64"\n", get_gmt(), JobID);
2587
/* if job to watch is not already in the list then add it */
2589
if ((curr=find_job(JobID))) {
2590
LNK_DATA(curr, job_elem_t, link)->precreated = 0;
2592
job_elem_t *job_elem = (job_elem_t *)malloc(sizeof(job_elem_t));
2593
memset(job_elem, 0, sizeof(job_elem_t));
2594
job_elem->starttime = get_gmt();
2595
job_elem->job.jd_jid = JobID;
2596
job_elem->job.jd_length = sizeof(psJob_t);
2597
LNK_INIT(&job_elem->procs);
2598
LNK_INIT(&job_elem->arses);
2599
/* add to job list */
2600
LNK_ADD(job_list.prev, &job_elem->link);
2607
int psIgnoreJob(JobID_t JobID) {
2610
/* if job is in the list, remove it */
2612
if ((curr = find_job(JobID))) {
2614
free_job(LNK_DATA(curr, job_elem_t, link));
2621
struct psStat_s *psStatus(void)
2624
static time_t last_time_stamp;
2625
time_t time_stamp = get_gmt();
2627
if ((pstat = (psStat_t *)malloc(sizeof(psStat_t)))==NULL) {
2631
/* Length of struct (set@run-time) */
2632
pstat->stat_length = sizeof(psStat_t);
2634
/* Time of last sample */
2635
pstat->stat_tstamp = last_time_stamp;
2638
pstat->stat_ifmpid = getpid();
2641
pstat->stat_DCpid = getpid();
2644
pstat->stat_IFMpid = getpid();
2646
/* elapsed time (to *now*, not snap) */
2647
pstat->stat_elapsed = time_stamp - start_time;
2649
/* user CPU time used by DC */
2650
pstat->stat_DCutime = 0;
2652
/* sys CPU time used by DC */
2653
pstat->stat_DCstime = 0;
2655
/* user CPU time used by IFM */
2656
pstat->stat_IFMutime = 0;
2658
/* sys CPU time used by IFM */
2659
pstat->stat_IFMstime = 0;
2661
/* number of jobs tracked */
2662
pstat->stat_jobcount = get_numjobs();
2664
last_time_stamp = time_stamp;
2670
struct psJob_s *psGetOneJob(JobID_t JobID)
2674
job_elem_t *job_elem = NULL;
2681
/* retrieve job data */
2682
psRetrieveOSJobData();
2684
/* see if job is in list */
2686
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
2687
job_elem = LNK_DATA(curr, job_elem_t, link);
2688
if (job_elem->precreated) continue; /* skip precreated jobs */
2689
if (job_elem->job.jd_jid == JobID) {
2696
unsigned long rsize;
2698
job = &job_elem->job;
2699
rsize = sizeof(psJob_t) + job->jd_proccount * sizeof(psProc_t);
2700
if ((rjob = (struct rjob_s *)malloc(rsize))) {
2701
memcpy(&rjob->job, job, sizeof(psJob_t));
2706
for (currp=job_elem->procs.next; currp != &job_elem->procs;
2707
currp=currp->next) {
2708
psProc_t *proc = &(LNK_DATA(currp, proc_elem_t, link)->proc);
2709
memcpy(&rjob->proc[nprocs++], proc, sizeof(psProc_t));
2715
return (struct psJob_s *)rjob;
2719
struct psJob_s *psGetAllJobs(void)
2721
psJob_t *rjob, *jobs;
2724
uint64 jobcount = 0;
2726
/* retrieve job data */
2727
psRetrieveOSJobData();
2729
/* calculate size of return data */
2731
rsize = sizeof(uint64);
2736
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
2737
job_elem_t *job_elem = LNK_DATA(curr, job_elem_t, link);
2738
psJob_t *job = &job_elem->job;
2739
if (job_elem->precreated) continue; /* skip precreated jobs */
2740
rsize += (sizeof(psJob_t) + (job->jd_proccount*sizeof(psProc_t)));
2744
/* allocate space for return data */
2745
if ((rjob = (psJob_t *)malloc(rsize)) == NULL)
2750
/* fill in return data */
2752
*(uint64 *)jobs = jobcount;
2754
INCJOBPTR(jobs, sizeof(uint64));
2759
/* copy the job data */
2760
for (curr=job_list.next; curr != &job_list; curr=curr->next) {
2761
job_elem_t *job_elem = LNK_DATA(curr, job_elem_t, link);
2762
psJob_t *job = &job_elem->job;
2765
if (job_elem->precreated) continue; /* skip precreated jobs */
2766
memcpy(jobs, job, sizeof(psJob_t));
2767
INCJOBPTR(jobs, sizeof(psJob_t));
2769
/* copy the process data */
2770
procs = (psProc_t *)jobs;
2773
for (currp=job_elem->procs.next; currp != &job_elem->procs; currp=currp->next) {
2774
psProc_t *proc = &(LNK_DATA(currp, proc_elem_t, link)->proc);
2775
memcpy(procs, proc, sizeof(psProc_t));
2776
INCPROCPTR(procs, sizeof(psProc_t));
2779
jobs = (psJob_t *)procs;
2787
#ifdef PDC_STANDALONE
2788
struct psSys_s *psGetSysdata(void)
2792
/* go get system data */
2793
psRetrieveSystemData();
2795
if ((sd = (psSys_t *)malloc(sizeof(psSys_t))) == NULL) {
2798
memcpy(sd, &sysdata, sizeof(psSys_t));
2811
#define INTOMEGS(x) (((double)x)/(1024*1024))
2818
fprintf(stderr, "\n%s\n\n", MSG_SGE_USAGE);
2819
fprintf(stderr, "\t-s\t%s\n", MSG_SGE_s_OPT_USAGE);
2820
fprintf(stderr, "\t-n\t%s\n", MSG_SGE_n_OPT_USAGE);
2821
fprintf(stderr, "\t-p\t%s\n", MSG_SGE_p_OPT_USAGE);
2822
fprintf(stderr, "\t-i\t%s\n", MSG_SGE_i_OPT_USAGE);
2823
fprintf(stderr, "\t-g\t%s\n", MSG_SGE_g_OPT_USAGE);
2824
fprintf(stderr, "\t-j\t%s\n", MSG_SGE_j_OPT_USAGE);
2825
fprintf(stderr, "\t-J\t%s\n", MSG_SGE_J_OPT_USAGE);
2826
fprintf(stderr, "\t-k\t%s\n", MSG_SGE_k_OPT_USAGE);
2827
fprintf(stderr, "\t-K\t%s\n", MSG_SGE_K_OPT_USAGE);
2828
fprintf(stderr, "\t-P\t%s\n", MSG_SGE_P_OPT_USAGE);
2829
fprintf(stderr, "\t-S\t%s\n", MSG_SGE_S_OPT_USAGE);
2834
print_job_data(psJob_t *job)
2836
printf("%s\n", MSG_SGE_JOBDATA );
2837
printf("jd_jid="OSJOBID_FMT"\n", job->jd_jid);
2838
printf("jd_length=%d\n", job->jd_length);
2839
printf("jd_uid="uid_t_fmt"\n", job->jd_uid);
2840
printf("jd_gid="uid_t_fmt"\n", job->jd_gid);
2841
#if defined(IRIX) || defined(CRAY)
2842
printf("jd_acid="F64"\n", job->jd_acid);
2844
printf("jd_tstamp=%s\n", ctime(&job->jd_tstamp));
2845
printf("jd_proccount=%d\n", (int)job->jd_proccount);
2846
printf("jd_refcnt=%d\n", (int)job->jd_refcnt);
2847
printf("jd_etime=%8.3f\n", job->jd_etime);
2848
printf("jd_utime_a=%8.3f\n", job->jd_utime_a);
2849
printf("jd_stime_a=%8.3f\n", job->jd_stime_a);
2851
printf("jd_bwtime_a=%8.3f\n", job->jd_bwtime_a);
2852
printf("jd_rwtime_a=%8.3f\n", job->jd_rwtime_a);
2854
printf("jd_srtime_a=%8.3f\n", job->jd_srtime_a);
2855
printf("jd_utime_c=%8.3f\n", job->jd_utime_c);
2856
printf("jd_stime_c=%8.3f\n", job->jd_stime_c);
2858
printf("jd_bwtime_c=%8.3f\n", job->jd_bwtime_c);
2859
printf("jd_rwtime_c=%8.3f\n", job->jd_rwtime_c);
2861
printf("jd_srtime_c=%8.3f\n", job->jd_srtime_c);
2862
printf("jd_mem="F64"\n", job->jd_mem);
2863
printf("jd_chars=%8.3fM\n", INTOMEGS(job->jd_chars));
2864
printf("jd_vmem=%8.3fM\n", INTOMEGS(job->jd_vmem));
2865
printf("jd_rss=%8.3fM\n", INTOMEGS(job->jd_rss));
2866
printf("jd_himem=%8.3fM\n", INTOMEGS(job->jd_himem));
2868
printf("jd_fsblks="F64"\n", job->jd_fsblks);
2872
print_process_data(psProc_t *proc)
2874
printf("\t******* Process Data *******\n");
2875
printf("\tpd_pid="pid_t_fmt"\n", proc->pd_pid);
2876
printf("\tpd_length=%d\n", (int)proc->pd_length);
2877
printf("\tpd_tstamp=%s\n", ctime(&proc->pd_tstamp));
2878
printf("\tpd_uid="uid_t_fmt"\n", proc->pd_uid);
2879
printf("\tpd_gid="uid_t_fmt"\n", proc->pd_gid);
2880
printf("\tpd_acid="F64"\n", proc->pd_acid);
2881
printf("\tpd_state=%d\n", (int)proc->pd_state);
2882
printf("\tpd_pstart=%8.3f\n", proc->pd_pstart);
2883
printf("\tpd_utime=%8.3f\n", proc->pd_utime);
2884
printf("\tpd_stime=%8.3f\n", proc->pd_stime);
2890
print_system_data(psSys_t *sys)
2892
printf("%s\n", MSG_SGE_SYSTEMDATA );
2893
printf("sys_length=%d\n", (int)sys->sys_length);
2894
printf("sys_ncpus=%d\n", (int)sys->sys_ncpus);
2895
printf("sys_tstamp=%s\n", ctime(&sys->sys_tstamp));
2896
printf("sys_ttimet=%8.3f\n", sys->sys_ttimet);
2897
printf("sys_ttime=%8.3f\n", sys->sys_ttime);
2898
printf("sys_utimet=%8.3f\n", sys->sys_utimet);
2899
printf("sys_utime=%8.3f\n", sys->sys_utime);
2900
printf("sys_stimet=%8.3f\n", sys->sys_stimet);
2901
printf("sys_stime=%8.3f\n", sys->sys_stime);
2902
printf("sys_itimet=%8.3f\n", sys->sys_itimet);
2903
printf("sys_itime=%8.3f\n", sys->sys_itime);
2904
printf("sys_srtimet=%8.3f\n", sys->sys_srtimet);
2905
printf("sys_srtime=%8.3f\n", sys->sys_srtime);
2906
printf("sys_wtimet=%8.3f\n", sys->sys_wtimet);
2907
printf("sys_wtime=%8.3f\n", sys->sys_wtime);
2909
printf("sys_swp_total=%8.3fM\n", INTOMEGS(sys->sys_swp_total));
2910
printf("sys_swp_free=%8.3fM\n", INTOMEGS(sys->sys_swp_free));
2911
printf("sys_swp_used=%8.3fM\n", INTOMEGS(sys->sys_swp_used));
2912
printf("sys_swp_virt=%8.3fM\n", INTOMEGS(sys->sys_swp_virt));
2913
printf("sys_swp_rate=%8.3f\n", sys->sys_swp_rate);
2914
printf("sys_mem_avail=%8.3fM\n", INTOMEGS(sys->sys_mem_avail));
2915
printf("sys_mem_used=%8.3fM\n", INTOMEGS(sys->sys_mem_used));
2917
printf("sys_swpocc=%8.3f\n", sys->sys_swpocc);
2918
printf("sys_swpque=%8.3f\n", sys->sys_swpque);
2919
printf("sys_runocc=%8.3f\n", sys->sys_runocc);
2920
printf("sys_runque=%8.3f\n", sys->sys_runque);
2921
printf("sys_readch="F64"\n", sys->sys_readch);
2922
printf("sys_writech="F64"\n", sys->sys_writech);
2928
print_status(psStat_t *stat)
2930
printf("%s\n", MSG_SGE_STATUS );
2931
printf("stat_length=%d\n", (int)stat->stat_length);
2932
printf("stat_tstamp=%s\n", ctime(&stat->stat_tstamp));
2933
printf("stat_ifmpid=%d\n", (int)stat->stat_ifmpid);
2934
printf("stat_DCpid=%d\n", (int)stat->stat_DCpid);
2935
printf("stat_IFMpid=%d\n", (int)stat->stat_IFMpid);
2936
printf("stat_elapsed=%d\n", (int)stat->stat_elapsed);
2937
printf("stat_DCutime=%8.3f\n", stat->stat_DCutime);
2938
printf("stat_DCstime=%8.3f\n", stat->stat_DCstime);
2939
printf("stat_IFMutime=%8.3f\n", stat->stat_IFMutime);
2940
printf("stat_IFMstime=%8.3f\n", stat->stat_IFMstime);
2941
printf("stat_jobcount=%d\n", (int)stat->stat_jobcount);
2946
main(int argc, char **argv)
2948
char sgeview_bar_title[256] = "";
2949
char sgeview_window_title[256] = "";
2953
extern char *optarg;
2958
int use_getonejob = 0;
2964
int sysi=-1, jobi=-1, prci=-1;
2966
double *curr_cpu=NULL, *prev_cpu=NULL, *diff_cpu=NULL;
2967
int jobid_count = 0;
2972
sge_dstring_init(&ds, buffer, sizeof(buffer));
2973
sprintf(sgeview_bar_title, "%-.250s", MSG_SGE_CPUUSAGE );
2974
sprintf(sgeview_window_title, "%-.100s %-.150s", feature_get_product_name(FS_SHORT, &ds) ,MSG_SGE_SGEJOBUSAGECOMPARSION );
2976
#ifdef __SGE_COMPILE_WITH_GETTEXT__
2977
/* init language output for gettext() , it will use the right language */
2978
install_language_func((gettext_func_type) gettext,
2979
(setlocale_func_type) setlocale,
2980
(bindtextdomain_func_type) bindtextdomain,
2981
(textdomain_func_type) textdomain);
2982
sge_lang_init(NULL,NULL);
2983
#endif /* __SGE_COMPILE_WITH_GETTEXT__ */
2992
while ((c = getopt(argc, argv, "g1snpi:S:J:P:j:k:K:")) != -1)
2998
jobids[jobid_count++] = optarg;
3002
/* no break here, fall into 'k' case */
3005
if (sscanf(optarg, "%d", &signo)!=1) {
3006
fprintf(stderr, MSG_SGE_XISNOTAVALIDSIGNALNUMBER_S , optarg);
3007
fprintf(stderr, "\n");
3025
if (sscanf(optarg, "%d", &sysi)!=1) {
3026
fprintf(stderr, MSG_SGE_XISNOTAVALIDINTERVAL_S, optarg);
3027
fprintf(stderr, MSG_SGE_XISNOTAVALIDSIGNALNUMBER_S , optarg);
3033
if (sscanf(optarg, "%d", &prci)!=1) {
3034
fprintf(stderr, MSG_SGE_XISNOTAVALIDINTERVAL_S, optarg);
3040
if (sscanf(optarg, "%d", &jobi)!=1) {
3041
fprintf(stderr, MSG_SGE_XISNOTAVALIDINTERVAL_S, optarg);
3047
if (sscanf(optarg, "%d", &interval)!=1) {
3048
fprintf(stderr, MSG_SGE_XISNOTAVALIDINTERVAL_S, optarg);
3049
fprintf(stderr, "\n");
3060
for (arg=optind; arg<argc; arg++) {
3061
if (sscanf(argv[arg], OSJOBID_FMT, &osjobid) != 1) {
3062
fprintf(stderr, MSG_SGE_XISNOTAVALIDJOBID_S , argv[arg]);
3063
fprintf(stderr, "\n");
3066
psWatchJob(osjobid);
3070
psSetCollectionIntervals(jobi, prci, sysi);
3074
int base_interval = 2; /* in tenths of a second */
3075
int sample_rate = 1;
3076
int num_samples = 1000;
3077
int use_winsize = 0;
3079
curr_cpu = (double *)malloc(numjobs * sizeof(double));
3080
memset(curr_cpu, 0, numjobs*sizeof(double));
3081
prev_cpu = (double *)malloc(numjobs * sizeof(double));
3082
memset(prev_cpu, 0, numjobs*sizeof(double));
3083
diff_cpu = (double *)malloc(numjobs * sizeof(double));
3084
memset(diff_cpu, 0, numjobs*sizeof(double));
3086
printf("%s\n", MSG_SGE_GROSVIEWEXPORTFILE );
3087
printf("=14 3\n"); /* arbsize */
3088
printf("=14 2 1\n");
3090
printf("=14 6 800 100\n"); /* winsize(x,y) */
3091
printf("=14 9 %d\n", base_interval);
3092
printf("=14 7 46\n");
3093
printf("=14 8 0\n");
3094
printf("=11 0 0x20000 0x4 0x%x 0x%x 0.000 1.000 0x%x 0 0 0 0 0 0x%x "
3095
"0 0 0x2e 0 0 0x1 0x7 0x4 0x%x 0x%x 0 0 0 0x4 0x1 0x6 "
3096
"0x%x 0x5 0x2e\n", sample_rate, sample_rate, numjobs+1,
3097
num_samples, sample_rate, sample_rate, numjobs+1);
3098
printf("h%s \n", sgeview_bar_title);
3099
for (i=0; i<numjobs; i++)
3100
if (jobid_count > i)
3101
printf("ljob %s \n", jobids[i]);
3103
printf("ljob %d \n", i+1);
3106
printf("%s\n", sgeview_window_title);
3112
psJob_t *jobs, *ojob;
3114
psStat_t *stat = NULL;
3115
psSys_t *sys = NULL;
3116
int jobcount, proccount, i, j, activeprocs;
3124
if (!sgeview && system) {
3126
if ((stat = psStatus()))
3130
if ((sys = psGetSysdata()))
3132
print_system_data(sys);
3136
ojob = jobs = psGetAllJobs();
3138
jobcount = *(uint64 *)jobs;
3139
INCJOBPTR(jobs, sizeof(uint64));
3140
for (i=0; i<jobcount; i++) {
3143
prev_cpu[i] = curr_cpu[i];
3144
curr_cpu[i] = jobs->jd_utime_a + jobs->jd_stime_a +
3145
jobs->jd_utime_c + jobs->jd_stime_c;
3146
} else if (use_getonejob) {
3149
if ((ojp = jp = psGetOneJob(jobs->jd_jid))) {
3150
if (verbose && !killjob)
3152
proccount = jp->jd_proccount;
3153
INCJOBPTR(jp, jp->jd_length);
3154
pp = (psProc_t *)jp;
3155
for (j=0; j<proccount; j++) {
3156
if (verbose && showproc)
3157
print_process_data(pp);
3158
INCPROCPTR(pp, pp->pd_length);
3163
} else if (verbose && !killjob)
3164
print_job_data(jobs);
3166
proccount = jobs->jd_proccount;
3167
activeprocs += proccount;
3168
INCJOBPTR(jobs, jobs->jd_length);
3169
procs = (psProc_t *)jobs;
3171
for (j=0; j<proccount; j++) {
3173
if (getuid() == SGE_SUPERUSER_UID ||
3174
getuid() == procs->pd_uid) {
3175
if (kill(procs->pd_pid, signo)<0) {
3177
sprintf(buf, "kill("pid_t_fmt", %d)", procs->pd_pid, signo);
3180
printf("kill("pid_t_fmt", %d) issued\n", procs->pd_pid, signo);
3182
fprintf(stderr, "kill: "pid_t_fmt ": %s\n",
3183
procs->pd_pid, MSG_SGE_PERMISSIONDENIED);
3186
if (verbose && showproc && !use_getonejob)
3187
print_process_data(procs);
3188
INCPROCPTR(procs, procs->pd_length);
3191
jobs = (psJob_t *)procs;
3195
printf("%s\n", MSG_SGE_NOJOBS );
3197
if (sgeview && jobcount>0) {
3199
double cpu_pct, total_cpu = 0, total_cpu_pct = 0;
3201
for(i=0; i<jobcount; i++) {
3202
diff_cpu[i] = curr_cpu[i] - prev_cpu[i];
3203
total_cpu += diff_cpu[i];
3207
for(i=0; i<jobcount; i++) {
3210
cpu_pct = diff_cpu[i] / total_cpu;
3211
total_cpu_pct += cpu_pct;
3212
printf("%8.5f ", cpu_pct);
3214
printf("%8.5f ", 1.0 - total_cpu_pct);
3219
if (ojob) free(ojob);
3220
if (stat) free(stat);
3223
if (killjob && (!forcekill || activeprocs == 0))
3233
#endif /* !defined(COMPILE_DC) */