1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
32
#if !defined(COMPILE_DC)
42
#include <sys/ioctl.h>
43
#include <sys/types.h>
44
#include <sys/signal.h>
46
#if !defined(CRAY) && !defined(NECSX4) && !defined(NECSX5) && !defined(AIX)
47
#include <sys/syscall.h>
51
#include <sys/times.h>
53
#if defined(FREEBSD) || defined(DARWIN)
56
#include <sys/resource.h>
63
# include <sys/user.h>
64
# include <sys/table.h>
65
# include <sys/procfs.h>
69
# include <sys/procfs.h>
73
#include <sys/param.h> /* for HZ (jiffies -> seconds ) */
76
#include "uti/sge_stdio.h"
77
#include "uti/sge_unistd.h"
82
#include "basis_types.h"
91
#if defined(LINUX) || defined(ALPHA) || defined(SOLARIS)
93
# define PROC_DIR "/debug"
95
# define PROC_DIR "/proc"
103
typedef struct _tLinProcStat { /* process Linux /proc/pid/stat structure */
104
int pr_pid; /* 1 process id */
105
char pr_cmd[16]; /* 2 command name */
106
char pr_stat; /* 3 process status */
107
int pr_ppid; /* 4 parent process id */
108
int pr_pgrp; /* 5 process group */
109
int pr_sid; /* 6 session id */
110
int pr_tty; /* 7 tty line MAJOR << ? + MINOR */
111
int pr_tty_pgrp; /* 8 tty process group */
112
long pr_flags; /* 9 process flags PF_* in <linux/sched.h> */
113
long pr_min_flt; /* 10 minor page faults */
114
long pr_cmin_flt; /* 11 children minor page faults */
115
long pr_maj_flt; /* 12 major page faults */
116
long pr_cmaj_flt; /* 13 children major page faults */
117
long pr_utime; /* 14 user time */
118
long pr_stime; /* 15 system time */
119
long pr_cutime; /* 16 children user time */
120
long pr_cstime; /* 17 children system time */
121
long pr_counter; /* 18 jiffies */
122
long pr_pri; /* 19 priority (nice) */
123
long pr_tmout; /* 20 Timeout time for scheduling */
124
long pr_it_real_value; /* 21 itimer real value */
125
long pr_start; /* 22 start of execution in jiffies since boot*/
126
long pr_vsize; /* 23 total size t + d + s NOT pages */
127
long pr_rss; /* 24 resident set size pages */
128
long pr_rlim_cur; /* 25 current rlimit ro rss */
129
long pr_start_code; /* 26 start of code */
130
long pr_end_code; /* 27 end of code */
131
long pr_start_stack; /* 28 start of stack */
132
long pr_esp; /* 29 head of stack (stack pointer) */
133
long pr_eip; /* 30 instruction pointer */
134
long pr_signal; /* 31 pending signals mask */
135
long pr_blocked; /* 32 blocked signals mask */
136
long pr_sigignore; /* 33 ignored signals mask */
137
long pr_sigcatch; /* 34 catched signals mask */
138
long pr_wchan; /* 35 WCHAN (seems to be a return address) */
143
/*-----------------------------------------------------------------------*/
144
#if defined(LINUX) || defined(ALPHA) || defined(SOLARIS)
147
static struct dirent *dent;
151
int groups_in_proc (void)
154
FILE* fd = (FILE*) NULL;
156
if (!(fd = fopen(PROC_DIR "/self/status", "r"))) {
159
while (fgets(buf, sizeof(buf), fd)) {
160
if (strcmp("Groups:", strtok(buf, "\t"))==0) {
174
/* search in job list for the pid
175
return the proc element */
176
static lnk_link_t *find_pid_in_jobs(pid_t pid, lnk_link_t *job_list)
178
lnk_link_t *job, *proc = NULL;
179
proc_elem_t *proc_elem = NULL;
180
job_elem_t *job_elem = NULL;
183
* try to find a matching job
185
for (job=job_list->next; job != job_list; job=job->next) {
187
job_elem = LNK_DATA(job, job_elem_t, link);
190
* try to find process in this jobs' proc list
193
for (proc=job_elem->procs.next; proc != &job_elem->procs;
196
proc_elem = LNK_DATA(proc, proc_elem_t, link);
197
if (proc_elem->proc.pd_pid == pid)
198
break; /* found it */
201
if (proc == &job_elem->procs) {
202
/* end of procs list - no process found - try next job */
205
/* found a process */
213
static void touch_time_stamp(const char *d_name, int time_stamp, lnk_link_t *job_list)
216
proc_elem_t *proc_elem;
219
DENTER(TOP_LAYER, "touch_time_stamp");
221
sscanf(d_name, pid_t_fmt, &pid);
222
if ((proc = find_pid_in_jobs(pid, job_list))) {
223
proc_elem = LNK_DATA(proc, proc_elem_t, link);
224
proc_elem->proc.pd_tstamp = time_stamp;
226
INFO((SGE_EVENT, "found job to process %s: set time stamp\n", d_name));
231
INFO((SGE_EVENT, "found no job to process %s\n", d_name));
238
void procfs_kill_addgrpid(gid_t add_grp_id, int sig,
239
tShepherd_trace shepherd_trace)
246
#if defined(SOLARIS) || defined(ALPHA)
256
DENTER(TOP_LAYER, "procfs_kill_addgrpid");
258
/* quick return in case of invalid add. group id */
259
if (add_grp_id == 0) {
264
max_groups = sge_sysconf(SGE_SYSCONF_NGROUPS_MAX);
266
if (shepherd_trace) {
269
sprintf(err_str, MSG_SGE_NGROUPS_MAXOSRECONFIGURATIONNECESSARY );
270
shepherd_trace(err_str);
273
* INSURE detects a WRITE_OVERFLOW when getgroups was invoked (LINUX).
274
* Is this a bug in the kernel or in INSURE?
277
list = (gid_t*) malloc(2*max_groups*sizeof(gid_t));
279
list = (gid_t*) malloc(max_groups*sizeof(gid_t));
282
if (shepherd_trace) {
285
sprintf(err_str, MSG_SGE_PROCFSKILLADDGRPIDMALLOCFAILED );
286
shepherd_trace(err_str);
291
/* find next valid entry in procfs */
292
while ((dent = readdir(cwd))) {
295
if (!dent->d_name[0])
298
if (!strcmp(dent->d_name, "..") || !strcmp(dent->d_name, "."))
301
if (atoi(dent->d_name) == 0)
304
#if defined(SOLARIS) || defined(ALPHA)
305
sprintf(procnam, "%s/%s", PROC_DIR, dent->d_name);
306
if ((fd = open(procnam, O_RDONLY, 0)) == -1) {
307
DPRINTF(("open(%s) failed: %s\n", procnam, strerror(errno)));
311
if (!strcmp(dent->d_name, "self"))
314
sprintf(procnam, "%s/%s/status", PROC_DIR, dent->d_name);
315
if (!(fp = fopen(procnam, "r")))
319
#if defined(SOLARIS) || defined(ALPHA)
320
/* get number of groups */
321
if (ioctl(fd, PIOCCRED, &proc_cred) == -1) {
326
/* get list of supplementary groups */
327
groups = proc_cred.pr_ngroups;
328
if (ioctl(fd, PIOCGROUPS, list) == -1) {
334
/* get number of groups and current uids, gids
335
* uids[0], gids[0] => UID and GID
336
* uids[1], gids[1] => EUID and EGID
337
* uids[2], gids[2] => SUID and SGID
338
* uids[3], gids[3] => FSUID and FSGID
341
while (fgets(buffer, sizeof(buffer), fp)) {
345
label = strtok(buffer, " \t\n");
347
if (!strcmp("Groups:", label)) {
348
while ((token = strtok((char*) NULL, " \t\n"))) {
349
list[groups]=(gid_t) atol(token);
352
} else if (!strcmp("Uid:", label)) {
355
while ((i < 4) && (token = strtok((char*) NULL, " \t\n"))) {
356
uids[i]=(uid_t) atol(token);
359
} else if (!strcmp("Gid:", label)) {
362
while ((i < 4) && (token = strtok((char*) NULL, " \t\n"))) {
363
gids[i]=(gid_t) atol(token);
371
#if defined(SOLARIS) || defined(ALPHA)
378
/* send each process a signal which belongs to add_grg_id */
379
for (i = 0; i < groups; i++) {
380
if (list[i] == add_grp_id) {
382
pid = (pid_t) atol(dent->d_name);
385
/* if UID, GID, EUID and EGID == 0
386
* don't kill the process!!! - it could be the rpc.nfs-deamon
388
if (!(uids[0] == 0 && gids[0] == 0 &&
389
uids[1] == 0 && gids[1] == 0)) {
390
#elif defined(SOLARIS) || defined(ALPHA)
391
if (!(proc_cred.pr_ruid == 0 && proc_cred.pr_rgid == 0 &&
392
proc_cred.pr_euid == 0 && proc_cred.pr_egid == 0)) {
395
if (shepherd_trace) {
398
sprintf(err_str, MSG_SGE_KILLINGPIDXY_UI , sge_u32c(pid), groups);
399
shepherd_trace(err_str);
405
if (shepherd_trace) {
408
sprintf(err_str, MSG_SGE_DONOTKILLROOTPROCESSXY_UI ,
409
sge_u32c(atol(dent->d_name)), groups);
410
shepherd_trace(err_str);
425
cwd = opendir(PROC_DIR);
433
int pt_dispatch_proc_to_job(
434
lnk_link_t *job_list,
440
char buffer[BIGLINE];
448
#if defined(SOLARIS) || defined(ALPHA)
457
proc_elem_t *proc_elem = NULL;
458
job_elem_t *job_elem = NULL;
463
DENTER(TOP_LAYER, "pt_dispatch_proc_to_job");
465
max_groups = sge_sysconf(SGE_SYSCONF_NGROUPS_MAX);
466
if (max_groups <= 0) {
467
ERROR((SGE_EVENT, MSG_SGE_NGROUPS_MAXOSRECONFIGURATIONNECESSARY));
472
list = (gid_t*) malloc(max_groups*sizeof(gid_t));
474
ERROR((SGE_EVENT, MSG_SGE_PTDISPATCHPROCTOJOBMALLOCFAILED));
479
/* find next valid entry in procfs */
480
while ((dent = readdir(cwd))) {
485
if (!dent->d_name[0])
488
if (!strcmp(dent->d_name, "..") || !strcmp(dent->d_name, "."))
491
if (dent->d_name[0] == '.')
492
pidname = &dent->d_name[1];
494
pidname = dent->d_name;
496
if (atoi(pidname) == 0)
500
sprintf(procnam, "%s/%s/stat", PROC_DIR, dent->d_name);
502
sprintf(procnam, "%s/%s", PROC_DIR, dent->d_name);
504
if ((fd = open(procnam, O_RDONLY, 0)) == -1) {
505
if (errno != ENOENT) {
508
INFO((SGE_EVENT, "(uid:"gid_t_fmt" euid:"gid_t_fmt") could not open %s: %s\n",
509
getuid(), geteuid(), procnam, strerror(errno)));
511
INFO((SGE_EVENT, "could not open %s: %s\n", procnam, strerror(errno)));
513
touch_time_stamp(dent->d_name, time_stamp, job_list);
519
** get a list of supplementary group ids to decide
520
** whether this process will be needed;
521
** read also prstatus
527
* Read the line and append a 0-Byte
529
if ((ret = read(fd, buffer, BIGLINE-1))<=0) {
531
if (ret == -1 && errno != ENOENT) {
533
INFO((SGE_EVENT, "could not read %s: %s\n", procnam, strerror(errno)));
535
touch_time_stamp(dent->d_name, time_stamp, job_list);
539
buffer[BIGLINE-1] = '\0';
541
if (SGE_FSTAT(fd, &fst)) {
543
if (errno != ENOENT) {
545
INFO((SGE_EVENT, "could not fstat %s: %s\n", procnam, strerror(errno)));
547
touch_time_stamp(dent->d_name, time_stamp, job_list);
556
"%d %s %c %d %d %d %d %d %lu %lu \
557
%lu %lu %lu %ld %ld %ld %ld %ld %ld %lu \
558
%lu %ld %lu %lu %lu %lu %lu %lu %lu %lu \
559
%lu %lu %lu %lu %lu",
580
&pr.pr_it_real_value,
602
* get number of groups;
603
* get list of supplementary groups
608
FILE* f = (FILE*) NULL;
610
sprintf(procnam, "%s/%s/status", PROC_DIR, dent->d_name);
611
if (!(f = fopen(procnam, "r"))) {
616
while (fgets(buf, sizeof(buf), f)) {
617
if (strcmp("Groups:", strtok(buf, "\t"))==0) {
620
while ((token=strtok((char*) NULL, " "))) {
621
list[groups]=atol(token);
631
# elif defined(SOLARIS) || defined(ALPHA)
636
if (ioctl(fd, PIOCSTATUS, &pr)==-1) {
638
if (errno != ENOENT) {
640
INFO((SGE_EVENT, "could not ioctl(PIOCSTATUS) %s: %s\n", procnam, strerror(errno)));
642
touch_time_stamp(dent->d_name, time_stamp, job_list);
648
* get number of groups
650
ret=ioctl(fd, PIOCCRED, &proc_cred);
653
if (errno != ENOENT) {
655
INFO((SGE_EVENT, "could not ioctl(PIOCCRED) %s: %s\n", procnam, strerror(errno)));
657
touch_time_stamp(dent->d_name, time_stamp, job_list);
663
* get list of supplementary groups
665
groups = proc_cred.pr_ngroups;
666
ret=ioctl(fd, PIOCGROUPS, list);
669
if (errno != ENOENT) {
671
INFO((SGE_EVENT, "could not ioctl(PIOCCRED) %s: %s\n", procnam, strerror(errno)));
673
touch_time_stamp(dent->d_name, time_stamp, job_list);
681
* try to find a matching job
683
for (curr=job_list->next; curr != job_list; curr=curr->next) {
687
job_elem = LNK_DATA(curr, job_elem_t, link);
688
for (group=0; !found_it && group<groups; group++) {
689
if (job_elem->job.jd_jid == list[group]) {
697
if (curr == job_list) { /* this is not a traced process */
702
/* we always read only one entry per function call
703
the while loop is needed to read next one */
709
if (!dent) {/* visited all files in procfs */
714
* try to find process in this jobs' proc list
717
for (curr=job_elem->procs.next; curr != &job_elem->procs;
719
proc_elem = LNK_DATA(curr, proc_elem_t, link);
721
if (proc_elem->proc.pd_pid == pr.pr_pid)
725
if (curr == &job_elem->procs) {
726
/* new process, add a proc element into jobs proc list */
727
if (!(proc_elem=(proc_elem_t *)malloc(sizeof(proc_elem_t)))) {
733
memset(proc_elem, 0, sizeof(proc_elem_t));
734
proc_elem->proc.pd_length = sizeof(psProc_t);
735
proc_elem->proc.pd_state = 1; /* active */
736
LNK_ADD(job_elem->procs.prev, &proc_elem->link);
737
job_elem->job.jd_proccount++;
743
utime = ((double)pr.pr_utime)/HZ;
744
stime = ((double)pr.pr_stime)/HZ;
746
utime = pr.pr_utime.tv_sec + pr.pr_utime.tv_nsec*1E-9;
747
stime = pr.pr_stime.tv_sec + pr.pr_stime.tv_nsec*1E-9;
749
INFO((SGE_EVENT, "new process "pid_t_fmt" for job "pid_t_fmt" (utime = %f stime = %f)\n",
750
pr.pr_pid, job_elem->job.jd_jid, utime, stime));
755
/* save previous usage data - needed to build delta usage */
756
old_time = proc_elem->proc.pd_utime + proc_elem->proc.pd_stime;
757
old_vmem = proc_elem->vmem;
760
proc_elem->proc.pd_tstamp = time_stamp;
762
proc_elem->proc.pd_pid = pr.pr_pid;
764
proc_elem->proc.pd_utime = ((double)pr.pr_utime)/HZ;
765
proc_elem->proc.pd_stime = ((double)pr.pr_stime)/HZ;
766
/* could retrieve uid/gid using stat() on stat file */
767
proc_elem->vmem = pr.pr_vsize;
769
proc_elem->proc.pd_utime = pr.pr_utime.tv_sec + pr.pr_utime.tv_nsec*1E-9;
770
proc_elem->proc.pd_stime = pr.pr_stime.tv_sec + pr.pr_stime.tv_nsec*1E-9;
772
/* Don't care if this part fails */
773
if (ioctl(fd, PIOCPSINFO, &pri) != -1) {
774
proc_elem->proc.pd_uid = pri.pr_uid;
775
proc_elem->proc.pd_gid = pri.pr_gid;
776
proc_elem->vmem = pri.pr_size * pagesize;
777
proc_elem->rss = pri.pr_rssize * pagesize;
778
proc_elem->proc.pd_pstart = pri.pr_start.tv_sec + pri.pr_start.tv_nsec*1E-9;
783
((proc_elem->proc.pd_stime + proc_elem->proc.pd_utime) - old_time) *
784
(( old_vmem + proc_elem->vmem)/2);
787
#define BLOCKSIZE 512
790
uint64 old_ru_ioblock = proc_elem->ru_ioblock;
792
/* need to do a table(2) call for each process to retrieve io usage data */
793
/* get user area stuff */
794
if (table(TBL_UAREA, proc_elem->proc.pd_pid, (char *)&ua, 1, sizeof ua) == 1) {
795
proc_elem->ru_ioblock = (uint64)(ua.u_ru.ru_inblock + ua.u_ru.ru_oublock);
796
proc_elem->delta_chars = (proc_elem->ru_ioblock - old_ru_ioblock)* BLOCKSIZE;
808
#endif /* (!COMPILE_DC) */