3
Kjetil Matheussen 2006.
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 2 of the License, or
8
(at your option) any later version.
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software
17
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
// Only necessary with old 2.6 kernel (before jan 2006 or thereabout).
21
// 2.4 and newer 2.6 works fine.
29
#include <sys/types.h>
40
#include <glibtop/proclist.h>
41
#include <glibtop/procstate.h>
42
#if LIBGTOP_MAJOR_VERSION<2
43
# include <glibtop/xmalloc.h>
45
#include <glibtop/procuid.h>
46
#include <glibtop/proctime.h>
48
#if LIBGTOP_MAJOR_VERSION<2
49
typedef u_int64_t ui64;
54
#define OPTARGS_BEGIN(das_usage) {int lokke;const char *usage=das_usage;for(lokke=1;lokke<argc;lokke++){char *a=argv[lokke];if(!strcmp("--help",a)||!strcmp("-h",a)){printf(usage);return 0;
55
#define OPTARG(name,name2) }}else if(!strcmp(name,a)||!strcmp(name2,a)){{
56
#define OPTARG_GETINT() atoi(argv[++lokke])
57
#define OPTARG_GETFLOAT() atof(argv[++lokke])
58
#define OPTARG_GETSTRING() argv[++lokke]
59
#define OPTARG_LAST() }}else if(lokke==argc-1){lokke--;{
60
#define OPTARGS_ELSE() }else if(1){
61
#define OPTARGS_END }else{fprintf(stderr,usage);return(-1);}}}
64
static int increasetime=1; // Seconds between each time the SCHED_OTHER thread is increasing the counter.
65
static int checktime=4; // Seconds between each time the SCHED_FIFO thread checks that the counter is increased.
66
static int waittime=8; // Seconds the SCHED_FIFO thread waits before setting the processes back to SCHED_FIFO.
70
int policy; //SCHED_OTHER, SCHED_FIFO, SHED_RR
72
ui64 start_time; // Creation time of the process.
75
struct das_proclist *proclist;
81
int counter=0; // Make non-static in case the c compiler does a whole-program optimization. :-)
84
static int checkirq=0;
87
static int xmessage_found=1;
90
static void print_error(FILE *where,char *fmt, ...) {
94
vsnprintf (temp, 9998, fmt, ap);
96
syslog(LOG_INFO,temp);
97
fprintf(where,"Das_Watchdog: %s\n",temp);
100
static ui64 get_pid_start_time(pid_t pid){
101
glibtop_proc_time buf={0};
102
glibtop_get_proc_time(&buf,pid);
103
return buf.start_time;
107
static int get_pid_priority(pid_t pid){
108
struct sched_param par;
109
sched_getparam(pid,&par);
110
return par.sched_priority;
113
static int set_pid_priority(pid_t pid,int policy,int priority,char *message,char *name){
114
struct sched_param par={0};
115
par.sched_priority=priority;
116
if((sched_setscheduler(pid,policy,&par)!=0)){
117
print_error(stderr,message,pid,name,strerror(errno));
124
struct das_proclist *get_proclist(int *num_procs){
127
glibtop_proclist proclist_def={0};
128
pid_t *proclist=glibtop_get_proclist(&proclist_def,GLIBTOP_KERN_PROC_ALL,0); //|GLIBTOP_EXCLUDE_SYSTEM,0);
129
struct das_proclist *ret=calloc(sizeof(struct das_proclist),proclist_def.number);
131
*num_procs=proclist_def.number;
133
for(lokke=0;lokke<proclist_def.number;lokke++){
134
pid_t pid=proclist[lokke];
136
ret[lokke].policy=sched_getscheduler(pid);
137
ret[lokke].priority=get_pid_priority(pid);
138
ret[lokke].start_time=get_pid_start_time(pid);
141
#if LIBGTOP_MAJOR_VERSION<2
142
glibtop_free(proclist);
150
struct proclistlist *pll_create(void){
151
struct proclistlist *pll=calloc(1,sizeof(struct proclistlist));
152
pll->proclist=get_proclist(&pll->length);
156
static void pll_delete(struct proclistlist *pll){
163
static pid_t name2pid(char *name){
167
struct das_proclist *proclist=get_proclist(&num_procs);
169
for(lokke=0;lokke<num_procs;lokke++){
170
glibtop_proc_state state;
171
glibtop_get_proc_state(&state,proclist[lokke].pid);
172
if(!strcmp(state.cmd,name)){
173
pid=proclist[lokke].pid;
183
static int is_a_member(int val,int *vals,int num_vals){
185
for(lokke=0;lokke<num_vals;lokke++)
193
// Returns a list of users that might be the one owning the proper .Xauthority file.
194
static int *get_userlist(struct proclistlist *pll, int *num_users){
195
int *ret=calloc(sizeof(int),pll->length);
200
for(lokke=0;lokke<pll->length;lokke++){
201
glibtop_proc_uid uid;
202
glibtop_get_proc_uid(&uid,pll->proclist[lokke].pid);
203
if( ! is_a_member(uid.uid,ret,*num_users)){ // ???
204
ret[*num_users]=uid.uid;
213
static int gettimerpid(char *name,int cpu){
220
sprintf(name,"softirq-timer/%d",cpu);
225
sprintf(name,"ksoftirqd/%d",cpu);
235
static int checksoftirq2(int force,int cpu){
237
pid_t pid=gettimerpid(&name[0],cpu);
239
if(pid==-1) return 0;
243
int policy=sched_getscheduler(pid);
244
int priority=get_pid_priority(pid);
246
if(priority<sched_get_priority_max(SCHED_FIFO)
247
|| policy==SCHED_OTHER
252
print_error(stdout,"Forcing %s to SCHED_FIFO priority %d",name,sched_get_priority_max(SCHED_FIFO));
253
set_pid_priority(pid,SCHED_FIFO,sched_get_priority_max(SCHED_FIFO),"Could not set %d (\"%s\") to SCHED_FIFO (%s).\n\n",name);
254
return checksoftirq2(0,cpu);
258
if(priority<sched_get_priority_max(SCHED_FIFO))
260
"\n\nWarning. The priority of the \"%s\" process is only %d, and not %d. Unless you are using the High Res Timer,\n"
261
"the watchdog will probably not work. If you are using the High Res Timer, please continue doing so and ignore this message.\n",
264
sched_get_priority_max(SCHED_FIFO)
266
if(policy==SCHED_OTHER)
268
"\n\nWarning The \"%s\" process is running SCHED_OTHER. Unless you are using the High Res Timer,\n"
269
"the watchdog will probably not work, and the timing on your machine is probably horrible.\n",
274
print_error(stdout,"\n\nUnless you are using the High Res Timer, you need to add the \"--force\" flag to run das_watchdog reliably.\n");
275
print_error(stdout,"(Things might change though, so it could work despite all warnings above. To test the watchdog, run the \"test_rt\" program.)\n\n");
279
//printf("name: -%s-\n",state.cmd);
286
static int checksoftirq(int force){
290
switch(checksoftirq2(force,cpu)){
307
static char *get_pid_environ_val(pid_t pid,char *val){
309
char *temp = malloc(temp_size);
315
sprintf(temp,"/proc/%d/environ",pid);
325
if (i >= temp_size) {
327
temp = realloc(temp, temp_size);
332
if(foundit==1 && (temp[i]=='\0' || temp[i]==EOF)){
345
if(!strcmp(temp,val)){
359
// Returns 1 in case a message was sent.
360
static int send_xmessage(char *xa_filename,char *message){
361
if(access(xa_filename,R_OK)==0){
362
setenv("XAUTHORITY",xa_filename,1);
364
print_error(stdout,"Trying xauth file \"%s\"",xa_filename);
365
if(system(message)==0)
371
// Returns 1 in case a message was sent.
372
static int send_xmessage_using_XAUTHORITY(struct proclistlist *pll,int lokke,char *message){
374
if(lokke==pll->length)
378
char *xa_filename=get_pid_environ_val(pll->proclist[lokke].pid,"XAUTHORITY");
379
if(xa_filename!=NULL){
380
if(send_xmessage(xa_filename,message)==1){
388
return send_xmessage_using_XAUTHORITY(pll,lokke+1,message);
391
int send_xmessage_using_uids(struct proclistlist *pll, char *message){
394
int *uids=get_userlist(pll,&num_users);
395
for(lokke=0;lokke<num_users;lokke++){
396
char xauthpath[5000];
397
struct passwd *pass=getpwuid(uids[lokke]);
398
sprintf(xauthpath,"%s/.Xauthority",pass->pw_dir);
399
if(send_xmessage(xauthpath,message)==1){
412
static void xmessage_fork(struct proclistlist *pll){
415
set_pid_priority(0,SCHED_FIFO,sched_get_priority_min(SCHED_FIFO),"Unable to set SCHED_FIFO for %d (\"%s\"). (%s)", "the xmessage fork");
417
setenv("DISPLAY",":0.0",1);
419
if( ! xmessage_found)
420
sprintf(message,"xmessage \"WARNING! das_watchdog pauses realtime operations for %d seconds.\"",waittime);
422
sprintf(message,"%s \"WARNING! das_watchdog pauses realtime operations for %d seconds.\"",WHICH_XMESSAGE,waittime);
424
if(send_xmessage_using_uids(pll,message)==0){
425
set_pid_priority(0,SCHED_OTHER,0,"Unable to set SCHED_OTHER for %d (\"%s\"). (%s)", "the xmessage fork"); // send_xmessage_using_XAUTHRITY is too heavy to run in realtime.
426
send_xmessage_using_XAUTHORITY(pll,0,message);
434
// The SCHED_OTHER thread.
435
static void *counter_func(void *arg){
438
set_pid_priority(0,SCHED_FIFO,sched_get_priority_min(SCHED_FIFO),"Unable to set SCHED_FIFO for %d (\"%s\"). (%s)", "the counter_func");
444
print_error(stderr,"counter set to %d",counter);
454
int main(int argc,char **argv){
455
pid_t mypid=getpid();
456
pthread_t counter_thread={0};
466
// Find number of timer processes.
467
while(gettimerpid(NULL,num_cpus)!=-1)
469
timerpids=malloc(sizeof(int)*num_cpus);
473
for(cpu=0;cpu<num_cpus;cpu++)
474
timerpids[cpu]=gettimerpid(NULL,cpu);
480
OPTARGS_BEGIN("Usage: das_watchdog [--force] [--verbose] [--checkirq] [--increasetime n] [--checktime n] [--waittime n]\n"
481
" [ -f] [ -v] [ -c] [ -it n] [ -ct n] [ -wt n]\n"
483
"Additional arguments:\n"
484
"[--version] or [-ve] -> Prints out version.\n"
485
"[--test] or [-te] -> Run a test to see if xmessage is working.\n")
488
OPTARG("--verbose","-v") verbose=1;
490
OPTARG("--force","-f") force=1;
491
OPTARG("--checkirq","-c") checkirq=1; return(checksoftirq(0));
493
OPTARG("--increasetime","-it") increasetime=OPTARG_GETINT();
494
OPTARG("--checktime","-ct") checktime=OPTARG_GETINT();
495
OPTARG("--waittime","-wt") waittime=OPTARG_GETINT();
496
OPTARG("--test","-te") testing=1; verbose=1;
497
OPTARG("--version","-ve") printf("Das Version die Uhr Hund %s nach sein bist.\n",VERSION);exit(0);
501
// Logging to /var/log/messages
503
openlog("das_watchdog", 0, LOG_DAEMON);
504
syslog(LOG_INFO, "started");
510
if(force && checksoftirq(force)<0)
517
print_error(stdout,"Warning, you are not running as root. das_watchdog should be run as an init-script at startup, and not as a normal user.\n");
521
if(access(WHICH_XMESSAGE,X_OK)!=0){
522
print_error(stderr,"Warning, \"xmessage\" is not found or is not an executable. I will try to use the $PATH instead. Hopefully that'll work,");
523
print_error(stderr,"but you might not receive messages to the screen in case das_watchdog has to take action.");
531
if( ! set_pid_priority(0,SCHED_FIFO,sched_get_priority_max(SCHED_FIFO),
532
"Unable to set SCHED_FIFO realtime priority for %d (\"%s\"). (%s). Exiting.",
533
"Der Gewinde nach die Uhr Hund"))
535
if(mlockall(MCL_CURRENT|MCL_FUTURE)==-1)
536
print_error(stderr,"Could not call mlockalll(MCL_CURRENT|MCL_FUTURE) (%s)",strerror(errno));
540
// Start child thread.
542
pthread_create(&counter_thread,NULL,counter_func,NULL);
546
// Main loop. (We are never supposed to exit from this one.)
548
int lastcounter=counter;
552
print_error(stderr," counter read to be %d (lastcounter=%d)",counter,lastcounter);
554
if(lastcounter==counter || testing==1){
556
struct proclistlist *pll=pll_create();
560
print_error(stdout,"Die Uhr Hund stossen sein!");
562
for(lokke=0;lokke<pll->length;lokke++){
563
if(pll->proclist[lokke].policy!=SCHED_OTHER
564
&& pll->proclist[lokke].pid!=mypid
565
&& (!is_a_member(pll->proclist[lokke].pid,timerpids,num_cpus))
568
struct sched_param par={0};
569
par.sched_priority=0;
571
print_error(stdout,"Setting pid %d temporarily to SCHED_OTHER.",pll->proclist[lokke].pid);
572
if(set_pid_priority(pll->proclist[lokke].pid,SCHED_OTHER,0,"Could not set pid %d (\"%s\") to SCHED_OTHER (%s).\n","no name"))
577
if(changedsched>0 || testing==1){
581
sprintf(message,"realtime operations paused for %d seconds.",waittime);
582
syslog(LOG_INFO,message);
592
for(lokke=0;lokke<pll->length;lokke++){
593
if(pll->proclist[lokke].policy != SCHED_OTHER
594
&& pll->proclist[lokke].pid != mypid
595
&& (!is_a_member(pll->proclist[lokke].pid,timerpids,num_cpus))
596
&& pll->proclist[lokke].start_time == get_pid_start_time(pll->proclist[lokke].pid)
599
if(get_pid_priority(pll->proclist[lokke].pid) != 0
600
|| sched_getscheduler(pll->proclist[lokke].pid) != SCHED_OTHER){
602
"Seems like someone else has changed priority and/or scheduling policy for %d in the mean time. I'm not going to do anything.",
603
pll->proclist[lokke].pid);
605
struct sched_param par={0};
606
par.sched_priority=pll->proclist[lokke].priority;
608
print_error(stdout,"Setting pid %d back to realtime priority.",pll->proclist[lokke].pid);
609
set_pid_priority(pll->proclist[lokke].pid,pll->proclist[lokke].policy,pll->proclist[lokke].priority,"Could not set pid %d (\"%s\") to SCHED_FIFO/SCHED_RR (%s).\n\n", "no name");
616
if(testing==1) break;