61
81
/* ------------------------------------------------------------- Definitions */
63
83
EventTable_T Event_Table[]= {
64
{EVENT_CHANGED, "Changed", "Changed not"},
65
{EVENT_CHECKSUM, "Checksum failed", "Checksum passed"},
66
{EVENT_CONNECTION, "Connection failed", "Connection passed"},
67
{EVENT_DATA, "Data access error", "Data access succeeded"},
68
{EVENT_EXEC, "Execution failed", "Execution succeeded"},
69
{EVENT_GID, "GID failed", "GID passed"},
70
{EVENT_ICMP, "ICMP failed", "ICMP passed"},
71
{EVENT_INSTANCE, "Monit instance changed", "Monit instance changed not"},
72
{EVENT_INVALID, "Invalid type", "Type passed"},
73
{EVENT_MATCH, "Regex match", "No regex match"},
74
{EVENT_NONEXIST, "Does not exist", "Exists"},
75
{EVENT_PERMISSION, "Permission failed", "Permission passed"},
76
{EVENT_RESOURCE, "Resource limit matched", "Resource limit passed"},
77
{EVENT_SIZE, "Size failed", "Size passed"},
78
{EVENT_TIMEOUT, "Timeout", "Timeout recovery"},
79
{EVENT_TIMESTAMP, "Timestamp failed", "Timestamp passed"},
80
{EVENT_UID, "UID failed", "UID passed"},
84
{EVENT_ACTION, "Action done", "Action done", "Action done", "Action done"},
85
{EVENT_CHECKSUM, "Checksum failed", "Checksum succeeded", "Checksum changed", "Checksum not changed"},
86
{EVENT_CONNECTION, "Connection failed", "Connection succeeded", "Connection changed", "Connection not changed"},
87
{EVENT_CONTENT, "Content failed", "Content succeeded", "Content match", "Content doesn't match"},
88
{EVENT_DATA, "Data access error", "Data access succeeded", "Data access changed", "Data access not changed"},
89
{EVENT_EXEC, "Execution failed", "Execution succeeded", "Execution changed", "Execution not changed"},
90
{EVENT_FSFLAG, "Filesystem flags failed", "Filesystem flags succeeded", "Filesystem flags changed", "Filesystem flags not changed"},
91
{EVENT_GID, "GID failed", "GID succeeded", "GID changed", "GID not changed"},
92
{EVENT_HEARTBEAT, "Heartbeat failed", "Heartbeat succeeded", "Heartbeat changed", "Heartbeat not changed"},
93
{EVENT_ICMP, "ICMP failed", "ICMP succeeded", "ICMP changed", "ICMP not changed"},
94
{EVENT_INSTANCE, "Monit instance failed", "Monit instance succeeded", "Monit instance changed", "Monit instance not changed"},
95
{EVENT_INVALID, "Invalid type", "Type succeeded", "Type changed", "Type not changed"},
96
{EVENT_NONEXIST, "Does not exist", "Exists", "Existence changed", "Existence not changed"},
97
{EVENT_PERMISSION, "Permission failed", "Permission succeeded", "Permission changed", "Permission not changed"},
98
{EVENT_PID, "PID failed", "PID succeeded", "PID changed", "PID not changed"},
99
{EVENT_PPID, "PPID failed", "PPID succeeded", "PPID changed", "PPID not changed"},
100
{EVENT_RESOURCE, "Resource limit matched", "Resource limit succeeded", "Resource limit changed", "Resource limit not changed"},
101
{EVENT_SIZE, "Size failed", "Size succeeded", "Size changed", "Size not changed"},
102
{EVENT_TIMEOUT, "Timeout", "Timeout recovery", "Timeout changed", "Timeout not changed"},
103
{EVENT_TIMESTAMP, "Timestamp failed", "Timestamp succeeded", "Timestamp changed", "Timestamp not changed"},
104
{EVENT_UID, "UID failed", "UID succeeded", "UID changed", "UID not changed"},
81
105
/* Virtual events */
82
{EVENT_NULL, "No Event", "No Event"},
106
{EVENT_NULL, "No Event", "No Event", "No Event", "No Event"}
102
127
* @param action Description of the event action
103
128
* @param s Optional message describing the event
105
void Event_post(Service_T service, long id, short state, EventAction_T action,
108
Event_T e = service->eventlist;
130
void Event_post(Service_T service, long id, short state, EventAction_T action, char *s, ...) {
112
ASSERT(state == STATE_FAILED || state == STATE_PASSED);
135
ASSERT(state == STATE_FAILED || state == STATE_SUCCEEDED || state == STATE_CHANGED || state == STATE_CHANGEDNOT);
116
/* Only first failed event can initialize the queue for given event type,
117
* thus passed events are ignored until first error. However, in the case
118
* that the error flag is set for the passed event, we will allow it (i.e.
119
* event queue was flushed during monit reload and the service was in
120
* failed state before reload) */
121
if(state != STATE_FAILED && !(service->error & id))
137
if ((e = service->eventlist) == NULL) {
138
/* Only first failed/changed event can initialize the queue for given event type,
139
* thus succeeded events are ignored until first error. */
140
if (state == STATE_SUCCEEDED || state == STATE_CHANGEDNOT)
124
143
/* Initialize event list and add first event. The manadatory informations
144
162
e->message = Util_formatString(s, ap, &l);
147
pthread_mutex_init(&e->mutex, NULL);
148
165
service->eventlist = e;
152
167
/* Try to find the event with the same origin and type identification.
153
168
* Each service and each test have its own custom actions object, so
154
169
* we share actions object address to identify event source. */
157
if(e->action == action && e->id == id)
160
e->collected = time(NULL);
162
/* Shift the existing event flags to the left
163
* and set the first bit based on actual state */
165
e->state_map |= state;
167
/* Update the message */
175
e->message = Util_formatString(s, ap, &l);
171
if (e->action == action && e->id == id) {
172
gettimeofday(&e->collected, NULL);
174
/* Shift the existing event flags to the left
175
* and set the first bit based on actual state */
177
e->state_map |= ((state == STATE_SUCCEEDED || state == STATE_CHANGEDNOT) ? 0 : 1);
179
/* Update the message */
186
e->message = Util_formatString(s, ap, &l);
188
/* Only first failed event can initialize the queue for given event type,
189
* thus passed events are ignored until first error */
190
if(state != STATE_FAILED)
195
/* Only first failed/changed event can initialize the queue for given event type,
196
* thus succeeded events are ignored until first error. */
197
if (state == STATE_SUCCEEDED || state == STATE_CHANGEDNOT)
193
200
/* Event was not found in the pending events list, we will add it.
342
321
* @return The Event raw state
344
323
short Event_check_state(Event_T E, short S) {
326
short state = (S == STATE_SUCCEEDED || S == STATE_CHANGEDNOT) ? 0 : 1; /* translate to 0/1 class */
349
328
Service_T service;
354
if(!(service = Event_get_source(E)))
333
if (!(service = Event_get_source(E)))
357
/* Only the true failed state condition can change the initial state */
358
if(S == STATE_PASSED && E->state == STATE_INIT && !(service->error & E->id))
336
/* Only true failed/changed state condition can change the initial state */
337
if (!state && E->state == STATE_INIT && !(service->error & E->id))
363
action = (S == STATE_PASSED)?E->action->passed:E->action->failed;
340
action = !state ? E->action->succeeded : E->action->failed;
365
342
/* Compare as many bits as cycles able to trigger the action */
366
for(i = 0; i < action->cycles; i++)
343
for (i = 0; i < action->cycles; i++) {
368
344
/* Check the state of the particular cycle given by the bit position */
369
345
flag = (E->state_map >> i) & 0x1;
371
347
/* Count occurences of the posted state */
378
if(count >= action->count && S != E->state)
352
/* the internal instance and action events are handled as changed any time since we need to deliver alert whenever it occurs */
353
if (E->id == EVENT_INSTANCE || E->id == EVENT_ACTION || (count >= action->count && S != E->state))
542
504
/* In the case that all handlers failed, skip the further processing in
543
505
* this cycle. Alert handler is currently defined anytime (either
544
506
* explicitly or localhost by default) */
547
FLAG(Run.handler_flag, HANDLER_COLLECTOR)
549
FLAG(Run.handler_flag, HANDLER_ALERT)
552
FLAG(Run.handler_flag, HANDLER_ALERT))
507
if ( (Run.mmonits && FLAG(Run.handler_flag, HANDLER_MMONIT) && FLAG(Run.handler_flag, HANDLER_ALERT)) || FLAG(Run.handler_flag, HANDLER_ALERT))
557
snprintf(file_name, STRLEN,
559
Run.eventlist_dir, de->d_name);
561
if(!stat(file_name, &st) && S_ISREG(st.st_mode))
510
snprintf(file_name, STRLEN, "%s/%s", Run.eventlist_dir, de->d_name);
512
if (!stat(file_name, &st) && S_ISREG(st.st_mode)) {
564
513
DEBUG("%s: processing queued event %s\n", prog, file_name);
566
if(! (file = fopen(file_name, "r")) )
568
LogError("%s: Processing failed - cannot open the event file %s -- %s\n",
569
prog, file_name, STRERROR);
515
if (! (file = fopen(file_name, "r")) ) {
516
LogError("%s: Processing failed - cannot open the event file %s -- %s\n", prog, file_name, STRERROR);
573
520
/* read event structure version */
574
if(!(version = File_readQueue(file, &size)) || size != sizeof(int)) {
575
LogError("skipping %s - unknown data format\n",
576
file_name, *version);
579
if(*version != EVENT_VERSION)
581
LogError("Aborting event %s - incompatible data format version %d\n",
582
file_name, *version);
521
if (!(version = File_readQueue(file, &size))) {
522
LogError("skipping %s - unknown data format\n", file_name);
525
if (size != sizeof(int)) {
526
LogError("Aborting event %s - invalid size %d\n", file_name, size);
529
if (*version != EVENT_VERSION) {
530
LogError("Aborting event %s - incompatible data format version %d\n", file_name, *version);
586
534
/* read event structure */
587
if(!(e = File_readQueue(file, &size)) || size != sizeof(*e))
535
if (!(e = File_readQueue(file, &size)))
537
if (size != sizeof(*e))
590
540
/* read source */
591
if(!(e->source = File_readQueue(file, &size)))
541
if (!(e->source = File_readQueue(file, &size)))
595
if(!(e->group = File_readQueue(file, &size)))
545
if (!(e->group = File_readQueue(file, &size)))
598
548
/* read message */
599
if(!(e->message = File_readQueue(file, &size)))
549
if (!(e->message = File_readQueue(file, &size)))
602
552
/* read event action */
603
if(!(action = File_readQueue(file, &size)) || size != sizeof(short))
553
if (!(action = File_readQueue(file, &size)))
555
if (size != sizeof(short))
606
if(e->state == STATE_FAILED)
558
if (e->state == STATE_FAILED)
616
564
/* Retry all remaining handlers */
619
if(e->flag & HANDLER_ALERT)
567
if (e->flag & HANDLER_ALERT) {
568
if (Run.handler_init)
623
569
Run.handler_queue[HANDLER_ALERT]++;
625
if((Run.handler_flag & HANDLER_ALERT) != HANDLER_ALERT)
627
if( handle_alert(e) != HANDLER_ALERT )
570
if ((Run.handler_flag & HANDLER_ALERT) != HANDLER_ALERT) {
571
if ( handle_alert(e) != HANDLER_ALERT ) {
629
572
e->flag &= ~HANDLER_ALERT;
630
573
Run.handler_queue[HANDLER_ALERT]--;
634
576
LogError("Alert handler failed, retry scheduled for next cycle\n");
635
577
Run.handler_flag |= HANDLER_ALERT;
641
if(e->flag & HANDLER_COLLECTOR)
645
Run.handler_queue[HANDLER_COLLECTOR]++;
647
if((Run.handler_flag & HANDLER_COLLECTOR) != HANDLER_COLLECTOR)
649
if( handle_collector(e) != HANDLER_COLLECTOR )
651
e->flag &= ~HANDLER_COLLECTOR;
652
Run.handler_queue[HANDLER_COLLECTOR]--;
656
LogError("Collector handler failed, retry scheduled for next cycle\n");
657
Run.handler_flag |= HANDLER_COLLECTOR;
583
if (e->flag & HANDLER_MMONIT) {
584
if (Run.handler_init)
585
Run.handler_queue[HANDLER_MMONIT]++;
586
if ((Run.handler_flag & HANDLER_MMONIT) != HANDLER_MMONIT) {
587
if ( handle_mmonit(e) != HANDLER_MMONIT ) {
588
e->flag &= ~HANDLER_MMONIT;
589
Run.handler_queue[HANDLER_MMONIT]--;
592
LogError("M/Monit handler failed, retry scheduled for next cycle\n");
593
Run.handler_flag |= HANDLER_MMONIT;
662
598
/* If no error persists, remove it from the queue */
663
if(e->flag == HANDLER_PASSED)
665
DEBUG("Removing event %s from the queue for later external delivery\n",
599
if (e->flag == HANDLER_SUCCEEDED) {
600
DEBUG("Removing event %s from the queue for later external delivery\n", file_name);
601
if (unlink(file_name) < 0)
602
LogError("Failed to remove queued event file '%s' -- %s\n", file_name, STRERROR);
603
} else if (handlers_passed > 0) {
604
DEBUG("Updating queued event %s (some handlers passed)\n", file_name);
605
Event_queue_update(e, file_name);
681
624
de = readdir(dir);
683
626
Run.handler_init = FALSE;
696
639
* @param E An event
698
641
static void handle_event(Event_T E) {
703
645
ASSERT(E->action);
704
646
ASSERT(E->action->failed);
705
ASSERT(E->action->passed);
647
ASSERT(E->action->succeeded);
707
/* We will handle only first passed event, recurrent passed events
708
* or insufficient passed events during failed service state are
649
/* We will handle only first succeeded event, recurrent succeeded events
650
* or insufficient succeeded events during failed service state are
709
651
* ignored. Failed events are handled each time. */
710
if(!E->state_changed && (E->state == STATE_PASSED || ((E->state_map & 0x1) ^ 0x1)))
652
if (!E->state_changed && (E->state == STATE_SUCCEEDED || E->state == STATE_CHANGEDNOT || ((E->state_map & 0x1) ^ 0x1)))
717
/* In the case that the service state is yet initializing and error
718
* occured, log it and exit. Passed events in init state are not
720
if(E->state != STATE_INIT || E->state_map & 0x1)
722
if(E->id == EVENT_INSTANCE || E->state == STATE_PASSED) {
723
LogInfo("%s\n", E->message);
725
LogError("%s\n", E->message);
728
if(E->state == STATE_INIT)
734
655
S = Event_get_source(E);
737
657
LogError("Event handling aborted\n");
741
if(E->state == STATE_FAILED)
662
/* In the case that the service state is initializing yet and error
663
* occured, log it and exit. Succeeded events in init state are not
664
* logged. Instance and action events are logged always with priority
666
if (E->state != STATE_INIT || E->state_map & 0x1) {
667
if (E->state == STATE_SUCCEEDED || E->state == STATE_CHANGEDNOT || E->id == EVENT_INSTANCE || E->id == EVENT_ACTION)
668
LogInfo("'%s' %s\n", S->name, E->message);
670
LogError("'%s' %s\n", S->name, E->message);
672
if (E->state == STATE_INIT)
676
if (E->state == STATE_FAILED || E->state == STATE_CHANGED) {
677
if (E->id != EVENT_INSTANCE && E->id != EVENT_ACTION) { // We are not interested in setting error flag for instance and action events
679
/* The error hint provides second dimension for error bitmap and differentiates between failed/changed event states (failed=0, chaged=1) */
680
if (E->state == STATE_CHANGED)
681
S->error_hint |= E->id;
683
S->error_hint &= ~E->id;
744
685
handle_action(E, E->action->failed);
748
687
S->error &= ~E->id;
749
handle_action(E, E->action->passed);
688
handle_action(E, E->action->succeeded);
752
691
/* Possible event state change was handled so we will reset the flag. */
753
692
E->state_changed = FALSE;
758
696
static void handle_action(Event_T E, Action_T A) {
765
E->flag = HANDLER_PASSED;
702
E->flag = HANDLER_SUCCEEDED;
767
if(A->id == ACTION_IGNORE)
704
if (A->id == ACTION_IGNORE)
772
/* Alert and collector event notification are common actions */
707
/* Alert and mmonit event notification are common actions */
708
E->flag |= handle_mmonit(E);
773
709
E->flag |= handle_alert(E);
774
E->flag |= handle_collector(E);
776
711
/* In the case that some subhandler failed, enqueue the event for
777
712
* partial reprocessing */
778
if(E->flag != HANDLER_PASSED)
780
if(Run.eventlist_dir)
713
if (E->flag != HANDLER_SUCCEEDED) {
714
if (Run.eventlist_dir)
782
715
Event_queue_add(E);
786
717
LogError("Aborting event\n");
790
if(!(s = Event_get_source(E)))
720
if (!(s = Event_get_source(E))) {
792
721
LogError("Event action handling aborted\n");
796
if(A->id == ACTION_ALERT)
798
return; /* Already handled */
800
else if(A->id == ACTION_EXEC)
725
/* Action event is handled already. For Instance events
726
* we don't wan't actions like stop to be executed
727
* to prevent the disabling of system service monitoring */
728
if (A->id == ACTION_ALERT || E->id == EVENT_INSTANCE) {
730
} else if (A->id == ACTION_EXEC) {
731
LogInfo("'%s' exec: %s\n", s->name, A->exec->arg[0]);
802
732
spawn(s, A->exec, Event_get_description(E));
808
(A->id == ACTION_START ||
809
A->id == ACTION_RESTART))
735
if (s->def_timeout && (A->id == ACTION_START || A->id == ACTION_RESTART))
814
if(s->mode == MODE_PASSIVE &&
815
(A->id == ACTION_START ||
816
A->id == ACTION_STOP ||
817
A->id == ACTION_RESTART))
738
if (s->mode == MODE_PASSIVE && (A->id == ACTION_START || A->id == ACTION_STOP || A->id == ACTION_RESTART))
822
741
control_service(s->name, A->id);
829
748
* @param E An event object
831
750
static void Event_queue_add(Event_T E) {
833
751
FILE *file = NULL;
834
752
char file_name[STRLEN];
835
753
int version = EVENT_VERSION;
836
754
short action = Event_get_action(E);
843
ASSERT(E->flag != HANDLER_PASSED);
759
ASSERT(E->flag != HANDLER_SUCCEEDED);
845
if(!File_checkQueueDirectory(Run.eventlist_dir, 0700))
847
LogError("%s: Aborting event - cannot access the directory %s\n",
848
prog, Run.eventlist_dir);
761
if (!File_checkQueueDirectory(Run.eventlist_dir, 0700)) {
762
LogError("%s: Aborting event - cannot access the directory %s\n", prog, Run.eventlist_dir);
852
if(!File_checkQueueLimit(Run.eventlist_dir, Run.eventlist_slots))
766
if (!File_checkQueueLimit(Run.eventlist_dir, Run.eventlist_slots)) {
854
767
LogError("%s: Aborting event - queue over quota\n", prog);
858
set_signal_block(&ns, &os);
860
771
/* compose the file name of actual timestamp and service name */
861
snprintf(file_name, STRLEN,
863
Run.eventlist_dir, (long int)time(NULL), E->source);
772
snprintf(file_name, STRLEN, "%s/%ld_%s", Run.eventlist_dir, (long int)time(NULL), E->source);
865
DEBUG("%s: Adding event to the queue file %s for later delivery\n",
774
DEBUG("%s: Adding event to the queue file %s for later delivery\n", prog, file_name);
868
776
mask = umask(QUEUEMASK);
869
777
file = fopen(file_name, "w");
873
LogError("%s: Aborting event - cannot open the event file %s -- %s\n",
874
prog, file_name, STRERROR);
780
LogError("%s: Aborting event - cannot open the event file %s -- %s\n", prog, file_name, STRERROR);
878
784
/* write event structure version */
879
if(!(rv = File_writeQueue(file, &version, sizeof(int))))
785
if (!(rv = File_writeQueue(file, &version, sizeof(int))))
882
788
/* write event structure */
883
if(!(rv = File_writeQueue(file, E, sizeof(*E))))
789
if (!(rv = File_writeQueue(file, E, sizeof(*E))))
886
792
/* write source */
887
if(!(rv = File_writeQueue(file, E->source, E->source?strlen(E->source)+1:0)))
793
if (!(rv = File_writeQueue(file, E->source, E->source ? strlen(E->source)+1 : 0)))
890
796
/* write group */
891
if(!(rv = File_writeQueue(file, E->group, E->group?strlen(E->group)+1:0)))
797
if (!(rv = File_writeQueue(file, E->group, E->group ? strlen(E->group)+1 : 0)))
894
800
/* write message */
895
if(!(rv = File_writeQueue(file, E->message, E->message?strlen(E->message)+1:0)))
801
if (!(rv = File_writeQueue(file, E->message, E->message ? strlen(E->message)+1 : 0)))
898
804
/* write event action */
899
if(!(rv = File_writeQueue(file, &action, sizeof(short))))
805
if (!(rv = File_writeQueue(file, &action, sizeof(short))))
905
LogError("%s: Aborting event - unable to save event information to %s\n",
911
if(!Run.handler_init && E->flag & HANDLER_ALERT)
811
LogError("%s: Aborting event - unable to save event information to %s\n", prog, file_name);
812
if (unlink(file_name) < 0)
813
LogError("Failed to remove event file '%s' -- %s\n", file_name, STRERROR);
815
if (!Run.handler_init && E->flag & HANDLER_ALERT)
913
816
Run.handler_queue[HANDLER_ALERT]++;
915
if(!Run.handler_init && E->flag & HANDLER_COLLECTOR)
917
Run.handler_queue[HANDLER_COLLECTOR]++;
922
unset_signal_block(&os);
817
if (!Run.handler_init && E->flag & HANDLER_MMONIT)
818
Run.handler_queue[HANDLER_MMONIT]++;
826
* Update the partialy handled event in the global queue
827
* @param E An event object
828
* @param file_name File name
830
static void Event_queue_update(Event_T E, const char *file_name) {
832
int version = EVENT_VERSION;
833
short action = Event_get_action(E);
838
ASSERT(E->flag != HANDLER_SUCCEEDED);
840
if (!File_checkQueueDirectory(Run.eventlist_dir, 0700)) {
841
LogError("%s: Aborting event - cannot access the directory %s\n", prog, Run.eventlist_dir);
845
DEBUG("%s: Updating event in the queue file %s for later delivery\n", prog, file_name);
847
mask = umask(QUEUEMASK);
848
file = fopen(file_name, "w");
852
LogError("%s: Aborting event - cannot open the event file %s -- %s\n", prog, file_name, STRERROR);
856
/* write event structure version */
857
if (!(rv = File_writeQueue(file, &version, sizeof(int))))
860
/* write event structure */
861
if (!(rv = File_writeQueue(file, E, sizeof(*E))))
865
if (!(rv = File_writeQueue(file, E->source, E->source ? strlen(E->source)+1 : 0)))
869
if (!(rv = File_writeQueue(file, E->group, E->group ? strlen(E->group)+1 : 0)))
873
if (!(rv = File_writeQueue(file, E->message, E->message ? strlen(E->message)+1 : 0)))
876
/* write event action */
877
if (!(rv = File_writeQueue(file, &action, sizeof(short))))
883
LogError("%s: Aborting event - unable to update event information to %s\n", prog, file_name);
884
if (unlink(file_name) < 0)
885
LogError("Failed to remove event file '%s' -- %s\n", file_name, STRERROR);