4
/*___INFO__MARK_BEGIN__*/
5
/*************************************************************************
7
* The Contents of this file are made available subject to the terms of
8
* the Sun Industry Standards Source License Version 1.2
10
* Sun Microsystems Inc., March, 2001
13
* Sun Industry Standards Source License Version 1.2
14
* =================================================
15
* The contents of this file are subject to the Sun Industry Standards
16
* Source License Version 1.2 (the "License"); You may not use this file
17
* except in compliance with the License. You may obtain a copy of the
18
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
20
* Software provided under this License is provided on an "AS IS" basis,
21
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
22
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
23
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
24
* See the License for the specific provisions governing your rights and
25
* obligations concerning the Software.
27
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
29
* Copyright: 2003 by Sun Microsystems, Inc.
31
* All Rights Reserved.
33
************************************************************************/
34
/*___INFO__MARK_END__*/
38
#include "basis_types.h"
39
#include "uti/sge_dstring.h"
42
* Monitoring functionality:
43
* -------------------------
45
* - qping health monitoring
47
* - keeping statistics on what is done during a thread loop
49
* - outputing the statistics information via message file or
56
* do a normal data definition, call init and free, when you are done.
57
* You have to call MONITOR_IDLE_TIME and sge_monitor_output. After that
58
* everything is up to you to design...
60
* -----start thread --------------
61
* monitoring_t monitor;
63
* sge_monitor_init(&monitor, "THREAD NAME", <EXTENSION>, <WARNING>, <ERROR>);
67
* MONITOR_IDLE_TIME(<wait for something>,(&monitor), monitor_time);
69
* < do your stuff and monitoring >
71
* sge_monitor_output(&monitor);
73
* sge_monitor_free(&monitor);
74
* ------end thread----------------
78
* The call to MONITOR_IDLE_TIME has to be the first one after the thread loop otherwise
79
* certain parts of the monitoring structure are not correctly initilized.
81
* General statistic methods:
82
* --------------------------
84
* - MONITOR_IDLE_TIME : counts idle time, very important, nothing works without it
85
* - MONITOR_WAIT_TIME : counts wait time (wait for a lock usually)
86
* - MONITOR_MESSAGES : counts how many times the thread loop is executed
87
* - MONITOR_MESSAGES_OUT : counts how many messages are send
89
* GDI statistics methods:
90
* -----------------------
92
* - MONITOR_GDI : counts GDI requests
93
* - MONITOR_ACK : counts ACKs
94
* - MONITOR_LOAD : counts reports
99
* qping thread warning times in seconds
106
ST_WARNING = 0, /* no timeout for this thread */
111
/* EB: TODO: ST: ??? */
114
* qping thread error times in seconds
121
ST_ERROR = 0, /* no timeout for this thread */
127
* This function definition is the prototyp for the output function of a data
130
typedef void (*extension_output)(
131
dstring *info_message, /* target memory buffer*/
132
void *monitor_extension, /* contains the monitor extension structur */
133
double time /* length of the time inteval */
137
* This enum identifies all available extensions
141
GDI_EXT = 0, /* GDI = request processing thread */
142
EDT_EXT = 1, /* EDT = event delivery thread */
143
TET_EXT = 2, /* TET = timed event thread */
144
LIS_EXT = 3, /* LIS = listener thread */
145
SCH_EXT = 4 /* SCH = scheduler thread */
149
* the monitoring data structure
152
/*--- init data ------------*/
153
const char *thread_name;
154
u_long32 monitor_time; /* stores the time interval for the mesuring run */
155
bool log_monitor_mes; /* if true, it logs the monitoring info into the message file */
156
/*--- output data ----------*/
157
dstring *output_line1;
158
dstring *output_line2;
160
int pos; /* position (line) in the qping output structure (kind of thread id) */
161
/*--- work data ------------*/
162
struct timeval now; /* start time of mesurement */
163
bool output; /* if true, triggers qping / message output */
164
u_long32 message_in_count;
165
u_long32 message_out_count;
166
double idle; /* idle time*/
167
double wait; /* wait time*/
168
/*--- extension data -------*/
169
extension_t ext_type;
171
u_long32 ext_data_size;
172
extension_output ext_output;
175
void sge_monitor_init(monitoring_t *monitor, const char *thread_name, extension_t ext,
176
thread_warning_t warning_timeout, thread_error_t error_timeout);
177
void sge_monitor_free(monitoring_t *monitor);
178
u_long32 sge_monitor_status(char **info_message, u_long32 monitor_time);
179
void sge_set_last_wait_time(monitoring_t *monitor, struct timeval after);
181
void sge_monitor_output(monitoring_t *monitor);
182
void sge_monitor_reset(monitoring_t *monitor);
189
#define MONITOR_IDLE_TIME(execute, monitor, output_time, is_log) { \
190
struct timeval before; \
191
gettimeofday(&before, NULL); \
192
sge_set_last_wait_time((monitor), before); \
193
if (output_time > 0) { \
194
struct timeval before; \
195
struct timeval after; \
198
(monitor)->monitor_time = output_time; \
199
(monitor)->log_monitor_mes = is_log; \
200
gettimeofday(&before, NULL); \
201
if ((monitor)->now.tv_sec == 0) { \
202
(monitor)->now = before; \
205
gettimeofday(&after, NULL); \
206
(monitor)->output = ((after.tv_sec-(monitor)->now.tv_sec) >= (monitor)->monitor_time)?true:false; \
207
time = after.tv_usec - before.tv_usec; \
208
time = after.tv_sec - before.tv_sec + (time/1000000); \
209
(monitor)->idle += time; \
217
* This might pose a problem if it is called with another makro.
219
* TODO: it should be customized for read/write locks.
221
#define MONITOR_WAIT_TIME(execute, monitor) if (((monitor) != NULL) && ((monitor)->monitor_time > 0)){ \
222
struct timeval before; \
223
struct timeval after; \
226
gettimeofday(&before, NULL); \
228
gettimeofday(&after, NULL); \
229
time = after.tv_usec - before.tv_usec; \
230
time = after.tv_sec - before.tv_sec + (time/1000000); \
231
(monitor)->wait += time; \
237
#define MONITOR_MESSAGES(monitor) if ((monitor != NULL) && ((monitor)->monitor_time > 0)) (monitor)->message_in_count++
239
#define MONITOR_MESSAGES_OUT(monitor) if (((monitor) != NULL) && ((monitor)->monitor_time > 0)) (monitor)->message_out_count++
241
/*--------------------------------*/
242
/* EXTENSION SECTION */
243
/*--------------------------------*/
246
* What you need to do to create a new extension:
248
* - create a new extension_t in the enum
249
* - define a extension data structure
250
* - modifiy the sge_monitor_init method to handle the new extension type
253
* monitor->ext_data_size = sizeof(m_gdi_t);
254
* monitor->ext_data = malloc(sizeof(m_gdi_t));
255
* monitor->ext_output = &ext_gdi_output;
258
* - write the extension output function
259
* - write the measurement makros
260
* - remember, that the entire extension structure is reset to 0 after the data is printed
265
/* scheduler thread extensions */
268
u_long32 dummy; /* unused */
271
/* GDI message thread extensions */
274
u_long32 gdi_add_count; /* counts the gdi add requests */
275
u_long32 gdi_mod_count; /* counts the gdi mod requests */
276
u_long32 gdi_get_count; /* counts the gdi get requests */
277
u_long32 gdi_del_count; /* counts teh gdi del requests */
278
u_long32 gdi_cp_count; /* counts the gdi cp requests */
279
u_long32 gdi_trig_count; /* counts the gdi trig requests */
280
u_long32 gdi_perm_count; /* counts the gdi perm requests */
281
u_long32 gdi_replace_count; /* counts the gdi perm requests */
283
u_long32 eload_count; /* counts the execd load reports */
284
u_long32 econf_count; /* counts the execd conf version requests */
285
u_long32 ejob_count; /* counts the execd job reports */
286
u_long32 eproc_count; /* counts the execd processor reports */
287
u_long32 eack_count; /* counts the execd acks */
289
u_long32 queue_length; /* worker queue length */
292
#define MONITOR_GDI_ADD(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_add_count++
293
#define MONITOR_GDI_GET(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_get_count++
294
#define MONITOR_GDI_MOD(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_mod_count++
295
#define MONITOR_GDI_DEL(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_del_count++
296
#define MONITOR_GDI_CP(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_cp_count++
297
#define MONITOR_GDI_TRIG(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_trig_count++
298
#define MONITOR_GDI_PERM(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_perm_count++
299
#define MONITOR_GDI_REPLACE(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->gdi_replace_count++
301
#define MONITOR_ACK(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->ack_count++
303
#define MONITOR_ELOAD(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->eload_count++
304
#define MONITOR_ECONF(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->econf_count++
305
#define MONITOR_EJOB(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->ejob_count++
306
#define MONITOR_EPROC(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->eproc_count++
307
#define MONITOR_EACK(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->eack_count++
309
#define MONITOR_SET_QLEN(monitor, qlen) if ((monitor) != NULL && (monitor->monitor_time > 0) && (monitor->ext_type == GDI_EXT)) ((m_gdi_t*)(monitor->ext_data))->queue_length = (qlen)
311
/* listener extension */
313
u_long32 inc_gdi; /* incoming GDI requests */
314
u_long32 inc_ack; /* ack requests */
315
u_long32 inc_ece; /* event client exits */
316
u_long32 inc_rep; /* report request */
319
#define MONITOR_INC_GDI(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == LIS_EXT)) ((m_lis_t*)(monitor->ext_data))->inc_gdi++
320
#define MONITOR_INC_ACK(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == LIS_EXT)) ((m_lis_t*)(monitor->ext_data))->inc_ack++
321
#define MONITOR_INC_ECE(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == LIS_EXT)) ((m_lis_t*)(monitor->ext_data))->inc_ece++
322
#define MONITOR_INC_REP(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == LIS_EXT)) ((m_lis_t*)(monitor->ext_data))->inc_rep++
324
/* event master thread extension */
327
u_long32 count; /* counts the number of runs */
328
u_long32 client_count; /* connected event clients */
329
u_long32 mod_client_count; /* event client modifications */
330
u_long32 ack_count; /* nr of acknowledges */
331
u_long32 new_event_count; /* newly added events */
332
u_long32 added_event_count; /* nr of events added to the event clients */
333
u_long32 skip_event_count; /* nr of events ignored, no client has a subscription */
334
u_long32 blocked_client_count; /* nr of event clients blocked during send */
335
u_long32 busy_client_count; /* nr of event clients busy during send */
338
#define MONITOR_CLIENT_COUNT(monitor, inc) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
339
((m_edt_t*) (monitor->ext_data))->client_count += inc
341
#define MONITOR_EDT_COUNT(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
342
((m_edt_t*) (monitor->ext_data))->count++
344
#define MONITOR_EDT_MOD(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
345
((m_edt_t*) (monitor->ext_data))->mod_client_count++
347
#define MONITOR_EDT_ACK(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
348
((m_edt_t*)(monitor->ext_data))->ack_count++
350
#define MONITOR_EDT_NEW(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
351
((m_edt_t*)(monitor->ext_data))->new_event_count++
353
#define MONITOR_EDT_ADDED(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
354
((m_edt_t*)(monitor->ext_data))->added_event_count++
356
#define MONITOR_EDT_SKIP(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
357
((m_edt_t*)(monitor->ext_data))->skip_event_count++
359
#define MONITOR_EDT_BLOCKED(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
360
((m_edt_t*)(monitor->ext_data))->blocked_client_count++
362
#define MONITOR_EDT_BUSY(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == EDT_EXT)) \
363
((m_edt_t*)(monitor->ext_data))->busy_client_count++
365
/* timed event thread extension */
368
u_long32 count; /* counts the number of runs */
369
u_long32 event_count; /* nr of pending events */
370
u_long32 exec_count; /* nr of executed events */
373
#define MONITOR_TET_COUNT(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == TET_EXT)) \
374
((m_tet_t*)(monitor->ext_data))->count++
376
#define MONITOR_TET_EVENT(monitor, inc) if ((monitor->monitor_time > 0) && (monitor->ext_type == TET_EXT)) \
377
((m_tet_t*)(monitor->ext_data))->event_count += inc
379
#define MONITOR_TET_EXEC(monitor) if ((monitor->monitor_time > 0) && (monitor->ext_type == TET_EXT)) \
380
((m_tet_t*)(monitor->ext_data))->exec_count++
383
#endif /* _SGE_MONITIR_H */