131
131
}lrm_objectstats;
133
/* define indexes into logmsg_ctrl_defs */
134
#define OP_STAYED_TOO_LONG 0
135
static struct logspam logmsg_ctrl_defs[] = {
136
{ "operation stayed too long in the queue",
137
10, 60, 120, /* max 10 messages in 60s, then delay for 120s */
138
"configuration advice: reduce operation contention "
139
"either by increasing lrmd max_children or by increasing intervals "
140
"of monitor operations"
133
144
#define set_fd_opts(fd,opts) do { \
135
146
if ((flag = fcntl(fd, F_GETFL)) >= 0) { \
3078
3089
op_type = ha_msg_value(op->msg, F_LRM_OP);
3079
if (!op->interval || is_logmsg_due(op)) { /* log non-repeating ops */
3080
lrmd_log(LOG_INFO,"rsc:%s:%d: %s",rsc->id,op->call_id,probe_str(op,op_type));
3082
lrmd_debug(LOG_DEBUG,"rsc:%s:%d: %s",rsc->id,op->call_id,op_type);
3084
3090
op_params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
3085
3091
params = merge_str_tables(rsc->params,op_params);
3086
3092
ha_msg_mod_str_table(op->msg, F_LRM_PARAM, params);
3125
3131
((op->interval && !is_logmsg_due(op)) ? PT_LOGNORMAL : PT_LOGVERBOSE) : PT_LOGNONE
3126
3132
, op, &ManagedChildTrackOps);
3128
if (op->interval && is_logmsg_due(op)) {
3129
op->t_lastlogmsg = time_longclock();
3134
if (!op->interval || is_logmsg_due(op)) { /* log non-repeating ops */
3135
lrmd_log(LOG_INFO,"rsc:%s %s[%d] (pid %d)",
3136
rsc->id,probe_str(op,op_type),op->call_id,pid);
3138
lrmd_debug(LOG_DEBUG,"rsc:%s %s[%d] (pid %d)",
3139
rsc->id,op_type,op->call_id,pid);
3131
3141
close(stdout_fd[1]);
3132
3142
close(stderr_fd[1]);
3218
3228
, "perform_ra_op:calling RA plugin to perform %s, pid: [%d]"
3219
3229
, op_info(op), getpid());
3220
3230
params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
3231
if (replace_secret_params(rsc->id, params) < 0) {
3232
/* replacing secrets failed! */
3233
if (!strcmp(op_type,"stop")) {
3234
/* don't fail on stop! */
3236
, "%s:%d: proceeding with the stop operation for %s"
3237
, __FUNCTION__, __LINE__, rsc->id);
3240
, "%s:%d: failed to get secrets for %s, "
3241
"considering resource not configured"
3242
, __FUNCTION__, __LINE__, rsc->id);
3243
exit(EXECRA_NOT_CONFIGURED);
3221
3246
RAExec->execra (rsc->id,
3317
3342
if( proctrack_timedout(p) ) {
3318
lrmd_log(LOG_WARNING, "%s: pid [%d] timed out"
3319
, op_info(op), proctrack_pid(p));
3343
lrmd_log(LOG_WARNING, "%s: pid %d timed out"
3344
, small_op_info(op), proctrack_pid(p));
3320
3345
op_status = LRM_OP_TIMEOUT;
3322
3347
op_status = LRM_OP_ERROR;
3325
3350
rc = RAExec->map_ra_retvalue(exitcode, op_type
3326
3351
, op->first_line_ra_stdout);
3327
if (rc != EXECRA_OK || debug_level > 0) {
3352
if (!op->interval || is_logmsg_due(op) || debug_level > 0) { /* log non-repeating ops */
3328
3353
if (rc == exitcode) {
3329
lrmd_debug2(rc == EXECRA_OK ? LOG_DEBUG : LOG_INFO
3330
, "%s: pid [%d] exited with"
3331
" return code %d", op_info(op), proctrack_pid(p), rc);
3355
, "%s: pid %d exited with"
3356
" return code %d", small_op_info(op), proctrack_pid(p), rc);
3333
lrmd_debug2(rc == EXECRA_OK ? LOG_DEBUG : LOG_INFO
3334
, "%s: pid [%d] exited with"
3359
, "%s: pid %d exited with"
3335
3360
" return code %d (mapped from %d)"
3336
, op_info(op), proctrack_pid(p), rc, exitcode);
3338
if (rc != EXECRA_OK || debug_level > 1) {
3339
lrmd_debug2(LOG_INFO, "Resource Agent output: [%s]"
3340
, op->first_line_ra_stdout);
3361
, small_op_info(op), proctrack_pid(p), rc, exitcode);
3343
3364
if (EXECRA_EXEC_UNKNOWN_ERROR == rc || EXECRA_NO_RA == rc) {
3348
3369
op_status = LRM_OP_DONE;
3372
if (op->interval && is_logmsg_due(op)) {
3373
op->t_lastlogmsg = time_longclock();
3352
3376
ha_msg_mod_int(op->msg, F_LRM_OPSTATUS, op_status)) {
3353
3377
LOG_FAILED_TO_ADD_FIELD("opstatus");
3921
3945
,op->call_id ,op->client_id);
3924
snprintf(info, sizeof(info)
3925
,"operation %s[%d] on %s::%s::%s for client %d"
3926
,lrm_str(op_type), op->call_id
3927
,lrm_str(rsc->class), lrm_str(rsc->type), lrm_str(rsc->id)
3948
if (op->exec_pid > 1) {
3949
snprintf(info, sizeof(info)
3950
,"operation %s[%d] with pid %d on %s for client %d"
3951
,lrm_str(op_type), op->call_id, op->exec_pid, lrm_str(rsc->id)
3954
snprintf(info, sizeof(info)
3955
,"operation %s[%d] on %s for client %d"
3956
,lrm_str(op_type), op->call_id, lrm_str(rsc->id)
3930
3960
if( add_params ) {
3931
3961
param_gstr = g_string_new("");
3968
3998
check_queue_duration(lrmd_op_t* op)
3970
4000
unsigned long t_stay_in_list = 0;
4001
static struct msg_ctrl *ml;
3971
4003
CHECK_ALLOCATED(op, "op", );
3972
4004
t_stay_in_list = longclockto_ms(op->t_perform - op->t_addtolist);
3973
if ( t_stay_in_list > WARNINGTIME_IN_LIST)
4005
if ( t_stay_in_list > WARNINGTIME_IN_LIST)
3975
lrmd_log(LOG_WARNING
3976
, "perform_ra_op: the operation %s stayed in operation "
4008
ml = cl_limit_log_new(logmsg_ctrl_defs + OP_STAYED_TOO_LONG);
4009
cl_limit_log(ml, LOG_WARNING
4010
, "perform_ra_op: the %s stayed in operation "
3977
4011
"list for %lu ms (longer than %d ms)"
3978
, op_info(op), t_stay_in_list
4012
, small_op_info(op), t_stay_in_list
3979
4013
, WARNINGTIME_IN_LIST
3981
4015
if (debug_level >= 2) {