1
/*****************************************************************************\
2
* src/srun/task_state.c - task state container
4
*****************************************************************************
5
* Copyright (C) 2002 The Regents of the University of California.
6
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
* Written by Mark Grondona <mgrondona@llnl.gov>.
8
* CODE-OCEC-09-009. All rights reserved.
10
* This file is part of SLURM, a resource management program.
11
* For details, see <https://computing.llnl.gov/linux/slurm/>.
12
* Please also read the included file: DISCLAIMER.
14
* SLURM is free software; you can redistribute it and/or modify it under
15
* the terms of the GNU General Public License as published by the Free
16
* Software Foundation; either version 2 of the License, or (at your option)
19
* In addition, as a special exception, the copyright holders give permission
20
* to link the code of portions of this program with the OpenSSL library under
21
* certain conditions as described in each individual source file, and
22
* distribute linked combinations including the two. You must obey the GNU
23
* General Public License in all respects for all of the code used other than
24
* OpenSSL. If you modify file(s) with this exception, you may extend this
25
* exception to your version of the file(s), but you are not obligated to do
26
* so. If you do not wish to do so, delete this exception statement from your
27
* version. If you delete this exception statement from all source files in
28
* the program, then also delete it here.
30
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
31
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
35
* You should have received a copy of the GNU General Public License along
36
* with SLURM; if not, write to the Free Software Foundation, Inc.,
37
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
38
\*****************************************************************************/
46
#include "src/common/xmalloc.h"
47
#include "src/common/bitstring.h"
48
#include "src/common/xassert.h"
50
#include "src/srun/task_state.h"
52
struct task_state_struct {
57
unsigned int first_exit:1;
58
unsigned int first_abnormal_exit:1;
59
bitstr_t *start_failed;
61
bitstr_t *normal_exit;
62
bitstr_t *abnormal_exit;
65
task_state_t task_state_create (int ntasks)
67
task_state_t ts = xmalloc (sizeof (*ts));
69
/* ts is zero filled by xmalloc() */
71
ts->running = bit_alloc (ntasks);
72
ts->start_failed = bit_alloc (ntasks);
73
ts->normal_exit = bit_alloc (ntasks);
74
ts->abnormal_exit = bit_alloc (ntasks);
79
void task_state_destroy (task_state_t ts)
84
bit_free (ts->start_failed);
86
bit_free (ts->running);
88
bit_free (ts->normal_exit);
89
if (ts->abnormal_exit)
90
bit_free (ts->abnormal_exit);
94
static const char *_task_state_type_str (task_state_type_t t)
97
case TS_START_SUCCESS:
98
return ("TS_START_SUCCESS");
99
case TS_START_FAILURE:
100
return ("TS_START_FAILURE");
102
return ("TS_NORMAL_EXIT");
103
case TS_ABNORMAL_EXIT:
104
return ("TS_ABNORMAL_EXIT");
109
void task_state_update (task_state_t ts, int taskid, task_state_type_t t)
111
xassert (ts != NULL);
112
xassert (taskid >= 0);
113
xassert (taskid < ts->n_tasks);
115
debug3("task_state_update(taskid=%d, %s)",
116
taskid, _task_state_type_str (t));
119
case TS_START_SUCCESS:
120
bit_set (ts->running, taskid);
123
case TS_START_FAILURE:
124
bit_set (ts->start_failed, taskid);
127
bit_set (ts->normal_exit, taskid);
128
bit_clear (ts->running, taskid);
131
case TS_ABNORMAL_EXIT:
132
bit_clear (ts->running, taskid);
133
bit_set (ts->abnormal_exit, taskid);
139
xassert ((bit_set_count(ts->abnormal_exit) +
140
bit_set_count(ts->normal_exit)) == ts->n_exited);
143
int task_state_first_exit (task_state_t ts)
145
if (!ts->first_exit && ts->n_exited) {
152
int task_state_first_abnormal_exit (task_state_t ts)
154
if (!ts->first_abnormal_exit && ts->n_abnormal) {
155
ts->first_abnormal_exit = 1;
161
static void _do_log_msg (bitstr_t *b, log_f fn, const char *msg)
164
char *s = bit_set_count (b) == 1 ? "" : "s";
165
(*fn) ("task%s %s: %s\n", s, bit_fmt (buf, sizeof(buf), b), msg);
168
void task_state_print (task_state_t ts, log_f fn)
170
bitstr_t *unseen = bit_alloc (ts->n_tasks);
172
if (bit_set_count (ts->start_failed)) {
173
_do_log_msg (ts->start_failed, fn, "failed to start");
174
bit_or (unseen, ts->start_failed);
176
if (bit_set_count (ts->running)) {
177
_do_log_msg (ts->running, fn, "running");
178
bit_or (unseen, ts->running);
180
if (bit_set_count (ts->abnormal_exit)) {
181
_do_log_msg (ts->abnormal_exit, fn, "exited abnormally");
182
bit_or (unseen, ts->abnormal_exit);
184
if (bit_set_count (ts->normal_exit)) {
185
_do_log_msg (ts->normal_exit, fn, "exited");
186
bit_or (unseen, ts->normal_exit);
189
if (bit_set_count (unseen))
190
_do_log_msg (unseen, fn, "unknown");