1
/*****************************************************************************\
2
* as_mysql_job.c - functions dealing with jobs and job steps.
3
*****************************************************************************
5
* Copyright (C) 2004-2007 The Regents of the University of California.
6
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
7
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8
* Written by Danny Auble <da@llnl.gov>
10
* This file is part of SLURM, a resource management program.
11
* For details, see <https://computing.llnl.gov/linux/slurm/>.
12
* Please also read the included file: DISCLAIMER.
14
* SLURM is free software; you can redistribute it and/or modify it under
15
* the terms of the GNU General Public License as published by the Free
16
* Software Foundation; either version 2 of the License, or (at your option)
19
* In addition, as a special exception, the copyright holders give permission
20
* to link the code of portions of this program with the OpenSSL library under
21
* certain conditions as described in each individual source file, and
22
* distribute linked combinations including the two. You must obey the GNU
23
* General Public License in all respects for all of the code used other than
24
* OpenSSL. If you modify file(s) with this exception, you may extend this
25
* exception to your version of the file(s), but you are not obligated to do
26
* so. If you do not wish to do so, delete this exception statement from your
27
* version. If you delete this exception statement from all source files in
28
* the program, then also delete it here.
30
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
31
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
35
* You should have received a copy of the GNU General Public License along
36
* with SLURM; if not, write to the Free Software Foundation, Inc.,
37
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
38
\*****************************************************************************/
40
#include "as_mysql_job.h"
41
#include "as_mysql_usage.h"
42
#include "as_mysql_wckey.h"
44
#include "src/common/parse_time.h"
45
#include "src/common/jobacct_common.h"
47
/* Used in job functions for getting the database index based off the
48
* submit time, job and assoc id. 0 is returned if none is found
50
static int _get_db_index(mysql_conn_t *mysql_conn,
51
time_t submit, uint32_t jobid, uint32_t associd)
53
MYSQL_RES *result = NULL;
56
char *query = xstrdup_printf("select job_db_inx from \"%s_%s\" where "
57
"time_submit=%d and id_job=%u "
59
mysql_conn->cluster_name, job_table,
60
(int)submit, jobid, associd);
62
if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
68
row = mysql_fetch_row(result);
70
mysql_free_result(result);
71
debug4("We can't get a db_index for this combo, "
72
"time_submit=%d and id_job=%u and id_assoc=%u. "
73
"We must not have heard about the start yet, "
74
"no big deal, we will get one right after this.",
75
(int)submit, jobid, associd);
78
db_index = slurm_atoul(row[0]);
79
mysql_free_result(result);
84
static char *_get_user_from_associd(mysql_conn_t *mysql_conn,
85
char *cluster, uint32_t associd)
89
MYSQL_RES *result = NULL;
92
/* Just so we don't have to keep a
93
cache of the associations around we
94
will just query the db for the user
95
name of the association id. Since
96
this should sort of be a rare case
99
query = xstrdup_printf("select user from \"%s_%s\" where id_assoc=%u",
100
cluster, assoc_table, associd);
102
debug4("%d(%s:%d) query\n%s",
103
mysql_conn->conn, THIS_FILE, __LINE__, query);
105
mysql_db_query_ret(mysql_conn, query, 0))) {
111
if ((row = mysql_fetch_row(result)))
112
user = xstrdup(row[0]);
114
mysql_free_result(result);
119
static uint32_t _get_wckeyid(mysql_conn_t *mysql_conn, char **name,
120
uid_t uid, char *cluster, uint32_t associd)
122
uint32_t wckeyid = 0;
124
if (slurm_get_track_wckey()) {
125
/* Here we are looking for the wckeyid if it doesn't
126
* exist we will create one. We don't need to check
127
* if it is good or not. Right now this is the only
128
* place things are created. We do this only on a job
129
* start, not on a job submit since we don't want to
130
* slow down getting the db_index back to the
133
slurmdb_wckey_rec_t wckey_rec;
136
/* since we are unable to rely on uids here (someone could
137
not have there uid in the system yet) we must
138
first get the user name from the associd */
139
if (!(user = _get_user_from_associd(
140
mysql_conn, cluster, associd))) {
141
error("No user for associd %u", associd);
144
/* get the default key */
146
slurmdb_user_rec_t user_rec;
147
memset(&user_rec, 0, sizeof(slurmdb_user_rec_t));
148
user_rec.uid = NO_VAL;
149
user_rec.name = user;
150
if (assoc_mgr_fill_in_user(mysql_conn, &user_rec,
151
1, NULL) != SLURM_SUCCESS) {
152
error("No user by name of %s assoc %u",
158
if (user_rec.default_wckey)
159
*name = xstrdup_printf("*%s",
160
user_rec.default_wckey);
162
*name = xstrdup_printf("*");
165
memset(&wckey_rec, 0, sizeof(slurmdb_wckey_rec_t));
166
wckey_rec.name = (*name);
167
wckey_rec.uid = NO_VAL;
168
wckey_rec.user = user;
169
wckey_rec.cluster = cluster;
170
if (assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec,
171
ACCOUNTING_ENFORCE_WCKEYS,
172
NULL) != SLURM_SUCCESS) {
173
List wckey_list = NULL;
174
slurmdb_wckey_rec_t *wckey_ptr = NULL;
176
wckey_list = list_create(slurmdb_destroy_wckey_rec);
178
wckey_ptr = xmalloc(sizeof(slurmdb_wckey_rec_t));
179
wckey_ptr->name = xstrdup((*name));
180
wckey_ptr->user = xstrdup(user);
181
wckey_ptr->cluster = xstrdup(cluster);
182
list_append(wckey_list, wckey_ptr);
183
/* info("adding wckey '%s' '%s' '%s'", */
184
/* wckey_ptr->name, wckey_ptr->user, */
185
/* wckey_ptr->cluster); */
186
/* we have already checked to make
187
sure this was the slurm user before
189
if (as_mysql_add_wckeys(mysql_conn,
190
slurm_get_slurm_user_id(),
193
acct_storage_p_commit(mysql_conn, 1);
194
/* If that worked lets get it */
195
assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec,
196
ACCOUNTING_ENFORCE_WCKEYS,
199
list_destroy(wckey_list);
202
/* info("got wckeyid of %d", wckey_rec.id); */
203
wckeyid = wckey_rec.id;
209
/* extern functions */
211
extern int as_mysql_job_start(mysql_conn_t *mysql_conn,
212
struct job_record *job_ptr)
214
int rc=SLURM_SUCCESS;
215
char *nodes = NULL, *jname = NULL, *node_inx = NULL;
217
char *block_id = NULL;
220
time_t begin_time, check_time, start_time, submit_time;
221
uint32_t wckeyid = 0;
222
int job_state, node_cnt = 0;
223
uint32_t job_db_inx = job_ptr->db_index;
225
if ((!job_ptr->details || !job_ptr->details->submit_time)
226
&& !job_ptr->resize_time) {
227
error("as_mysql_job_start: "
228
"Not inputing this job, it has no submit time.");
232
if (check_connection(mysql_conn) != SLURM_SUCCESS)
233
return ESLURM_DB_CONNECTION;
235
debug2("as_mysql_slurmdb_job_start() called");
237
job_state = job_ptr->job_state;
239
/* Since we need a new db_inx make sure the old db_inx
240
* removed. This is most likely the only time we are going to
241
* be notified of the change also so make the state without
243
if (IS_JOB_RESIZING(job_ptr)) {
244
/* If we have a db_index lets end the previous record. */
245
if (job_ptr->db_index)
246
as_mysql_job_complete(mysql_conn, job_ptr);
248
error("We don't have a db_index for job %u, "
249
"this should never happen.", job_ptr->job_id);
250
job_state &= (~JOB_RESIZING);
251
job_ptr->db_index = 0;
254
job_state &= JOB_STATE_BASE;
256
if (job_ptr->resize_time) {
257
begin_time = job_ptr->resize_time;
258
submit_time = job_ptr->resize_time;
259
start_time = job_ptr->resize_time;
261
begin_time = job_ptr->details->begin_time;
262
submit_time = job_ptr->details->submit_time;
263
start_time = job_ptr->start_time;
266
/* See what we are hearing about here if no start time. If
267
* this job latest time is before the last roll up we will
268
* need to reset it to look at this job. */
270
check_time = start_time;
272
check_time = begin_time;
274
check_time = submit_time;
276
slurm_mutex_lock(&rollup_lock);
277
if (check_time < global_last_rollup) {
278
MYSQL_RES *result = NULL;
281
/* check to see if we are hearing about this time for the
284
query = xstrdup_printf("select job_db_inx "
285
"from \"%s_%s\" where id_job=%u and "
286
"time_submit=%ld and time_eligible=%ld "
287
"and time_start=%ld;",
288
mysql_conn->cluster_name,
289
job_table, job_ptr->job_id,
290
submit_time, begin_time, start_time);
291
debug3("%d(%s:%d) query\n%s",
292
mysql_conn->conn, THIS_FILE, __LINE__, query);
294
mysql_db_query_ret(mysql_conn, query, 0))) {
296
slurm_mutex_unlock(&rollup_lock);
300
if ((row = mysql_fetch_row(result))) {
301
mysql_free_result(result);
302
debug4("revieved an update for a "
303
"job (%u) already known about",
305
slurm_mutex_unlock(&rollup_lock);
306
goto no_rollup_change;
308
mysql_free_result(result);
310
if (job_ptr->start_time)
311
debug("Need to reroll usage from %sJob %u "
312
"from %s started then and we are just "
313
"now hearing about it.",
315
job_ptr->job_id, mysql_conn->cluster_name);
317
debug("Need to reroll usage from %sJob %u "
318
"from %s became eligible then and we are just "
319
"now hearing about it.",
321
job_ptr->job_id, mysql_conn->cluster_name);
323
debug("Need to reroll usage from %sJob %u "
324
"from %s was submitted then and we are just "
325
"now hearing about it.",
327
job_ptr->job_id, mysql_conn->cluster_name);
329
global_last_rollup = check_time;
330
slurm_mutex_unlock(&rollup_lock);
332
/* If the times here are later than the daily_rollup
333
or monthly rollup it isn't a big deal since they
334
are always shrunk down to the beginning of each
337
query = xstrdup_printf("update \"%s_%s\" set "
338
"hourly_rollup=%ld, "
339
"daily_rollup=%ld, monthly_rollup=%ld",
340
mysql_conn->cluster_name,
341
last_ran_table, check_time,
342
check_time, check_time);
343
debug3("%d(%s:%d) query\n%s",
344
mysql_conn->conn, THIS_FILE, __LINE__, query);
345
rc = mysql_db_query(mysql_conn, query);
348
slurm_mutex_unlock(&rollup_lock);
352
if (job_ptr->name && job_ptr->name[0])
353
jname = slurm_add_slash_to_quotes(job_ptr->name);
355
jname = xstrdup("allocation");
359
if (job_ptr->nodes && job_ptr->nodes[0])
360
nodes = job_ptr->nodes;
362
nodes = "None assigned";
364
if (job_ptr->batch_flag)
368
block_id = xstrdup(job_ptr->comment);
369
node_cnt = job_ptr->total_nodes;
370
node_inx = job_ptr->network;
372
char temp_bit[BUF_SIZE];
374
if (job_ptr->node_bitmap) {
375
node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
376
job_ptr->node_bitmap);
379
select_g_select_jobinfo_get(job_ptr->select_jobinfo,
380
SELECT_JOBDATA_BLOCK_ID,
382
select_g_select_jobinfo_get(job_ptr->select_jobinfo,
383
SELECT_JOBDATA_NODE_CNT,
386
node_cnt = job_ptr->total_nodes;
390
/* If there is a start_time get the wckeyid. If the job is
391
* cancelled before the job starts we also want to grab it. */
392
if (job_ptr->assoc_id
393
&& (job_ptr->start_time || IS_JOB_CANCELLED(job_ptr)))
394
wckeyid = _get_wckeyid(mysql_conn, &job_ptr->wckey,
396
mysql_conn->cluster_name,
399
if (!job_ptr->db_index) {
401
begin_time = submit_time;
402
query = xstrdup_printf(
403
"insert into \"%s_%s\" "
404
"(id_job, id_assoc, id_qos, id_wckey, id_user, "
405
"id_group, nodelist, id_resv, timelimit, "
406
"time_eligible, time_submit, time_start, "
407
"job_name, track_steps, state, priority, cpus_req, "
408
"cpus_alloc, nodes_alloc",
409
mysql_conn->cluster_name, job_table);
411
if (job_ptr->account)
412
xstrcat(query, ", account");
413
if (job_ptr->partition)
414
xstrcat(query, ", partition");
416
xstrcat(query, ", id_block");
418
xstrcat(query, ", wckey");
420
xstrcat(query, ", node_inx");
423
") values (%u, %u, %u, %u, %u, %u, '%s', %u, %u, "
424
"%ld, %ld, %ld, '%s', %u, %u, %u, %u, %u, %u",
425
job_ptr->job_id, job_ptr->assoc_id,
426
job_ptr->qos_id, wckeyid,
427
job_ptr->user_id, job_ptr->group_id, nodes,
428
job_ptr->resv_id, job_ptr->time_limit,
429
begin_time, submit_time, start_time,
430
jname, track_steps, job_state,
431
job_ptr->priority, job_ptr->details->min_cpus,
432
job_ptr->total_cpus, node_cnt);
434
if (job_ptr->account)
435
xstrfmtcat(query, ", '%s'", job_ptr->account);
436
if (job_ptr->partition)
437
xstrfmtcat(query, ", '%s'", job_ptr->partition);
439
xstrfmtcat(query, ", '%s'", block_id);
441
xstrfmtcat(query, ", '%s'", job_ptr->wckey);
443
xstrfmtcat(query, ", '%s'", node_inx);
446
") on duplicate key update "
447
"job_db_inx=LAST_INSERT_ID(job_db_inx), "
448
"id_wckey=%u, id_user=%u, id_group=%u, "
449
"nodelist='%s', id_resv=%u, timelimit=%u, "
450
"time_submit=%ld, time_start=%ld, "
451
"job_name='%s', track_steps=%u, id_qos=%u, "
452
"state=greatest(state, %u), priority=%u, "
453
"cpus_req=%u, cpus_alloc=%u, nodes_alloc=%u",
454
wckeyid, job_ptr->user_id, job_ptr->group_id, nodes,
455
job_ptr->resv_id, job_ptr->time_limit,
456
submit_time, start_time,
457
jname, track_steps, job_ptr->qos_id, job_state,
458
job_ptr->priority, job_ptr->details->min_cpus,
459
job_ptr->total_cpus, node_cnt);
461
if (job_ptr->account)
462
xstrfmtcat(query, ", account='%s'", job_ptr->account);
463
if (job_ptr->partition)
464
xstrfmtcat(query, ", partition='%s'",
467
xstrfmtcat(query, ", id_block='%s'", block_id);
469
xstrfmtcat(query, ", wckey='%s'", job_ptr->wckey);
471
xstrfmtcat(query, ", node_inx='%s'", node_inx);
473
debug3("%d(%s:%d) query\n%s",
474
mysql_conn->conn, THIS_FILE, __LINE__, query);
476
if (!(job_ptr->db_index = mysql_db_insert_ret_id(
477
mysql_conn, query))) {
479
error("It looks like the storage has gone "
480
"away trying to reconnect");
481
mysql_db_close_db_connection(
484
check_connection(mysql_conn);
491
query = xstrdup_printf("update \"%s_%s\" set nodelist='%s', ",
492
mysql_conn->cluster_name,
495
if (job_ptr->account)
496
xstrfmtcat(query, "account='%s', ", job_ptr->account);
497
if (job_ptr->partition)
498
xstrfmtcat(query, "partition='%s', ",
501
xstrfmtcat(query, "id_block='%s', ", block_id);
503
xstrfmtcat(query, "wckey='%s', ", job_ptr->wckey);
505
xstrfmtcat(query, "node_inx='%s', ", node_inx);
507
xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, "
508
"cpus_alloc=%u, nodes_alloc=%u, id_qos=%u, "
509
"id_assoc=%u, id_wckey=%u, id_resv=%u, timelimit=%u "
510
"where job_db_inx=%d",
511
start_time, jname, job_state,
512
job_ptr->total_cpus, node_cnt, job_ptr->qos_id,
513
job_ptr->assoc_id, wckeyid,
514
job_ptr->resv_id, job_ptr->time_limit,
516
debug3("%d(%s:%d) query\n%s",
517
mysql_conn->conn, THIS_FILE, __LINE__, query);
518
rc = mysql_db_query(mysql_conn, query);
525
/* now we will reset all the steps */
526
if (IS_JOB_RESIZING(job_ptr)) {
527
if (IS_JOB_SUSPENDED(job_ptr))
528
as_mysql_suspend(mysql_conn, job_db_inx, job_ptr);
529
/* Here we aren't sure how many cpus are being changed here in
530
the step since we don't have that information from the
531
job. The resize of steps shouldn't happen very often in
532
the first place (srun --no-kill option), and this don't
533
effect accounting in the first place so it isn't a
536
query = xstrdup_printf("update \"%s_%s\" set job_db_inx=%u "
537
"where job_db_inx=%u;",
538
mysql_conn->cluster_name, step_table,
539
job_ptr->db_index, job_db_inx);
541
debug3("%d(%s:%d) query\n%s",
542
mysql_conn->conn, THIS_FILE, __LINE__, query);
543
rc = mysql_db_query(mysql_conn, query);
550
extern List as_mysql_modify_job(mysql_conn_t *mysql_conn, uint32_t uid,
551
slurmdb_job_modify_cond_t *job_cond,
552
slurmdb_job_rec_t *job)
554
List ret_list = NULL;
555
int rc = SLURM_SUCCESS;
557
char *vals = NULL, *query = NULL, *cond_char = NULL;
558
time_t now = time(NULL);
559
char *user_name = NULL;
560
MYSQL_RES *result = NULL;
563
if (!job_cond || !job) {
564
error("we need something to change");
566
} else if (job_cond->job_id == NO_VAL) {
567
errno = SLURM_NO_CHANGE_IN_DATA;
568
error("Job ID was not specified for job modification\n");
570
} else if (!job_cond->cluster) {
571
errno = SLURM_NO_CHANGE_IN_DATA;
572
error("Cluster was not specified for job modification\n");
574
} else if (check_connection(mysql_conn) != SLURM_SUCCESS)
577
if (job->derived_ec != NO_VAL)
578
xstrfmtcat(vals, ", derived_ec=%u", job->derived_ec);
581
xstrfmtcat(vals, ", derived_es='%s'", job->derived_es);
584
errno = SLURM_NO_CHANGE_IN_DATA;
585
error("No change specified for job modification");
589
/* Here we want to get the last job submitted here */
590
query = xstrdup_printf("select job_db_inx, id_job, time_submit "
591
"from \"%s_%s\" where deleted=0 "
592
"&& id_job=%u && id_user=%u "
593
"order by time_submit desc limit 1;",
594
job_cond->cluster, job_table,
595
job_cond->job_id, uid);
597
debug3("%d(%s:%d) query\n%s",
598
mysql_conn->conn, THIS_FILE, __LINE__, query);
599
if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
605
if ((row = mysql_fetch_row(result))) {
607
time_t time_submit = atol(row[2]);
608
slurm_make_time_str(&time_submit, tmp_char, sizeof(tmp_char));
610
xstrfmtcat(cond_char, "job_db_inx=%s", row[0]);
611
object = xstrdup_printf("%s submitted at %s", row[1], tmp_char);
613
ret_list = list_create(slurm_destroy_char);
614
list_append(ret_list, object);
615
mysql_free_result(result);
617
errno = SLURM_NO_CHANGE_IN_DATA;
618
debug3("didn't effect anything\n%s", query);
621
mysql_free_result(result);
626
user_name = uid_to_string((uid_t) uid);
627
rc = modify_common(mysql_conn, DBD_MODIFY_JOB, now, user_name,
628
job_table, cond_char, vals, job_cond->cluster);
632
if (rc == SLURM_ERROR) {
633
error("Couldn't modify job");
634
list_destroy(ret_list);
641
extern int as_mysql_job_complete(mysql_conn_t *mysql_conn,
642
struct job_record *job_ptr)
644
char *query = NULL, *nodes = NULL;
645
int rc = SLURM_SUCCESS, job_state;
646
time_t submit_time, end_time;
648
if (!job_ptr->db_index
649
&& ((!job_ptr->details || !job_ptr->details->submit_time)
650
&& !job_ptr->resize_time)) {
651
error("as_mysql_job_complete: "
652
"Not inputing this job, it has no submit time.");
656
if (check_connection(mysql_conn) != SLURM_SUCCESS)
657
return ESLURM_DB_CONNECTION;
658
debug2("as_mysql_slurmdb_job_complete() called");
660
if (job_ptr->resize_time)
661
submit_time = job_ptr->resize_time;
663
submit_time = job_ptr->details->submit_time;
665
if (IS_JOB_RESIZING(job_ptr)) {
666
end_time = job_ptr->resize_time;
667
job_state = JOB_RESIZING;
669
/* If we get an error with this just fall through to avoid an
671
if (job_ptr->end_time == 0) {
672
debug("as_mysql_jobacct: job %u never started",
674
return SLURM_SUCCESS;
676
end_time = job_ptr->end_time;
677
job_state = job_ptr->job_state & JOB_STATE_BASE;
680
slurm_mutex_lock(&rollup_lock);
681
if (end_time < global_last_rollup) {
682
global_last_rollup = job_ptr->end_time;
683
slurm_mutex_unlock(&rollup_lock);
685
query = xstrdup_printf("update \"%s_%s\" set "
686
"hourly_rollup=%ld, "
687
"daily_rollup=%ld, monthly_rollup=%ld",
688
mysql_conn->cluster_name,
689
last_ran_table, end_time,
691
debug3("%d(%s:%d) query\n%s",
692
mysql_conn->conn, THIS_FILE, __LINE__, query);
693
rc = mysql_db_query(mysql_conn, query);
696
slurm_mutex_unlock(&rollup_lock);
698
if (job_ptr->nodes && job_ptr->nodes[0])
699
nodes = job_ptr->nodes;
701
nodes = "None assigned";
703
if (!job_ptr->db_index) {
704
if (!(job_ptr->db_index =
705
_get_db_index(mysql_conn,
708
job_ptr->assoc_id))) {
709
/* If we get an error with this just fall
710
* through to avoid an infinite loop
712
if (as_mysql_job_start(
713
mysql_conn, job_ptr) == SLURM_ERROR) {
714
error("couldn't add job %u at job completion",
716
return SLURM_SUCCESS;
721
query = xstrdup_printf("update \"%s_%s\" set "
722
"time_end=%ld, state=%d, nodelist='%s', "
723
"derived_ec=%d, exit_code=%d, "
724
"kill_requid=%d where job_db_inx=%d;",
725
mysql_conn->cluster_name, job_table,
726
end_time, job_state, nodes,
727
job_ptr->derived_ec, job_ptr->exit_code,
728
job_ptr->requid, job_ptr->db_index);
730
debug3("%d(%s:%d) query\n%s",
731
mysql_conn->conn, THIS_FILE, __LINE__, query);
732
rc = mysql_db_query(mysql_conn, query);
738
extern int as_mysql_step_start(mysql_conn_t *mysql_conn,
739
struct step_record *step_ptr)
741
int cpus = 0, tasks = 0, nodes = 0, task_dist = 0;
742
int rc=SLURM_SUCCESS;
743
char node_list[BUFFER_SIZE];
744
char *node_inx = NULL, *step_name = NULL;
745
time_t start_time, submit_time;
748
char *ionodes = NULL;
752
if (!step_ptr->job_ptr->db_index
753
&& ((!step_ptr->job_ptr->details
754
|| !step_ptr->job_ptr->details->submit_time)
755
&& !step_ptr->job_ptr->resize_time)) {
756
error("as_mysql_step_start: "
757
"Not inputing this job, it has no submit time.");
761
if (step_ptr->job_ptr->resize_time) {
762
submit_time = start_time = step_ptr->job_ptr->resize_time;
763
if (step_ptr->start_time > submit_time)
764
start_time = step_ptr->start_time;
766
start_time = step_ptr->start_time;
767
submit_time = step_ptr->job_ptr->details->submit_time;
770
if (check_connection(mysql_conn) != SLURM_SUCCESS)
771
return ESLURM_DB_CONNECTION;
773
tasks = step_ptr->job_ptr->details->num_tasks;
774
cpus = step_ptr->cpu_count;
775
snprintf(node_list, BUFFER_SIZE, "%s",
776
step_ptr->job_ptr->nodes);
777
nodes = step_ptr->step_layout->node_cnt;
778
task_dist = step_ptr->step_layout->task_dist;
779
node_inx = step_ptr->network;
780
} else if (step_ptr->step_id == SLURM_BATCH_SCRIPT) {
781
char temp_bit[BUF_SIZE];
783
if (step_ptr->step_node_bitmap) {
784
node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
785
step_ptr->step_node_bitmap);
787
/* We overload gres with the node name of where the
790
snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->gres);
791
nodes = cpus = tasks = 1;
793
char temp_bit[BUF_SIZE];
795
if (step_ptr->step_node_bitmap) {
796
node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
797
step_ptr->step_node_bitmap);
800
tasks = cpus = step_ptr->job_ptr->details->min_cpus;
801
select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo,
802
SELECT_JOBDATA_IONODES,
805
snprintf(node_list, BUFFER_SIZE,
806
"%s[%s]", step_ptr->job_ptr->nodes, ionodes);
809
snprintf(node_list, BUFFER_SIZE, "%s",
810
step_ptr->job_ptr->nodes);
811
select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo,
812
SELECT_JOBDATA_NODE_CNT,
815
if (!step_ptr->step_layout
816
|| !step_ptr->step_layout->task_cnt) {
817
tasks = cpus = step_ptr->job_ptr->total_cpus;
818
snprintf(node_list, BUFFER_SIZE, "%s",
819
step_ptr->job_ptr->nodes);
820
nodes = step_ptr->job_ptr->total_nodes;
822
cpus = step_ptr->cpu_count;
823
tasks = step_ptr->step_layout->task_cnt;
824
nodes = step_ptr->step_layout->node_cnt;
825
task_dist = step_ptr->step_layout->task_dist;
826
snprintf(node_list, BUFFER_SIZE, "%s",
827
step_ptr->step_layout->node_list);
832
if (!step_ptr->job_ptr->db_index) {
833
if (!(step_ptr->job_ptr->db_index =
834
_get_db_index(mysql_conn,
836
step_ptr->job_ptr->job_id,
837
step_ptr->job_ptr->assoc_id))) {
838
/* If we get an error with this just fall
839
* through to avoid an infinite loop
841
if (as_mysql_job_start(mysql_conn, step_ptr->job_ptr)
843
error("couldn't add job %u at step start",
844
step_ptr->job_ptr->job_id);
845
return SLURM_SUCCESS;
850
step_name = slurm_add_slash_to_quotes(step_ptr->name);
852
/* we want to print a -1 for the requid so leave it a
854
/* The stepid could be -2 so use %d not %u */
855
query = xstrdup_printf(
856
"insert into \"%s_%s\" (job_db_inx, id_step, time_start, "
858
"cpus_alloc, nodes_alloc, task_cnt, nodelist, "
859
"node_inx, task_dist) "
860
"values (%d, %d, %d, '%s', %d, %d, %d, %d, "
862
"on duplicate key update cpus_alloc=%d, nodes_alloc=%d, "
863
"task_cnt=%d, time_end=0, state=%d, "
864
"nodelist='%s', node_inx='%s', task_dist=%d",
865
mysql_conn->cluster_name, step_table,
866
step_ptr->job_ptr->db_index,
868
(int)start_time, step_name,
869
JOB_RUNNING, cpus, nodes, tasks, node_list, node_inx, task_dist,
870
cpus, nodes, tasks, JOB_RUNNING,
871
node_list, node_inx, task_dist);
872
debug3("%d(%s:%d) query\n%s",
873
mysql_conn->conn, THIS_FILE, __LINE__, query);
874
rc = mysql_db_query(mysql_conn, query);
881
extern int as_mysql_step_complete(mysql_conn_t *mysql_conn,
882
struct step_record *step_ptr)
887
int cpus = 0, tasks = 0;
888
struct jobacctinfo *jobacct = (struct jobacctinfo *)step_ptr->jobacct;
889
struct jobacctinfo dummy_jobacct;
890
double ave_vsize = 0, ave_rss = 0, ave_pages = 0;
891
double ave_cpu = 0, ave_cpu2 = 0;
893
int rc =SLURM_SUCCESS;
894
uint32_t exit_code = 0;
895
time_t start_time, submit_time;
897
if (!step_ptr->job_ptr->db_index
898
&& ((!step_ptr->job_ptr->details
899
|| !step_ptr->job_ptr->details->submit_time)
900
&& !step_ptr->job_ptr->resize_time)) {
901
error("as_mysql_step_complete: "
902
"Not inputing this job, it has no submit time.");
906
if (step_ptr->job_ptr->resize_time) {
907
submit_time = start_time = step_ptr->job_ptr->resize_time;
908
if (step_ptr->start_time > submit_time)
909
start_time = step_ptr->start_time;
911
start_time = step_ptr->start_time;
912
submit_time = step_ptr->job_ptr->details->submit_time;
915
if (jobacct == NULL) {
916
/* JobAcctGather=slurmdb_gather/none, no data to process */
917
memset(&dummy_jobacct, 0, sizeof(dummy_jobacct));
918
jobacct = &dummy_jobacct;
921
if (check_connection(mysql_conn) != SLURM_SUCCESS)
922
return ESLURM_DB_CONNECTION;
925
now = step_ptr->job_ptr->end_time;
926
tasks = step_ptr->job_ptr->details->num_tasks;
927
cpus = step_ptr->cpu_count;
928
} else if (step_ptr->step_id == SLURM_BATCH_SCRIPT) {
934
tasks = cpus = step_ptr->job_ptr->details->min_cpus;
937
if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt)
938
tasks = cpus = step_ptr->job_ptr->total_cpus;
940
cpus = step_ptr->cpu_count;
941
tasks = step_ptr->step_layout->task_cnt;
946
if ((elapsed = (now - start_time)) < 0)
947
elapsed = 0; /* For *very* short jobs, if clock is wrong */
949
exit_code = step_ptr->exit_code;
950
if (WIFSIGNALED(exit_code)) {
951
comp_status = JOB_CANCELLED;
952
} else if (exit_code)
953
comp_status = JOB_FAILED;
955
step_ptr->requid = -1;
956
comp_status = JOB_COMPLETE;
959
/* figure out the ave of the totals sent */
961
ave_vsize = (double)jobacct->tot_vsize;
962
ave_vsize /= (double)cpus;
963
ave_rss = (double)jobacct->tot_rss;
964
ave_rss /= (double)cpus;
965
ave_pages = (double)jobacct->tot_pages;
966
ave_pages /= (double)cpus;
967
ave_cpu = (double)jobacct->tot_cpu;
968
ave_cpu /= (double)cpus;
971
if (jobacct->min_cpu != NO_VAL) {
972
ave_cpu2 = (double)jobacct->min_cpu;
975
if (!step_ptr->job_ptr->db_index) {
976
if (!(step_ptr->job_ptr->db_index =
977
_get_db_index(mysql_conn,
979
step_ptr->job_ptr->job_id,
980
step_ptr->job_ptr->assoc_id))) {
981
/* If we get an error with this just fall
982
* through to avoid an infinite loop
984
if (as_mysql_job_start(mysql_conn, step_ptr->job_ptr)
986
error("couldn't add job %u "
987
"at step completion",
988
step_ptr->job_ptr->job_id);
989
return SLURM_SUCCESS;
994
/* The stepid could be -2 so use %d not %u */
995
query = xstrdup_printf(
996
"update \"%s_%s\" set time_end=%d, state=%d, "
997
"kill_requid=%d, exit_code=%d, "
998
"user_sec=%u, user_usec=%u, "
999
"sys_sec=%u, sys_usec=%u, "
1000
"max_vsize=%u, max_vsize_task=%u, "
1001
"max_vsize_node=%u, ave_vsize=%f, "
1002
"max_rss=%u, max_rss_task=%u, "
1003
"max_rss_node=%u, ave_rss=%f, "
1004
"max_pages=%u, max_pages_task=%u, "
1005
"max_pages_node=%u, ave_pages=%f, "
1006
"min_cpu=%f, min_cpu_task=%u, "
1007
"min_cpu_node=%u, ave_cpu=%f "
1008
"where job_db_inx=%d and id_step=%d",
1009
mysql_conn->cluster_name, step_table, (int)now,
1014
jobacct->user_cpu_sec,
1015
/* user microseconds */
1016
jobacct->user_cpu_usec,
1017
/* system seconds */
1018
jobacct->sys_cpu_sec,
1019
/* system microsecs */
1020
jobacct->sys_cpu_usec,
1021
jobacct->max_vsize, /* max vsize */
1022
jobacct->max_vsize_id.taskid, /* max vsize task */
1023
jobacct->max_vsize_id.nodeid, /* max vsize node */
1024
ave_vsize, /* ave vsize */
1025
jobacct->max_rss, /* max vsize */
1026
jobacct->max_rss_id.taskid, /* max rss task */
1027
jobacct->max_rss_id.nodeid, /* max rss node */
1028
ave_rss, /* ave rss */
1029
jobacct->max_pages, /* max pages */
1030
jobacct->max_pages_id.taskid, /* max pages task */
1031
jobacct->max_pages_id.nodeid, /* max pages node */
1032
ave_pages, /* ave pages */
1033
ave_cpu2, /* min cpu */
1034
jobacct->min_cpu_id.taskid, /* min cpu task */
1035
jobacct->min_cpu_id.nodeid, /* min cpu node */
1036
ave_cpu, /* ave cpu */
1037
step_ptr->job_ptr->db_index, step_ptr->step_id);
1038
debug3("%d(%s:%d) query\n%s",
1039
mysql_conn->conn, THIS_FILE, __LINE__, query);
1040
rc = mysql_db_query(mysql_conn, query);
1046
extern int as_mysql_suspend(mysql_conn_t *mysql_conn,
1047
uint32_t old_db_inx,
1048
struct job_record *job_ptr)
1051
int rc = SLURM_SUCCESS;
1053
uint32_t job_db_inx;
1055
if (check_connection(mysql_conn) != SLURM_SUCCESS)
1056
return ESLURM_DB_CONNECTION;
1058
if (job_ptr->resize_time)
1059
submit_time = job_ptr->resize_time;
1061
submit_time = job_ptr->details->submit_time;
1063
if (!job_ptr->db_index) {
1064
if (!(job_ptr->db_index =
1065
_get_db_index(mysql_conn,
1068
job_ptr->assoc_id))) {
1069
/* If we get an error with this just fall
1070
* through to avoid an infinite loop
1072
if (as_mysql_job_start(
1073
mysql_conn, job_ptr) == SLURM_ERROR) {
1074
error("couldn't suspend job %u",
1076
return SLURM_SUCCESS;
1081
if (IS_JOB_RESIZING(job_ptr)) {
1083
error("No old db inx given for job %u cluster %s, "
1084
"can't update suspend table.",
1085
job_ptr->job_id, mysql_conn->cluster_name);
1088
job_db_inx = old_db_inx;
1090
"update \"%s_%s\" set time_end=%d where "
1091
"job_db_inx=%u && time_end=0;",
1092
mysql_conn->cluster_name, suspend_table,
1093
(int)job_ptr->suspend_time, job_db_inx);
1096
job_db_inx = job_ptr->db_index;
1098
/* use job_db_inx for this one since we want to update the
1099
supend time of the job before it was resized.
1102
"update \"%s_%s\" set time_suspended=%d-time_suspended, "
1103
"state=%d where job_db_inx=%d;",
1104
mysql_conn->cluster_name, job_table,
1105
(int)job_ptr->suspend_time,
1106
job_ptr->job_state & JOB_STATE_BASE,
1108
if (IS_JOB_SUSPENDED(job_ptr))
1110
"insert into \"%s_%s\" (job_db_inx, id_assoc, "
1111
"time_start, time_end) values (%u, %u, %d, 0);",
1112
mysql_conn->cluster_name, suspend_table,
1113
job_ptr->db_index, job_ptr->assoc_id,
1114
(int)job_ptr->suspend_time);
1117
"update \"%s_%s\" set time_end=%d where "
1118
"job_db_inx=%u && time_end=0;",
1119
mysql_conn->cluster_name, suspend_table,
1120
(int)job_ptr->suspend_time, job_ptr->db_index);
1121
debug3("%d(%s:%d) query\n%s",
1122
mysql_conn->conn, THIS_FILE, __LINE__, query);
1124
rc = mysql_db_query(mysql_conn, query);
1127
if (rc != SLURM_ERROR) {
1129
"update \"%s_%s\" set "
1130
"time_suspended=%u-time_suspended, "
1131
"state=%d where job_db_inx=%u and time_end=0",
1132
mysql_conn->cluster_name, step_table,
1133
(int)job_ptr->suspend_time,
1134
job_ptr->job_state, job_ptr->db_index);
1135
rc = mysql_db_query(mysql_conn, query);
1142
extern int as_mysql_flush_jobs_on_cluster(
1143
mysql_conn_t *mysql_conn, time_t event_time)
1145
int rc = SLURM_SUCCESS;
1146
/* put end times for a clean start */
1147
MYSQL_RES *result = NULL;
1150
char *id_char = NULL;
1151
char *suspended_char = NULL;
1153
if (check_connection(mysql_conn) != SLURM_SUCCESS)
1154
return ESLURM_DB_CONNECTION;
1156
/* First we need to get the job_db_inx's and states so we can clean up
1157
* the suspend table and the step table
1159
query = xstrdup_printf(
1160
"select distinct t1.job_db_inx, t1.state from \"%s_%s\" "
1161
"as t1 where t1.time_end=0;",
1162
mysql_conn->cluster_name, job_table);
1163
debug3("%d(%s:%d) query\n%s",
1164
mysql_conn->conn, THIS_FILE, __LINE__, query);
1166
mysql_db_query_ret(mysql_conn, query, 0))) {
1172
while ((row = mysql_fetch_row(result))) {
1173
int state = slurm_atoul(row[1]);
1174
if (state == JOB_SUSPENDED) {
1176
xstrfmtcat(suspended_char,
1177
" || job_db_inx=%s", row[0]);
1179
xstrfmtcat(suspended_char, "job_db_inx=%s",
1184
xstrfmtcat(id_char, " || job_db_inx=%s", row[0]);
1186
xstrfmtcat(id_char, "job_db_inx=%s", row[0]);
1188
mysql_free_result(result);
1190
if (suspended_char) {
1192
"update \"%s_%s\" set "
1193
"time_suspended=%ld-time_suspended "
1195
mysql_conn->cluster_name, job_table,
1196
event_time, suspended_char);
1198
"update \"%s_%s\" set "
1199
"time_suspended=%ld-time_suspended "
1201
mysql_conn->cluster_name, step_table,
1202
event_time, suspended_char);
1204
"update \"%s_%s\" set time_end=%ld where (%s) "
1206
mysql_conn->cluster_name, suspend_table,
1207
event_time, suspended_char);
1208
xfree(suspended_char);
1212
"update \"%s_%s\" set state=%d, "
1213
"time_end=%ld where %s;",
1214
mysql_conn->cluster_name, job_table,
1215
JOB_CANCELLED, event_time, id_char);
1217
"update \"%s_%s\" set state=%d, "
1218
"time_end=%ld where %s;",
1219
mysql_conn->cluster_name, step_table,
1220
JOB_CANCELLED, event_time, id_char);
1225
debug3("%d(%s:%d) query\n%s",
1226
mysql_conn->conn, THIS_FILE, __LINE__, query);
1228
rc = mysql_db_query(mysql_conn, query);