7
7
* Written by Danny Auble <da@llnl.gov>
9
9
* This file is part of SLURM, a resource management program.
10
* For details, see <http://www.llnl.gov/linux/slurm/>.
10
* For details, see <https://computing.llnl.gov/linux/slurm/>.
11
* Please also read the included file: DISCLAIMER.
12
13
* SLURM is free software; you can redistribute it and/or modify it under
13
14
* the terms of the GNU General Public License as published by the Free
195
197
dbd_acct_coord_msg_t get_msg;
196
198
int rc, resp_code;
200
memset(&get_msg, 0, sizeof(dbd_acct_coord_msg_t));
198
201
get_msg.acct_list = acct_list;
199
202
get_msg.cond = user_cond;
292
299
extern int acct_storage_p_add_wckeys(void *db_conn, uint32_t uid,
295
302
slurmdbd_msg_t req;
296
303
dbd_list_msg_t get_msg;
297
304
int rc, resp_code;
306
memset(&get_msg, 0, sizeof(dbd_list_msg_t));
299
307
get_msg.my_list = wckey_list;
301
309
req.msg_type = DBD_ADD_WCKEYS;
320
extern int acct_storage_p_add_reservation(void *db_conn,
321
acct_reservation_rec_t *resv)
324
dbd_rec_msg_t get_msg;
327
memset(&get_msg, 0, sizeof(dbd_rec_msg_t));
330
req.msg_type = DBD_ADD_RESV;
333
rc = slurm_send_slurmdbd_recv_rc_msg(SLURMDBD_VERSION,
336
if(resp_code != SLURM_SUCCESS)
312
342
extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid,
313
343
acct_user_cond_t *user_cond,
314
344
acct_user_rec_t *user)
593
extern int acct_storage_p_modify_reservation(void *db_conn,
594
acct_reservation_rec_t *resv)
597
dbd_rec_msg_t get_msg;
600
memset(&get_msg, 0, sizeof(dbd_rec_msg_t));
603
req.msg_type = DBD_MODIFY_RESV;
606
rc = slurm_send_slurmdbd_recv_rc_msg(SLURMDBD_VERSION,
609
if(resp_code != SLURM_SUCCESS)
557
615
extern List acct_storage_p_remove_users(void *db_conn, uint32_t uid,
558
616
acct_user_cond_t *user_cond)
903
extern int acct_storage_p_remove_reservation(void *db_conn,
904
acct_reservation_rec_t *resv)
907
dbd_rec_msg_t get_msg;
910
memset(&get_msg, 0, sizeof(dbd_rec_msg_t));
913
req.msg_type = DBD_REMOVE_RESV;
916
rc = slurm_send_slurmdbd_recv_rc_msg(SLURMDBD_VERSION,
919
if(resp_code != SLURM_SUCCESS)
842
925
extern List acct_storage_p_get_users(void *db_conn, uid_t uid,
843
926
acct_user_cond_t *user_cond)
955
1041
return ret_list;
1044
extern List acct_storage_p_get_config(void)
1046
slurmdbd_msg_t req, resp;
1047
dbd_list_msg_t *got_msg;
1049
List ret_list = NULL;
1051
req.msg_type = DBD_GET_CONFIG;
1053
rc = slurm_send_recv_slurmdbd_msg(SLURMDBD_VERSION, &req, &resp);
1055
if (rc != SLURM_SUCCESS)
1056
error("slurmdbd: DBD_GET_CONFIG failure: %m");
1057
else if (resp.msg_type == DBD_RC) {
1058
dbd_rc_msg_t *msg = resp.data;
1059
if(msg->return_code == SLURM_SUCCESS) {
1060
info("%s", msg->comment);
1061
ret_list = list_create(NULL);
1063
error("%s", msg->comment);
1064
slurmdbd_free_rc_msg(SLURMDBD_VERSION, msg);
1065
} else if (resp.msg_type != DBD_GOT_CONFIG) {
1066
error("slurmdbd: response type not DBD_GOT_CONFIG: %u",
1069
got_msg = (dbd_list_msg_t *) resp.data;
1070
ret_list = got_msg->my_list;
1071
got_msg->my_list = NULL;
1072
slurmdbd_free_list_msg(SLURMDBD_VERSION, got_msg);
958
1078
extern List acct_storage_p_get_associations(void *db_conn, uid_t uid,
959
1079
acct_association_cond_t *assoc_cond)
1083
1206
return ret_list;
1209
extern List acct_storage_p_get_reservations(void *mysql_conn, uid_t uid,
1210
acct_reservation_cond_t *resv_cond)
1212
slurmdbd_msg_t req, resp;
1213
dbd_cond_msg_t get_msg;
1214
dbd_list_msg_t *got_msg;
1216
List ret_list = NULL;
1218
memset(&get_msg, 0, sizeof(dbd_cond_msg_t));
1219
get_msg.cond = resv_cond;
1221
req.msg_type = DBD_GET_RESVS;
1222
req.data = &get_msg;
1223
rc = slurm_send_recv_slurmdbd_msg(SLURMDBD_VERSION, &req, &resp);
1225
if (rc != SLURM_SUCCESS)
1226
error("slurmdbd: DBD_GET_RESVS failure: %m");
1227
else if (resp.msg_type == DBD_RC) {
1228
dbd_rc_msg_t *msg = resp.data;
1229
if(msg->return_code == SLURM_SUCCESS) {
1230
info("%s", msg->comment);
1231
ret_list = list_create(NULL);
1233
error("%s", msg->comment);
1234
slurmdbd_free_rc_msg(SLURMDBD_VERSION, msg);
1235
} else if (resp.msg_type != DBD_GOT_RESVS) {
1236
error("slurmdbd: response type not DBD_GOT_RESVS: %u",
1239
got_msg = (dbd_list_msg_t *) resp.data;
1240
/* do this just for this type since it could be called
1241
* multiple times, and if we send back and empty list
1242
* instead of no list we will only call this once.
1244
if(!got_msg->my_list)
1245
ret_list = list_create(NULL);
1247
ret_list = got_msg->my_list;
1248
got_msg->my_list = NULL;
1249
slurmdbd_free_list_msg(SLURMDBD_VERSION, got_msg);
1086
1255
extern List acct_storage_p_get_txn(void *db_conn, uid_t uid,
1087
1256
acct_txn_cond_t *txn_cond)
1197
1368
extern int acct_storage_p_roll_usage(void *db_conn,
1369
time_t sent_start, time_t sent_end,
1370
uint16_t archive_data)
1200
1372
slurmdbd_msg_t req;
1201
1373
dbd_roll_usage_msg_t get_msg;
1202
1374
int rc, resp_code;
1376
memset(&get_msg, 0, sizeof(dbd_roll_usage_msg_t));
1377
get_msg.end = sent_end;
1204
1378
get_msg.start = sent_start;
1379
get_msg.archive_data = archive_data;
1206
1381
req.msg_type = DBD_ROLL_USAGE;
1237
1413
my_reason = node_ptr->reason;
1415
memset(&req, 0, sizeof(dbd_node_state_msg_t));
1239
1416
req.cluster_name = cluster;
1240
1417
req.cpu_count = cpus;
1241
1418
req.hostlist = node_ptr->name;
1242
1419
req.new_state = DBD_NODE_STATE_DOWN;
1243
1420
req.event_time = event_time;
1244
1421
req.reason = my_reason;
1422
req.state = node_ptr->node_state;
1245
1423
msg.msg_type = DBD_NODE_STATE;
1246
1424
msg.data = &req;
1258
1437
slurmdbd_msg_t msg;
1259
1438
dbd_node_state_msg_t req;
1440
memset(&req, 0, sizeof(dbd_node_state_msg_t));
1261
1441
req.cluster_name = cluster;
1262
1442
req.hostlist = node_ptr->name;
1263
1443
req.new_state = DBD_NODE_STATE_UP;
1282
1463
int rc = SLURM_ERROR;
1284
1465
debug2("Sending info for cluster %s", cluster);
1466
memset(&req, 0, sizeof(dbd_cluster_procs_msg_t));
1285
1467
req.cluster_name = cluster;
1468
req.cluster_nodes = cluster_nodes;
1286
1469
req.proc_count = procs;
1287
1470
req.event_time = event_time;
1288
1471
msg.msg_type = DBD_CLUSTER_PROCS;
1301
1484
dbd_register_ctld_msg_t req;
1302
1485
info("Registering slurmctld for cluster %s at port %u with slurmdbd.",
1303
1486
cluster, port);
1487
memset(&req, 0, sizeof(dbd_register_ctld_msg_t));
1304
1489
req.cluster_name = cluster;
1305
1490
req.port = port;
1306
1491
msg.msg_type = DBD_REGISTER_CTLD;
1367
1554
slurmdbd_msg_t msg, msg_rc;
1368
1555
dbd_job_start_msg_t req;
1369
dbd_job_start_rc_msg_t *resp;
1556
dbd_id_rc_msg_t *resp;
1370
1557
char *block_id = NULL;
1371
1558
int rc = SLURM_SUCCESS;
1559
char temp_bit[BUF_SIZE];
1373
1561
if (!job_ptr->details || !job_ptr->details->submit_time) {
1374
1562
error("jobacct_storage_p_job_start: "
1375
1563
"Not inputing this job, it has no submit time.");
1376
1564
return SLURM_ERROR;
1566
memset(&req, 0, sizeof(dbd_job_start_msg_t));
1379
1568
req.alloc_cpus = job_ptr->total_procs;
1380
1569
req.cluster = cluster_name;
1384
1573
select_g_get_jobinfo(job_ptr->select_jobinfo,
1385
1574
SELECT_DATA_BLOCK_ID,
1576
select_g_get_jobinfo(job_ptr->select_jobinfo,
1577
SELECT_DATA_NODE_CNT,
1580
req.alloc_nodes = job_ptr->node_cnt;
1388
1582
req.block_id = block_id;
1389
1583
req.db_index = job_ptr->db_index;
1394
1588
req.job_state = job_ptr->job_state & (~JOB_COMPLETING);
1395
1589
req.name = job_ptr->name;
1396
1590
req.nodes = job_ptr->nodes;
1591
if(job_ptr->node_bitmap)
1592
req.node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
1593
job_ptr->node_bitmap);
1397
1595
req.partition = job_ptr->partition;
1398
1596
req.req_cpus = job_ptr->num_procs;
1597
req.resv_id = job_ptr->resv_id;
1399
1598
req.priority = job_ptr->priority;
1400
1599
req.start_time = job_ptr->start_time;
1600
req.timelimit = job_ptr->time_limit;
1601
req.wckey = job_ptr->wckey;
1401
1602
if (job_ptr->details)
1402
1603
req.submit_time = job_ptr->details->submit_time;
1403
1604
req.uid = job_ptr->user_id;
1426
1627
xfree(block_id);
1427
1628
return SLURM_ERROR;
1429
} else if (msg_rc.msg_type != DBD_JOB_START_RC) {
1430
error("slurmdbd: response type not DBD_GOT_JOBS: %u",
1630
} else if (msg_rc.msg_type != DBD_ID_RC) {
1631
error("slurmdbd: response type not DBD_ID_RC: %u",
1431
1632
msg_rc.msg_type);
1433
resp = (dbd_job_start_rc_msg_t *) msg_rc.data;
1434
job_ptr->db_index = resp->db_index;
1634
resp = (dbd_id_rc_msg_t *) msg_rc.data;
1635
job_ptr->db_index = resp->id;
1435
1636
rc = resp->return_code;
1436
1637
//info("here got %d for return code", resp->return_code);
1437
slurmdbd_free_job_start_rc_msg(SLURMDBD_VERSION, resp);
1638
slurmdbd_free_id_rc_msg(SLURMDBD_VERSION, resp);
1439
1640
xfree(block_id);
1457
1658
return SLURM_ERROR;
1661
memset(&req, 0, sizeof(dbd_job_comp_msg_t));
1460
1663
req.assoc_id = job_ptr->assoc_id;
1461
1664
req.db_index = job_ptr->db_index;
1462
1665
req.end_time = job_ptr->end_time;
1483
1686
extern int jobacct_storage_p_step_start(void *db_conn,
1484
1687
struct step_record *step_ptr)
1689
uint32_t cpus = 0, tasks = 0, nodes = 0, task_dist = 0;
1487
1690
char node_list[BUFFER_SIZE];
1488
1691
slurmdbd_msg_t msg;
1489
1692
dbd_step_start_msg_t req;
1693
char temp_bit[BUF_SIZE];
1492
1696
char *ionodes = NULL;
1494
cpus = step_ptr->job_ptr->num_procs;
1698
cpus = tasks = step_ptr->job_ptr->num_procs;
1495
1699
select_g_get_jobinfo(step_ptr->job_ptr->select_jobinfo,
1496
1700
SELECT_DATA_IONODES,
1503
1707
snprintf(node_list, BUFFER_SIZE, "%s",
1504
1708
step_ptr->job_ptr->nodes);
1710
select_g_get_jobinfo(step_ptr->job_ptr->select_jobinfo,
1711
SELECT_DATA_NODE_CNT,
1508
1714
if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) {
1509
cpus = step_ptr->job_ptr->total_procs;
1715
cpus = tasks = step_ptr->job_ptr->total_procs;
1510
1716
snprintf(node_list, BUFFER_SIZE, "%s",
1511
1717
step_ptr->job_ptr->nodes);
1718
nodes = step_ptr->job_ptr->node_cnt;
1513
cpus = step_ptr->step_layout->task_cnt;
1720
cpus = step_ptr->cpu_count;
1721
tasks = step_ptr->step_layout->task_cnt;
1722
nodes = step_ptr->step_layout->node_cnt;
1723
task_dist = step_ptr->step_layout->task_dist;
1514
1724
snprintf(node_list, BUFFER_SIZE, "%s",
1515
1725
step_ptr->step_layout->node_list);
1523
1733
"Not inputing this job, it has no submit time.");
1524
1734
return SLURM_ERROR;
1736
memset(&req, 0, sizeof(dbd_step_start_msg_t));
1527
1738
req.assoc_id = step_ptr->job_ptr->assoc_id;
1528
1739
req.db_index = step_ptr->job_ptr->db_index;
1529
1740
req.job_id = step_ptr->job_ptr->job_id;
1530
1741
req.name = step_ptr->name;
1531
1742
req.nodes = node_list;
1743
if(step_ptr->step_node_bitmap)
1744
req.node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
1745
step_ptr->step_node_bitmap);
1746
req.node_cnt = nodes;
1532
1747
req.start_time = step_ptr->start_time;
1533
1748
if (step_ptr->job_ptr->details)
1534
1749
req.job_submit_time = step_ptr->job_ptr->details->submit_time;
1535
1750
req.step_id = step_ptr->step_id;
1751
if (step_ptr->step_layout)
1752
req.task_dist = step_ptr->step_layout->task_dist;
1753
req.task_dist = task_dist;
1536
1754
req.total_procs = cpus;
1755
req.total_tasks = tasks;
1538
1757
msg.msg_type = DBD_STEP_START;
1539
1758
msg.data = &req;
1550
1769
extern int jobacct_storage_p_step_complete(void *db_conn,
1551
1770
struct step_record *step_ptr)
1772
uint32_t cpus = 0, tasks = 0;
1554
1773
char node_list[BUFFER_SIZE];
1555
1774
slurmdbd_msg_t msg;
1556
1775
dbd_step_comp_msg_t req;
1575
1794
if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) {
1576
cpus = step_ptr->job_ptr->total_procs;
1795
cpus = tasks = step_ptr->job_ptr->total_procs;
1577
1796
snprintf(node_list, BUFFER_SIZE, "%s",
1578
1797
step_ptr->job_ptr->nodes);
1580
cpus = step_ptr->step_layout->task_cnt;
1799
cpus = step_ptr->cpu_count;
1800
tasks = step_ptr->step_layout->task_cnt;
1581
1801
snprintf(node_list, BUFFER_SIZE, "%s",
1582
1802
step_ptr->step_layout->node_list);
1591
1811
return SLURM_ERROR;
1814
memset(&req, 0, sizeof(dbd_step_comp_msg_t));
1594
1816
req.assoc_id = step_ptr->job_ptr->assoc_id;
1595
1817
req.db_index = step_ptr->job_ptr->db_index;
1596
1818
req.end_time = time(NULL); /* called at step completion */
1622
1845
slurmdbd_msg_t msg;
1623
1846
dbd_job_suspend_msg_t req;
1848
memset(&req, 0, sizeof(dbd_job_suspend_msg_t));
1625
1850
req.assoc_id = job_ptr->assoc_id;
1626
1851
req.job_id = job_ptr->job_id;
1627
1852
req.db_index = job_ptr->db_index;
1754
1983
extern int acct_storage_p_update_shares_used(void *db_conn,
1755
1984
List shares_used)
1758
dbd_list_msg_t shares_used_msg;
1761
shares_used_msg.my_list = shares_used;
1763
req.msg_type = DBD_UPDATE_SHARES_USED;
1764
req.data = &shares_used_msg;
1765
rc = slurm_send_slurmdbd_recv_rc_msg(SLURMDBD_VERSION,
1768
if(resp_code != SLURM_SUCCESS)
1986
return SLURM_SUCCESS;
1774
1989
extern int acct_storage_p_flush_jobs_on_cluster(void *db_conn, char *cluster,
1780
1995
info("Ending any jobs in accounting that were running when controller "
1781
1996
"went down on cluster %s", cluster);
1998
memset(&req, 0, sizeof(dbd_cluster_procs_msg_t));
1782
2000
req.cluster_name = cluster;
1783
2001
req.proc_count = 0;
1784
2002
req.event_time = event_time;
1785
2004
msg.msg_type = DBD_FLUSH_JOBS;
1786
2005
msg.data = &req;