1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
33
#include <sys/types.h>
35
#include <sys/resource.h>
42
#include "basis_types.h"
43
#include "config_file.h"
44
#include "err_trace.h"
45
#include "execution_states.h"
46
#include "uti/sge_stdio.h"
47
#include "uti/sge_uidgid.h"
48
#include "msg_common.h"
50
bool shepherd_write_pid_file(pid_t pid, dstring *errmsg)
55
fp = fopen("pid", "w");
57
if (fprintf(fp, pid_t_fmt"\n", pid) < 0) {
58
sge_dstring_sprintf(errmsg, MSG_FILE_CANNOT_WRITE_SS, "pid", strerror(errno));
62
sge_dstring_sprintf(errmsg, MSG_FILE_CANNOT_FLUSH_SS, "pid", strerror(errno));
68
sge_dstring_sprintf(errmsg, MSG_FILE_NOOPEN_SS, "pid", strerror(errno));
73
sge_dstring_sprintf(errmsg, MSG_FILE_NOCLOSE_SS, "pid", strerror(errno));
78
shepherd_read_qrsh_pid_file(const char *filename, pid_t *qrsh_pid,
79
int *replace_qrsh_pid)
84
fp = fopen(filename, "r");
86
int arguments = fscanf(fp, pid_t_fmt, qrsh_pid);
91
/* set pid from qrsh_starter as job_pid */
92
sprintf(buffer, pid_t_fmt, *qrsh_pid);
93
/* TODO: should better be add_or_replace */
94
add_config_entry("job_pid", buffer);
95
*replace_qrsh_pid = 0;
97
shepherd_trace("could not read qrsh_pid file");
103
* CR 6588743 - raising a shepherd_error here would set the queue in
104
* error state and rerun the job
106
shepherd_trace(MSG_FILE_NOOPEN_SS, filename, strerror(errno));
112
* CR 6588743 - raising a shepherd_error here would set the queue in
113
* error state and rerun the job
115
shepherd_trace(MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
120
shepherd_write_usage_file(u_long32 wait_status, int exit_status,
121
int child_signal, u_long32 start_time,
122
u_long32 end_time, struct rusage *rusage)
125
const char *const filename = "usage";
128
shepherd_trace("writing usage file to \"usage\"");
130
fp = fopen(filename, "w");
133
* the wait status is returned by japi_wait()
134
* see sge_reportL.h for bitmask and makro definition
136
FPRINTF((fp, "wait_status="sge_u32"\n", wait_status));
137
FPRINTF((fp, "exit_status=%d\n", exit_status));
138
FPRINTF((fp, "signal=%d\n", child_signal));
140
FPRINTF((fp, "start_time=%d\n", (int) start_time));
141
FPRINTF((fp, "end_time=%d\n", (int) end_time));
142
FPRINTF((fp, "ru_wallclock="sge_u32"\n", (u_long32) end_time-start_time));
143
#if defined(NEC_ACCOUNTING_ENTRIES)
144
/* Additional accounting information for NEC SX-4 SX-5 */
145
#if defined(NECSX4) || defined(NECSX5)
147
FPRINTF((fp, "necsx_necsx4="sge_u32"\n", 1));
148
#elif defined(NECSX5)
149
FPRINTF((fp, "necsx_necsx5="sge_u32"\n", 1));
151
FPRINTF((fp, "necsx_base_prty="sge_u32"\n", 0));
152
FPRINTF((fp, "necsx_time_slice="sge_u32"\n", 0));
153
FPRINTF((fp, "necsx_num_procs="sge_u32"\n", 0));
154
FPRINTF((fp, "necsx_kcore_min="sge_u32"\n", 0));
155
FPRINTF((fp, "necsx_mean_size="sge_u32"\n", 0));
156
FPRINTF((fp, "necsx_maxmem_size="sge_u32"\n", 0));
157
FPRINTF((fp, "necsx_chars_trnsfd="sge_u32"\n", 0));
158
FPRINTF((fp, "necsx_blocks_rw="sge_u32"\n", 0));
159
FPRINTF((fp, "necsx_inst="sge_u32"\n", 0));
160
FPRINTF((fp, "necsx_vector_inst="sge_u32"\n", 0));
161
FPRINTF((fp, "necsx_vector_elmt="sge_u32"\n", 0));
162
FPRINTF((fp, "necsx_vec_exe="sge_u32"\n", 0));
163
FPRINTF((fp, "necsx_flops="sge_u32"\n", 0));
164
FPRINTF((fp, "necsx_conc_flops="sge_u32"\n", 0));
165
FPRINTF((fp, "necsx_fpec="sge_u32"\n", 0));
166
FPRINTF((fp, "necsx_cmcc="sge_u32"\n", 0));
167
FPRINTF((fp, "necsx_bccc="sge_u32"\n", 0));
168
FPRINTF((fp, "necsx_mt_open="sge_u32"\n", 0));
169
FPRINTF((fp, "necsx_io_blocks="sge_u32"\n", 0));
170
FPRINTF((fp, "necsx_multi_single="sge_u32"\n", 0));
171
FPRINTF((fp, "necsx_max_nproc="sge_u32"\n", 0));
175
FPRINTF((fp, "ru_utime=%f\n", (double)rusage->ru_utime.tv_sec + (double)rusage->ru_utime.tv_usec / 1000000.0));
176
FPRINTF((fp, "ru_stime=%f\n", (double)rusage->ru_stime.tv_sec + (double)rusage->ru_stime.tv_usec / 1000000.0));
177
FPRINTF((fp, "ru_maxrss=%ld\n", rusage->ru_maxrss));
178
FPRINTF((fp, "ru_ixrss=%ld\n", rusage->ru_ixrss));
180
FPRINTF((fp, "ru_ismrss=%ld\n", rusage->ru_ismrss));
182
FPRINTF((fp, "ru_idrss=%ld\n", rusage->ru_idrss));
183
FPRINTF((fp, "ru_isrss=%ld\n", rusage->ru_isrss));
184
FPRINTF((fp, "ru_minflt=%ld\n", rusage->ru_minflt));
185
FPRINTF((fp, "ru_majflt=%ld\n", rusage->ru_majflt));
186
FPRINTF((fp, "ru_nswap=%ld\n", rusage->ru_nswap));
187
FPRINTF((fp, "ru_inblock=%ld\n", rusage->ru_inblock));
188
FPRINTF((fp, "ru_oublock=%ld\n", rusage->ru_oublock));
189
FPRINTF((fp, "ru_msgsnd=%ld\n", rusage->ru_msgsnd));
190
FPRINTF((fp, "ru_msgrcv=%ld\n", rusage->ru_msgrcv));
191
FPRINTF((fp, "ru_nsignals=%ld\n", rusage->ru_nsignals));
192
FPRINTF((fp, "ru_nvcsw=%ld\n", rusage->ru_nvcsw));
193
FPRINTF((fp, "ru_nivcsw=%ld\n", rusage->ru_nivcsw));
198
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
204
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
209
shepherd_write_job_pid_file(const char *job_pid)
212
const char *const filename = "job_pid";
215
fp = fopen(filename, "w");
217
FPRINTF((fp, "%s\n", job_pid));
220
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
226
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
231
shepherd_write_sig_info_file(const char *filename, const char *task_id,
232
u_long32 exit_status)
237
fp = fopen(filename, "a");
239
FPRINTF((fp, "%s "sge_u32"\n", task_id, exit_status));
242
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
248
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
253
bool shepherd_write_osjobid_file(const char *osjobid)
256
const char *const filename = "osjobid";
259
fp = fopen(filename, "w");
261
FPRINTF((fp, "%s\n", osjobid));
264
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
270
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
275
shepherd_write_processor_set_number_file(int proc_set)
278
const char *const filename = "processor_set_number";
281
fp = fopen(filename, "w");
283
FPRINTF((fp, "%d\n", proc_set));
286
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
292
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
297
shepherd_write_shepherd_about_to_exit_file(void)
300
const char *const filename = "shepherd_about_to_exit";
303
fd = fopen(filename, "w");
307
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
312
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
317
shepherd_read_exit_status_file(int *return_code)
321
const char *const filename = "exit_status";
323
fp = fopen(filename, "r");
325
int arguments = fscanf(fp, "%d\n", return_code);
326
/* retrieve first exit status from exit status file */
328
if (arguments != 1) {
329
shepherd_trace("could not read exit_status file");
330
*return_code = ESSTATE_NO_EXITSTATUS;
334
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
340
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
345
shepherd_read_qrsh_file(const char* pid_file_name, pid_t *qrsh_pid)
350
fp = fopen(pid_file_name, "r");
352
int arguments = fscanf(fp, pid_t_fmt, qrsh_pid);
354
/* retrieve first exit status from exit status file */
355
if (arguments != 1) {
356
shepherd_trace("could not read qrsh_pid_file '%s'", pid_file_name);
363
* CR 6588743 - raising a shepherd_error here would set the queue in
364
* error state and rerun the job
366
shepherd_trace(MSG_FILE_NOOPEN_SS, pid_file_name, strerror(errno));
372
* CR 6588743 - raising a shepherd_error here would set the queue in
373
* error state and rerun the job
375
shepherd_trace(MSG_FILE_NOCLOSE_SS, pid_file_name, strerror(errno));
380
shepherd_read_processor_set_number_file(int *proc_set)
384
const char *const filename = "processor_set_number";
386
fp = fopen(filename, "r");
388
int arguments = fscanf(fp, "%d", proc_set);
390
if (arguments != 1) {
391
shepherd_trace("could not read processor_set_number file");
396
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
402
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
406
#if defined(IRIX) || defined(CRAY) || defined(NECSX4) || defined(NECSX5)
408
shepherd_read_osjobid_file(
411
#elif defined(NECSX4) || defined(NECSX5)
421
const char *const filename = "osjobid";
423
fp = fopen(filename, "r");
428
arguments = fscanf(fp, "%lld\n", return_code);
430
arguments = fscanf(fp, "%d\n", return_code);
433
if (arguments != 1) {
434
shepherd_trace("could not read osjobid file");
440
if (is_error == true) {
441
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
443
shepherd_trace(MSG_FILE_NOOPEN_SS, filename, strerror(errno));
449
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
455
create_checkpointed_file(int ckpt_is_in_arena)
457
const char *const filename = "checkpointed";
460
fp = fopen(filename, "w");
462
if (ckpt_is_in_arena) {
463
FPRINTF((fp, "1\n"));
467
shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
472
shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
477
checkpointed_file_exists(void)
480
return !SGE_STAT("checkpointed", &buf);