1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
41
/* this timeout is in effect with SGE commprocs */
42
#define SGE_COMMPROC_TIMEOUT 60*5
44
#include "japi/drmaa.h"
45
#include "japi/japi.h"
46
#include "japi/japiP.h"
48
#include "cull/cull_list.h"
50
#include "gdi/sge_gdi.h"
52
#include "sgeobj/sge_jobL.h"
53
#include "sgeobj/sge_answer.h"
55
#include "uti/sge_profiling.h"
56
#include "uti/sge_stdio.h"
58
#include "comm/commlib.h"
61
#include "rmon_monitoring_level.h"
63
#include "gdi/sge_gdi_ctx.h"
65
#include "msg_common.h"
74
#define NEXT_ARGV(argc, argv) \
75
((*argc)--, (*argv)++, (*argv)[0])
85
ST_SUBMIT_NO_RUN_WAIT,
87
- submit jobs that won't run
91
/* - multiple submission threads
92
- wait is done by main thread */
94
MT_SUBMIT_BEFORE_INIT_WAIT,
95
/* - no drmaa_init() was called
96
- multiple threads try to submit but fail
97
- when drmaa_init() is called by main thread
99
- wait is done by main thread */
102
/* - drmaa_init() is called multiple times
103
- first time it must succeed - second time it must fail
104
- then drmaa_exit() is called */
107
/* - drmaa_init() is called
108
- then drmaa_exit() is called multiple times
109
- first time it must succeed - second time it must fail */
111
MT_EXIT_DURING_SUBMIT,
112
/* - drmaa_init() is called
113
- multiple submission threads submitting (delayed) a series
115
- during submission main thread does drmaa_exit() */
118
/* - drmaa_init() is called
119
- multiple submission threads submit jobs and wait these jobs
120
- when all threads are finished main thread calls drmaa_exit() */
122
MT_EXIT_DURING_SUBMIT_OR_WAIT,
123
/* - drmaa_init() is called
124
- multiple submission threads submit jobs and wait these jobs
125
- while submission threads are waiting their jobs the main
126
thread calls drmaa_exit() */
129
/* - drmaa_init() is called
130
- a bulk job is submitted and waited
131
- then drmaa_exit() is called */
133
ST_BULK_SINGLESUBMIT_WAIT_INDIVIDUAL,
134
/* - drmaa_init() is called
135
- bulk and sequential jobs are submitted
136
- all jobs are waited individually
137
- then drmaa_exit() is called */
139
ST_SUBMITMIXTURE_SYNC_ALL_DISPOSE,
140
/* - drmaa_init() is called
141
- submit a mixture of single and bulk jobs
142
- do drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose)
143
to wait for all jobs to finish
144
- then drmaa_exit() is called */
146
ST_SUBMITMIXTURE_SYNC_ALL_NODISPOSE,
147
/* - drmaa_init() is called
148
- submit a mixture of single and bulk jobs
149
- do drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, no-dispose)
150
to wait for all jobs to finish
151
- do drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY) until
152
DRMAA_ERRNO_INVALID_JOB to reap all jobs
153
- then drmaa_exit() is called */
155
ST_SUBMITMIXTURE_SYNC_ALLIDS_DISPOSE,
156
/* - drmaa_init() is called
157
- submit a mixture of single and bulk jobs
158
- do drmaa_synchronize(all_jobids, dispose)
159
to wait for all jobs to finish
160
- then drmaa_exit() is called */
162
ST_SUBMITMIXTURE_SYNC_ALLIDS_NODISPOSE,
163
/* - drmaa_init() is called
164
- submit a mixture of single and bulk jobs
165
- do drmaa_synchronize(all_jobids, no-dispose)
166
to wait for all jobs to finish
167
- do drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY) until
168
DRMAA_ERRNO_INVALID_JOB to reap all jobs
169
- then drmaa_exit() is called */
172
ST_SUBMIT_PAUSE_SUBMIT_SYNC,
173
/* - drmaa_init() is called
175
- do a long sleep(SGE_COMMPROC_TIMEOUT+)
176
- another job is submitted
177
- do drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose)
178
- then drmaa_exit() is called */
180
ST_INPUT_FILE_FAILURE,
181
ST_OUTPUT_FILE_FAILURE,
182
ST_ERROR_FILE_FAILURE,
183
/* - drmaa_init() is called
184
- a job is submitted with input/output/error path specification
185
that must cause the job to fail
186
- use drmaa_synchronize() to ensure job was started
187
- drmaa_job_ps() must return DRMAA_PS_FAILED
188
- drmaa_wait() must report drmaa_wifaborted() -> true
189
- then drmaa_exit() is called */
191
ST_SUBMIT_IN_HOLD_RELEASE,
192
/* - drmaa_init() is called
193
- a job is submitted with a user hold
194
- use drmaa_job_ps() to verify user hold state
195
- hold state is released using drmaa_control()
197
- then drmaa_exit() is called
198
(still requires manual testing)
201
ST_SUBMIT_IN_HOLD_DELETE,
202
/* - drmaa_init() is called
203
- a job is submitted with a user hold
204
- use drmaa_job_ps() to verify user hold state
205
- job is terminated using drmaa_control()
207
- then drmaa_exit() is called
208
(still requires manual testing)
211
ST_BULK_SUBMIT_IN_HOLD_SINGLE_RELEASE,
212
/* - drmaa_init() is called
213
- a bulk job is submitted with a user hold
214
- hold state is released separately for each task using drmaa_control()
215
- the job ids are waited
216
- then drmaa_exit() is called
217
(still requires manual testing)
220
ST_BULK_SUBMIT_IN_HOLD_SESSION_RELEASE,
221
/* - drmaa_init() is called
222
- a bulk job is submitted with a user hold
223
- hold state is released for the session using drmaa_control()
224
- the job ids are waited
225
- then drmaa_exit() is called
226
(still requires manual testing)
229
ST_BULK_SUBMIT_IN_HOLD_SESSION_DELETE,
230
/* - drmaa_init() is called
231
- a bulk job is submitted with a user hold
232
- use drmaa_job_ps() to verify user hold state
233
- all session jobs are terminated using drmaa_control()
234
- the job ids are waited
235
- then drmaa_exit() is called
236
(still requires manual testing)
239
ST_BULK_SUBMIT_IN_HOLD_SINGLE_DELETE,
240
/* - drmaa_init() is called
241
- a bulk job is submitted with a user hold
242
- use drmaa_job_ps() to verify user hold state
243
- all session jobs are terminated using drmaa_control()
244
- the job ids are waited
245
- then drmaa_exit() is called
246
(still requires manual testing)
249
ST_INPUT_BECOMES_OUTPUT,
250
/* - drmaa_init() is called
251
- job input is prepared in local file
252
- a job is submitted that echoes it's input to output
254
- then drmaa_exit() is called
255
- job output must be identical to job input
256
(this requires manual testing) */
259
/* - drmaa_init() is called
260
- drmaa_job_ps() is used to retrieve DRMAA state
261
for each jobid passed *manually* in argv
262
- then drmaa_exit() is called
263
(requires manual testing)
267
/* - drmaa_init() is called
268
- drmaa_control() is used to change DRMAA job state
269
for each jobid passed *manually* in argv
270
- drmaa_control() must return with the exit status passed in argv
271
- then drmaa_exit() is called
272
(still manual testing)
276
/* - drmaa_init() is called
277
- 255 job are submitted
278
- job i returns i as exit status (8 bit)
279
- drmaa_wait() verifies each job returned the
281
- then drmaa_exit() is called */
284
/* - drmaa_init() is called
285
- drmaa_get_attribute_names() is called
286
- the names of all supported non vector attributes are printed
287
- then drmaa_exit() is called */
290
/* - drmaa_init() is called
291
- drmaa_get_vector_attribute_names() is called
292
- the names of all supported vector attributes are printed
293
- then drmaa_exit() is called */
296
/* - drmaa_version() is called
297
- version information is printed */
300
/* - drmaa_get_contact() is called
301
- the contact string is printed
302
- drmaa_init() is called
303
- drmaa_get_contact() is called
304
- the contact string is printed
305
- then drmaa_exit() is called */
308
/* - drmaa_get_DRM_system() is called
309
- the contact string is printed
310
- drmaa_init() is called
311
- drmaa_get_DRM_system() is called
312
- the DRM system name is printed
313
- then drmaa_exit() is called */
316
/* - drmaa_get_DRM_system() is called
317
- the contact string is printed
318
- drmaa_init() is called
319
- drmaa_get_DRMAA_implementation() is called
320
- the DRMAA implemention name is printed
321
- then drmaa_exit() is called */
323
ST_EMPTY_SESSION_WAIT,
324
/* - drmaa_init() is called
325
- drmaa_wait() must return DRMAA_ERRNO_INVALID_JOB
326
- then drmaa_exit() is called */
328
ST_EMPTY_SESSION_SYNCHRONIZE_DISPOSE,
329
/* - drmaa_init() is called
330
- drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose=true) must return DRMAA_ERRNO_SUCCESS
331
- then drmaa_exit() is called */
333
ST_EMPTY_SESSION_SYNCHRONIZE_NODISPOSE,
334
/* - drmaa_init() is called
335
- drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose=false) must return DRMAA_ERRNO_SUCCESS
336
- then drmaa_exit() is called */
338
ST_EMPTY_SESSION_CONTROL,
339
/* - drmaa_init() is called
340
- drmaa_control(DRMAA_JOB_IDS_SESSION_ALL, <passed control operation>) must return DRMAA_ERRNO_SUCCESS
341
- then drmaa_exit() is called */
343
ST_SUBMIT_SUSPEND_RESUME_WAIT,
344
/* - drmaa_init() is called
345
- a single job is submitted
346
- drmaa_job_ps() is used to actively wait until job is running
347
- drmaa_control() is used to suspend the job
348
- drmaa_job_ps() is used to verify job was suspended
349
- drmaa_control() is used to resume the job
350
- drmaa_job_ps() is used to verify job was resumed
351
- drmaa_wait() is used to wait for the jobs regular end
352
- then drmaa_exit() is called */
354
ST_SUBMIT_POLLING_WAIT_TIMEOUT,
355
/* - drmaa_init() is called
356
- a single job is submitted
357
- repeatedly drmaa_wait() with a timeout is used until job is finished
358
- then drmaa_exit() is called */
360
ST_SUBMIT_POLLING_WAIT_ZEROTIMEOUT,
361
/* - drmaa_init() is called
362
- a single job is submitted
363
- repeatedly do drmaa_wait(DRMAA_TIMEOUT_NO_WAIT) + sleep() until job is finished
364
- then drmaa_exit() is called */
366
ST_SUBMIT_POLLING_SYNCHRONIZE_TIMEOUT,
367
/* - drmaa_init() is called
368
- a single job is submitted
369
- repeatedly drmaa_synchronize() with a timeout is used until job is finished
370
- then drmaa_exit() is called */
372
ST_SUBMIT_POLLING_SYNCHRONIZE_ZEROTIMEOUT,
373
/* - drmaa_init() is called
374
- a single job is submitted
375
- repeatedly do drmaa_synchronize(DRMAA_TIMEOUT_NO_WAIT) + sleep() until job is finished
376
- then drmaa_exit() is called */
379
/* Need to test all DRMAA attributes:
380
DRMAA_REMOTE_COMMAND - implicit
389
DRMAA_NATIVE_SPECIFICATION - test if it works and it if clashes
400
ST_TRANSFER_FILES_SINGLE_JOB,
401
ST_TRANSFER_FILES_BULK_JOB,
402
/* Set Job InputHost:/InputPath, OutputHost:/OutputPath, ErrorHost:/ErrorPath */
404
ST_RESERVATION_FINISH_ORDER,
405
/* ensure three jobs finish in the order foreseen for reservation */
407
ST_BACKFILL_FINISH_ORDER,
408
/* ensure three jobs finish in the order foreseen for backfilling */
411
/* ensure 7 jobs finish in the order foreseen for wildcard parallel jobs */
414
/* - drmaa_init() is called
415
- drmaa_set_attribute() is called for an invalid attribute
416
- then drmaa_exit() is called */
418
ST_UNSUPPORTED_VATTR,
419
/* - drmaa_init() is called
420
- drmaa_set_vector_attribute() is called for an invalid attribute
421
- then drmaa_exit() is called */
423
ST_SYNCHRONIZE_NONEXISTANT,
425
- Create job template.
427
- Delete job template.
428
- Use job id to create unknown, valid job id.
429
- Synchronize against unknown id.
430
- Wait for real job to finish.
433
ST_RECOVERABLE_SESSION,
435
- Create job template.
437
- Delete job template.
440
- Wait for job to finish.
444
/* - Test that each error code has the right value. */
447
const struct test_name2number_map {
448
char *test_name; /* name of the test */
449
int test_number; /* number the test is internally mapped to */
450
int nargs; /* number of test case arguments required */
451
char *opt_arguments; /* description of test case arguments for usage output */
454
/* all automated tests - ST_* and MT_* tests */
455
{ "ALL_AUTOMATED", ALL_TESTS, 3, "<sleeper_job> <exit_arg_job> <email_addr>" },
457
/* one application thread - automated tests only */
458
{ "ST_ERROR_CODES", ST_ERROR_CODES, 0, "" },
459
{ "ST_MULT_INIT", ST_MULT_INIT, 0, "" },
460
{ "ST_MULT_EXIT", ST_MULT_EXIT, 0, "" },
461
{ "ST_SUPPORTED_ATTR", ST_SUPPORTED_ATTR, 0, "" },
462
{ "ST_SUPPORTED_VATTR", ST_SUPPORTED_VATTR, 0, "" },
463
{ "ST_VERSION", ST_VERSION, 0, "" },
464
{ "ST_DRM_SYSTEM", ST_DRM_SYSTEM, 0, "" },
465
{ "ST_DRMAA_IMPL", ST_DRMAA_IMPL, 0, "" },
466
{ "ST_CONTACT", ST_CONTACT, 0, "" },
467
{ "ST_EMPTY_SESSION_WAIT", ST_EMPTY_SESSION_WAIT, 0, "" },
468
{ "ST_EMPTY_SESSION_SYNCHRONIZE_DISPOSE", ST_EMPTY_SESSION_SYNCHRONIZE_DISPOSE, 0, "" },
469
{ "ST_EMPTY_SESSION_SYNCHRONIZE_NODISPOSE", ST_EMPTY_SESSION_SYNCHRONIZE_NODISPOSE, 0, "" },
470
{ "ST_EMPTY_SESSION_CONTROL", ST_EMPTY_SESSION_CONTROL, 1, "DRMAA_CONTROL_*" },
471
{ "ST_SUBMIT_WAIT", ST_SUBMIT_WAIT, 1, "<sleeper_job>" },
472
{ "ST_SUBMIT_NO_RUN_WAIT", ST_SUBMIT_NO_RUN_WAIT, 1, "<sleeper_job>" },
473
{ "ST_BULK_SUBMIT_WAIT", ST_BULK_SUBMIT_WAIT, 1, "<sleeper_job>" },
474
{ "ST_BULK_SINGLESUBMIT_WAIT_INDIVIDUAL", ST_BULK_SINGLESUBMIT_WAIT_INDIVIDUAL, 1, "<sleeper_job>" },
475
{ "ST_SUBMITMIXTURE_SYNC_ALL_DISPOSE", ST_SUBMITMIXTURE_SYNC_ALL_DISPOSE, 1, "<sleeper_job>" },
476
{ "ST_SUBMITMIXTURE_SYNC_ALL_NODISPOSE", ST_SUBMITMIXTURE_SYNC_ALL_NODISPOSE, 1, "<sleeper_job>" },
477
{ "ST_SUBMITMIXTURE_SYNC_ALLIDS_DISPOSE", ST_SUBMITMIXTURE_SYNC_ALLIDS_DISPOSE, 1, "<sleeper_job>" },
478
{ "ST_SUBMITMIXTURE_SYNC_ALLIDS_NODISPOSE", ST_SUBMITMIXTURE_SYNC_ALLIDS_NODISPOSE, 1, "<sleeper_job>" },
479
{ "ST_SUBMIT_PAUSE_SUBMIT_SYNC", ST_SUBMIT_PAUSE_SUBMIT_SYNC, 1, "<sleeper_job>" },
480
{ "ST_EXIT_STATUS", ST_EXIT_STATUS, 1, "<exit_arg_job>" },
481
{ "ST_INPUT_FILE_FAILURE", ST_INPUT_FILE_FAILURE, 1, "<sleeper_job>" },
482
{ "ST_OUTPUT_FILE_FAILURE", ST_OUTPUT_FILE_FAILURE, 1, "<sleeper_job>" },
483
{ "ST_ERROR_FILE_FAILURE", ST_ERROR_FILE_FAILURE, 1, "<sleeper_job>" },
484
{ "ST_SUBMIT_IN_HOLD_RELEASE", ST_SUBMIT_IN_HOLD_RELEASE, 1, "<sleeper_job>" },
485
{ "ST_SUBMIT_IN_HOLD_DELETE", ST_SUBMIT_IN_HOLD_DELETE, 1, "<sleeper_job>" },
486
{ "ST_BULK_SUBMIT_IN_HOLD_SESSION_RELEASE", ST_BULK_SUBMIT_IN_HOLD_SESSION_RELEASE, 1, "<sleeper_job>" },
487
{ "ST_BULK_SUBMIT_IN_HOLD_SINGLE_RELEASE", ST_BULK_SUBMIT_IN_HOLD_SINGLE_RELEASE, 1, "<sleeper_job>" },
488
{ "ST_BULK_SUBMIT_IN_HOLD_SESSION_DELETE", ST_BULK_SUBMIT_IN_HOLD_SESSION_DELETE, 1, "<sleeper_job>" },
489
{ "ST_BULK_SUBMIT_IN_HOLD_SINGLE_DELETE", ST_BULK_SUBMIT_IN_HOLD_SINGLE_DELETE, 1, "<sleeper_job>" },
490
{ "ST_SUBMIT_POLLING_WAIT_TIMEOUT", ST_SUBMIT_POLLING_WAIT_TIMEOUT, 1, "<sleeper_job>" },
491
{ "ST_SUBMIT_POLLING_WAIT_ZEROTIMEOUT", ST_SUBMIT_POLLING_WAIT_ZEROTIMEOUT, 1, "<sleeper_job>" },
492
{ "ST_SUBMIT_POLLING_SYNCHRONIZE_TIMEOUT", ST_SUBMIT_POLLING_SYNCHRONIZE_TIMEOUT, 1, "<sleeper_job>" },
493
{ "ST_SUBMIT_POLLING_SYNCHRONIZE_ZEROTIMEOUT", ST_SUBMIT_POLLING_SYNCHRONIZE_ZEROTIMEOUT, 1, "<sleeper_job>" },
494
{ "ST_UNSUPPORTED_ATTR", ST_UNSUPPORTED_ATTR, 0, "" },
495
{ "ST_UNSUPPORTED_VATTR", ST_UNSUPPORTED_VATTR, 0, "" },
496
{ "ST_SYNCHRONIZE_NONEXISTANT", ST_SYNCHRONIZE_NONEXISTANT, 1, "<sleeper_job>" },
497
{ "ST_RECOVERABLE_SESSION", ST_RECOVERABLE_SESSION, 1, "<sleeper_job>" },
499
/* multiple application threads - automated tests only */
500
{ "MT_SUBMIT_WAIT", MT_SUBMIT_WAIT, 1, "<sleeper_job>" },
501
{ "MT_SUBMIT_BEFORE_INIT_WAIT", MT_SUBMIT_BEFORE_INIT_WAIT, 1, "<sleeper_job>" },
502
{ "MT_EXIT_DURING_SUBMIT", MT_EXIT_DURING_SUBMIT, 1, "<sleeper_job>" },
503
{ "MT_SUBMIT_MT_WAIT", MT_SUBMIT_MT_WAIT, 1, "<sleeper_job>" },
504
{ "MT_EXIT_DURING_SUBMIT_OR_WAIT", MT_EXIT_DURING_SUBMIT_OR_WAIT, 1, "<sleeper_job>" },
506
/* ------------------------------------------------------------------------------------ */
507
/* tests that require test suite to be run in an automated fashion (file name creation) */
508
{ "ST_INPUT_BECOMES_OUTPUT", ST_INPUT_BECOMES_OUTPUT, 2, "<input_path> <output_path>" },
509
{ "ST_ATTRIBUTE_CHECK", ST_ATTRIBUTE_CHECK, 2, "<exit_arg_job> <email_addr>" },
510
{ "ST_SUBMIT_SUSPEND_RESUME_WAIT", ST_SUBMIT_SUSPEND_RESUME_WAIT, 1, "<sleeper_job>" },
512
/* tests that test_drmaa can't test in an automated fashion (so far) */
513
{ "ST_DRMAA_JOB_PS", ST_DRMAA_JOB_PS, 1, "<jobid> ..." },
514
{ "ST_DRMAA_CONTROL", ST_DRMAA_CONTROL, 3, "DRMAA_CONTROL_* DRMAA_ERRNO_* <jobid> ..." },
515
{ "ST_USAGE_CHECK", ST_USAGE_CHECK, 1, "<exit_job>" },
517
{ "ST_TRANSFER_FILES_SINGLE_JOB", ST_TRANSFER_FILES_SINGLE_JOB, 6, "<sleeper_job> <file_staging_flags "
518
"{\"i\"|\"o\"|\"e\" }> <merge_stderr {\"y\"|\"n\"}> <[inputhost]:/inputpath> <[outputhost]:/outputpath> <[errorhost]:/errorpath>" },
521
{ "ST_TRANSFER_FILES_BULK_JOB", ST_TRANSFER_FILES_BULK_JOB, 6, "<sleeper_job> <file_staging_flags "
522
"{\"i\"|\"o\"|\"e\" }> <<merge_stderr {\"y\"|\"n\"}> [inputhost]:/inputpath> <[outputhost]:/outputpath> <[errorhost]:/errorpath>" },
524
/* tests that have nothing to do with drmaa */
525
{ "ST_RESERVATION_FINISH_ORDER", ST_RESERVATION_FINISH_ORDER, 4, "<sleeper_job> <native_spec0> <native_spec1> <native_spec2>" },
526
{ "ST_BACKFILL_FINISH_ORDER", ST_BACKFILL_FINISH_ORDER, 4, "<sleeper_job> <native_spec0> <native_spec1> <native_spec2>" },
527
{ "ST_WILD_PARALLEL", ST_WILD_PARALLEL, 4, "<sleeper_job> <native_spec0> <native_spec1> <native_spec2>" },
531
#define FIRST_NON_AUTOMATED_TEST ST_INPUT_BECOMES_OUTPUT
533
static int test(sge_gdi_ctx_class_t *ctx, int *argc, char **argv[], int parse_args);
534
static int submit_and_wait(int n);
535
static int submit_sleeper(int n);
536
static int submit_input_mirror(int n, const char *mirror_job,
537
const char *input_path, const char *output_path,
538
const char *error_path, int join, char* hostname);
539
static int do_submit(drmaa_job_template_t *jt, int n);
540
static int wait_all_jobs(int n);
541
static int wait_n_jobs(int n);
542
static drmaa_job_template_t *create_sleeper_job_template(int seconds,
545
static drmaa_job_template_t *create_exit_job_template(const char *exit_job,
547
static void report_session_key(void);
548
static void *submit_and_wait_thread (void *v);
549
static void *submit_sleeper_thread (void *v);
551
int str2drmaa_state(const char *str);
552
static int str2drmaa_ctrl(const char *str);
553
static int str2drmaa_errno(const char *str);
554
static const char *drmaa_state2str(int state);
555
static const char *drmaa_ctrl2str(int control);
556
static const char *drmaa_errno2str(int ctrl);
558
static void array_job_run_sequence_adapt(int **sequence, int job_id, int count);
560
static int set_path_attribute_plus_colon(drmaa_job_template_t *jt,
561
const char *name, const char *value,
562
char *error_diagnosis,
563
size_t error_diag_len);
564
static int set_path_attribute_plus_colon(drmaa_job_template_t *jt,
565
const char *name, const char *value,
566
char *error_diagnosis,
567
size_t error_diag_len);
568
static bool test_error_code(char *name, int code, int expected);
569
static void report_wrong_job_finish(const char *comment, const char *jobid,
576
static int test_dispatch_order_njobs(int n, test_job_t jobs[], char *jsr_str);
577
static int job_run_sequence_verify(int pos, const char *all_jobids[], int *order[]);
578
static int **job_run_sequence_parse(char *jrs_str);
580
static int test_case;
581
static int is_sun_grid_engine;
583
/* global test case parameters */
584
char *sleeper_job = NULL,
593
static void init_jobids(const char *jobids[], int size)
596
for (i = 0; i < size; i++) {
601
static void free_jobids(const char *jobids[], int size)
604
for (i = 0; i < size; i++) {
605
if (jobids[i] != NULL) {
611
static void usage(void)
614
fprintf(stderr, "usage: test_drmaa <test_case>\n");
616
fprintf(stderr, " <test_case> is one of the keywords below including the enlisted test case arguments\n");
617
for (i=0; test_map[i].test_name; i++)
618
fprintf(stderr, "\t%-45.45s %s\n", test_map[i].test_name, test_map[i].opt_arguments);
620
fprintf(stderr, " <sleeper_job> is an executable job that sleeps <argv1> seconds\n");
621
fprintf(stderr, " the job must be executable at the target machine\n");
622
fprintf(stderr, " <mirror_job> is an executable job that returns it's stdin stream to stdout (e.g. /bin/cat)\n");
623
fprintf(stderr, " the job must be executable at the target machine\n\n");
624
fprintf(stderr, " <exit_arg_job> is an executable job that exits <argv1> as exit status\n");
625
fprintf(stderr, " the job must be executable at the target machine\n");
626
fprintf(stderr, " <input_path> is the path of an input file\n");
627
fprintf(stderr, " the user must have read access to this file at the target machine\n");
628
fprintf(stderr, " <output_path> is the path of an output file\n");
629
fprintf(stderr, " the user must have write access to this file at the target machine\n");
630
fprintf(stderr, " <email_addr> is an email address to which to send \n");
631
fprintf(stderr, " job completion notices\n");
632
fprintf(stderr, " <native_spec0> a native specification\n");
633
fprintf(stderr, " <native_spec1> a native specification\n");
634
fprintf(stderr, " <native_spec2> a native specification\n");
639
int main(int argc, char *argv[])
643
char diag[DRMAA_ERROR_STRING_BUFFER];
644
sge_gdi_ctx_class_t *ctx = NULL;
647
DENTER_MAIN(TOP_LAYER, "qsub");
652
/* Print out an adivsory */
653
printf ("The DRMAA test suite is now starting.\n");
655
/* figure out which DRM system we are using */
657
char drm_name[DRMAA_DRM_SYSTEM_BUFFER];
658
if (drmaa_get_DRM_system(drm_name, 255, diag, sizeof(diag)-1)!=DRMAA_ERRNO_SUCCESS) {
659
fprintf(stderr, "drmaa_get_DRM_system() failed: %s\n", diag);
663
printf("Connecting to DRM system \"%s\"\n", drm_name);
664
if (!strncmp(drm_name, "SGE", 3))
665
is_sun_grid_engine = 1;
667
is_sun_grid_engine = 0;
671
** since drmaa doesn't give an explicit handle to the context and sge_gdi
672
** is used below, we provide our own context here
674
if (sge_gdi2_setup(&ctx, JAPI, MAIN_THREAD, &alp) != AE_OK) {
675
answer_list_output(&alp);
676
SGE_EXIT((void**)&ctx, 1);
680
/* map test name to test number */
681
for (i=0; test_map[i].test_name; i++)
682
if (!strcasecmp(argv[1], test_map[i].test_name))
684
if (!test_map[i].test_name) {
685
fprintf(stderr, "test_drmaa: %s is not a valid test name\n", argv[1]);
688
test_case = test_map[i].test_number;
690
if ((argc-1) < test_map[i].nargs)
693
if (test_case == ALL_TESTS) {
695
sleeper_job = NEXT_ARGV(&argc, &argv);
696
exit_job = NEXT_ARGV(&argc, &argv);
697
email_addr = NEXT_ARGV(&argc, &argv);
699
for (i=1; test_map[i].test_name && test_map[i].test_number != FIRST_NON_AUTOMATED_TEST && success; i++) {
700
test_case = test_map[i].test_number;
701
printf("---------------------\n");
702
printf("starting test #%d (%s)\n", i, test_map[i].test_name);
705
case ST_EMPTY_SESSION_CONTROL:
708
const int ctrl_ops[] = { DRMAA_CONTROL_SUSPEND, DRMAA_CONTROL_RESUME,
709
DRMAA_CONTROL_HOLD, DRMAA_CONTROL_RELEASE, DRMAA_CONTROL_TERMINATE, -1 };
710
for (i=0; ctrl_ops[i] != -1; i++) {
711
ctrl_op = ctrl_ops[i];
712
if (test(ctx, &argc, &argv, 0)!=0) {
713
printf("test \"%s\" with \"%s\" failed\n",
714
test_map[i].test_name, drmaa_ctrl2str(ctrl_ops[i]));
719
printf("successfully finished test \"%s\" with \"%s\"\n",
720
test_map[i].test_name, drmaa_ctrl2str(ctrl_ops[i]));
726
if (test(ctx, &argc, &argv, 0)!=0) {
727
printf("test #%d failed\n", i);
732
printf("successfully finished test #%d\n", i);
740
printf("starting test \"%s\"\n", test_map[i].test_name);
741
if (test(ctx, &argc, &argv, 1)!=0) {
742
printf("test \"%s\" failed\n", test_map[i].test_name);
747
printf("successfully finished test \"%s\"\n", test_map[i].test_name);
750
sge_gdi2_shutdown((void**)&ctx);
751
sge_gdi_ctx_class_destroy(&ctx);
758
static int test(sge_gdi_ctx_class_t *ctx, int *argc, char **argv[], int parse_args)
760
bool bBulkJob = false;
762
int job_chunk = JOB_CHUNK;
763
char diagnosis[DRMAA_ERROR_STRING_BUFFER];
764
drmaa_job_template_t *jt = NULL;
766
int do_while_end = 0;
771
if (test_error_code("DRMAA_ERRNO_SUCCESS", DRMAA_ERRNO_SUCCESS, 0) &&
772
test_error_code("DRMAA_ERRNO_INTERNAL_ERROR", DRMAA_ERRNO_INTERNAL_ERROR, 1) &&
773
test_error_code("DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE", DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE, 2) &&
774
test_error_code("DRMAA_ERRNO_AUTH_FAILURE", DRMAA_ERRNO_AUTH_FAILURE, 3) &&
775
test_error_code("DRMAA_ERRNO_INVALID_ARGUMENT", DRMAA_ERRNO_INVALID_ARGUMENT, 4) &&
776
test_error_code("DRMAA_ERRNO_NO_ACTIVE_SESSION", DRMAA_ERRNO_NO_ACTIVE_SESSION, 5) &&
777
test_error_code("DRMAA_ERRNO_NO_MEMORY", DRMAA_ERRNO_NO_MEMORY, 6) &&
778
test_error_code("DRMAA_ERRNO_INVALID_CONTACT_STRING", DRMAA_ERRNO_INVALID_CONTACT_STRING, 7) &&
779
test_error_code("DRMAA_ERRNO_DEFAULT_CONTACT_STRING_ERROR", DRMAA_ERRNO_DEFAULT_CONTACT_STRING_ERROR, 8) &&
780
test_error_code("DRMAA_ERRNO_NO_DEFAULT_CONTACT_STRING_SELECTED", DRMAA_ERRNO_NO_DEFAULT_CONTACT_STRING_SELECTED, 9) &&
781
test_error_code("DRMAA_ERRNO_DRMS_INIT_FAILED", DRMAA_ERRNO_DRMS_INIT_FAILED, 10) &&
782
test_error_code("DRMAA_ERRNO_ALREADY_ACTIVE_SESSION", DRMAA_ERRNO_ALREADY_ACTIVE_SESSION, 11) &&
783
test_error_code("DRMAA_ERRNO_DRMS_EXIT_ERROR", DRMAA_ERRNO_DRMS_EXIT_ERROR, 12) &&
784
test_error_code("DRMAA_ERRNO_INVALID_ATTRIBUTE_FORMAT", DRMAA_ERRNO_INVALID_ATTRIBUTE_FORMAT, 13) &&
785
test_error_code("DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE", DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE, 14) &&
786
test_error_code("DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES", DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES, 15) &&
787
test_error_code("DRMAA_ERRNO_TRY_LATER", DRMAA_ERRNO_TRY_LATER, 16) &&
788
test_error_code("DRMAA_ERRNO_DENIED_BY_DRM", DRMAA_ERRNO_DENIED_BY_DRM, 17) &&
789
test_error_code("DRMAA_ERRNO_INVALID_JOB", DRMAA_ERRNO_INVALID_JOB, 18) &&
790
test_error_code("DRMAA_ERRNO_RESUME_INCONSISTENT_STATE", DRMAA_ERRNO_RESUME_INCONSISTENT_STATE, 19) &&
791
test_error_code("DRMAA_ERRNO_SUSPEND_INCONSISTENT_STATE", DRMAA_ERRNO_SUSPEND_INCONSISTENT_STATE, 20) &&
792
test_error_code("DRMAA_ERRNO_HOLD_INCONSISTENT_STATE", DRMAA_ERRNO_HOLD_INCONSISTENT_STATE, 21) &&
793
test_error_code("DRMAA_ERRNO_RELEASE_INCONSISTENT_STATE", DRMAA_ERRNO_RELEASE_INCONSISTENT_STATE, 22) &&
794
test_error_code("DRMAA_ERRNO_EXIT_TIMEOUT", DRMAA_ERRNO_EXIT_TIMEOUT, 23) &&
795
test_error_code("DRMAA_ERRNO_NO_RUSAGE", DRMAA_ERRNO_NO_RUSAGE, 24) &&
796
test_error_code("DRMAA_ERRNO_NO_MORE_ELEMENTS", DRMAA_ERRNO_NO_MORE_ELEMENTS, 25)
806
/* no test case arguments */
808
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
809
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
812
report_session_key();
813
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_ALREADY_ACTIVE_SESSION) {
814
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
817
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
818
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
826
/* no test case arguments */
828
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
829
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
832
report_session_key();
833
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
834
fprintf(stderr, "drmaa_exit(1) failed: %s\n", diagnosis);
837
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
838
fprintf(stderr, "drmaa_exit(2) failed: %s\n", diagnosis);
845
case ST_SUBMIT_NO_RUN_WAIT:
847
int n = (test_case == ST_SUBMIT_WAIT)?JOB_CHUNK:1;
851
sleeper_job = NEXT_ARGV(argc, argv);
853
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
854
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
857
report_session_key();
859
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
860
fprintf(stderr, "create_sleeper_job_template() failed\n");
864
if (test_case == ST_SUBMIT_NO_RUN_WAIT) {
865
drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, "-l a=fantasy_os -now yes -w n", NULL, 0);
868
for (i=0; i<n; i++) {
869
if (drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
870
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
873
printf("submitted job \"%s\"\n", jobid);
876
drmaa_delete_job_template(jt, NULL, 0);
878
if (wait_all_jobs(n) != DRMAA_ERRNO_SUCCESS) {
881
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
882
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
888
case ST_SUBMIT_POLLING_WAIT_TIMEOUT:
889
case ST_SUBMIT_POLLING_WAIT_ZEROTIMEOUT:
890
case ST_SUBMIT_POLLING_SYNCHRONIZE_TIMEOUT:
891
case ST_SUBMIT_POLLING_SYNCHRONIZE_ZEROTIMEOUT:
894
const int timeout = 5;
895
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
898
sleeper_job = NEXT_ARGV(argc, argv);
900
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
901
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
904
report_session_key();
906
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
907
fprintf(stderr, "create_sleeper_job_template() failed\n");
911
if (drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
912
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
915
printf("submitted job \"%s\"\n", jobid);
917
drmaa_delete_job_template(jt, NULL, 0);
921
case ST_SUBMIT_POLLING_WAIT_TIMEOUT:
922
while ((drmaa_errno=drmaa_wait(jobid, NULL, 0, NULL, timeout, NULL,
923
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
924
if (drmaa_errno != DRMAA_ERRNO_EXIT_TIMEOUT) {
925
fprintf(stderr, "drmaa_wait(\"%s\", timeout = %d) failed: %s (%s)\n",
926
jobid, timeout, diagnosis, drmaa_strerror(drmaa_errno));
929
printf("still waiting for job \"%s\" to finish\n", jobid);
933
case ST_SUBMIT_POLLING_WAIT_ZEROTIMEOUT:
934
while ((drmaa_errno=drmaa_wait(jobid, NULL, 0, NULL, DRMAA_TIMEOUT_NO_WAIT, NULL,
935
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
936
if (drmaa_errno != DRMAA_ERRNO_EXIT_TIMEOUT) {
937
fprintf(stderr, "drmaa_wait(\"%s\", no timeout) failed: %s (%s)\n",
938
jobid, diagnosis, drmaa_strerror(drmaa_errno));
941
printf("still waiting for job \"%s\" to finish\n", jobid);
943
printf("slept %d seconds\n", timeout);
947
case ST_SUBMIT_POLLING_SYNCHRONIZE_TIMEOUT:
948
while ((drmaa_errno=drmaa_synchronize(session_all, timeout, 1,
949
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
950
if (drmaa_errno != DRMAA_ERRNO_EXIT_TIMEOUT) {
951
fprintf(stderr, "drmaa_synchronize(\"%s\", timeout = %d) failed: %s (%s)\n",
952
jobid, timeout, diagnosis, drmaa_strerror(drmaa_errno));
955
printf("still trying to synchronize with job \"%s\" to finish\n", jobid);
959
case ST_SUBMIT_POLLING_SYNCHRONIZE_ZEROTIMEOUT:
960
while ((drmaa_errno=drmaa_synchronize(session_all, DRMAA_TIMEOUT_NO_WAIT, 1,
961
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
962
if (drmaa_errno != DRMAA_ERRNO_EXIT_TIMEOUT) {
963
fprintf(stderr, "drmaa_synchronize(\"%s\", no timeout) failed: %s (%s)\n",
964
jobid, diagnosis, drmaa_strerror(drmaa_errno));
967
printf("still trying to synchronize with job \"%s\" to finish\n", jobid);
969
printf("slept %d seconds\n", timeout);
974
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
975
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
983
pthread_t submitter_threads[NTHREADS];
987
sleeper_job = NEXT_ARGV(argc, argv);
989
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
990
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
993
report_session_key();
995
for (i=0; i<NTHREADS; i++)
996
pthread_create(&submitter_threads[i], NULL, submit_sleeper_thread, &job_chunk);
997
for (i=0; i<NTHREADS; i++)
998
if (pthread_join(submitter_threads[i], NULL))
999
printf("pthread_join() returned != 0\n");
1000
if (wait_all_jobs(n) != DRMAA_ERRNO_SUCCESS) {
1004
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1005
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1011
case MT_SUBMIT_BEFORE_INIT_WAIT:
1013
pthread_t submitter_threads[NTHREADS];
1017
sleeper_job = NEXT_ARGV(argc, argv);
1018
printf("sleeper_job = %s\n", sleeper_job);
1019
for (i=0; i<NTHREADS; i++) {
1020
pthread_create(&submitter_threads[i], NULL, submit_sleeper_thread, &job_chunk);
1023
/* delay drmaa_init() */
1025
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1026
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1029
report_session_key();
1031
for (i=0; i<NTHREADS; i++)
1032
if (pthread_join(submitter_threads[i], NULL))
1033
printf("pthread_join() returned != 0\n");
1035
if (wait_all_jobs(n) != DRMAA_ERRNO_SUCCESS) {
1038
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1039
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1045
case MT_EXIT_DURING_SUBMIT:
1047
pthread_t submitter_threads[NTHREADS];
1050
sleeper_job = NEXT_ARGV(argc, argv);
1052
putenv("SGE_DELAY_AFTER_SUBMIT=20");
1054
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1055
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1058
report_session_key();
1060
for (i=0; i<NTHREADS; i++)
1061
pthread_create(&submitter_threads[i], NULL, submit_sleeper_thread, &job_chunk);
1063
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1064
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1067
printf("drmaa_exit() succeeded\n");
1069
putenv("SGE_DELAY_AFTER_SUBMIT=0");
1071
for (i=0; i<NTHREADS; i++)
1072
if (pthread_join(submitter_threads[i], NULL))
1073
printf("pthread_join() returned != 0\n");
1077
case MT_SUBMIT_MT_WAIT:
1079
pthread_t submitter_threads[NTHREADS];
1082
sleeper_job = NEXT_ARGV(argc, argv);
1084
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1085
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1088
report_session_key();
1090
for (i=0; i<NTHREADS; i++)
1091
pthread_create(&submitter_threads[i], NULL, submit_and_wait_thread, &job_chunk);
1093
for (i=0; i<NTHREADS; i++)
1094
if (pthread_join(submitter_threads[i], NULL))
1095
printf("pthread_join() returned != 0\n");
1097
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1098
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1104
case MT_EXIT_DURING_SUBMIT_OR_WAIT:
1106
pthread_t submitter_threads[NTHREADS];
1109
sleeper_job = NEXT_ARGV(argc, argv);
1111
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1112
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1115
report_session_key();
1117
for (i=0; i<NTHREADS; i++)
1118
pthread_create(&submitter_threads[i], NULL, submit_and_wait_thread, &job_chunk);
1120
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1121
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1124
printf("drmaa_exit() succeeded\n");
1126
for (i=0; i<NTHREADS; i++)
1127
if (pthread_join(submitter_threads[i], NULL))
1128
printf("pthread_join() returned != 0\n");
1132
case ST_BULK_SUBMIT_WAIT:
1135
sleeper_job = NEXT_ARGV(argc, argv);
1137
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1138
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1141
report_session_key();
1142
if (!(jt = create_sleeper_job_template(5, 1, 0))) {
1143
fprintf(stderr, "create_sleeper_job_template() failed\n");
1146
for (i=0; i<NBULKS; i++) {
1148
drmaa_job_ids_t *jobids;
1152
if ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
1153
printf("failed submitting bulk job (%s): %s\n", drmaa_strerror(drmaa_errno), diagnosis);
1157
printf("submitted bulk job with jobids:\n");
1159
drmaa_errno = drmaa_get_num_job_ids(jobids, &size);
1161
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1162
fprintf(stderr, "failed getting # job ids: %s\n", drmaa_strerror(drmaa_errno));
1166
for (j = 0; j < size; j++) {
1167
drmaa_errno = drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
1169
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1170
printf("failed getting job id: %s\n", drmaa_strerror(drmaa_errno));
1173
printf("\t \"%s\"\n", jobid);
1176
drmaa_errno = drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
1178
if (drmaa_errno != DRMAA_ERRNO_NO_MORE_ELEMENTS) {
1179
fprintf(stderr, "Got incorrect return value from drmaa_get_next_job_id()\n");
1183
drmaa_release_job_ids(jobids);
1185
drmaa_delete_job_template(jt, NULL, 0);
1187
if (wait_n_jobs(JOB_CHUNK*NBULKS) != DRMAA_ERRNO_SUCCESS) {
1190
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1191
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1197
case ST_BULK_SINGLESUBMIT_WAIT_INDIVIDUAL:
1199
const int size_all_jobids = NBULKS*JOB_CHUNK + JOB_CHUNK + 1;
1200
const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK + 1];
1204
init_jobids(all_jobids, size_all_jobids);
1207
sleeper_job = NEXT_ARGV(argc, argv);
1210
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1211
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1214
report_session_key();
1217
* submit some bulk jobs
1219
if (!(jt = create_sleeper_job_template(5, 1, 0))) {
1220
fprintf(stderr, "create_sleeper_job_template() failed\n");
1223
for (i = 0; i < NBULKS; i++) {
1224
drmaa_job_ids_t *jobids;
1227
while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis,
1228
sizeof(diagnosis)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1229
fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s\n", diagnosis);
1232
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1233
fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s\n", diagnosis);
1234
free_jobids(all_jobids, size_all_jobids);
1238
printf("submitted bulk job with jobids:\n");
1239
for (j = 0; j < JOB_CHUNK; j++) {
1240
drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
1241
all_jobids[pos++] = strdup(jobid);
1242
printf("\t \"%s\"\n", jobid);
1244
drmaa_release_job_ids(jobids);
1247
drmaa_delete_job_template(jt, NULL, 0);
1250
* submit some sequential jobs
1252
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
1253
fprintf(stderr, "create_sleeper_job_template() failed\n");
1254
free_jobids(all_jobids, size_all_jobids);
1257
for (i=0; i<JOB_CHUNK; i++) {
1258
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
1259
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1260
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
1263
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1264
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
1265
free_jobids(all_jobids, size_all_jobids);
1268
all_jobids[pos++] = strdup(jobid);
1269
printf("\t \"%s\"\n", jobid);
1272
/* set string array end mark */
1273
all_jobids[pos] = NULL;
1275
drmaa_delete_job_template(jt, NULL, 0);
1278
* wait all those jobs
1280
for (pos=0; pos<NBULKS*JOB_CHUNK + JOB_CHUNK; pos++) {
1283
drmaa_errno = drmaa_wait(all_jobids[pos], jobid, sizeof(jobid)-1,
1284
&stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
1285
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1286
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", all_jobids[pos], diagnosis);
1289
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
1291
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1292
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", all_jobids[pos], diagnosis);
1293
free_jobids(all_jobids, size_all_jobids);
1296
printf("waited job \"%s\"\n", all_jobids[pos]);
1297
FREE(all_jobids[pos]);
1300
free_jobids(all_jobids, size_all_jobids);
1302
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1303
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1309
case ST_SUBMITMIXTURE_SYNC_ALL_DISPOSE:
1310
/* - drmaa_init() is called
1311
- submit a mixture of single and bulk jobs
1312
- do drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose)
1313
to wait for all jobs to finish
1314
- then drmaa_exit() is called */
1316
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
1320
sleeper_job = NEXT_ARGV(argc, argv);
1322
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1323
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1326
report_session_key();
1329
* submit some bulk jobs
1331
if (!(jt = create_sleeper_job_template(5, 1, 0))) {
1332
fprintf(stderr, "create_sleeper_job_template() failed\n");
1335
for (i=0; i<NBULKS; i++) {
1336
drmaa_job_ids_t *jobids;
1339
while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis,
1340
sizeof(diagnosis)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1341
fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s\n", diagnosis);
1344
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1345
fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s\n", diagnosis);
1349
printf("submitted bulk job with jobids:\n");
1350
for (j=0; j<JOB_CHUNK; j++) {
1351
drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
1352
printf("\t \"%s\"\n", jobid);
1354
drmaa_release_job_ids(jobids);
1357
drmaa_delete_job_template(jt, NULL, 0);
1360
* submit some sequential jobs
1362
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
1363
fprintf(stderr, "create_sleeper_job_template() failed\n");
1366
for (i=0; i<JOB_CHUNK; i++) {
1367
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
1368
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1369
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
1372
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1373
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
1376
printf("\t \"%s\"\n", jobid);
1378
drmaa_delete_job_template(jt, NULL, 0);
1381
* synchronize with all jobs
1383
drmaa_errno = drmaa_synchronize(session_all, DRMAA_TIMEOUT_WAIT_FOREVER, 1, diagnosis, sizeof(diagnosis)-1);
1384
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1385
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
1388
printf("waited all jobs\n");
1389
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1390
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1396
case ST_SUBMITMIXTURE_SYNC_ALL_NODISPOSE:
1398
/* - drmaa_init() is called
1399
- submit a mixture of single and bulk jobs
1400
- do drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, no-dispose)
1401
to wait for all jobs to finish
1402
- do drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY) until
1403
DRMAA_ERRNO_INVALID_JOB to reap all jobs
1404
- then drmaa_exit() is called */
1407
int size_all_jobids = NBULKS*JOB_CHUNK + JOB_CHUNK + 1;
1408
const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK + 1];
1409
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
1413
init_jobids(all_jobids, size_all_jobids);
1416
sleeper_job = NEXT_ARGV(argc, argv);
1418
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1419
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1422
report_session_key();
1425
* submit some bulk jobs
1427
if (!(jt = create_sleeper_job_template(5, 1, 0))) {
1428
fprintf(stderr, "create_sleeper_job_template() failed\n");
1431
for (i=0; i<NBULKS; i++) {
1432
drmaa_job_ids_t *jobids;
1435
while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis,
1436
sizeof(diagnosis)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1437
fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s\n", diagnosis);
1440
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1441
fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s\n", diagnosis);
1442
free_jobids(all_jobids, size_all_jobids);
1446
printf("submitted bulk job with jobids:\n");
1447
for (j=0; j<JOB_CHUNK; j++) {
1448
drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
1449
all_jobids[pos++] = strdup(jobid);
1450
printf("\t \"%s\"\n", jobid);
1452
drmaa_release_job_ids(jobids);
1455
drmaa_delete_job_template(jt, NULL, 0);
1458
* submit some sequential jobs
1460
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
1461
fprintf(stderr, "create_sleeper_job_template() failed\n");
1464
for (i=0; i<JOB_CHUNK; i++) {
1465
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
1466
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1467
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
1470
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1471
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
1472
free_jobids(all_jobids, size_all_jobids);
1475
printf("\t \"%s\"\n", jobid);
1476
all_jobids[pos++] = strdup(jobid);
1479
/* set string array end mark */
1480
all_jobids[pos] = NULL;
1482
drmaa_delete_job_template(jt, NULL, 0);
1485
* synchronize with all jobs
1487
drmaa_errno = drmaa_synchronize(session_all, DRMAA_TIMEOUT_WAIT_FOREVER, 0, diagnosis, sizeof(diagnosis)-1);
1488
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1489
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
1490
free_jobids(all_jobids, size_all_jobids);
1493
printf("synchronized with all jobs\n");
1496
* wait all those jobs
1498
for (pos=0; pos<NBULKS*JOB_CHUNK + JOB_CHUNK; pos++) {
1501
drmaa_errno = drmaa_wait(all_jobids[pos], jobid, sizeof(jobid)-1,
1502
&stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
1503
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1504
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", all_jobids[pos], diagnosis);
1507
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
1509
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1510
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", all_jobids[pos], diagnosis);
1511
free_jobids(all_jobids, size_all_jobids);
1514
printf("waited job \"%s\"\n", all_jobids[pos]);
1515
FREE(all_jobids[pos]);
1517
free_jobids(all_jobids, size_all_jobids);
1519
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1520
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1526
case ST_SUBMITMIXTURE_SYNC_ALLIDS_DISPOSE:
1527
/* - drmaa_init() is called
1528
- submit a mixture of single and bulk jobs
1529
- do drmaa_synchronize(all_jobids, dispose)
1530
to wait for all jobs to finish
1531
- then drmaa_exit() is called */
1533
int size_all_jobids = NBULKS*JOB_CHUNK + JOB_CHUNK + 1;
1534
const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK + 1];
1538
init_jobids(all_jobids, size_all_jobids);
1541
sleeper_job = NEXT_ARGV(argc, argv);
1543
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1544
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1547
report_session_key();
1550
* submit some bulk jobs
1552
if (!(jt = create_sleeper_job_template(5, 1, 0))) {
1553
fprintf(stderr, "create_sleeper_job_template() failed\n");
1556
for (i=0; i<NBULKS; i++) {
1557
drmaa_job_ids_t *jobids;
1560
while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis,
1561
sizeof(diagnosis)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1562
fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s\n", diagnosis);
1565
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1566
fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s\n", diagnosis);
1567
free_jobids(all_jobids, size_all_jobids);
1571
printf("submitted bulk job with jobids:\n");
1572
for (j=0; j<JOB_CHUNK; j++) {
1573
drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
1574
printf("\t \"%s\"\n", jobid);
1575
all_jobids[pos++] = strdup(jobid);
1577
drmaa_release_job_ids(jobids);
1580
drmaa_delete_job_template(jt, NULL, 0);
1583
* submit some sequential jobs
1585
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
1586
fprintf(stderr, "create_sleeper_job_template() failed\n");
1587
free_jobids(all_jobids, size_all_jobids);
1590
for (i=0; i<JOB_CHUNK; i++) {
1591
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
1592
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1593
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
1596
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1597
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
1598
free_jobids(all_jobids, size_all_jobids);
1601
printf("\t \"%s\"\n", jobid);
1602
all_jobids[pos++] = strdup(jobid);
1605
/* set string array end mark */
1606
all_jobids[pos] = NULL;
1608
drmaa_delete_job_template(jt, NULL, 0);
1611
* synchronize with all jobs
1613
drmaa_errno = drmaa_synchronize(all_jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 1, diagnosis, sizeof(diagnosis)-1);
1614
free_jobids(all_jobids, size_all_jobids);
1616
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1617
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
1620
printf("waited all jobs\n");
1621
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1622
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1628
case ST_SUBMITMIXTURE_SYNC_ALLIDS_NODISPOSE:
1630
/* - drmaa_init() is called
1631
- submit a mixture of single and bulk jobs
1632
- do drmaa_synchronize(all_jobids, no-dispose)
1633
to wait for all jobs to finish
1634
- do drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY) until
1635
DRMAA_ERRNO_INVALID_JOB to reap all jobs
1636
- then drmaa_exit() is called */
1639
int size_all_jobids = NBULKS*JOB_CHUNK + JOB_CHUNK+1;
1640
const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK+1];
1644
init_jobids(all_jobids, size_all_jobids);
1647
sleeper_job = NEXT_ARGV(argc, argv);
1649
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1650
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1653
report_session_key();
1656
* submit some bulk jobs
1658
if (!(jt = create_sleeper_job_template(5, 1, 0))) {
1659
fprintf(stderr, "create_sleeper_job_template() failed\n");
1662
for (i=0; i<NBULKS; i++) {
1663
drmaa_job_ids_t *jobids;
1666
while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis,
1667
sizeof(diagnosis)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1668
fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s\n", diagnosis);
1671
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1672
fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s\n", diagnosis);
1673
free_jobids(all_jobids, size_all_jobids);
1677
printf("submitted bulk job with jobids:\n");
1678
for (j=0; j<JOB_CHUNK; j++) {
1679
drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
1680
all_jobids[pos++] = strdup(jobid);
1681
printf("\t \"%s\"\n", jobid);
1683
drmaa_release_job_ids(jobids);
1685
drmaa_delete_job_template(jt, NULL, 0);
1688
* submit some sequential jobs
1690
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
1691
fprintf(stderr, "create_sleeper_job_template() failed\n");
1692
free_jobids(all_jobids, size_all_jobids);
1695
for (i=0; i<JOB_CHUNK; i++) {
1696
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
1697
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
1698
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
1701
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1702
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
1703
free_jobids(all_jobids, size_all_jobids);
1706
printf("\t \"%s\"\n", jobid);
1707
all_jobids[pos++] = strdup(jobid);
1710
/* set string array end mark */
1711
all_jobids[pos] = NULL;
1713
drmaa_delete_job_template(jt, NULL, 0);
1716
* synchronize with all jobs
1718
drmaa_errno = drmaa_synchronize(all_jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 0, diagnosis, sizeof(diagnosis)-1);
1719
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1720
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
1721
free_jobids(all_jobids, size_all_jobids);
1724
printf("synchronized with all jobs\n");
1727
* wait all those jobs
1729
for (pos=0; pos<NBULKS*JOB_CHUNK + JOB_CHUNK; pos++) {
1732
drmaa_errno = drmaa_wait(all_jobids[pos], jobid, sizeof(jobid)-1,
1733
&stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
1734
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1735
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", all_jobids[pos], diagnosis);
1738
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
1740
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1741
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", all_jobids[pos], diagnosis);
1742
free_jobids(all_jobids, size_all_jobids);
1745
printf("waited job \"%s\"\n", all_jobids[pos]);
1747
free_jobids(all_jobids, size_all_jobids);
1749
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1750
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1756
case ST_SUBMIT_PAUSE_SUBMIT_SYNC:
1757
/* - drmaa_init() is called
1758
- a job is submitted
1759
- do a long sleep(ST_SUBMIT_PAUSE_SUBMIT_SYNC+)
1760
- another job is submitted
1761
- do drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose)
1762
- then drmaa_exit() is called */
1764
int size_all_jobids = 2 + 1;
1765
const char *all_jobids[2 + 1];
1769
init_jobids(all_jobids, size_all_jobids);
1772
sleeper_job = NEXT_ARGV(argc, argv);
1774
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1775
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1778
report_session_key();
1781
* submit some sequential jobs
1783
if (!(jt = create_sleeper_job_template(5, 0, 0))) {
1784
fprintf(stderr, "create_sleeper_job_template() failed\n");
1787
for (i=0; i<2; i++) {
1788
drmaa_errno = drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
1789
sizeof(diagnosis)-1);
1790
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1791
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
1792
free_jobids(all_jobids, size_all_jobids);
1795
printf("\t \"%s\"\n", jobid);
1796
all_jobids[pos++] = strdup(jobid);
1799
* enforce SGE commproc timeout
1800
* this timeout must be handled transparently by DRMAA implementation
1803
printf("sleeping %d seconds\n", SGE_COMMPROC_TIMEOUT+30);
1804
sleep(SGE_COMMPROC_TIMEOUT+30);
1807
/* set string array end mark */
1808
all_jobids[pos] = NULL;
1810
drmaa_delete_job_template(jt, NULL, 0);
1813
* synchronize with all jobs
1815
drmaa_errno = drmaa_synchronize(all_jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 1, diagnosis, sizeof(diagnosis)-1);
1816
free_jobids(all_jobids, size_all_jobids);
1817
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1818
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
1821
printf("waited all jobs\n");
1823
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1824
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1830
case ST_INPUT_FILE_FAILURE:
1831
case ST_ERROR_FILE_FAILURE:
1832
case ST_OUTPUT_FILE_FAILURE:
1834
int aborted, stat, remote_ps;
1836
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
1839
sleeper_job = NEXT_ARGV(argc, argv);
1841
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1842
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1845
report_session_key();
1847
/* submit a job that must fail */
1848
drmaa_allocate_job_template(&jt, NULL, 0);
1849
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, sleeper_job, NULL, 0);
1851
switch (test_case) {
1852
case ST_OUTPUT_FILE_FAILURE:
1853
drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0);
1854
drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":/etc/passwd", NULL, 0);
1857
case ST_ERROR_FILE_FAILURE:
1858
drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "n", NULL, 0);
1859
drmaa_set_attribute(jt, DRMAA_ERROR_PATH, ":/etc/passwd", NULL, 0);
1862
case ST_INPUT_FILE_FAILURE:
1863
drmaa_set_attribute(jt, DRMAA_INPUT_PATH, ":<not existing file>", NULL, 0);
1867
if ((drmaa_errno = drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
1868
sizeof(diagnosis)-1)) != DRMAA_ERRNO_SUCCESS) {
1869
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
1872
drmaa_delete_job_template(jt, NULL, 0);
1873
printf("submitted job \"%s\"\n", jobid);
1875
/* synchronize with job to finish but do not dispose job finish information */
1876
if ((drmaa_errno = drmaa_synchronize(session_all, DRMAA_TIMEOUT_WAIT_FOREVER, 0,
1877
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
1878
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
1881
printf("synchronized with job finish\n");
1884
drmaa_errno = drmaa_job_ps(jobid, &remote_ps, diagnosis, sizeof(diagnosis)-1);
1885
if (remote_ps != DRMAA_PS_FAILED) {
1886
fprintf(stderr, "job \"%s\" is not in failed state: %s\n",
1887
jobid, drmaa_state2str(remote_ps));
1892
if ((drmaa_errno = drmaa_wait(jobid, NULL, 0, &stat, DRMAA_TIMEOUT_NO_WAIT, NULL,
1893
diagnosis, sizeof(diagnosis)-1)) != DRMAA_ERRNO_SUCCESS) {
1894
printf("drmaa_wait() failed %s: %s\n", drmaa_strerror(drmaa_errno), diagnosis);
1898
/* job finish information */
1899
drmaa_wifaborted(&aborted, stat, diagnosis, sizeof(diagnosis)-1);
1901
fprintf(stderr, "job \"%s\" failed but drmaa_wifaborted() returns false\n",
1905
printf("waited job \"%s\" that never ran\n", jobid);
1907
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1908
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1914
case ST_SUPPORTED_ATTR:
1915
case ST_SUPPORTED_VATTR:
1916
/* - drmaa_init() is called
1917
- drmaa_get_attribute_names()/drmaa_get_vector_attribute_names() is called
1918
- the names of all supported non vector/vector attributes are printed
1919
- then drmaa_exit() is called */
1921
drmaa_attr_names_t *vector;
1922
char attr_name[DRMAA_ATTR_BUFFER];
1925
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1926
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
1930
if (test_case == ST_SUPPORTED_ATTR)
1931
drmaa_errno = drmaa_get_attribute_names(&vector, diagnosis, sizeof(diagnosis)-1);
1933
drmaa_errno = drmaa_get_vector_attribute_names(&vector, diagnosis, sizeof(diagnosis)-1);
1935
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1936
fprintf(stderr, "drmaa_get_attribute_names()/drmaa_get_vector_attribute_names() failed: %s\n",
1941
drmaa_errno = drmaa_get_num_attr_names(vector, &size);
1943
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
1944
fprintf(stderr, "drmaa_get_num_attr_names() failed: %s\n", drmaa_strerror(drmaa_errno));
1948
while ((drmaa_errno=drmaa_get_next_attr_name(vector, attr_name, sizeof(attr_name)-1))==DRMAA_ERRNO_SUCCESS) {
1950
printf("%s\n", attr_name);
1953
/* we don't need vector any longer - free it */
1954
drmaa_release_attr_names(vector);
1958
fprintf(stderr, "Got incorrect size from drmaa_get_num_attr_names()\n");
1962
if (drmaa_errno != DRMAA_ERRNO_NO_MORE_ELEMENTS) {
1963
fprintf(stderr, "Got incorrect return value from drmaa_get_next_attr_name()\n");
1967
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
1968
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
1975
/* - drmaa_version() is called
1976
- version information is printed */
1978
unsigned int major, minor;
1980
if (drmaa_version(&major, &minor, diagnosis, sizeof(diagnosis)-1)
1981
!=DRMAA_ERRNO_SUCCESS) {
1982
fprintf(stderr, "drmaa_version() failed: %s\n", diagnosis);
1986
printf("version %d.%d\n", major, minor);
1988
if ((major != 1) || (minor != 0)) {
1989
fprintf(stderr, "drmaa_version() failed -- incorrect version number : %d.%d\n", major, minor);
1999
char output_string[1024];
2001
if (test_case == ST_CONTACT)
2002
drmaa_errno = drmaa_get_contact(output_string, sizeof(output_string)-1,
2003
diagnosis, sizeof(diagnosis)-1);
2004
else if (test_case == ST_DRM_SYSTEM)
2005
drmaa_errno = drmaa_get_DRM_system(output_string, sizeof(output_string)-1,
2006
diagnosis, sizeof(diagnosis)-1);
2007
else if (test_case == ST_DRMAA_IMPL)
2008
drmaa_errno = drmaa_get_DRMAA_implementation(output_string, sizeof(output_string)-1,
2009
diagnosis, sizeof(diagnosis)-1);
2011
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2012
fprintf(stderr, "drmaa_get_contact()/drmaa_get_DRM_system() failed: %s\n", diagnosis);
2016
if (test_case == ST_CONTACT)
2017
printf("drmaa_get_contact() returned \"%s\" before init\n", output_string);
2018
else if (test_case == ST_DRM_SYSTEM)
2019
printf("drmaa_get_DRM_system() returned \"%s\" before init\n", output_string);
2020
else if (test_case == ST_DRMAA_IMPL)
2021
printf("drmaa_get_DRMAA_implementation() returned \"%s\" before init\n", output_string);
2023
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2024
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2028
if (test_case == ST_CONTACT)
2029
drmaa_errno = drmaa_get_contact(output_string, sizeof(output_string)-1,
2030
diagnosis, sizeof(diagnosis)-1);
2031
else if (test_case == ST_DRM_SYSTEM)
2032
drmaa_errno = drmaa_get_DRM_system(output_string, sizeof(output_string)-1,
2033
diagnosis, sizeof(diagnosis)-1);
2034
else if (test_case == ST_DRMAA_IMPL)
2035
drmaa_errno = drmaa_get_DRMAA_implementation(output_string, sizeof(output_string)-1,
2036
diagnosis, sizeof(diagnosis)-1);
2038
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2039
fprintf(stderr, "drmaa_get_contact()/drmaa_get_DRM_system() failed: %s\n", diagnosis);
2043
if (test_case == ST_CONTACT)
2044
printf("drmaa_get_contact() returned \"%s\" after init\n", output_string);
2045
else if (test_case == ST_DRM_SYSTEM)
2046
printf("drmaa_get_DRM_system() returned \"%s\" after init\n", output_string);
2047
else if (test_case == ST_DRMAA_IMPL)
2048
printf("drmaa_get_DRMAA_implementation() returned \"%s\" after init\n", output_string);
2050
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2051
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2057
case ST_INPUT_BECOMES_OUTPUT:
2059
const char *mirror_job = "/bin/cat",
2061
*output_path = NULL;
2064
const char *mirror_text = "thefoxjumps...";
2065
char* local_host_name = NULL;
2068
input_path = NEXT_ARGV(argc, argv);
2069
output_path = NEXT_ARGV(argc, argv);
2072
if (!(fp = fopen(input_path, "w"))) {
2073
fprintf(stderr, "fopen(w) failed: %s\n", strerror(errno));
2076
fprintf(fp, "%s\n", mirror_text);
2079
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2080
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2083
report_session_key();
2085
cl_com_gethostname(&local_host_name, NULL, NULL, NULL);
2086
if (local_host_name == NULL) {
2087
fprintf(stderr, "can't get local hostname\n");
2090
if (submit_input_mirror(1, mirror_job, input_path, output_path,
2091
NULL, 1, local_host_name)!=DRMAA_ERRNO_SUCCESS) {
2094
FREE(local_host_name);
2096
if (wait_n_jobs(1) != DRMAA_ERRNO_SUCCESS) {
2100
if (!(fp=fopen(output_path, "r"))) {
2101
fprintf(stderr, "fopen(%s) failed: %s\n", output_path, strerror(errno));
2104
fscanf(fp, "%s", buffer);
2105
if (strcmp(buffer, mirror_text)) {
2106
fprintf(stderr, "wrong output file: %s\n", buffer);
2110
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2111
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2117
case ST_SUBMIT_IN_HOLD_RELEASE:
2118
case ST_SUBMIT_IN_HOLD_DELETE:
2120
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
2125
sleeper_job = NEXT_ARGV(argc, argv);
2127
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2128
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2131
report_session_key();
2132
if (!(jt = create_sleeper_job_template(5, 0, 1))) {
2133
fprintf(stderr, "create_sleeper_job_template() failed\n");
2136
if (drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2137
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
2140
drmaa_delete_job_template(jt, NULL, 0);
2141
printf("submitted job in hold state \"%s\"\n", jobid);
2142
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2143
fprintf(stderr, "drmaa_job_ps(\"%s\")) failed: %s\n", jobid, diagnosis);
2146
if (job_state != DRMAA_PS_USER_ON_HOLD && job_state != DRMAA_PS_USER_SYSTEM_ON_HOLD) {
2147
fprintf(stderr, "job \"%s\" is not in user hold state: %s\n",
2148
jobid, drmaa_state2str(job_state));
2151
printf("verified user hold state for job \"%s\"\n", jobid);
2153
if (test_case == ST_SUBMIT_IN_HOLD_RELEASE) {
2154
if (drmaa_control(jobid, DRMAA_CONTROL_RELEASE, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2155
fprintf(stderr, "drmaa_control(%s, DRMAA_CONTROL_RELEASE) failed: %s\n",
2159
printf("released user hold state for job \"%s\"\n", jobid);
2161
if (drmaa_control(jobid, DRMAA_CONTROL_TERMINATE, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2162
fprintf(stderr, "drmaa_control(%s, DRMAA_CONTROL_TERMINATE) failed: %s\n",
2166
printf("terminated job in hold state \"%s\"\n", jobid);
2169
/* synchronize with job to finish but do not dispose job finish information */
2170
if ((drmaa_errno = drmaa_synchronize(session_all, DRMAA_TIMEOUT_WAIT_FOREVER, 0,
2171
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2172
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
2175
printf("synchronized with job finish\n");
2177
/* report job finish state */
2178
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2179
fprintf(stderr, "drmaa_job_ps(\"%s\")) failed: %s\n", jobid, diagnosis);
2182
printf("state of job \"%s\" is now %s\n", jobid, drmaa_state2str(job_state));
2183
if ((test_case == ST_SUBMIT_IN_HOLD_RELEASE && job_state != DRMAA_PS_DONE) ||
2184
(test_case != ST_SUBMIT_IN_HOLD_RELEASE && job_state != DRMAA_PS_FAILED)) {
2185
fprintf(stderr, "job \"%s\" terminated with unexpected state \"%s\"\n", jobid, drmaa_state2str(job_state));
2189
if (wait_n_jobs(1) != DRMAA_ERRNO_SUCCESS) {
2192
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2193
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2199
case ST_BULK_SUBMIT_IN_HOLD_SESSION_RELEASE:
2200
case ST_BULK_SUBMIT_IN_HOLD_SINGLE_RELEASE:
2201
case ST_BULK_SUBMIT_IN_HOLD_SESSION_DELETE:
2202
case ST_BULK_SUBMIT_IN_HOLD_SINGLE_DELETE:
2204
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
2205
int size_all_jobids = JOB_CHUNK + 1;
2206
const char *all_jobids[JOB_CHUNK + 1];
2207
int job_state, pos = 0;
2210
init_jobids(all_jobids, size_all_jobids);
2213
sleeper_job = NEXT_ARGV(argc, argv);
2215
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2216
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2219
report_session_key();
2221
if (!(jt = create_sleeper_job_template(5, 1, 1))) {
2222
fprintf(stderr, "create_sleeper_job_template() failed\n");
2227
* Submit a bulk job in hold and verify state using drmaa_job_ps()
2230
drmaa_job_ids_t *jobids;
2232
if ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2233
printf("failed submitting bulk job (%s): %s\n", drmaa_strerror(drmaa_errno), diagnosis);
2236
printf("submitted bulk job with jobids:\n");
2237
for (j=0; j<JOB_CHUNK; j++) {
2239
drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
2240
printf("\t \"%s\"\n", jobid);
2242
/* copy jobid into jobid array */
2243
all_jobids[pos++] = strdup(jobid);
2245
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2246
fprintf(stderr, "drmaa_job_ps(\"%s\")) failed: %s\n", jobid, diagnosis);
2247
free_jobids(all_jobids, size_all_jobids);
2250
if (job_state != DRMAA_PS_USER_ON_HOLD && job_state != DRMAA_PS_USER_SYSTEM_ON_HOLD) {
2251
fprintf(stderr, "job \"%s\" is not in user hold state: %s\n",
2252
jobid, drmaa_state2str(job_state));
2253
free_jobids(all_jobids, size_all_jobids);
2257
drmaa_release_job_ids(jobids);
2259
drmaa_delete_job_template(jt, NULL, 0);
2261
printf("verified user hold state for bulk job\n");
2264
* Release or terminate all jobs using drmaa_control() depending on the test case
2265
* drmaa_control() is applied muliple times on all tasks or on the whole session
2267
if (test_case == ST_BULK_SUBMIT_IN_HOLD_SINGLE_RELEASE ||
2268
test_case == ST_BULK_SUBMIT_IN_HOLD_SINGLE_DELETE) {
2270
if (test_case == ST_BULK_SUBMIT_IN_HOLD_SINGLE_RELEASE)
2271
ctrl_op = DRMAA_CONTROL_RELEASE;
2273
ctrl_op = DRMAA_CONTROL_TERMINATE;
2275
for (pos=0; pos<JOB_CHUNK; pos++) {
2276
if (drmaa_control(all_jobids[pos], ctrl_op, diagnosis,
2277
sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2278
fprintf(stderr, "drmaa_control(%s, %s) failed: %s\n",
2279
all_jobids[pos], drmaa_ctrl2str(ctrl_op), diagnosis);
2280
free_jobids(all_jobids, size_all_jobids);
2285
if (test_case == ST_BULK_SUBMIT_IN_HOLD_SESSION_RELEASE)
2286
ctrl_op = DRMAA_CONTROL_RELEASE;
2288
ctrl_op = DRMAA_CONTROL_TERMINATE;
2290
if (drmaa_control(DRMAA_JOB_IDS_SESSION_ALL, ctrl_op, diagnosis,
2291
sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2292
fprintf(stderr, "drmaa_control(%s, %s) failed: %s\n",
2293
DRMAA_JOB_IDS_SESSION_ALL, drmaa_ctrl2str(ctrl_op), diagnosis);
2294
free_jobids(all_jobids, size_all_jobids);
2298
printf("released/terminated all jobs\n");
2300
/* synchronize with job to finish but do not dispose job finish information */
2301
if ((drmaa_errno = drmaa_synchronize(session_all, DRMAA_TIMEOUT_WAIT_FOREVER, 0,
2302
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2303
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
2304
free_jobids(all_jobids, size_all_jobids);
2307
printf("synchronized with job finish\n");
2310
* Verify job state of all jobs in the job id array
2312
for (pos=0; pos<JOB_CHUNK; pos++) {
2313
if (drmaa_job_ps(all_jobids[pos], &job_state, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2314
fprintf(stderr, "drmaa_job_ps(\"%s\")) failed: %s\n", all_jobids[pos], diagnosis);
2315
free_jobids(all_jobids, size_all_jobids);
2319
printf("state of job \"%s\" is now %s\n", all_jobids[pos], drmaa_state2str(job_state));
2320
if (((test_case == ST_BULK_SUBMIT_IN_HOLD_SINGLE_RELEASE ||
2321
test_case == ST_BULK_SUBMIT_IN_HOLD_SESSION_RELEASE) && job_state != DRMAA_PS_DONE) ||
2322
((test_case == ST_BULK_SUBMIT_IN_HOLD_SINGLE_DELETE ||
2323
test_case == ST_BULK_SUBMIT_IN_HOLD_SESSION_DELETE) && job_state != DRMAA_PS_FAILED)) {
2324
fprintf(stderr, "job \"%s\" terminated with unexpected state \"%s\"\n", all_jobids[pos], drmaa_state2str(job_state));
2325
free_jobids(all_jobids, size_all_jobids);
2330
free_jobids(all_jobids, size_all_jobids);
2332
if (wait_n_jobs(JOB_CHUNK) != DRMAA_ERRNO_SUCCESS) {
2335
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2336
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2342
case ST_SUBMIT_SUSPEND_RESUME_WAIT:
2344
int job_state, stat, exited, exit_status;
2348
sleeper_job = NEXT_ARGV(argc, argv);
2350
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2351
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2355
/* submit a job running long enough allowing it to be suspended and resumed */
2356
if (!(jt = create_sleeper_job_template(30, 0, 0))) {
2357
fprintf(stderr, "create_sleeper_job_template() failed\n");
2360
if (drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2361
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
2364
printf("submitted job \"%s\"\n", jobid);
2365
drmaa_delete_job_template(jt, NULL, 0);
2367
/* wait until job is running */
2369
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2370
fprintf(stderr, "drmaa_job_ps() failed: %s\n", diagnosis);
2373
if (job_state != DRMAA_PS_RUNNING)
2375
} while (job_state != DRMAA_PS_RUNNING);
2376
printf("job \"%s\" is now running\n", jobid);
2378
/* drmaa_control() is used to suspend the job */
2379
if ((drmaa_errno=drmaa_control(jobid, DRMAA_CONTROL_SUSPEND, diagnosis,
2380
sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2381
fprintf(stderr, "drmaa_control(\"%s\", DRMAA_CONTROL_SUSPEND) failed: %s (%s)\n",
2382
jobid, diagnosis, drmaa_strerror(drmaa_errno));
2385
printf("suspended job \"%s\"\n", jobid);
2387
/* drmaa_job_ps() is used to verify job was suspended */
2388
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2389
fprintf(stderr, "drmaa_job_ps() failed: %s\n", diagnosis);
2392
if (job_state != DRMAA_PS_USER_SUSPENDED) {
2393
fprintf(stderr, "drmaa_job_ps(\"%s\") failed returns unexpected job state after "
2394
"job suspension: %s\n", jobid, drmaa_state2str(job_state));
2397
printf("verified suspend was done for job \"%s\"\n", jobid);
2399
/* drmaa_control() is used to resume the job */
2400
if ((drmaa_errno=drmaa_control(jobid, DRMAA_CONTROL_RESUME, diagnosis,
2401
sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2402
fprintf(stderr, "drmaa_control(\"%s\", DRMAA_CONTROL_RESUME) failed: %s (%s)\n",
2403
jobid, diagnosis, drmaa_strerror(drmaa_errno));
2406
printf("resumed job \"%s\"\n", jobid);
2408
/* drmaa_job_ps() is used to verify job was resumed */
2409
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2410
fprintf(stderr, "drmaa_job_ps() failed: %s\n", diagnosis);
2413
if (job_state != DRMAA_PS_RUNNING) {
2414
fprintf(stderr, "drmaa_job_ps(\"%s\") failed returns unexpected job state after "
2415
"job resume: %s\n", jobid, drmaa_state2str(job_state));
2418
printf("verified resume was done for job \"%s\"\n", jobid);
2420
/* drmaa_wait() is used to wait for the jobs regular end */
2421
if ((drmaa_errno=drmaa_wait(jobid, NULL, 0, &stat,
2422
DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2423
fprintf(stderr, "drmaa_wait(\"%s\") failed: %s\n", jobid, diagnosis);
2427
drmaa_wifexited(&exited, stat, NULL, 0);
2428
if (!exited || (drmaa_wexitstatus(&exit_status, stat, NULL, 0), exit_status != 0)) {
2429
report_wrong_job_finish("expected regular job end", jobid, stat);
2432
printf("job \"%s\" finished as expected\n", jobid);
2434
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2435
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2441
case ST_EMPTY_SESSION_WAIT:
2442
case ST_EMPTY_SESSION_SYNCHRONIZE_DISPOSE:
2443
case ST_EMPTY_SESSION_SYNCHRONIZE_NODISPOSE:
2445
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2446
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2450
switch (test_case) {
2451
case ST_EMPTY_SESSION_WAIT:
2452
/* drmaa_wait() must return DRMAA_ERRNO_INVALID_JOB */
2453
if ((drmaa_errno=drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY, NULL, 0, NULL,
2454
DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_INVALID_JOB) {
2455
fprintf(stderr, "drmaa_wait(empty session) failed: %s\n", diagnosis);
2459
case ST_EMPTY_SESSION_SYNCHRONIZE_DISPOSE:
2460
case ST_EMPTY_SESSION_SYNCHRONIZE_NODISPOSE:
2462
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
2463
/* drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL) must return DRMAA_ERRNO_SUCCESS */
2464
if ((drmaa_errno=drmaa_synchronize(session_all, DRMAA_TIMEOUT_WAIT_FOREVER,
2465
(test_case == ST_EMPTY_SESSION_SYNCHRONIZE_DISPOSE) ? 1 : 0,
2466
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2467
fprintf(stderr, "drmaa_synchronize(empty session) failed: %s\n", diagnosis);
2474
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2475
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2481
case ST_EMPTY_SESSION_CONTROL:
2485
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2486
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2490
/* parse control operation */
2492
s = NEXT_ARGV(argc, argv);
2493
if ((ctrl_op = str2drmaa_ctrl(s)) == -1) {
2494
fprintf(stderr, "unknown DRMAA control operation \"%s\"\n", s);
2499
if ((drmaa_errno=drmaa_control(DRMAA_JOB_IDS_SESSION_ALL, ctrl_op,
2500
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
2501
fprintf(stderr, "drmaa_control(empty_session, %s) failed: %s (%s)\n",
2502
drmaa_ctrl2str(ctrl_op), diagnosis, drmaa_strerror(drmaa_errno));
2506
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2507
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2513
case ST_DRMAA_JOB_PS:
2515
char diagnosis[1024];
2518
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2519
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2522
report_session_key();
2524
while ( *argc > 1) {
2527
/* for this test args must always be parsed */
2528
jobid = NEXT_ARGV(argc, argv);
2529
if (drmaa_job_ps(jobid, &state, diagnosis, sizeof(diagnosis)-1)
2530
!=DRMAA_ERRNO_SUCCESS) {
2531
fprintf(stderr, "drmaa_job_ps(\"%s\") failed: %s\n", jobid, diagnosis);
2534
printf("%20s %s\n", jobid, drmaa_state2str(state));
2537
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2538
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2544
case ST_DRMAA_CONTROL:
2546
char diagnosis[1024];
2547
const char *s, *jobid;
2548
int drmaa_target_errno, drmaa_errno, drmaa_control_op = -1;
2550
/* parse control operation */
2551
s = NEXT_ARGV(argc, argv);
2552
if ((drmaa_control_op = str2drmaa_ctrl(s)) == -1) {
2553
fprintf(stderr, "unknown DRMAA control operation \"%s\"\n", s);
2556
/* parse aspired errno value */
2557
s = NEXT_ARGV(argc, argv);
2558
if ((drmaa_target_errno = str2drmaa_errno(s)) == -1) {
2559
fprintf(stderr, "unknown DRMAA errno constant \"%s\"\n", s);
2563
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2564
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2567
report_session_key();
2569
while ( *argc > 1) {
2570
jobid = NEXT_ARGV(argc, argv);
2571
if ((drmaa_errno=drmaa_control(jobid, drmaa_control_op, diagnosis, sizeof(diagnosis)-1))
2572
!= drmaa_target_errno) {
2574
if (drmaa_target_errno == DRMAA_ERRNO_SUCCESS) {
2575
fprintf(stderr, "drmaa_control(\"%s\", %s) failed: %s (%s)\n", jobid,
2576
drmaa_ctrl2str(drmaa_control_op), diagnosis, drmaa_strerror(drmaa_errno));
2579
fprintf(stderr, "drmaa_control(\"%s\", %s) returned with wrong errno "
2580
"(%s) instead of %s: %s\n", jobid, drmaa_ctrl2str(drmaa_control_op),
2581
drmaa_errno2str(drmaa_errno), drmaa_errno2str(drmaa_target_errno),
2582
drmaa_errno==DRMAA_ERRNO_SUCCESS?"<no error>":diagnosis);
2586
printf("%20s %s\n", jobid, drmaa_ctrl2str(drmaa_control_op));
2589
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2590
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2596
case ST_EXIT_STATUS:
2597
/* - drmaa_init() is called
2598
- 255 job are submitted
2599
- job i returns i as exit status (8 bit)
2600
- drmaa_wait() verifies each job returned the
2602
- then drmaa_exit() is called */
2604
char diagnosis[1024];
2605
int size_all_jobids = 256;
2606
const char *all_jobids[256];
2607
const char *job_argv[2];
2611
init_jobids(all_jobids, size_all_jobids);
2614
exit_job = NEXT_ARGV(argc, argv);
2617
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2618
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2621
report_session_key();
2624
* submit sequential jobs
2626
if (!(jt = create_exit_job_template(exit_job, 0))) {
2627
fprintf(stderr, "create_exit_job_template() failed\n");
2630
for (i=0; i<255; i++) {
2632
/* parametrize exit job with job argument */
2633
sprintf(buffer, "%d", i);
2634
job_argv[0] = buffer;
2636
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
2638
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
2639
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
2640
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
2643
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2644
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
2645
free_jobids(all_jobids, size_all_jobids);
2648
printf("\t \"%s\"\n", jobid);
2649
all_jobids[i] = strdup(jobid);
2652
/* set string array end mark */
2653
all_jobids[i] = NULL;
2655
drmaa_delete_job_template(jt, NULL, 0);
2658
* wait for all jobs and verify exit status
2661
for (i=0; i<255; i++) {
2663
int exit_status = 0;
2667
drmaa_errno = drmaa_wait(all_jobids[i], jobid, sizeof(jobid)-1,
2668
&stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
2669
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2670
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", all_jobids[i], diagnosis);
2673
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
2675
printf("job %d with job id %s finished\n", i, all_jobids[i]);
2676
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2677
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", all_jobids[i], diagnosis);
2678
free_jobids(all_jobids, size_all_jobids);
2682
drmaa_wifexited(&exited, stat, NULL, 0);
2684
fprintf(stderr, "job \"%s\" did not exit cleanly\n", all_jobids[i]);
2685
free_jobids(all_jobids, size_all_jobids);
2688
drmaa_wexitstatus(&exit_status, stat, NULL, 0);
2689
if (exit_status != i) {
2690
fprintf(stderr, "job \"%s\" returned wrong exit status %d instead of %d\n",
2691
all_jobids[i], exit_status, i);
2692
free_jobids(all_jobids, size_all_jobids);
2698
free_jobids(all_jobids, size_all_jobids);
2700
printf("waited all jobs\n");
2701
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2702
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
2708
case ST_ATTRIBUTE_CHECK:
2709
/* Need to test all DRMAA attributes:
2710
DRMAA_REMOTE_COMMAND
2713
- wait for job and check status code
2716
- submit job in hold state with name
2717
- use drmaa_ps_job to verify state
2718
- use custom routine to verfiy name
2723
- reuse ST_INPUT_BECOMES_OUTPUT with files in /tmp
2725
- submit tar job with error path set
2726
- wait for job to finish
2727
- check contents of error file
2729
- submit tar job to create a sample tar file
2730
- wait for job to finish
2731
- submit tar job with output path and join files set
2732
- wait for job to finish
2733
- check contents of output file
2735
- submit job with job category containing -h and -N
2736
- use drmaa_ps_job to verify state
2737
- use sge_gdi to verify name
2739
- submit job with job category containing -h and -N and DRMAA_JOB_NAME
2740
- use sge_gdi to verify name
2741
- use drmaa_ps_job to verify state
2743
- wait for job to complete
2744
DRMAA_NATIVE_SPECIFICATION
2745
- submit job with -h and -N
2746
- use drmaa_ps_job to verify state
2747
- use sge_gdi to verify name
2749
- submit job with -h and -N and DRMAA_JOB_NAME
2750
- use sge_gdi to verify name
2751
- use drmaa_ps_job to verify state
2753
- wait for job to complete
2756
- submit job with start time +1 min
2757
- wait for job to complete
2758
- compare the current time to stored time
2760
- submit echo job with output path set
2761
- wait for job to finish
2766
int status, job_state, exit_status;
2767
int failed_test = 0, test_failed = 0;
2768
char diagnosis[1024];
2769
const char *job_argv[4];
2770
char jobid[1024], new_jobid[1024];
2772
lList *alp, *job_lp;
2775
const char *mirror_text = "thefoxjumps...";
2776
mirror_job = "/bin/cat";
2777
input_path = "test.in";
2778
output_path = "test.out";
2779
error_path = "test.err";
2782
exit_job = NEXT_ARGV(argc, argv);
2783
email_addr = NEXT_ARGV(argc, argv);
2786
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
2787
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
2790
report_session_key();
2793
* test remote command and argv
2796
printf ("Testing remote command and argv\n");
2797
printf ("Getting job template\n");
2799
drmaa_allocate_job_template(&jt, NULL, 0);
2802
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
2807
printf ("Filling job template\n");
2810
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
2811
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
2813
printf ("Running job\n");
2814
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
2815
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
2816
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
2820
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2821
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
2826
printf ("Waiting for job to complete\n");
2828
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
2829
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
2830
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2831
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
2834
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
2836
printf("Job with job id %s finished\n", jobid);
2838
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2839
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
2844
drmaa_wexitstatus(&exit_status, status, NULL, 0);
2846
if (exit_status != 5) {
2847
fprintf(stderr, "job \"%s\" returned wrong exit status %d instead of 5\n",
2848
jobid, exit_status);
2851
else if (!failed_test) {
2852
printf ("Test succeeded!\n");
2854
} while (do_while_end);
2856
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
2858
if (failed_test) test_failed = 1;
2860
printf("=====================\n");
2863
* testing job submission state and job name
2867
printf ("Testing job submission state and job name\n");
2868
printf ("Getting job template\n");
2869
drmaa_allocate_job_template(&jt, NULL, 0);
2872
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
2877
printf ("Filling job template\n");
2880
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
2881
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
2882
drmaa_set_attribute(jt, DRMAA_JS_STATE, DRMAA_SUBMISSION_STATE_HOLD, NULL, 0);
2883
drmaa_set_attribute(jt, DRMAA_JOB_NAME, "ExitTest", NULL, 0);
2885
printf ("Running job\n");
2886
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
2887
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
2888
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
2892
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2893
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
2898
printf ("Getting job name for job %lu from GDI\n", (unsigned long)atol(jobid));
2900
lCondition* where = lWhere("%T(%I==%u)", JB_Type, JB_job_number, (u_long32)atol(jobid));
2901
lEnumeration *what = lWhat("%T (%I %I)", JB_Type, JB_job_number, JB_job_name);
2902
alp = ctx->gdi(ctx, SGE_JOB_LIST, SGE_GDI_GET, &job_lp, where, what);
2903
job_ep = lFirst(job_lp);
2908
int tmp_ret = answer_list_print_err_warn(&alp, "GDI Critical", "GDI Error: ", "Message from GDI: ");
2911
fprintf (stderr, "problem talking to gdi\n");
2917
if (job_ep == NULL) {
2918
printf ("No such job number.\n");
2922
else if ((job_ep != NULL) && (strcmp (lGetString (job_ep, JB_job_name), "ExitTest") != 0)) {
2923
fprintf(stderr, "Job \"%s\" name was \"%s\" instead of \"ExitTest\"\n",
2924
jobid, lGetString (job_ep, JB_job_name));
2930
printf ("Checking job state\n");
2931
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)
2932
!=DRMAA_ERRNO_SUCCESS) {
2933
fprintf(stderr, "drmaa_job_ps(\"%s\") failed: %s\n", jobid, diagnosis);
2938
if (job_state != DRMAA_PS_USER_ON_HOLD && job_state != DRMAA_PS_USER_SYSTEM_ON_HOLD) {
2939
fprintf (stderr, "Job \"%s\" was not in hold state\n", jobid);
2943
printf ("Releasing job\n");
2944
if (drmaa_control(jobid, DRMAA_CONTROL_RELEASE, diagnosis,
2945
sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
2946
fprintf(stderr, "drmaa_control(%s, %s) failed: %s\n",
2947
jobid, drmaa_ctrl2str(DRMAA_CONTROL_RELEASE), diagnosis);
2952
printf ("Waiting for job to complete\n");
2954
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
2955
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
2956
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2957
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
2960
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
2962
printf("job with job id %s finished\n", jobid);
2964
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
2965
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
2970
printf ("Test succeeded!\n");
2972
} while (do_while_end);
2974
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
2976
if (failed_test) test_failed = 1;
2978
printf("=====================\n");
2981
* testing working directory, input stream and output stream
2987
printf ("Testing working directory, input stream and output stream\n");
2989
printf ("Writing input file\n");
2991
strcpy (abs_path, "/tmp/");
2992
if (!(fp = fopen (strcat (abs_path, input_path), "w"))) {
2993
fprintf(stderr, "fopen(%s, w) failed: %s\n", abs_path, strerror(errno));
2998
fprintf(fp, "%s\n", mirror_text);
3001
printf ("Clearing output file\n");
3002
strcpy (abs_path, "/tmp/");
3003
if ((unlink (strcat (abs_path, output_path)) == -1) && (errno != ENOENT)) {
3004
fprintf(stderr, "unlink(%s) failed: %s\n", abs_path, strerror(errno));
3009
printf ("Getting job template\n");
3010
drmaa_allocate_job_template(&jt, NULL, 0);
3013
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3018
printf ("Filling job template\n");
3019
drmaa_set_attribute(jt, DRMAA_WD, "/tmp", NULL, 0);
3020
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, mirror_job, NULL, 0);
3021
set_path_attribute_plus_colon(jt, DRMAA_INPUT_PATH, input_path, NULL, 0);
3022
set_path_attribute_plus_colon(jt, DRMAA_OUTPUT_PATH, output_path, NULL, 0);
3024
printf ("Running job\n");
3025
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3026
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3027
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3031
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3032
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3037
printf ("Waiting for job to complete\n");
3039
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3040
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3041
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3042
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3045
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3047
printf("Job with job id %s finished\n", jobid);
3049
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3050
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3055
strcpy (abs_path, "/tmp/");
3056
if (!(fp=fopen(strcat (abs_path, output_path), "r"))) {
3057
fprintf(stderr, "fopen(%s) failed: %s\n", abs_path, strerror(errno));
3062
fscanf(fp, "%s", buffer);
3063
if (strcmp(buffer, mirror_text)) {
3064
fprintf(stderr, "Wrong output file: %s\n", buffer);
3067
else if (!failed_test) {
3068
printf ("Test succeeded!\n");
3070
} while (do_while_end);
3072
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
3074
if (failed_test) test_failed = 1;
3076
printf("=====================\n");
3079
* testing error path
3083
printf ("Testing error stream\n");
3085
printf ("Clearing error file\n");
3087
strcpy (abs_path, "/tmp/");
3088
if ((unlink (strcat (abs_path, error_path)) == -1) && (errno != ENOENT)) {
3089
fprintf(stderr, "unlink(%s) failed: %s\n", abs_path, strerror(errno));
3094
printf ("Getting job template\n");
3095
drmaa_allocate_job_template(&jt, NULL, 0);
3098
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3103
printf ("Filling job template\n");
3104
drmaa_set_attribute(jt, DRMAA_WD, "/tmp", NULL, 0);
3105
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, "tar", NULL, 0);
3106
set_path_attribute_plus_colon(jt, DRMAA_ERROR_PATH, error_path, NULL, 0);
3108
printf ("Running job\n");
3109
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3110
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3111
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3115
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3116
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3121
printf ("Waiting for job to complete\n");
3123
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3124
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3125
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3126
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3129
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3131
printf("Job with job id %s finished\n", jobid);
3133
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3134
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3139
strcpy (abs_path, "/tmp/");
3140
if (!(fp=fopen(strcat (abs_path, error_path), "r"))) {
3141
fprintf(stderr, "fopen(%s) failed: %s\n", abs_path, strerror(errno));
3146
fscanf(fp, "%10c", buffer);
3148
if (strcmp("Usage: tar", buffer) != 0) {
3149
fprintf(stderr, "wrong output file: %s\n", buffer);
3152
else if (!failed_test) {
3153
printf ("Test succeeded!\n");
3155
} while (do_while_end);
3157
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
3159
if (failed_test) test_failed = 1;
3161
printf("=====================\n");
3164
* testing join files
3167
/* First submit job to create tar file */
3168
char *tar_path = "test.tar";
3171
printf ("Testing join files\n");
3172
printf ("Running job to prepare data\n");
3173
printf ("Clearing output file\n");
3174
strcpy (abs_path, "/tmp/");
3175
if ((unlink (strcat (abs_path, output_path)) == -1) && (errno != ENOENT)) {
3176
fprintf(stderr, "unlink(%s) failed: %s\n", abs_path, strerror(errno));
3181
printf ("Clearing tar file\n");
3182
strcpy (abs_path, "/tmp/");
3183
if ((unlink (strcat (abs_path, tar_path)) == -1) && (errno != ENOENT)) {
3184
fprintf(stderr, "unlink(%s) failed: %s\n", abs_path, strerror(errno));
3189
printf ("Getting job template\n");
3190
drmaa_allocate_job_template(&jt, NULL, 0);
3193
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3198
printf ("Filling job template\n");
3199
job_argv[0] = "cvf";
3200
job_argv[1] = tar_path;
3201
job_argv[2] = input_path;
3203
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3204
drmaa_set_attribute(jt, DRMAA_WD, "/tmp", NULL, 0);
3205
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, "tar", NULL, 0);
3207
printf ("Running job\n");
3208
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3209
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3210
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3214
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3215
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3220
drmaa_delete_job_template(jt, NULL, 0);
3223
printf ("Waiting for job to complete\n");
3225
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3226
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3227
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3228
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3231
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3233
printf("Job with job id %s finished\n", jobid);
3235
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3236
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3241
/* submit job to read tar file */
3242
printf ("Running job to read data\n");
3243
printf ("Getting job template\n");
3244
drmaa_allocate_job_template(&jt, NULL, 0);
3247
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3252
printf ("Filling job template\n");
3253
job_argv[0] = "tvf";
3254
job_argv[1] = "test.tar";
3256
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3257
drmaa_set_attribute(jt, DRMAA_WD, "/tmp", NULL, 0);
3258
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, "tar", NULL, 0);
3259
drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0);
3260
set_path_attribute_plus_colon(jt, DRMAA_OUTPUT_PATH, output_path, NULL, 0);
3262
printf ("Running job\n");
3263
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3264
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3265
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3269
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3270
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3275
printf ("Waiting for job to complete\n");
3277
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3278
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3279
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3280
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3283
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3285
printf("Job with job id %s finished\n", jobid);
3287
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3288
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3293
strcpy (abs_path, "/tmp/");
3294
if (!(fp=fopen(strcat (abs_path, output_path), "r"))) {
3295
fprintf(stderr, "fopen(%s) failed: %s\n", abs_path, strerror(errno));
3300
fscanf(fp, "%14c%*[^\n]\n", buffer);
3302
if (strcmp("tar: blocksize", buffer) != 0) {
3303
fprintf(stderr, "missing stderr from output file: %s\n", buffer);
3307
fscanf(fp, "%4c\n", buffer);
3309
if (strcmp("-rw-", buffer) != 0) {
3310
fprintf(stderr, "missing stdout from output file: %s\n", buffer);
3313
else if (!failed_test) {
3314
printf ("Test succeeded!\n");
3316
} while (do_while_end);
3318
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
3320
if (failed_test) test_failed = 1;
3322
printf("=====================\n");
3325
* testing job category
3328
printf ("Testing job category\n");
3329
printf ("$SGE_ROOT/$SGE_CELL/common/qtask should contain the following entry:\n");
3330
printf ("test.cat -N ExitTest -h\n");
3332
/* first test that it works */
3333
printf ("Testing job category standalone\n");
3334
printf ("Getting job template\n");
3335
drmaa_allocate_job_template(&jt, NULL, 0);
3338
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3343
printf ("Filling job template\n");
3346
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3347
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
3348
drmaa_set_attribute(jt, DRMAA_JOB_CATEGORY, "test.cat", NULL, 0);
3350
printf ("Running job\n");
3351
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3352
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3353
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3357
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3358
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3363
drmaa_delete_job_template(jt, NULL, 0);
3366
printf ("Getting job name for job %lu from GDI\n", (unsigned long)atol(jobid));
3368
lCondition *where = lWhere("%T(%I==%u)", JB_Type, JB_job_number, (u_long32)atol(jobid));
3369
lEnumeration *what = lWhat("%T (%I %I)", JB_Type, JB_job_number, JB_job_name);
3370
alp = ctx->gdi(ctx, SGE_JOB_LIST, SGE_GDI_GET, &job_lp, where, what);
3371
job_ep = lFirst(job_lp);
3376
int tmp_ret = answer_list_print_err_warn(&alp, "GDI Critical", "GDI Error: ", "Message from GDI: ");
3379
fprintf (stderr, "problem talking to gdi\n");
3386
if (job_ep == NULL) {
3387
printf ("No such job number.\n");
3391
else if ((job_ep != NULL) && (strcmp (lGetString (job_ep, JB_job_name), "ExitTest") != 0)) {
3392
fprintf(stderr, "Job \"%s\" name was \"%s\" instead of \"ExitTest\"\n",
3393
jobid, lGetString (job_ep, JB_job_name));
3399
printf ("Checking job state\n");
3400
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)
3401
!=DRMAA_ERRNO_SUCCESS) {
3402
fprintf(stderr, "drmaa_job_ps(\"%s\") failed: %s\n", jobid, diagnosis);
3407
if (job_state != DRMAA_PS_USER_ON_HOLD && job_state != DRMAA_PS_USER_SYSTEM_ON_HOLD) {
3408
fprintf (stderr, "Job \"%s\" was not in hold state\n", jobid);
3412
printf ("Releasing job\n");
3413
if (drmaa_control(jobid, DRMAA_CONTROL_RELEASE, diagnosis,
3414
sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
3415
fprintf(stderr, "drmaa_control(%s, %s) failed: %s\n",
3416
jobid, drmaa_ctrl2str(DRMAA_CONTROL_RELEASE), diagnosis);
3421
printf ("Waiting for job to complete\n");
3423
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3424
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3425
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3426
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3429
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3431
printf("Job with job id %s finished\n", jobid);
3433
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3434
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3439
/* then test that it doesn't override DRMAA attributes */
3440
printf ("Testing job category v/s DRMAA attributes\n");
3441
printf ("Getting job template\n");
3442
drmaa_allocate_job_template(&jt, NULL, 0);
3445
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3450
printf ("Filling job template\n");
3453
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3454
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
3455
drmaa_set_attribute(jt, DRMAA_JOB_NAME, "DRMAAExitTest", NULL, 0);
3456
drmaa_set_attribute(jt, DRMAA_JOB_CATEGORY, "test.cat", NULL, 0);
3458
printf ("Running job\n");
3459
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3460
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3461
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3465
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3466
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3471
printf ("Getting job name for job %lu from GDI\n", (unsigned long)atol(jobid));
3473
lCondition *where = lWhere("%T(%I==%u)", JB_Type, JB_job_number, (u_long32)atol(jobid));
3474
lEnumeration *what = lWhat("%T (%I %I)", JB_Type, JB_job_number, JB_job_name);
3475
alp = ctx->gdi(ctx, SGE_JOB_LIST, SGE_GDI_GET, &job_lp, where, what);
3476
job_ep = lFirst(job_lp);
3481
int tmp_ret = answer_list_print_err_warn(&alp, "GDI Critical", "GDI Error: ", "Message from GDI: ");
3484
fprintf (stderr, "problem talking to gdi\n");
3490
if (job_ep == NULL) {
3491
printf ("No such job number.\n");
3495
else if ((job_ep != NULL) && (strcmp (lGetString (job_ep, JB_job_name), "DRMAAExitTest") != 0)) {
3496
fprintf(stderr, "Job \"%s\" name was \"%s\" instead of \"DRMAAExitTest\"\n",
3497
jobid, lGetString (job_ep, JB_job_name));
3503
printf ("Checking job state\n");
3504
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)
3505
!=DRMAA_ERRNO_SUCCESS) {
3506
fprintf(stderr, "drmaa_job_ps(\"%s\") failed: %s\n", jobid, diagnosis);
3511
if (job_state != DRMAA_PS_USER_ON_HOLD && job_state != DRMAA_PS_USER_SYSTEM_ON_HOLD) {
3512
fprintf (stderr, "job \"%s\" was not in hold state\n", jobid);
3516
printf ("Releasing job\n");
3517
if (drmaa_control(jobid, DRMAA_CONTROL_RELEASE, diagnosis,
3518
sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
3519
fprintf(stderr, "drmaa_control(%s, %s) failed: %s\n",
3520
jobid, drmaa_ctrl2str(DRMAA_CONTROL_RELEASE), diagnosis);
3525
printf ("Waiting for job to complete\n");
3527
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3528
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3529
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3530
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3533
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3535
printf("Job with job id %s finished\n", jobid);
3537
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3538
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3542
else if (!failed_test) {
3543
printf ("Test succeeded!\n");
3545
} while (do_while_end);
3547
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
3549
if (failed_test) test_failed = 1;
3551
printf("=====================\n");
3554
* testing native specification
3557
printf ("Testing native specification\n");
3558
/* first test that it works */
3559
printf ("Testing native specification standalone\n");
3560
printf ("Getting job template\n");
3561
drmaa_allocate_job_template(&jt, NULL, 0);
3564
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3569
printf ("Filling job template\n");
3572
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3573
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
3574
drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, "-h -N ExitTest", NULL, 0);
3576
printf ("Running job\n");
3577
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3578
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3579
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3583
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3584
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3589
drmaa_delete_job_template(jt, NULL, 0);
3592
printf ("Getting job name for job %lu from GDI\n", (unsigned long)atol(jobid));
3594
lCondition* where = lWhere("%T(%I==%u)", JB_Type, JB_job_number, (u_long32)atol(jobid));
3595
lEnumeration *what = lWhat("%T (%I %I)", JB_Type, JB_job_number, JB_job_name);
3596
alp = ctx->gdi(ctx, SGE_JOB_LIST, SGE_GDI_GET, &job_lp, where, what);
3597
job_ep = lFirst(job_lp);
3602
int tmp_ret = answer_list_print_err_warn(&alp, "GDI Critical", "GDI Error: ", "Message from GDI: ");
3605
fprintf (stderr, "problem talking to gdi\n");
3612
if (job_ep == NULL) {
3613
printf ("No such job number.\n");
3617
else if ((job_ep != NULL) && (strcmp (lGetString (job_ep, JB_job_name), "ExitTest") != 0)) {
3618
fprintf(stderr, "Job \"%s\" name was \"%s\" instead of \"ExitTest\"\n",
3619
jobid, lGetString (job_ep, JB_job_name));
3625
printf ("Checking job state\n");
3626
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)
3627
!=DRMAA_ERRNO_SUCCESS) {
3628
fprintf(stderr, "drmaa_job_ps(\"%s\") failed: %s\n", jobid, diagnosis);
3633
if (job_state != DRMAA_PS_USER_ON_HOLD && job_state != DRMAA_PS_USER_SYSTEM_ON_HOLD) {
3634
fprintf (stderr, "Job \"%s\" was not in hold state\n", jobid);
3638
printf ("Releasing job\n");
3639
if (drmaa_control(jobid, DRMAA_CONTROL_RELEASE, diagnosis,
3640
sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
3641
fprintf(stderr, "drmaa_control(%s, %s) failed: %s\n",
3642
jobid, drmaa_ctrl2str(DRMAA_CONTROL_RELEASE), diagnosis);
3647
printf ("Waiting for job to complete\n");
3649
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3650
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3651
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3652
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3655
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3657
printf("Job with job id %s finished\n", jobid);
3659
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3660
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3665
/* then test that it doesn't override DRMAA attributes */
3666
printf ("Testing native specification v/s DRMAA attributes\n");
3667
printf ("Getting job template\n");
3668
drmaa_allocate_job_template(&jt, NULL, 0);
3671
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3676
printf ("Filling job template\n");
3679
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3680
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
3681
drmaa_set_attribute(jt, DRMAA_JOB_NAME, "DRMAAExitTest", NULL, 0);
3682
drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, "-h -N ExitTest", NULL, 0);
3684
printf ("Running job\n");
3685
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3686
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3687
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3691
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3692
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3697
printf ("Getting job name for job %lu from GDI\n", (unsigned long)atol(jobid));
3699
lCondition *where = lWhere ("%T(%I==%u)", JB_Type, JB_job_number, (u_long32)atol(jobid));
3700
lEnumeration *what = lWhat("%T (%I %I)", JB_Type, JB_job_number, JB_job_name);
3701
alp = ctx->gdi(ctx, SGE_JOB_LIST, SGE_GDI_GET, &job_lp, where, what);
3702
job_ep = lFirst(job_lp);
3707
int tmp_ret = answer_list_print_err_warn(&alp, "GDI Critical", "GDI Error: ", "Message from GDI: ");
3710
fprintf (stderr, "problem talking to gdi\n");
3716
if (job_ep == NULL) {
3717
printf ("No such job number.\n");
3721
else if ((job_ep != NULL) && (strcmp (lGetString (job_ep, JB_job_name), "DRMAAExitTest") != 0)) {
3722
fprintf(stderr, "Job \"%s\" name was \"%s\" instead of \"DRMAAExitTest\"\n",
3723
jobid, lGetString (job_ep, JB_job_name));
3729
printf ("Checking job state\n");
3730
if (drmaa_job_ps(jobid, &job_state, diagnosis, sizeof(diagnosis)-1)
3731
!=DRMAA_ERRNO_SUCCESS) {
3732
fprintf(stderr, "drmaa_job_ps(\"%s\") failed: %s\n", jobid, diagnosis);
3737
if (job_state != DRMAA_PS_USER_ON_HOLD && job_state != DRMAA_PS_USER_SYSTEM_ON_HOLD) {
3738
fprintf (stderr, "job \"%s\" was not in hold state\n", jobid);
3742
printf ("Releasing job\n");
3743
if (drmaa_control(jobid, DRMAA_CONTROL_RELEASE, diagnosis,
3744
sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
3745
fprintf(stderr, "drmaa_control(%s, %s) failed: %s\n",
3746
jobid, drmaa_ctrl2str(DRMAA_CONTROL_RELEASE), diagnosis);
3751
printf ("Waiting for job to complete\n");
3753
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3754
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3755
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3756
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3759
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3761
printf("Job with job id %s finished\n", jobid);
3763
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3764
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3768
else if (!failed_test) {
3769
printf ("Test succeeded!\n");
3771
} while (do_while_end);
3773
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
3775
if (failed_test) test_failed = 1;
3777
printf("=====================\n");
3780
* testing start time
3785
struct tm timelater;
3788
printf ("Testing start time\n");
3789
printf ("Getting job template\n");
3790
drmaa_allocate_job_template(&jt, NULL, 0);
3793
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3798
printf ("Filling job template\n");
3801
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3802
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
3805
localtime_r (&now, &timenow);
3809
/* This fails at midnight. */
3810
if (timenow.tm_min == 0) {
3814
sprintf (timestr, "%.4d/%.2d/%.2d %.2d:%.2d:%.2d", timenow.tm_year + 1900,
3815
timenow.tm_mon + 1, timenow.tm_mday, timenow.tm_hour,
3816
timenow.tm_min, timenow.tm_sec);
3817
printf ("%s\n", timestr);
3819
drmaa_set_attribute(jt, DRMAA_START_TIME, timestr, NULL, 0);
3821
printf ("Running job\n");
3822
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3823
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3824
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3828
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3829
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3834
printf ("Waiting for job to complete\n");
3836
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3837
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3838
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3839
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3842
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3846
printf("Job with job id %s finished\n", jobid);
3848
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3849
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3854
localtime_r (&now, &timenow);
3855
localtime_r (&later, &timelater);
3857
time_diff = (((timelater.tm_hour * 60) + timelater.tm_min) * 60 + timelater.tm_sec) -
3858
(((timenow.tm_hour * 60) + timenow.tm_min) * 60 + timenow.tm_sec);
3860
/* Allow 10 seconds for scheduling and run time. This test will fail
3861
* if run at midnight. */
3862
if (time_diff > 80) {
3863
printf ("Job took %d seconds longer than expected\n", time_diff - 60);
3866
else if (time_diff < 0) {
3867
printf ("Job finished in %d seconds\n",
3868
((timelater.tm_hour * 60) + timelater.tm_min) * 60 + timelater.tm_sec);
3871
else if (!failed_test) {
3872
printf ("Test succeeded!\n");
3874
} while (do_while_end);
3876
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
3878
if (failed_test) test_failed = 1;
3880
printf("=====================\n");
3883
* testing job environment
3887
const char *job_env[2];
3890
printf ("Testing job environment\n");
3892
printf ("Clearing output file\n");
3893
strcpy (abs_path, "/tmp/");
3894
if ((unlink (strcat (abs_path, output_path)) == -1) && (errno != ENOENT)) {
3895
fprintf(stderr, "unlink(%s) failed: %s\n", abs_path, strerror(errno));
3900
printf ("Getting job template\n");
3901
drmaa_allocate_job_template(&jt, NULL, 0);
3904
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3909
printf ("Filling job template\n");
3910
job_argv[0] = "$YOU_ARE_MY_SUNSHINE";
3912
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
3913
job_env[0] = "YOU_ARE_MY_SUNSHINE=MyOnlySunshine";
3915
drmaa_set_vector_attribute(jt, DRMAA_V_ENV, job_env, NULL, 0);
3916
drmaa_set_attribute(jt, DRMAA_WD, "/tmp", NULL, 0);
3917
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, "/usr/bin/echo", NULL, 0);
3918
drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, "-shell y", NULL, 0);
3919
set_path_attribute_plus_colon(jt, DRMAA_OUTPUT_PATH, output_path, NULL, 0);
3921
printf ("Running job\n");
3922
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
3923
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
3924
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
3928
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3929
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
3934
printf ("Waiting for job to complete\n");
3936
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
3937
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
3938
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3939
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
3942
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
3944
printf("Job with job id %s finished\n", jobid);
3946
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
3947
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
3952
strcpy (abs_path, "/tmp/");
3953
if (!(fp=fopen(strcat (abs_path, output_path), "r"))) {
3954
fprintf(stderr, "fopen(%s) failed: %s\n", abs_path, strerror(errno));
3959
fscanf(fp, "%s", buffer);
3960
if (strcmp(buffer, "MyOnlySunshine")) {
3961
fprintf(stderr, "Wrong output file: %s\n", buffer);
3964
else if (!failed_test) {
3965
printf ("Test succeeded!\n");
3967
} while (do_while_end);
3969
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
3971
if (failed_test) test_failed = 1;
3973
printf("=====================\n");
3976
* testing email address
3980
const char *email[3];
3982
printf ("Testing email address\n");
3983
printf ("$SGE_ROOT/$SGE_CELL/common/sge_request should contain the following entry:\n");
3985
printf ("Getting job template\n");
3986
drmaa_allocate_job_template(&jt, NULL, 0);
3989
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
3994
printf ("Filling job template\n");
3995
email[0] = email_addr;
3997
drmaa_set_vector_attribute(jt, DRMAA_V_EMAIL, email, NULL, 0);
4000
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
4001
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
4003
printf ("Running job\n");
4004
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
4005
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
4006
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
4010
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4011
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
4016
printf ("Waiting for job to complete\n");
4018
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
4019
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
4020
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4021
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
4024
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
4026
printf("Job with job id %s finished\n", jobid);
4028
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4029
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
4034
printf ("Check for email to find out if the test succeeded.\n");
4035
} while (do_while_end);
4037
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
4039
if (failed_test) test_failed = 1;
4043
* testing email supression
4045
printf("=====================\n");
4048
const char *email[2];
4050
printf ("Testing email supression\n");
4051
printf ("$SGE_ROOT/$SGE_CELL/common/sge_request should contain the following entry:\n");
4053
printf ("Getting job template\n");
4054
drmaa_allocate_job_template(&jt, NULL, 0);
4057
fprintf(stderr, "drmaa_allocate_job_template() failed\n");
4062
printf ("Filling job template\n");
4063
email[0] = email_addr;
4065
drmaa_set_vector_attribute(jt, DRMAA_V_EMAIL, email, NULL, 0);
4068
drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
4069
drmaa_set_attribute(jt, DRMAA_BLOCK_EMAIL, "1", NULL, 0);
4070
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
4072
printf ("Running job\n");
4073
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
4074
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
4075
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
4079
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4080
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
4085
printf ("Waiting for job to complete\n");
4087
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
4088
&status, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1);
4089
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4090
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
4093
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
4095
printf("Job with job id %s finished\n", jobid);
4097
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4098
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
4103
printf ("Check for email to find out if the test failed.\n");
4104
} while (do_while_end);
4106
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
4108
if (failed_test) test_failed = 1;
4113
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4114
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4120
case ST_USAGE_CHECK:
4122
char jobid[1024], value[128], new_jobid[1024];
4123
drmaa_attr_values_t *rusage = NULL;
4128
exit_job = NEXT_ARGV(argc, argv);
4130
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4131
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4135
report_session_key();
4137
jt = create_exit_job_template(exit_job, 0);
4139
printf ("Running job\n");
4140
while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
4141
sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
4142
fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis);
4146
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4147
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
4151
if (jt != NULL) { drmaa_delete_job_template(jt, NULL, 0); jt = NULL; }
4153
printf ("Waiting for job to complete\n");
4155
drmaa_errno = drmaa_wait(jobid, new_jobid, sizeof(jobid)-1,
4156
&status, DRMAA_TIMEOUT_WAIT_FOREVER, &rusage, diagnosis, sizeof(diagnosis)-1);
4157
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4158
fprintf(stderr, "drmaa_wait(%s) failed - retry: %s\n", jobid, diagnosis);
4161
} while (drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE);
4163
printf("Job with job id %s finished\n", jobid);
4165
if (drmaa_errno == DRMAA_ERRNO_NO_RUSAGE) {
4166
fprintf(stderr, "drmaa_wait(%s) did not return usage information.\n", jobid);
4168
} else if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4169
fprintf(stderr, "drmaa_wait(%s) failed: %s\n", jobid, diagnosis);
4170
drmaa_release_attr_values(rusage);
4172
} else if (rusage == NULL) {
4173
fprintf (stderr, "drmaa_wait(%s) did not return usage information and did not return DRMAA_ERRNO_NO_RUSAGE\n", jobid);
4177
drmaa_errno = drmaa_get_num_attr_values(rusage, &size);
4179
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4180
fprintf(stderr, "drmaa_get_num_attr_values() failed: %s\n", drmaa_strerror(drmaa_errno));
4181
drmaa_release_attr_values(rusage);
4186
while ((drmaa_errno=drmaa_get_next_attr_value(rusage, value, 127))==DRMAA_ERRNO_SUCCESS) {
4188
printf("%s\n", value);
4191
drmaa_release_attr_values(rusage);
4195
fprintf(stderr, "Got incorrect size from drmaa_get_num_attr_values()\n");
4199
if (drmaa_errno != DRMAA_ERRNO_NO_MORE_ELEMENTS) {
4200
fprintf(stderr, "Got incorrect return value from drmaa_get_next_attr_value()\n");
4206
case ST_TRANSFER_FILES_BULK_JOB:
4208
case ST_TRANSFER_FILES_SINGLE_JOB:
4210
int aborted, stat, remote_ps;
4215
char attr_name[DRMAA_ATTR_BUFFER];
4216
drmaa_attr_names_t *vector = NULL;
4217
const char *session_all[] = { DRMAA_JOB_IDS_SESSION_ALL, NULL };
4220
sleeper_job = NEXT_ARGV(argc, argv);
4222
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4223
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4226
report_session_key();
4228
/* submit a working job from a local directory to the execution host */
4229
drmaa_allocate_job_template(&jt, NULL, 0);
4231
drmaa_errno = drmaa_get_attribute_names(&vector, diagnosis, sizeof(diagnosis)-1);
4232
while((drmaa_errno=drmaa_get_next_attr_name(vector, attr_name,
4233
sizeof(attr_name)-1)) == DRMAA_ERRNO_SUCCESS) {
4234
if( strcmp( attr_name, "drmaa_transfer_files" )==0 ) {
4239
/* we don't need vector any longer - free it */
4240
drmaa_release_attr_names(vector);
4244
fprintf( stderr, "DRMAA_TRANSFER_FILES is not supported!\n" );
4247
drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, sleeper_job, NULL, 0);
4249
szTemp = NEXT_ARGV(argc,argv);
4250
drmaa_set_attribute(jt, DRMAA_TRANSFER_FILES, szTemp, NULL, 0);
4252
szTemp = NEXT_ARGV(argc,argv);
4253
drmaa_set_attribute(jt, DRMAA_JOIN_FILES, szTemp, NULL, 0);
4255
if( !strcmp( (szPath=NEXT_ARGV(argc,argv)), "NULL" )) {
4258
drmaa_set_attribute(jt, DRMAA_INPUT_PATH, szPath, NULL, 0);
4260
if( !strcmp( (szPath=NEXT_ARGV(argc,argv)), "NULL" )) {
4263
drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH,szPath, NULL, 0);
4265
if( !strcmp( (szPath=NEXT_ARGV(argc,argv)), "NULL" )) {
4268
drmaa_set_attribute(jt, DRMAA_ERROR_PATH, szPath, NULL, 0);
4271
drmaa_job_ids_t *jobids;
4274
if((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, 3, 1,
4275
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
4276
printf("failed submitting bulk job (%s): %s\n", drmaa_strerror(drmaa_errno), diagnosis);
4280
printf("submitted bulk job with jobids:\n");
4281
for (j=0; j<3; j++) {
4282
drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1);
4283
printf("\t \"%s\"\n", jobid);
4285
drmaa_release_job_ids(jobids);
4288
/* synchronize with job to finish but do not dispose job finish information */
4289
if((drmaa_errno = drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
4290
sizeof(diagnosis)-1)) != DRMAA_ERRNO_SUCCESS) {
4291
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
4296
drmaa_delete_job_template(jt, NULL, 0);
4298
/* synchronize with job to finish but do not dispose job finish information */
4299
if ((drmaa_errno = drmaa_synchronize(session_all, DRMAA_TIMEOUT_WAIT_FOREVER, 0,
4300
diagnosis, sizeof(diagnosis)-1))!=DRMAA_ERRNO_SUCCESS) {
4301
fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis);
4304
printf("synchronized with job finish\n");
4307
drmaa_errno = drmaa_job_ps(jobid, &remote_ps, diagnosis, sizeof(diagnosis)-1);
4308
if (remote_ps == DRMAA_PS_FAILED) {
4309
fprintf(stderr, "job \"%s\" is not in failed state: %s\n",
4310
jobid, drmaa_state2str(remote_ps));
4315
if ((drmaa_errno = drmaa_wait(jobid, NULL, 0, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL,
4316
diagnosis, sizeof(diagnosis)-1)) != DRMAA_ERRNO_SUCCESS) {
4317
printf("drmaa_wait() failed %s: %s\n", drmaa_strerror(drmaa_errno), diagnosis);
4321
/* job finish information */
4322
drmaa_wifaborted(&aborted, stat, diagnosis, sizeof(diagnosis)-1);
4325
"job \"%s\" failed but drmaa_wifaborted() returns false\n", jobid);
4328
printf("waited job \"%s\" that never ran\n", jobid);
4330
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4331
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4337
case ST_RESERVATION_FINISH_ORDER:
4338
case ST_BACKFILL_FINISH_ORDER:
4340
test_job_t job_spec[3];
4343
sleeper_job = NEXT_ARGV(argc, argv);
4344
job_spec[0].native = NEXT_ARGV(argc, argv);
4345
job_spec[0].time = 10;
4346
job_spec[1].native = NEXT_ARGV(argc, argv);
4347
job_spec[1].time = 10;
4348
job_spec[2].native = NEXT_ARGV(argc, argv);
4349
job_spec[2].time = 10;
4352
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4353
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4356
report_session_key();
4358
if (test_dispatch_order_njobs(3, job_spec, test_case==ST_RESERVATION_FINISH_ORDER?"0-1-2":"0,2-1")) {
4362
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4363
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4369
case ST_WILD_PARALLEL:
4371
test_job_t job_spec[7];
4374
sleeper_job = NEXT_ARGV(argc, argv);
4376
job_spec[0].native = NEXT_ARGV(argc, argv);
4377
job_spec[0].time = 20;
4378
job_spec[1].native = job_spec[0].native;
4379
job_spec[1].time = 20;
4380
job_spec[2].native = job_spec[0].native;
4381
job_spec[2].time = 20;
4382
job_spec[3].native = job_spec[0].native;
4383
job_spec[3].time = 20;
4385
job_spec[4].native = NEXT_ARGV(argc, argv);
4386
job_spec[4].time = 20;
4387
job_spec[5].native = job_spec[4].native;
4388
job_spec[5].time = 20;
4390
job_spec[6].native = NEXT_ARGV(argc, argv);
4391
job_spec[6].time = 20;
4394
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4395
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4398
report_session_key();
4400
if (test_dispatch_order_njobs(7, job_spec, "6-4,5-0,1,2,3")) {
4404
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4405
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4411
case ST_UNSUPPORTED_ATTR:
4412
case ST_UNSUPPORTED_VATTR:
4414
drmaa_job_template_t *jt = NULL;
4415
const char *values[2];
4420
if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4421
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4425
/* submit a working job from a local directory to the execution host */
4426
drmaa_allocate_job_template(&jt, NULL, 0);
4428
if (test_case == ST_UNSUPPORTED_ATTR) {
4429
drmaa_errno = drmaa_set_attribute(jt, "blah", "blah", diagnosis, sizeof(diagnosis)-1);
4431
drmaa_errno = drmaa_set_vector_attribute(jt, "blah", (const char**)values, diagnosis, sizeof(diagnosis)-1);
4434
drmaa_delete_job_template(jt, NULL, 0);
4437
if (drmaa_errno != DRMAA_ERRNO_INVALID_ARGUMENT) {
4438
fprintf(stderr, "drmaa_set_attribute()/drmaa_set_vector_attribute() allowed invalid attribute\n");
4442
if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
4443
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4449
case ST_SYNCHRONIZE_NONEXISTANT:
4452
const char *all_jobids[2];
4454
int drmaa_errno = DRMAA_ERRNO_SUCCESS;
4457
sleeper_job = NEXT_ARGV(argc, argv);
4460
drmaa_errno = drmaa_init(NULL, diagnosis, sizeof(diagnosis) - 1);
4462
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4463
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4467
report_session_key();
4469
jt = create_sleeper_job_template(5, 0, 0);
4472
fprintf(stderr, "create_sleeper_job_template() failed\n");
4476
drmaa_errno = drmaa_run_job(jobid, sizeof(jobid) - 1, jt, diagnosis,
4477
sizeof(diagnosis) - 1);
4479
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4480
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
4484
printf("submitted job \"%s\"\n", jobid);
4486
drmaa_delete_job_template(jt, NULL, 0);
4488
/* Convert job id into a number and add 1. */
4489
new_id = strtol(jobid, NULL, 10) + 1;
4490
printf ("Last job id is %s. Using %d.\n", jobid, new_id);
4492
/* Build job id list. */
4493
sprintf(jobid, "%d", new_id);
4494
all_jobids[0] = jobid;
4495
all_jobids[1] = NULL;
4497
/* Synchronize on the new job id. */
4498
drmaa_errno = drmaa_synchronize(all_jobids, DRMAA_TIMEOUT_WAIT_FOREVER,
4500
DRMAA_ERROR_STRING_BUFFER);
4502
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4503
fprintf(stderr, "Synchronize on non-existant job id failed\n");
4507
drmaa_errno = wait_all_jobs(1);
4509
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4513
drmaa_errno = drmaa_exit(diagnosis, sizeof(diagnosis) - 1);
4515
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4516
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4522
case ST_RECOVERABLE_SESSION:
4524
char jobid1[DRMAA_JOBNAME_BUFFER + 1];
4525
char jobid2[DRMAA_JOBNAME_BUFFER + 1];
4526
char jobid3[DRMAA_JOBNAME_BUFFER + 1];
4527
char jobid4[DRMAA_JOBNAME_BUFFER + 1];
4528
char buffer[DRMAA_JOBNAME_BUFFER + 1];
4529
char contact[DRMAA_CONTACT_BUFFER + 1];
4530
int drmaa_errno = DRMAA_ERRNO_SUCCESS;
4533
drmaa_job_ids_t *bulk_job_ids = NULL;
4534
drmaa_attr_values_t *rusage = NULL;
4537
sleeper_job = NEXT_ARGV(argc, argv);
4540
if (drmaa_init("", diagnosis, DRMAA_ERROR_STRING_BUFFER) != DRMAA_ERRNO_SUCCESS) {
4541
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4545
if (drmaa_get_contact(contact, DRMAA_CONTACT_BUFFER, diagnosis,
4546
DRMAA_ERROR_STRING_BUFFER) != DRMAA_ERRNO_SUCCESS) {
4547
fprintf(stderr, "drmaa_get_contact() failed: %s\n", diagnosis);
4551
printf ("Contact string is \"%s\"\n", contact);
4554
jt = create_sleeper_job_template(120, 0, 0);
4557
fprintf(stderr, "create_job_template() failed\n");
4562
drmaa_errno = drmaa_run_job(jobid1, DRMAA_JOBNAME_BUFFER, jt, diagnosis,
4563
DRMAA_ERROR_STRING_BUFFER);
4565
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4566
fprintf(stderr, "drmaa_run_job() failed: %s %s\n", diagnosis,
4567
drmaa_strerror(drmaa_errno));
4572
drmaa_delete_job_template(jt, diagnosis, DRMAA_ERROR_STRING_BUFFER);
4574
/* Run short job. */
4575
jt = create_sleeper_job_template(10, 0, 0);
4578
fprintf(stderr, "create_job_template() failed\n");
4583
drmaa_errno = drmaa_run_job(jobid2, DRMAA_JOBNAME_BUFFER, jt, diagnosis,
4584
DRMAA_ERROR_STRING_BUFFER);
4586
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4587
fprintf(stderr, "drmaa_run_job() failed: %s %s\n", diagnosis,
4588
drmaa_strerror(drmaa_errno));
4593
drmaa_delete_job_template(jt, diagnosis, DRMAA_ERROR_STRING_BUFFER);
4596
jt = create_sleeper_job_template(10, 1, 1);
4599
fprintf(stderr, "create_job_template() failed\n");
4604
drmaa_errno = drmaa_run_bulk_jobs(&bulk_job_ids, jt, 1, 2, 1,
4606
DRMAA_ERROR_STRING_BUFFER);
4608
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4609
fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s %s\n", diagnosis,
4610
drmaa_strerror(drmaa_errno));
4615
drmaa_delete_job_template(jt, diagnosis, DRMAA_ERROR_STRING_BUFFER);
4617
/* Release one of the bulk jobs */
4618
drmaa_get_next_job_id(bulk_job_ids, jobid3, DRMAA_JOBNAME_BUFFER);
4619
drmaa_get_next_job_id(bulk_job_ids, jobid4, DRMAA_JOBNAME_BUFFER);
4620
drmaa_release_job_ids(bulk_job_ids);
4622
drmaa_errno = drmaa_control(jobid3, DRMAA_CONTROL_RELEASE, diagnosis,
4623
DRMAA_ERROR_STRING_BUFFER);
4625
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4626
fprintf(stderr, "drmaa_control() failed: %s %s\n", diagnosis,
4627
drmaa_strerror(drmaa_errno));
4632
/* Stop and restart the session. */
4633
if (drmaa_exit(diagnosis, DRMAA_ERROR_STRING_BUFFER) != DRMAA_ERRNO_SUCCESS) {
4634
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4638
/* Sleep long enough for the short jobs to finish. */
4639
printf ("Sleeping for 60 seconds\n");
4641
printf ("Done sleeping\n");
4643
if (drmaa_init(contact, diagnosis, DRMAA_ERROR_STRING_BUFFER) != DRMAA_ERRNO_SUCCESS) {
4644
fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis);
4648
/* Wait for the long job to finish. */
4649
drmaa_errno = drmaa_wait(jobid1, buffer, DRMAA_JOBNAME_BUFFER, &stat,
4650
DRMAA_TIMEOUT_WAIT_FOREVER, &rusage, diagnosis,
4651
DRMAA_ERROR_STRING_BUFFER);
4653
/* we don't use rusage here - free it! */
4654
drmaa_release_attr_values(rusage);
4657
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4658
fprintf(stderr, "drmaa_wait() failed: %s\n", diagnosis);
4663
/* Wait for the short job to finish. */
4664
drmaa_errno = drmaa_wait(jobid2, buffer, DRMAA_JOBNAME_BUFFER, &stat,
4665
DRMAA_TIMEOUT_WAIT_FOREVER, &rusage, diagnosis,
4666
DRMAA_ERROR_STRING_BUFFER);
4668
/* we don't use rusage here - free it! */
4669
drmaa_release_attr_values(rusage);
4672
if ((drmaa_errno != DRMAA_ERRNO_INVALID_JOB) &&
4673
(drmaa_errno != DRMAA_ERRNO_NO_RUSAGE)) {
4674
fprintf(stderr, "drmaa_wait() did not fail as expected: %s\n",
4680
/* Wait for the bulk jobs to finish. */
4681
drmaa_errno = drmaa_wait(jobid3, buffer, DRMAA_JOBNAME_BUFFER, &stat,
4682
DRMAA_TIMEOUT_WAIT_FOREVER, &rusage, diagnosis,
4683
DRMAA_ERROR_STRING_BUFFER);
4685
/* we don't use rusage here - free it! */
4686
drmaa_release_attr_values(rusage);
4689
/* This one must be found, because another task in this job is still
4690
* held. The only option is to complain about no rusage info. */
4691
if (drmaa_errno != DRMAA_ERRNO_NO_RUSAGE) {
4692
fprintf(stderr, "drmaa_wait() did not fail as expected: %s\n",
4698
drmaa_errno = drmaa_wait(jobid4, buffer, DRMAA_JOBNAME_BUFFER, &stat,
4699
DRMAA_TIMEOUT_NO_WAIT, &rusage, diagnosis,
4700
DRMAA_ERROR_STRING_BUFFER);
4702
/* we don't use rusage here - free it! */
4703
drmaa_release_attr_values(rusage);
4706
if (drmaa_errno != DRMAA_ERRNO_EXIT_TIMEOUT) {
4707
fprintf(stderr, "drmaa_wait() did not fail as expected: %s\n",
4713
drmaa_errno = drmaa_control(jobid4, DRMAA_CONTROL_TERMINATE, diagnosis,
4714
DRMAA_ERROR_STRING_BUFFER);
4716
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4717
fprintf(stderr, "drmaa_control() failed: %s %s\n", diagnosis,
4718
drmaa_strerror(drmaa_errno));
4724
if (drmaa_exit(diagnosis, DRMAA_ERROR_STRING_BUFFER) != DRMAA_ERRNO_SUCCESS) {
4725
fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis);
4729
if (exit_code != 0) {
4741
fprintf(stderr, MSG_FILE_ERRORCLOSEINGXY_SS, input_path, strerror(errno));
4746
static void *submit_and_wait_thread (void *vp) {
4756
submit_and_wait (n);
4758
return (void *)NULL;
4761
static int submit_and_wait(int n)
4763
int ret = DRMAA_ERRNO_SUCCESS;
4765
ret = submit_sleeper(n);
4767
if (ret == DRMAA_ERRNO_SUCCESS) {
4768
ret = wait_all_jobs(n);
4774
static void *submit_sleeper_thread (void *vp) {
4786
return (void *)NULL;
4789
static drmaa_job_template_t *create_exit_job_template(const char *exit_job, int as_bulk_job)
4791
const char *job_argv[2];
4792
drmaa_job_template_t *jt = NULL;
4793
int ret = DRMAA_ERRNO_SUCCESS;
4795
ret = drmaa_allocate_job_template(&jt, NULL, 0);
4797
if (ret == DRMAA_ERRNO_SUCCESS) {
4798
ret = drmaa_set_attribute(jt, DRMAA_WD, DRMAA_PLACEHOLDER_HD, NULL, 0);
4801
if (ret == DRMAA_ERRNO_SUCCESS) {
4802
ret = drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, exit_job, NULL, 0);
4805
if (ret == DRMAA_ERRNO_SUCCESS) {
4808
ret = drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
4811
if (ret == DRMAA_ERRNO_SUCCESS) {
4812
ret = drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0);
4817
ret = drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB.$JOB_ID", NULL, 0);
4820
ret = drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB.$JOB_ID."DRMAA_PLACEHOLDER_INCR, NULL, 0);
4823
if (ret == DRMAA_ERRNO_SUCCESS) {
4824
/* no output please */
4825
ret = drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":/dev/null", NULL, 0);
4829
if (ret == DRMAA_ERRNO_SUCCESS) {
4837
static drmaa_job_template_t *create_sleeper_job_template(int seconds, int as_bulk_job, int in_hold)
4839
const char *job_argv[2];
4840
drmaa_job_template_t *jt = NULL;
4842
int ret = DRMAA_ERRNO_SUCCESS;
4844
ret = drmaa_allocate_job_template(&jt, NULL, 0);
4846
if (ret == DRMAA_ERRNO_SUCCESS) {
4847
ret = drmaa_set_attribute(jt, DRMAA_WD, DRMAA_PLACEHOLDER_HD, NULL, 0);
4850
if (ret == DRMAA_ERRNO_SUCCESS) {
4851
ret = drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, sleeper_job, NULL, 0);
4854
if (ret == DRMAA_ERRNO_SUCCESS) {
4855
sprintf(buffer, "%d", seconds);
4856
job_argv[0] = buffer;
4858
ret = drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0);
4861
if (ret == DRMAA_ERRNO_SUCCESS) {
4862
ret = drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0);
4865
if (ret == DRMAA_ERRNO_SUCCESS) {
4868
ret = drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB.$JOB_ID", NULL, 0);
4871
ret = drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB.$JOB_ID."DRMAA_PLACEHOLDER_INCR, NULL, 0);
4874
/* no output please */
4875
ret = drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":/dev/null", NULL, 0);
4879
if (ret == DRMAA_ERRNO_SUCCESS) {
4881
ret = drmaa_set_attribute(jt, DRMAA_JS_STATE, DRMAA_SUBMISSION_STATE_HOLD, NULL, 0);
4885
if (ret == DRMAA_ERRNO_SUCCESS) {
4893
static int submit_sleeper(int n)
4895
drmaa_job_template_t *jt;
4896
int ret = DRMAA_ERRNO_SUCCESS;
4898
jt = create_sleeper_job_template(10, 0, 0);
4901
ret = do_submit(jt, n);
4903
/* We don't care about the error code from this one. It doesn't affect
4905
drmaa_delete_job_template(jt, NULL, 0);
4912
static int submit_input_mirror(int n, const char *mirror_job,
4913
const char *input_path, const char *output_path,
4914
const char *error_path, int join, char* hostname)
4916
drmaa_job_template_t *jt = NULL;
4918
int ret = DRMAA_ERRNO_SUCCESS;
4920
ret = drmaa_allocate_job_template(&jt, NULL, 0);
4922
if (ret == DRMAA_ERRNO_SUCCESS) {
4923
ret = drmaa_set_attribute(jt, DRMAA_WD, DRMAA_PLACEHOLDER_HD, NULL, 0);
4926
if (ret == DRMAA_ERRNO_SUCCESS) {
4927
ret = drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, mirror_job, NULL, 0);
4931
* we use the local host for the cat job, because when job is running
4932
* on other hosts there my be NFS problems when reading the input_path file
4934
if (ret == DRMAA_ERRNO_SUCCESS && hostname != NULL) {
4935
snprintf(buffer, 10000, "-l h=%s", hostname);
4936
ret = drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, buffer, NULL, 0);
4939
if (ret == DRMAA_ERRNO_SUCCESS) {
4941
ret = drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0);
4944
ret = drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "n", NULL, 0);
4948
if (ret == DRMAA_ERRNO_SUCCESS) {
4950
strcpy(buffer, ":");
4951
strcat(buffer, input_path);
4952
ret = drmaa_set_attribute(jt, DRMAA_INPUT_PATH, buffer, NULL, 0);
4956
if (ret == DRMAA_ERRNO_SUCCESS) {
4958
strcpy(buffer, ":");
4959
strcat(buffer, output_path);
4960
ret = drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, buffer, NULL, 0);
4964
if (ret == DRMAA_ERRNO_SUCCESS) {
4966
strcpy(buffer, ":");
4967
strcat(buffer, error_path);
4968
ret = drmaa_set_attribute(jt, DRMAA_ERROR_PATH, buffer, NULL, 0);
4972
if (ret == DRMAA_ERRNO_SUCCESS) {
4973
ret = do_submit(jt, n);
4975
/* We don't care about the error code here because it doesn't affect
4977
drmaa_delete_job_template(jt, NULL, 0);
4983
static int do_submit(drmaa_job_template_t *jt, int n)
4986
char diagnosis[1024];
4988
int drmaa_errno = DRMAA_ERRNO_SUCCESS;
4989
int error = DRMAA_ERRNO_SUCCESS;
4992
for (i=0; i<n; i++) {
4996
drmaa_errno = drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1);
4998
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
4999
printf("failed submitting job (%s)\n", drmaa_strerror(drmaa_errno));
5002
/* Only retry on "try again" error. */
5003
if (drmaa_errno == DRMAA_ERRNO_TRY_LATER) {
5004
printf("retry: %s\n", diagnosis);
5012
if (drmaa_errno == DRMAA_ERRNO_SUCCESS) {
5013
printf("submitted job \"%s\"\n", jobid);
5015
printf("unable to submit job\n");
5018
if (((test_case == MT_EXIT_DURING_SUBMIT_OR_WAIT) ||
5019
(test_case == MT_EXIT_DURING_SUBMIT)) &&
5020
(drmaa_errno == DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
5021
/* It's supposed to do that. */
5022
drmaa_errno = DRMAA_ERRNO_SUCCESS;
5025
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
5026
/* If there is ever an error, we will return an error. */
5027
error = drmaa_errno;
5034
static int wait_all_jobs(int n)
5037
int drmaa_errno = DRMAA_ERRNO_SUCCESS;
5039
drmaa_attr_values_t *rusage = NULL;
5042
drmaa_errno = drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY, jobid, sizeof(jobid)-1, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, &rusage, NULL, 0);
5043
/* we don't use rusage here - free it! */
5044
drmaa_release_attr_values(rusage);
5047
if (drmaa_errno == DRMAA_ERRNO_SUCCESS) {
5048
printf("waited job \"%s\"\n", jobid);
5051
printf("waited for last job\n");
5055
} else if (drmaa_errno != DRMAA_ERRNO_INVALID_JOB) {
5056
printf("drmaa_wait() returned %s\n", drmaa_strerror(drmaa_errno));
5058
} while (drmaa_errno == DRMAA_ERRNO_SUCCESS);
5060
/* that means we got all */
5061
if (drmaa_errno == DRMAA_ERRNO_INVALID_JOB) {
5062
printf("no more jobs to wait\n");
5063
drmaa_errno = DRMAA_ERRNO_SUCCESS;
5065
else if (((drmaa_errno == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) &&
5066
(test_case == MT_EXIT_DURING_SUBMIT_OR_WAIT)) ||
5067
((drmaa_errno == DRMAA_ERRNO_NO_RUSAGE) &&
5068
(test_case == ST_SUBMIT_NO_RUN_WAIT))) {
5069
/* It's supposed to do that. */
5070
drmaa_errno = DRMAA_ERRNO_SUCCESS;
5076
static int wait_n_jobs(int n)
5080
int drmaa_errno = DRMAA_ERRNO_SUCCESS;
5081
int error = DRMAA_ERRNO_SUCCESS;
5084
for (i=0; i<n; i++) {
5087
drmaa_errno = drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY, jobid,
5088
sizeof(jobid)-1, &stat,
5089
DRMAA_TIMEOUT_WAIT_FOREVER, NULL, NULL, 0);
5091
if (drmaa_errno != DRMAA_ERRNO_SUCCESS) {
5092
printf("failed waiting for job (%s)\n", drmaa_strerror(drmaa_errno));
5095
/* Only retry on "try again" error. */
5096
if (drmaa_errno == DRMAA_ERRNO_TRY_LATER) {
5097
printf("retry...\n");
5105
if (drmaa_errno == DRMAA_ERRNO_SUCCESS) {
5106
printf("waited job \"%s\"\n", jobid);
5108
/* If there is ever an error, we will return an error. */
5109
error = drmaa_errno;
5116
static void report_session_key(void)
5118
if (is_sun_grid_engine) {
5119
const char *session_key = getenv("SGE_SESSION_KEY");
5121
printf("got \"%s\" as session key\n", session_key);
5123
printf("no session key set\n");
5129
static init_signal_handling()
5131
struct sigaction nact;
5133
nact.sa_handler = SIG_IGN;
5135
sigaction(SIGPIPE, &act, NULL);
5140
const struct drmaa_errno_descr_s {
5143
} errno_vector[] = {
5144
{ "DRMAA_ERRNO_SUCCESS", DRMAA_ERRNO_SUCCESS },
5145
{ "DRMAA_ERRNO_INTERNAL_ERROR", DRMAA_ERRNO_INTERNAL_ERROR },
5146
{ "DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE", DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE },
5147
{ "DRMAA_ERRNO_AUTH_FAILURE", DRMAA_ERRNO_AUTH_FAILURE },
5148
{ "DRMAA_ERRNO_INVALID_ARGUMENT", DRMAA_ERRNO_INVALID_ARGUMENT },
5149
{ "DRMAA_ERRNO_NO_ACTIVE_SESSION", DRMAA_ERRNO_NO_ACTIVE_SESSION },
5150
{ "DRMAA_ERRNO_NO_MEMORY", DRMAA_ERRNO_NO_MEMORY },
5151
{ "DRMAA_ERRNO_INVALID_CONTACT_STRING", DRMAA_ERRNO_INVALID_CONTACT_STRING },
5152
{ "DRMAA_ERRNO_DEFAULT_CONTACT_STRING_ERROR", DRMAA_ERRNO_DEFAULT_CONTACT_STRING_ERROR },
5153
{ "DRMAA_ERRNO_DRMS_INIT_FAILED", DRMAA_ERRNO_DRMS_INIT_FAILED },
5154
{ "DRMAA_ERRNO_ALREADY_ACTIVE_SESSION", DRMAA_ERRNO_ALREADY_ACTIVE_SESSION },
5155
{ "DRMAA_ERRNO_DRMS_EXIT_ERROR", DRMAA_ERRNO_DRMS_EXIT_ERROR },
5156
{ "DRMAA_ERRNO_INVALID_ATTRIBUTE_FORMAT", DRMAA_ERRNO_INVALID_ATTRIBUTE_FORMAT },
5157
{ "DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE", DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE },
5158
{ "DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES", DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES },
5159
{ "DRMAA_ERRNO_TRY_LATER", DRMAA_ERRNO_TRY_LATER },
5160
{ "DRMAA_ERRNO_DENIED_BY_DRM", DRMAA_ERRNO_DENIED_BY_DRM },
5161
{ "DRMAA_ERRNO_INVALID_JOB", DRMAA_ERRNO_INVALID_JOB },
5162
{ "DRMAA_ERRNO_RESUME_INCONSISTENT_STATE", DRMAA_ERRNO_RESUME_INCONSISTENT_STATE },
5163
{ "DRMAA_ERRNO_SUSPEND_INCONSISTENT_STATE", DRMAA_ERRNO_SUSPEND_INCONSISTENT_STATE },
5164
{ "DRMAA_ERRNO_HOLD_INCONSISTENT_STATE", DRMAA_ERRNO_HOLD_INCONSISTENT_STATE },
5165
{ "DRMAA_ERRNO_RELEASE_INCONSISTENT_STATE", DRMAA_ERRNO_RELEASE_INCONSISTENT_STATE },
5166
{ "DRMAA_ERRNO_EXIT_TIMEOUT", DRMAA_ERRNO_EXIT_TIMEOUT },
5167
{ "DRMAA_ERRNO_NO_RUSAGE", DRMAA_ERRNO_NO_RUSAGE },
5172
/****** test_drmaa/str2drmaa_errno() ********************************************
5174
* str2drmaa_errno() -- Map string into DRMAA errno constant
5177
* static int str2drmaa_errno(const char *str)
5180
* Map string into DRMAA errno constant.
5183
* const char *str - ???
5186
* static int - DRMAA_ERRNO_* constant or -1 on failre
5187
*******************************************************************************/
5188
static int str2drmaa_errno(const char *str)
5191
for (i=0; errno_vector[i].descr != NULL; i++)
5192
if (!strcmp(errno_vector[i].descr, str))
5193
return errno_vector[i].drmaa_errno;
5197
/****** test_drmaa/drmaa_errno2str() *******************************************
5199
* drmaa_errno2str() -- Map DRMAA errno constant into string
5202
* static const char* drmaa_errno2str(int drmaa_errno)
5205
* Map DRMAA errno constant into string
5208
* int drmaa_errno - Any DRMAA errno
5211
* static const char* - String representation
5212
*******************************************************************************/
5213
static const char *drmaa_errno2str(int drmaa_errno)
5216
for (i=0; errno_vector[i].descr != NULL; i++)
5217
if (errno_vector[i].drmaa_errno == drmaa_errno)
5218
return errno_vector[i].descr;
5219
return "DRMAA_ERRNO_???UNKNOWN???";
5222
const struct ctrl_descr_s {
5226
{ "DRMAA_CONTROL_SUSPEND", DRMAA_CONTROL_SUSPEND },
5227
{ "DRMAA_CONTROL_RESUME", DRMAA_CONTROL_RESUME },
5228
{ "DRMAA_CONTROL_HOLD", DRMAA_CONTROL_HOLD },
5229
{ "DRMAA_CONTROL_RELEASE", DRMAA_CONTROL_RELEASE },
5230
{ "DRMAA_CONTROL_TERMINATE", DRMAA_CONTROL_TERMINATE },
5234
/****** test_drmaa/drmaa_ctrl2str() ********************************************
5236
* drmaa_ctrl2str() -- Map DRMAA control constant into string
5239
* static const char* drmaa_ctrl2str(int ctrl)
5242
* Map DRMAA control constant into string
5245
* int ctrl - Any DRMAA_CONTROL_* value
5248
* static const char* - DRMAA constant name or "unknown" string
5250
*******************************************************************************/
5251
static const char *drmaa_ctrl2str(int ctrl)
5254
for (i=0; ctrl_vector[i].descr != NULL; i++)
5255
if (ctrl_vector[i].ctrl == ctrl)
5256
return ctrl_vector[i].descr;
5257
return "DRMAA_CONTROL_???UNKNOWN???";
5260
/****** test_drmaa/str2drmaa_ctrl() ********************************************
5262
* str2drmaa_ctrl() -- Map string into DRMAA control constant
5265
* static int str2drmaa_ctrl(const char *str)
5268
* Map string into DRMAA control constant.
5271
* const char *str - ???
5274
* static int - DRMAA_CONTROL_* constant or -1 on failure
5275
*******************************************************************************/
5276
static int str2drmaa_ctrl(const char *str)
5279
for (i=0; ctrl_vector[i].descr != NULL; i++) {
5280
if (!strcmp(ctrl_vector[i].descr, str))
5281
return ctrl_vector[i].ctrl;
5286
const struct state_descr_s {
5289
} state_vector[] = {
5290
{ "DRMAA_PS_UNDETERMINED", DRMAA_PS_UNDETERMINED },
5291
{ "DRMAA_PS_QUEUED_ACTIVE", DRMAA_PS_QUEUED_ACTIVE },
5292
{ "DRMAA_PS_SYSTEM_ON_HOLD", DRMAA_PS_SYSTEM_ON_HOLD },
5293
{ "DRMAA_PS_USER_ON_HOLD ", DRMAA_PS_USER_ON_HOLD },
5294
{ "DRMAA_PS_USER_SYSTEM_ON_HOLD", DRMAA_PS_USER_SYSTEM_ON_HOLD },
5295
{ "DRMAA_PS_RUNNING", DRMAA_PS_RUNNING },
5296
{ "DRMAA_PS_SYSTEM_SUSPENDED", DRMAA_PS_SYSTEM_SUSPENDED },
5297
{ "DRMAA_PS_USER_SUSPENDED", DRMAA_PS_USER_SUSPENDED },
5298
{ "DRMAA_PS_USER_SYSTEM_SUSPENDED", DRMAA_PS_USER_SYSTEM_SUSPENDED },
5299
{ "DRMAA_PS_DONE", DRMAA_PS_DONE },
5300
{ "DRMAA_PS_FAILED", DRMAA_PS_FAILED },
5304
/****** test_drmaa/drmaa_state2str() *******************************************
5306
* drmaa_state2str() -- Map DRMAA state constant into string
5309
* static const char* drmaa_state2str(int state)
5312
* Map DRMAA state constant into string
5315
* int state - Any DRMAA_PS_* value.
5318
* static const char* -
5319
*******************************************************************************/
5320
static const char *drmaa_state2str(int state)
5323
for (i=0; state_vector[i].descr != NULL; i++)
5324
if (state_vector[i].state == state)
5325
return state_vector[i].descr;
5326
return "DRMAA_PS_???UNKNOWN???";
5329
/****** test_drmaa/str2drmaa_state() *******************************************
5331
* str2drmaa_state() -- Map string into DRMAA state constant
5334
* int str2drmaa_state(const char *str)
5337
* Map string into DRMAA state constant.
5344
*******************************************************************************/
5345
int str2drmaa_state(const char *str)
5348
for (i=0; state_vector[i].descr != NULL; i++)
5349
if (!strcmp(state_vector[i].descr, str))
5350
return state_vector[i].state;
5355
/****** test_drmaa/report_wrong_job_finish() ***********************************
5357
* report_wrong_job_finish() -- Report how job finished
5360
* static void report_wrong_job_finish(const char *comment, const char
5364
* Report how job finished based on the stat value returned by drmaa_wait().
5365
* The information is printed to stderr.
5368
* const char *comment - provided by the caller
5369
* const char *jobid - provided by the caller
5370
* int stat - stat value as returned by drmaa_wait()
5371
*******************************************************************************/
5372
static void report_wrong_job_finish(const char *comment, const char *jobid, int stat)
5374
int aborted, exited, exit_status, signaled;
5376
drmaa_wifaborted(&aborted, stat, NULL, 0);
5378
fprintf(stderr, "%s: job \"%s\" never ran\n", comment, jobid);
5380
drmaa_wifexited(&exited, stat, NULL, 0);
5382
drmaa_wexitstatus(&exit_status, stat, NULL, 0);
5383
fprintf(stderr, "%s: job \"%s\" finished regularly with exit status %d\n",
5384
comment, jobid, exit_status);
5386
drmaa_wifsignaled(&signaled, stat, NULL, 0);
5388
char termsig[DRMAA_SIGNAL_BUFFER+1];
5389
drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, stat, NULL, 0);
5390
fprintf(stderr, "%s: job \"%s\" finished due to signal %s\n",
5391
comment, jobid, termsig);
5393
fprintf(stderr, "%s: job \"%s\" finished with unclear conditions\n",
5399
static bool extract_array_command(char *command_line, int *start, int *end, int *incr)
5402
char *t_option = NULL;
5403
char *start_value = NULL;
5404
char *end_value = NULL;
5405
char *incr_value = NULL;
5406
char *end_t_option = NULL;
5412
t_option = strstr(command_line, "-t");
5414
if (t_option != NULL) {
5416
start_value = t_option + 3;
5418
*start = atoi(start_value);
5424
end_t_option = strstr(start_value, " ");
5425
end_value = strstr(start_value, "-");
5426
incr_value = strstr(start_value, ":");
5428
if ((end_value != NULL) && (end_value < end_t_option)) {
5429
*end = atoi(end_value+1);
5435
if ((incr_value != NULL) && (incr_value < end_t_option)) {
5436
*incr = atoi(incr_value+1);
5448
if (end_t_option != NULL) {
5449
strcpy(t_option, end_t_option+1);
5459
if (end_t_option != NULL) {
5460
strcpy(t_option, end_t_option);
5469
fprintf(stderr, "could not parse \"%s\" for -t option\n", command_line);
5471
if (end_t_option != NULL) {
5472
strcpy(t_option, end_t_option);
5481
static void free_order(int **order)
5484
while (order[i] != NULL) {
5491
static int test_dispatch_order_njobs(int njobs, test_job_t job[], char *jsr_str)
5493
char diagnosis[DRMAA_ERROR_STRING_BUFFER];
5494
const char *all_jobids[10];
5496
drmaa_job_template_t *jt;
5497
int drmaa_errno, i, pos = 0;
5499
int **order = job_run_sequence_parse(jsr_str);
5502
if (order == NULL) {
5503
fprintf(stderr, "failed parsing job run sequence string\n");
5507
init_jobids(all_jobids, njobs);
5509
/* submit jobs in hold */
5510
for (i = 0; i < njobs; i++) {
5514
bool bulk_job = false;
5516
bulk_job = extract_array_command(job[i].native, &start, &end, &incr);
5518
jt = create_sleeper_job_template(job[i].time, 0, 1);
5519
drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, job[i].native, NULL, 0);
5522
drmaa_job_ids_t *bulkJobId;
5523
if (drmaa_run_bulk_jobs(&bulkJobId, jt, start, end, incr,
5524
diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
5525
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
5530
while (drmaa_get_next_job_id(bulkJobId, jobid, sizeof(jobid)-1) == DRMAA_ERRNO_SUCCESS) {
5531
all_jobids[pos++] = strdup(jobid);
5532
printf("submitted job \"%s\"\n", jobid);
5535
array_job_run_sequence_adapt(order,i,counter);
5536
drmaa_release_job_ids(bulkJobId);
5538
if (drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) {
5539
fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis);
5541
free_jobids(all_jobids, njobs);
5545
printf("submitted job \"%s\"\n", jobid);
5546
all_jobids[pos++] = strdup(jobid);
5548
drmaa_delete_job_template(jt, NULL, 0);
5550
all_jobids[pos] = NULL;
5555
/* release all three jobs in one operation to ensure they get runnable at once for scheduler */
5556
if (drmaa_control(DRMAA_JOB_IDS_SESSION_ALL, DRMAA_CONTROL_RELEASE, diagnosis, sizeof(diagnosis)-1)!=DRMAA_ERRNO_SUCCESS) {
5557
fprintf(stderr, "drmaa_control(DRMAA_JOB_IDS_SESSION_ALL, DRMAA_CONTROL_RELEASE) failed: %s\n", diagnosis);
5559
free_jobids(all_jobids, njobs);
5564
drmaa_errno = drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY, jobid, sizeof(jobid)-1, &stat,
5565
DRMAA_TIMEOUT_WAIT_FOREVER, NULL, NULL, 0);
5566
if (drmaa_errno == DRMAA_ERRNO_SUCCESS) {
5568
printf("waited job \"%s\"\n", jobid);
5570
/* map jobid to job index */
5572
for (i = 0; i < njobs; i++) {
5573
if (all_jobids[i] != NULL && strcmp(jobid, all_jobids[i]) == 0) {
5579
fprintf(stderr, "drmaa_wait() returned unexpected job: %s\n", jobid);
5580
free_jobids(all_jobids, njobs);
5586
if (job_run_sequence_verify(pos, all_jobids, order)) {
5587
free_jobids(all_jobids, njobs);
5592
/* NULL-ify finished ones */
5593
FREE(all_jobids[pos]);
5596
printf("waited for last job\n");
5599
} else if (drmaa_errno != DRMAA_ERRNO_INVALID_JOB) {
5600
printf("drmaa_wait() returned %s\n", drmaa_strerror(drmaa_errno));
5602
} while (drmaa_errno == DRMAA_ERRNO_SUCCESS);
5604
free_jobids(all_jobids, njobs);
5609
static int job_run_sequence_verify(int pos, const char *all_jobids[], int *order[])
5611
int test_index, i, j;
5612
int found_group = 0;
5615
/* search the group this job belongs to */
5616
for (i=0; order[i]; i++) {
5618
for (j=0; group[j] != -1; j++) {
5619
if (group[j] == pos) {
5628
fprintf(stderr, "test broken: could not find job index %d in finish order scheme\n", pos);
5632
/* complain about previous group job that did not finish earlier */
5635
for (j=0; order[i][j] != -1; j++) {
5636
test_index = order[i][j];
5637
if (all_jobids[test_index] != NULL) {
5638
fprintf(stderr, "order broken: job \"%s\" [%d] did not finish before job \"%s\" [%d]\n",
5639
all_jobids[test_index], test_index, all_jobids[pos], pos);
5649
/****** test_drmaa/job_run_sequence_parse() ************************************
5651
* job_run_sequence_parse() -- ???
5654
* static int** job_run_sequence_parse(char *jrs_str)
5657
* Parse job run sequence strings into order data structures.
5660
* char *jrs_str - ???
5666
* For exmples the strings "0-1-2", "0,2-1" and "0,1-2-3" are parsed
5667
* into data structures like the following ones:
5669
* int rr0[] = { 0, -1 };
5670
* int rr1[] = { 1, -1 };
5671
* int rr2[] = { 2, -1 };
5672
* int *rr_order[] = { rr0, rr1, rr2, NULL };
5674
* int bf0[] = { 0, 2, -1 };
5675
* int bf1[] = { 1, -1 };
5676
* int *bf_order[] = { bf0, bf1, NULL };
5678
* int st0[] = { 0, 1, -1 };
5679
* int st1[] = { 2, -1 };
5680
* int st2[] = { 3, -1 };
5681
* int *st_order[] = { st0, st1, st2, NULL };
5682
*******************************************************************************/
5683
#define GROUP_CHUNK 5
5684
#define NUMBER_CHUNK 10
5685
static int **job_run_sequence_parse(char *jrs_str)
5687
char *s = NULL, *group_str = NULL;
5689
/* control outer loop */
5690
char *jrs_str_cp = strdup(jrs_str);
5691
char *iter_dash = NULL;
5692
int **sequence = NULL;
5693
int groups_total = GROUP_CHUNK;
5694
int groups_used = 0;
5698
printf("parsing sequence: \"%s\"\n", jrs_str_cp);
5700
sequence = malloc(sizeof(int *)*(GROUP_CHUNK+1));
5702
/* groups are delimited by dashes '-' */
5703
for (group_str=strtok_r(jrs_str_cp, "-", &iter_dash); group_str; group_str=strtok_r(NULL, "-", &iter_dash)) {
5704
char *iter_comma = NULL;
5710
if (++groups_used > groups_total) {
5711
groups_total += GROUP_CHUNK;
5712
sequence = sge_realloc(sequence, groups_total + 1, 1);
5715
numbers_total = NUMBER_CHUNK;
5718
group = malloc(sizeof(int *)*(NUMBER_CHUNK+1));
5720
/* sequence numbers within a group are delimited by comma ',' */
5721
for (s=strtok_r(group_str, ",", &iter_comma); s; s=strtok_r(NULL, ",", &iter_comma)) {
5722
if (++numbers_used > numbers_total) {
5723
numbers_total += NUMBER_CHUNK;
5724
group = sge_realloc(group, numbers_total + 1, 1);
5733
sequence[i] = group;
5744
static void array_job_run_sequence_adapt(int **sequence, int job_id, int count)
5753
printf("modify finish order:\n");
5755
while (sequence[x] != NULL) {
5757
while (sequence[x][y] != -1) {
5759
if (sequence[x][y] == job_id) {
5762
printf("%d ", sequence[x][y]);
5764
while (sequence[x][y] != -1) {
5768
for (; dy < (count-1); dy++) {
5769
sequence[x][y+dy] = job_id + dy + 1;
5770
sequence[x][y+dy+1] = -1;
5771
printf("[%d] ", sequence[x][y+dy]);
5776
else if (sequence[x][y] > job_id) {
5777
sequence[x][y] += (count-1);
5778
printf("(%d) ", sequence[x][y]);
5782
printf("%d ", sequence[x][y]);
5792
static int set_path_attribute_plus_colon(drmaa_job_template_t *jt,
5793
const char *name, const char *value,
5794
char *error_diagnosis,
5795
size_t error_diag_len)
5797
char path_buffer[10000];
5798
strcpy(path_buffer, ":");
5799
strcat(path_buffer, value);
5800
return drmaa_set_attribute(jt, name, path_buffer, error_diagnosis,
5804
static bool test_error_code(char *name, int code, int expected)
5806
if (code != expected) {
5807
fprintf(stderr, "%s = %d; should be %d\n", name, code, expected);