1
/* Template for the remote job exportation interface to GNU Make.
2
Copyright (C) 1988, 1989, 1992, 1993, 1996 Free Software Foundation, Inc.
3
This file is part of GNU Make.
5
GNU Make is free software; you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 2, or (at your option)
10
GNU Make is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with GNU Make; see the file COPYING. If not, write to
17
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18
Boston, MA 02111-1307, USA. */
21
#include <string.h> /* strerror */
22
#include <sys/types.h>
23
#include <sys/stat.h> /* file permissions */
25
#include <sys/wait.h> /* waitpid */
27
#include <stdio.h> /* remove */
28
#include <stdlib.h> /* getenv() */
29
#include <netdb.h> /* MAXHOSTNAMELEN */
30
#include <sys/param.h>
31
#include <unistd.h> /* usleep */
32
#include <limits.h> /* PATH_MAX */
33
#include <signal.h> /* kill */
36
# include <sys/param.h>
40
#include <rpc/types.h> /* MAXHOSTNAMELEN */
43
#if defined(AIX43) || defined(sgi)
44
#include <sys/param.h> /* MAXHOSTNAMELEN */
47
/****** Interactive/qmake/--Introduction ***************************************
50
* qmake -- Scheduled parallel distributed make
53
* qmake <sge options> -- <gmake options>
56
* Scheduled, parallel, distributed make.
57
* qmake is implemented based on GNU make 3.78.1 using the remote stub
59
* qmake will start a parallel job and run make tasks as task in the
60
* parallel job using the Grid Engine qrsh -inherit command.
63
* sge options - all options that can be specified with qsub command
64
* gmake options - all possible gmake options
67
* returncode from gmake or EXIT_FAILURE, if remote mechanism fails.
68
* On error, an appropriate error description is written to stderr.
71
* qmake -cwd -v PATH -- -j 5
72
* Build sge system: aimk -qmake -parallel 10
75
* Not yet internationalized
76
* Should be updated to latest gmake version or (better) be contributed
77
* to the GNU make project
79
****************************************************************************
82
/****** Interactive/qmake/-Typedefs ***************************************
85
* Typedefs -- type definitions
93
* struct hostfile_info {
98
* struct finished_job {
99
* struct finished_job *next;
108
* struct job_info - Used to store all child processes a qmake process with
109
* an offset to the hostfile of the host, on which the
110
* executed command runs.
112
* struct hostfile_info - Information about hostfile, is stored at the beginning
114
* Contains the number of free slots (used to control
115
* multible parallel recursive makes) and the offset
116
* to the next free host.
118
* struct finished_job - Information about finished jobs, that have not
119
* yet been waited for by gmake.
120
* Linked list, new jobs are appended.
122
****************************************************************************
130
struct hostfile_info {
135
struct finished_job {
136
struct finished_job *next;
144
/****** Interactive/qmake/-Defines ***************************************
147
* Defines -- constant and macro definitions
150
* #define LOCK_SLEEP_TIME 500
151
* #define WAIT_SLOT_TIME 5
154
* LOCK_SLEEP_TIME - Defines how long qmake should (u)sleep
155
* after an unsuccessful try to get a lock to the
157
* WAIT_SLOT_TIME - Time to wait, if no slot is free in hostfile
159
****************************************************************************
163
# define LOCK_SLEEP_TIME 1
164
# define usleep sleep
166
# define LOCK_SLEEP_TIME 500 /* [ms] */
169
#define WAIT_SLOT_TIME 5 /* [s] */
172
/****** Interactive/qmake/-Global_Variables ************************************
175
* global Variables -- global Variables used for remote mechanism
179
* int qrsh_wrapper_cmdline;
183
* char *program_name = 0;
185
* const char *remote_description = "xxx";
189
* int remote_enabled = 0;
192
* char **sge_argv = NULL;
194
* int sge_v_argc = 0;
195
* char **sge_v_argv = NULL;
197
* int gmake_argc = 0;
198
* char **gmake_argv = NULL;
202
* char *lockfile_name = NULL;
204
* int hostfile_locked = 0;
206
* char *hostfile_name = NULL;
207
* char *sge_hostfile_name = NULL;
209
* int host_count = 0;
210
* char *jobid = NULL;
211
* char *makelevel = NULL;
213
* struct job_info jobs[];
218
* be_verbose - flag that indicates verbose output of program
220
* qrsh_wrapper_cmdline - the option -v QRSH_WRAPPER[=value] was specified
221
* in the commandline options
222
* dynamic_mode - are tasks started with qrsh -inherit within an
223
* existing parallel job, or are they submitted as
224
* individual qrsh jobs?
225
* program_name - argv[0] that was used to start the actual process
226
* remote_description - string that is output with --version text from
228
* localhost - fully qualified host name of the host,
229
* where qmake is executing
230
* remote_enabled - flag that indicates, if remote execution of
231
* commands is enabled
232
* sge_argc - argument counter for sge options
233
* sge_argv - argument vector for sge options
234
* sge_v_argc - argument counter for sge -v options
235
* sge_v_argv - argument vector for sge -v options
236
* gmake_argc - argument counter for gmake options
237
* gmake_argv - argument vector for gmake options
238
* pass_cwd - do we have to pass option -cwd to qrsh for rules?
239
* lockfile_name - name of the qmake lockfile for access of the
241
* hostfile_locked - flag, that tells if we currently owne a lock
243
* hostfile - filehandle to qmake hostfile
244
* hostfile_name - name of the qmake hostfile
245
* sge_hostfile_name - name of the sge hostfile
246
* host_count - number of hosts resp. slots in hostfile
247
* jobid - environment variable JOBID
248
* makelevel - environment variable MAKELEVEL or ""
249
* jobs - array with information about running children of
251
* next_job - next free position in array jobs
252
* saved_status - first job that has finished and not been waited
255
****************************************************************************
259
int qrsh_wrapper_cmdline = 0;
263
static char *program_name = 0;
265
const char *remote_description = "distributed make\nload balancing by Grid Engine\n";
267
static char *localhost;
269
static int remote_enabled = 0;
271
static int sge_argc = 0;
272
static char **sge_argv = NULL;
274
static int sge_v_argc = 0;
275
static char **sge_v_argv = NULL;
277
static int gmake_argc = 0;
278
static char **gmake_argv = NULL;
280
static int pass_cwd = 0;
282
static char *lockfile_name = NULL;
284
static int hostfile_locked = 0;
285
static int hostfile = -1;
286
static char *hostfile_name = NULL;
287
static char *sge_hostfile_name = NULL;
289
static int host_count = 0;
291
static char *jobid = NULL;
292
static char *makelevel = NULL;
294
static struct job_info *jobs;
295
static int next_job = 0;
297
struct finished_job *saved_status = NULL;
299
static int read_remote_status(int *exit_code_ptr, int *signal_ptr, int *coredump_ptr, int block);
300
static void read_and_save_remote_status(void);
302
/****** Interactive/qmake/remote_exit() ***************************************
305
* remote_exit() -- exit qmake
308
* static void remote_exit(int code, const char *message,
309
* const char *reason);
312
* Outputs the error messages passed as parameters to stderr
313
* and then exits with the error code passed as parameter.
317
* message - message to output before exit, should describe the
318
* situation when error occurs
319
* reason - description of the error reason, e.g. result from
320
* system call strerror(errno)
326
* #include <stdlib.h>
327
* #include <string.h>
332
* if(write(filehandle, buffer, size) != size) {
333
* remote_exit(EXIT_FAILURE, "writing to file failed", strerror(errno));
336
****************************************************************************
338
static void remote_exit(int code, const char *message, const char *reason)
341
fprintf(stderr, "remote_exit called\n");
345
fprintf(stderr, message);
349
fprintf(stderr, ": %s\n", reason);
352
fprintf(stderr, "qmake: *** exit triggered from remote module\n");
358
/* debugging code, dump qmake's hostfile */
359
static void dump_hostfile() {
360
char buffer[MAXHOSTNAMELEN];
362
struct hostfile_info hostinfo;
365
lseek(hostfile, 0, SEEK_SET);
366
read(hostfile, &hostinfo, sizeof(hostfile_info));
367
printf("---------- Hostfile --------------\n");
368
printf("\t%d\t%d\n", hostinfo.free_slots, hostinfo.offset);
369
for(i = 0; i < host_count; i++) {
370
read(hostfile, &lock, sizeof(char));
371
read(hostfile, buffer, MAXHOSTNAMELEN);
372
printf("\t%d\t%s\n", lock, buffer);
374
printf("----------------------------------\n");
378
/****** Interactive/qmake/read_hostinfo() ***************************************
381
* read_hostinfo() -- read info record from qmake hostfile
384
* static struct hostfile_info *read_hostinfo();
387
* reads and returns the hostfile_info record at the beginning of
388
* the qmake hostfile.
389
* If an error occurs, the program exits.
394
* Pointer to a static structure hostfile_info containing data from
400
* The result points to a static buffer. This buffer will be overwritten
401
* on subsequent calls to read_hostinfo().
402
* It is in the responsibility of the caller to save values for later use.
405
* Interactive/qmake/write_hostinfo()
407
****************************************************************************
410
static struct hostfile_info *read_hostinfo()
412
static struct hostfile_info hostinfo;
414
if(lseek(hostfile, 0, SEEK_SET) < 0) {
415
remote_exit(EXIT_FAILURE, "unable to position in qmake hostfile", strerror(errno));
418
if(read(hostfile, &hostinfo, sizeof(hostfile_info)) != sizeof(hostfile_info)) {
419
remote_exit(EXIT_FAILURE, "unable to read from qmake hostfile", strerror(errno));
424
/****** Interactive/qmake/write_hostinfo() ***************************************
427
* write_hostinfo() -- write info record to qmake hostfile
430
* static void write_hostinfo(const struct hostfile_info *hostinfo);
433
* Writes the record given with parameter hostinfo to the qmake
435
* On error, the program will exit.
438
* hostinfo - pointer to structure hostfile_info to write to the hostfile
441
* Interactive/qmake/read_hostinfo()
443
****************************************************************************
445
static void write_hostinfo(const struct hostfile_info *hostinfo)
447
if(lseek(hostfile, 0, SEEK_SET) < 0) {
448
remote_exit(EXIT_FAILURE, "unable to position in qmake hostfile", strerror(errno));
451
if(write(hostfile, hostinfo, sizeof(hostfile_info)) != sizeof(hostfile_info)) {
452
remote_exit(EXIT_FAILURE, "unable to write to qmake hostfile", strerror(errno));
456
/****** Interactive/qmake/get_host_count() ***************************************
459
* get_host_count() -- get number of entries in qmake hostfile
462
* static int get_host_count();
465
* Calculates the number of entries in the hostfile.
466
* Uses the filehandle "hostfile".
467
* If access to the hostfile fails, qmake will exit with an appropriate
468
* error description and error code.
471
* host_count - number of entries in hostfile
474
* The filehandle "hostfile" (global variable) must have been initialized
475
* by a call to open(...) and be a valid qmake hostfile.
478
* Interactive/qmake/-Global_Variables
479
* Interactive/qmake/init_remote
480
* Interactive/qmake/create_hostfile
482
****************************************************************************
484
static int get_host_count()
486
struct stat fileinfo;
489
if(fstat(hostfile, &fileinfo) < 0) {
490
remote_exit(EXIT_FAILURE, "cannot access fileinfo for qmake hostfile", strerror(errno));
493
host_count = (fileinfo.st_size - sizeof(hostfile_info)) / (MAXHOSTNAMELEN + sizeof(char));
496
fprintf(stdout, "number of slots for qmake execution is %d\n", host_count);
503
/****** Interactive/qmake/create_hostfile() ***************************************
506
* create_hostfile -- create special qmake hostfile
509
* static void create_hostfile();
512
* Creates a qmake hostfile from the sge hostfile created for the
513
* requested parallel environment.
515
* The qmake hostfile is a binary file with the following structure:
516
* <info><host1><host2> ... <hostn>
517
* <info> is a structure containing <free_slots> and <offset>
518
* <free_slots> is the number of still free slots/hosts in
520
* <offset> is the offset to the next host to use
521
* <hostn> describes one host and has the following structure:
523
* <lock> is one character describing, if the host is in use
524
* (1 means in use, 0 means free)
525
* <hostname> is a fixed size character array with the hostname,
526
* padded with 0 bytes
528
* If any system call (file access) fails, qmake exits with an appropriate
529
* error description and error code.
532
* The global variables sge_hostfile_name and hostfile_name must be
536
* Interactive/qmake/-Global_Variables
537
* Interactive/qmake/init_remote()
539
****************************************************************************
541
static void create_hostfile()
544
char line [MAXHOSTNAMELEN + 20];
545
struct hostfile_info hostinfo;
548
fprintf(stdout, "creating qmake hostfile\n");
551
/* open qmake hostfile */
552
hostfile = open(hostfile_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
554
if(errno == EEXIST) {
557
remote_exit(EXIT_FAILURE, "cannot open qmake hostfile", strerror(errno));
561
/* open sge hostfile */
562
sge_hostfile = fopen(sge_hostfile_name, "r");
563
if(sge_hostfile == NULL) {
564
fprintf(stderr, "cannot open sge hostfile %s\ndisable remote execution\n", sge_hostfile_name);
571
/* write initial offset */
572
hostinfo.free_slots = 0;
574
write_hostinfo(&hostinfo);
576
/* parse sge hostfile and write qmake hostfile */
577
while(fgets(line, MAXHOSTNAMELEN + 20, sge_hostfile)) {
578
char buffer[MAXHOSTNAMELEN];
579
char hostname[MAXHOSTNAMELEN + 1];
584
sscanf(line, "%s %d", hostname, &slots);
585
memset(buffer, 0, MAXHOSTNAMELEN);
586
strncpy(buffer, hostname, MAXHOSTNAMELEN - 1);
587
hostinfo.free_slots += slots;
589
for(i = 0; i < slots; i++) {
590
if(write(hostfile, &lock, sizeof(char)) != sizeof(char)) {
591
remote_exit(EXIT_FAILURE, "cannot write to qmake hostfile", strerror(errno));
593
if(write(hostfile, buffer, MAXHOSTNAMELEN) != MAXHOSTNAMELEN) {
594
remote_exit(EXIT_FAILURE, "cannot write to qmake hostfile", strerror(errno));
599
/* rewrite actual hostinfo */
600
write_hostinfo(&hostinfo);
604
fclose(sge_hostfile);
607
/****** Interactive/qmake/init_remote() ***************************************
610
* init_remote() -- initialize remote execution of jobs
613
* static void init_remote();
616
* Initializes global variables for remote execution:
619
* - sge_hostfile_name
623
* Creates qmake hostfile
626
* If an error occures, that makes remote execution impossible,
627
* qmake exits with an appropriate error message and error code.
630
* Interactive/qmake/-Global_Variables
631
* Interactive/qmake/create_hostfile()
633
****************************************************************************
636
static void init_remote()
645
/* tmpdir from environment */
646
tmpdir = getenv("TMPDIR");
648
remote_exit(EXIT_FAILURE, "cannot read environment variable TMPDIR",
653
sprintf(buffer, "%s/qmake_lockfile", tmpdir);
654
lockfile_name = (char *)malloc(strlen(buffer) + 1);
655
strcpy(lockfile_name, buffer);
658
sprintf(buffer, "%s/qmake_hostfile", tmpdir);
659
hostfile_name = (char *)malloc(strlen(buffer) + 1);
660
strcpy(hostfile_name, buffer);
662
/* sge_hostfile_name */
663
c = getenv("PE_HOSTFILE");
665
remote_exit(EXIT_FAILURE, "cannot read environment variable "
666
"PE_HOSTFILE", strerror(errno));
669
sge_hostfile_name = (char *)malloc(strlen(c) + 1);
670
strcpy(sge_hostfile_name, c);
673
fprintf(stdout, "sge hostfile = %s\n", sge_hostfile_name);
674
fprintf(stdout, "qmake hostfile = %s\n", hostfile_name);
675
fprintf(stdout, "qmake lockfile = %s\n", lockfile_name);
681
hostfile = open(hostfile_name, O_RDWR);
683
remote_exit(EXIT_FAILURE, "cannot open qmake hostfile",
687
/* host_count - it is already initialized from -j option, but make sure
688
* it really matches the information from the pe_hostfile
690
host_count = get_host_count();
694
jobs = (struct job_info *)malloc(host_count * sizeof(job_info));
696
remote_exit(EXIT_FAILURE, "malloc failed", NULL);
699
for(i = 0; i < host_count; i++) {
705
/****** Interactive/qmake/lock_hostfile() ***************************************
708
* lock_hostfile() -- get lock on hostfile
711
* static void lock_hostfile();
714
* Locks acces to the qmake hostfile. The lock is achieved by
715
* creating a lockfile.
716
* The name of the lockfile is contained in the global variable
717
* lockfile_name, that is set by function init_remote.
718
* The function loops until an exclusive create operation can be
719
* performed on the lockfile. Between two loops, qmake sleeps for
720
* a timeperiod, that is defined by LOCK_SLEEP_TIME in ms.
721
* If an error occurs when creating the lockfile, qmake exits with an
722
* appropriate error message and error code.
725
* Probably, a timeout for getting the lock should be introduced to
726
* avoid deadlocks in case of errors.
729
* Interactive/qmake/-Defines
730
* Interactive/qmake/-Global_Variables
731
* Interactive/qmake/init_remote()
732
* Interactive/qmake/unlock_hostfile()
734
****************************************************************************
737
static void lock_hostfile()
742
if((lockfile = open(lockfile_name, O_CREAT | O_EXCL, S_IRUSR)) >= 0) {
746
fprintf(stdout, "obtained lock to qmake lockfile\n");
751
if(errno == EEXIST) {
753
fprintf(stdout, "waiting for lock to qmake lockfile\n");
755
usleep(LOCK_SLEEP_TIME);
757
remote_exit(EXIT_FAILURE, "unable to access lockfile", strerror(errno));
762
/****** Interactive/qmake/unlock_hostfile() ***************************************
765
* unlock_hostfile() -- unlock hostfile
768
* static void unlock_hostfile();
771
* Remove the lock to the qmake hostfile.
772
* The lockfile (lockfile_name) is removed.
775
* Interactive/qmake/-Global_Variables
776
* Interactive/qmake/init_remote()
777
* Interactive/qmake/lock_hostfile()
779
****************************************************************************
782
static void unlock_hostfile()
784
if(hostfile_locked) {
786
remove(lockfile_name);
788
fprintf(stdout, "clearing lock to hostfile\n");
793
/****** Interactive/qmake/unlock_hostentry() ***************************************
796
* unlock_hostentry() -- unlock one host in hostfile
799
* static void unlock_hostentry(off_t offset);
802
* Unlock one hostentry in the hostfile. The lock field for the host
803
* given by parameter offset is set to 0.
804
* If an error occurs in positioning in the hostfile or writing the
805
* lockfield, qmake exits with an appropriate error message and
809
* offset - offset to the host to be unlocked. Describes the position
810
* of the host in the hostfile, e.g. 2 for the 2nd host.
813
* Interactive/qmake/next_host();
815
****************************************************************************
818
static void unlock_hostentry(off_t offset) {
820
struct hostfile_info *hostinfo;
826
fprintf(stdout, "unlock_hostentry %d\n", (int) offset);
829
if(lseek(hostfile, offset * (MAXHOSTNAMELEN + sizeof(char)) + sizeof(hostfile_info), SEEK_SET) < 0) {
830
remote_exit(EXIT_FAILURE, "unable to position in qmake hostfile", strerror(errno));
833
if(write(hostfile, &lock, sizeof(char)) != sizeof(char)) {
834
remote_exit(EXIT_FAILURE, "unable to write to qmake hostfile", strerror(errno));
837
/* update number of free slots in hostinfo */
838
hostinfo = read_hostinfo();
839
hostinfo->free_slots++;
840
write_hostinfo(hostinfo);
846
/****** Interactive/qmake/next_host() ***************************************
849
* next_host -- determine next free remote host
852
* static const char *next_host();
855
* Determines the next host to be used for a remote operation.
856
* Waits until a host is free (info from hostfile_info.free_hosts),
857
* during this time, check for finished child processes and store
858
* info about them in structures of type finished_job.
859
* Then reads the host pointed to by hostfile_info.offset.
860
* Checks, if the host is really free, while not,
861
* - the next host from the hostfile is read
862
* - it is checked, whether this host is still used/locked
863
* The found host is locked.
864
* The offset of the found host is stored in the jobs array.
865
* If an error occurs during file operations, qmake exits with an
866
* appropriate error message and error code.
869
* hostname - pointer to character string with name of host to use
871
****************************************************************************
874
static const char *next_host()
876
struct hostfile_info *hostinfo;
880
static char buffer[MAXHOSTNAMELEN];
882
/* wait until slot free */
884
/* lock and read hostinfo */
886
hostinfo = read_hostinfo();
888
/* if no slot free, unlock to allow others to free slots and wait */
889
if(hostinfo->free_slots == 0) {
893
fprintf(stdout, "waiting for a free slot\n");
896
sleep(WAIT_SLOT_TIME);
897
/* check for dead children */
898
read_and_save_remote_status();
900
} while(hostinfo->free_slots == 0);
902
hostinfo->free_slots--;
904
/* search free host */
907
file_offset = hostinfo->offset * (MAXHOSTNAMELEN + sizeof(char)) + sizeof(hostfile_info);
909
/* try to find unlocked host */
910
if(lseek(hostfile, file_offset, SEEK_SET) < 0) {
911
remote_exit(EXIT_FAILURE, "unable to position in qmake hostfile", strerror(errno));
914
if(read(hostfile, &lock, sizeof(char)) != sizeof(char)) {
915
remote_exit(EXIT_FAILURE, "unable to read from qmake hostfile", strerror(errno));
917
offset = hostinfo->offset;
918
hostinfo->offset = (hostinfo->offset + 1) % host_count;
924
if(lseek(hostfile, file_offset, SEEK_SET) < 0) {
925
remote_exit(EXIT_FAILURE, "unable to position in qmake hostfile", strerror(errno));
928
if(write(hostfile, &lock, sizeof(char)) != sizeof(char)) {
929
remote_exit(EXIT_FAILURE, "unable to write to qmake hostfile", strerror(errno));
933
if(read(hostfile, buffer, MAXHOSTNAMELEN) != MAXHOSTNAMELEN) {
934
remote_exit(EXIT_FAILURE, "unable to read from qmake hostfile", strerror(errno));
937
/* rewrite hostinfo */
938
write_hostinfo(hostinfo);
944
jobs[next_job].offset = offset;
947
fprintf(stdout, "next host for qmake job is %s\n", buffer);
953
static void build_submit_argv()
958
/* copy sge -v options for later calls to qrsh -inherit
959
* for dynamic mode, we also have to copy job requests (-l)
961
for(i = 0; i < sge_argc; i++) {
962
if(strcmp(sge_argv[i], "-V") == 0) {
964
} else if(strcmp(sge_argv[i], "-v") == 0) {
967
} else if (dynamic_mode && strcmp(sge_argv[i], "-l") == 0) {
975
sge_v_argv = (char **)malloc(v_counter * sizeof(char *));
977
if(sge_v_argv == NULL) {
978
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
982
for(i = 0; i < sge_argc; i++) {
983
if(strcmp(sge_argv[i], "-v") == 0 && (i + 1) < sge_argc) {
984
sge_v_argv[sge_v_argc++] = sge_argv[i];
985
sge_v_argv[sge_v_argc++] = sge_argv[i + 1];
986
} else if(strcmp(sge_argv[i], "-V") == 0) {
987
sge_v_argv[sge_v_argc++] = sge_argv[i];
988
} else if (dynamic_mode &&
989
strcmp(sge_argv[i], "-l") == 0 && (i + 1) < sge_argc) {
990
sge_v_argv[sge_v_argc++] = sge_argv[i];
991
sge_v_argv[sge_v_argc++] = sge_argv[i + 1];
996
/****** Interactive/qmake/parse_options() ***************************************
999
* parse_options() -- split sge and gmake options
1002
* static int parse_options(int *p_argc, char **p_argv[]);
1005
* Parses the command line options passed to qmake.
1006
* If the keyword "-verbose" is found, start verbose reporting of
1008
* Splits sge and gmake options at the option "--".
1009
* If memory for the new argument vectors cannot be allocated, qmake
1010
* exits with an appropriate error message and error code.
1013
* p_argc - pointer to argument counter
1014
* p_argv - pointer to argument vector
1017
* if everything is OK: 1
1018
* if no sge options found (no option --): 0 --> standard gmake
1020
****************************************************************************
1023
static int parse_options(int *p_argc, char **p_argv[])
1029
int first_gmake_option;
1031
int unhandled_recursive = 0;
1036
/* detect if there are any sge parameters and count them */
1037
first_gmake_option = 0;
1038
for(i = 1; i < argc; i++) {
1039
if(!strcmp(argv[i], "--")) {
1040
first_gmake_option = i;
1045
/* no -- option set to split sge and gmake options? */
1046
/* if JOBID, assume we are started recursively from any shellscript fragments */
1047
/* and insert -inherit */
1049
if(first_gmake_option == 0) {
1051
unhandled_recursive = 1;
1057
if(unhandled_recursive) {
1058
/* case: might be recursive make call */
1059
/* try to copy sge parameters from environment variable */
1060
char *passed_sge_options = getenv("RECURSIVE_QMAKE_OPTIONS");
1061
if(passed_sge_options != NULL) {
1062
/* count number of sge options */
1064
char *s = passed_sge_options;
1073
sge_argv = (char **)malloc(counter * sizeof(char *));
1075
if(sge_argv == NULL) {
1076
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1079
sge_argv[sge_argc++] = strtok(passed_sge_options, "\n");
1080
for(i = 1; i < counter; i++) {
1081
sge_argv[sge_argc++] = strtok(NULL, "\n");
1085
/* case: normal qmake call */
1086
/* copy sge parameters */
1088
sge_argv = (char **)malloc(first_gmake_option * sizeof(char *));
1090
if(sge_argv == NULL) {
1091
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1094
for(i = 0; i < first_gmake_option; i++) {
1095
sge_argv[sge_argc++] = argv[i];
1099
/* determine dynamic_mode */
1100
dynamic_mode = getenv("PE") == NULL;
1102
/* parse sge options and set some flags */
1103
for(i = 0; i < sge_argc; i++) {
1104
if(strcmp(sge_argv[i], "-verbose") == 0) {
1106
} else if(strcmp(sge_argv[i], "-cwd") == 0) {
1108
} else if(strcmp(sge_argv[i], "-v") == 0) {
1111
if(strncmp(sge_argv[i], "QRSH_WRAPPER",
1112
sizeof("QRSH_WRAPPER") - 1) == 0) {
1113
qrsh_wrapper_cmdline = 1;
1119
/* copy gmake parameters */
1121
gmake_argv = (char **)malloc((argc - first_gmake_option) * sizeof(char *));
1123
if(gmake_argv == NULL) {
1124
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1127
gmake_argv[gmake_argc++] = argv[0];
1128
for(i = first_gmake_option + 1; i < argc; i++, gmake_argc++) {
1129
gmake_argv[gmake_argc] = argv[i];
1132
/* in case of dynamic mode, set number of slots to use from -j option */
1134
for(i = 0; i < gmake_argc; i++) {
1135
if(strcmp(gmake_argv[i], "-j") == 0) {
1137
/* no further parameter - would core dump? */
1138
/* JG: TODO: in dynamic mode, -j without parameter would be ok.
1139
* it would mean: allow any number of parallel jobs.
1140
* to allow this, we have to change storage of job_info from
1141
* array to linked list.
1143
if(i >= gmake_argc) {
1144
remote_exit(EXIT_FAILURE, "-j option requires parameter", NULL);
1147
host_count = atoi(gmake_argv[i]);
1152
if(host_count < 1) {
1157
/* return new gmake parameters */
1158
*p_argc = gmake_argc;
1159
*p_argv = gmake_argv;
1163
fprintf(stdout, "dynamic task allocation mode\n");
1166
for(i = 0; i < sge_argc; i++) {
1167
fprintf(stdout, "sge_argv[%d] = %s\n", i, sge_argv[i]);
1169
for(i = 0; i < sge_v_argc; i++) {
1170
fprintf(stdout, "sge_v_argv[%d] = %s\n", i, sge_v_argv[i]);
1172
for(i = 0; i < gmake_argc; i++) {
1173
fprintf(stdout, "gmake_argv[%d] = %s\n", i, gmake_argv[i]);
1180
/****** Interactive/qmake/inherit_job() ***************************************
1183
* inherit_job() -- is qmake option -inherit set?
1186
* static int inherit_job();
1189
* Checks if the option "-inherit" is contained in the sge options.
1190
* If yes, then it is deleted from the sge options.
1193
* 1, if sge options contain "-inherit", else 0
1195
****************************************************************************
1198
static int inherit_job()
1202
for(i = 0; i < sge_argc; i++) {
1203
if(!strcmp(sge_argv[i], "-inherit")) {
1205
for(j = i; j < sge_argc; j++) {
1206
sge_argv[j] = sge_argv[j + 1];
1215
/****** Interactive/qmake/set_default_options() ********************************
1218
* set_default_options() -- initialize remote mechanism before gmake startup
1221
* void set_default_options();
1224
* Adds default options to the sge argument vector:
1225
* - if no resource request is contained, insert resource request for
1226
* the architecture (-l arch=$SGE_ARCH)
1227
* If a system call failes (reading environment, malloc), qmake exits with
1228
* an appropriate error message and error code.
1230
****************************************************************************
1233
void set_default_options()
1237
static char buffer[1024];
1238
int insert_resource_request = 1;
1240
/* check if sge options contain resource requests */
1241
for(i = 0; i < sge_argc; i++) {
1242
if(!strcmp(sge_argv[i], "-l")) {
1243
insert_resource_request = 0;
1248
if(insert_resource_request) {
1250
/* determine architecture */
1251
architecture = getenv("SGE_ARCH");
1252
if(architecture == NULL || strlen(architecture) == 0) {
1253
fprintf(stdout, "qmake: *** cannot determine architecture from environment variable SGE_ARCH\n");
1254
fprintf(stdout, " no default architecture set\n");
1258
/* if no resource requests, insert to use same architecture */
1259
/* copy old sge options */
1261
sge_argv = (char **)malloc((sge_argc + 3) * sizeof(char *));
1263
if(sge_argv == NULL) {
1264
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1267
/* copy existing options */
1268
for(i = 0; i < sge_argc; i++) {
1269
sge_argv[i] = argv[i];
1272
/* append architecture */
1273
if(insert_resource_request) {
1274
sprintf(buffer, "arch=%s", architecture);
1277
fprintf(stdout, "setting default options: -l %s\n", buffer);
1280
sge_argv[sge_argc++] = "-l";
1281
sge_argv[sge_argc++] = buffer;
1284
/* free old sge_argv */
1291
/****** Interactive/qmake/equalize_nslots() ************************************
1294
* equalize_nslots() -- equalize -j option with NSLOTS environment
1297
* static void equalize_nslots(int *p_argc, char **p_argv[]);
1300
* Reads the number of slots from environment variable NSLOTS,
1301
* if -j option is not set or differs from NSLOTS, it is
1302
* inserted/corrected.
1303
* If a system call fails (reading environment, malloc), qmake exits
1304
* with an appropriate error message and error code.
1307
* p_argc - pointer to argument counter
1308
* p_argv - pointer to argument vector
1310
****************************************************************************
1312
static void equalize_nslots(int *p_argc, char **p_argv[])
1318
/* get NSLOTS from environment */
1319
nslots = getenv("NSLOTS");
1320
if(nslots == NULL) {
1321
remote_exit(EXIT_FAILURE, "NSLOTS not set in environment", strerror(errno));
1324
/* if -j option differs, set NSLOTS as -j option */
1325
for(i = 0; i < gmake_argc; i++) {
1326
if(!strcmp(gmake_argv[i], "-j")) {
1328
/* no further parameter - would core dump? */
1329
if(i >= gmake_argc) {
1330
remote_exit(EXIT_FAILURE, "-j option requires parameter", NULL);
1332
/* NSLOTS differs from -j parameter? */
1333
if(strcmp(gmake_argv[i], nslots)) {
1335
fprintf(stdout, "equalizing -j option with NSLOTS environment: -j %s\n", nslots);
1337
gmake_argv[i] = nslots;
1346
fprintf(stdout, "inserting -j option from NSLOTS environment: -j %s\n", nslots);
1349
/* no -j option set */
1350
/* copy old gmake options */
1352
gmake_argv = (char **)malloc((gmake_argc + 2) * sizeof(char *));
1354
if(gmake_argv == NULL) {
1355
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1358
for(i = 0; i < gmake_argc; i++) {
1359
gmake_argv[i] = argv[i];
1362
/* append architecture */
1363
gmake_argv[gmake_argc++] = "-j";
1364
gmake_argv[gmake_argc++] = nslots;
1366
/* free old gmake_argv */
1369
/* pass new options to gmake */
1370
*p_argc = gmake_argc;
1371
*p_argv = gmake_argv;
1374
/****** Interactive/qmake/equalize_pe_j() ***************************************
1377
* equalize_pe_j() -- equalize no slots from -pe and -j option
1380
* static void equalize_pe_j();
1383
* If no parallel environment is requested in the sge options
1384
* and more than 1 slot is requested by the gmake -j option,
1385
* a request for a parallel environment "make" with the a range
1386
* of slots from 1 to the value given with the -j option is
1387
* inserted into the sge options.
1388
* If an error occurs (invalid -j option, malloc) qmake exits with
1389
* an appropriate error message and error code.
1391
****************************************************************************
1394
static void equalize_pe_j()
1399
static char buffer[100];
1401
/* -pe sge option set? Then take this one */
1402
for(i = 0; i < sge_argc; i++) {
1403
if(!strcmp(sge_argv[i], "-pe")) {
1408
/* gmake option -j requests more than 1 slot? */
1409
for(i = 0; i < gmake_argc; i++) {
1410
if(!strcmp(gmake_argv[i], "-j")) {
1412
/* no further parameter - would core dump? */
1413
if(i >= gmake_argc) {
1414
remote_exit(EXIT_FAILURE, "-j option requires parameter", NULL);
1417
nslots = atoi(gmake_argv[i]);
1427
fprintf(stdout, "inserting pe request to sge options: -pe make 1-%d\n", nslots);
1430
/* insert pe into sge options */
1431
/* copy old sge options */
1433
sge_argv = (char **)malloc((sge_argc + 3) * sizeof(char *));
1435
if(sge_argv == NULL) {
1436
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1439
for(i = 0; i < sge_argc; i++) {
1440
sge_argv[i] = argv[i];
1444
sprintf(buffer, "1-%d", nslots);
1445
sge_argv[sge_argc++] = "-pe";
1446
sge_argv[sge_argc++] = "make";
1447
sge_argv[sge_argc++] = buffer;
1449
/* free old sge_argv */
1455
/****** Interactive/qmake/submit_qmake() ***************************************
1458
* submit_qmake() -- start a scheduled qmake with qrsh
1461
* static void submit_qmake();
1464
* Builds a new argument vector for a qrsh call to start a scheduled
1466
* Inserts the option -inherit into the argument vector to signal
1467
* qmake that it is called from qrsh or from within a batch script.
1468
* Pass option -verbose to qrsh and the scheduled qmake.
1469
* qrsh is called by forking and exec to qrsh.
1470
* The parent process waits for qrsh to exit and then exits with
1471
* the exit status from qrsh or EXIT_FAILURE, if qrsh exited because
1474
****************************************************************************
1476
static void submit_qmake()
1482
int insert_qrsh_wrapper = 0;
1484
/* do we have to pass QRSH_WRAPPER from environment? */
1485
if(!qrsh_wrapper_cmdline && getenv("QRSH_WRAPPER") != NULL) {
1486
insert_qrsh_wrapper = 1;
1489
/* build argv for qrsh */
1491
argv = (char **)malloc((sge_argc + sge_v_argc + gmake_argc + 4 + (be_verbose ? 1 : 0) + (insert_qrsh_wrapper ? 2 : 0) + pass_cwd) * sizeof(char *));
1494
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1497
argv[argc++] = "qrsh";
1499
argv[argc++] = "-noshell";
1501
for(i = 1; i < sge_argc; i++) {
1502
argv[argc++] = sge_argv[i];
1505
argv[argc++] = gmake_argv[0];
1507
argv[argc++] = "-inherit";
1510
argv[argc++] = "-verbose";
1514
argv[argc++] = "-cwd";
1517
if(insert_qrsh_wrapper) {
1518
argv[argc++] = "-v";
1519
argv[argc++] = "QRSH_WRAPPER";
1522
for(i = 0; i < sge_v_argc; i++) {
1523
argv[argc++] = sge_v_argv[i];
1526
argv[argc++] = "--";
1527
for(i = 1; i < gmake_argc; i++) {
1528
argv[argc++] = gmake_argv[i];
1533
/* build subprocess */
1537
remote_exit(EXIT_FAILURE, "unable to create qrsh process", strerror(errno));
1541
/* in parent, wait for child to exit */
1545
if(waitpid(qrsh_pid, &status, 0) == qrsh_pid) {
1546
if(WIFEXITED(status)) {
1547
exit(WEXITSTATUS(status));
1550
if(WIFSIGNALED(status)) {
1552
sprintf(buffer, "qrsh exited on signal %d", WTERMSIG(status));
1553
remote_exit(EXIT_FAILURE, buffer, NULL);
1558
/* in child, start qrsh */
1560
fprintf(stdout, "creating scheduled qmake\n");
1561
for(i = 0; i < argc; i++) {
1562
fprintf(stdout, "argv[%3d] = %s\n", i, argv[i]);
1566
execvp("qrsh", argv);
1567
remote_exit(EXIT_FAILURE, "start of qrsh failed", strerror(errno));
1572
/****** Interactive/qmake/remote_options() *************************************
1575
* remote_options() -- initialize remote mechanism before gmake startup
1578
* void remote_options(int *p_argc, char **p_argv[]);
1581
* Determine the qmake startmode and create an appropriate program
1583
* The following start modes are defined:
1584
* - qmake called interactively without any special options
1585
* - qmake called interactively with special options
1586
* - qmake called from sge (qrsh or batch job)
1587
* - qmake called recursively from scheduled qmake
1590
* p_argc - pointer to argument counter
1591
* p_argv - pointer to argument vector
1593
****************************************************************************
1596
void remote_options(int *p_argc, char **p_argv[])
1598
jobid = getenv("JOB_ID");
1599
makelevel = getenv("MAKELEVEL");
1601
/* store program name to detect recursive make calls */
1602
program_name = (char *)malloc(strlen((*p_argv)[0]) + 1);
1603
if(program_name == 0) {
1604
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1606
strcpy(program_name, (*p_argv)[0]);
1608
/* split sge and gmake options */
1609
if(!parse_options(p_argc, p_argv)) {
1610
/* no sge options set: behave as gmake */
1615
fprintf(stdout, "determine qmake startmode\n");
1618
/* option -inherit set? */
1620
/* is MAKELEVEL set in environment? */
1621
if(makelevel == NULL) {
1624
/* in non dynamic mode, equalize NSLOTS (from pe) with -j option */
1625
if (!dynamic_mode) {
1626
equalize_nslots(p_argc, p_argv);
1629
/* enable remote execution */
1631
build_submit_argv();
1633
/* set default sge options (architecture) */
1634
set_default_options();
1635
build_submit_argv();
1636
/* equalize -pe and -j options */
1637
/* JG: If no pe is given, we use dynamic allocation */
1641
/* start a scheduled qmake with qrsh, wait for qrsh to exit and exit */
1646
/****** Interactive/qmake/remote_setup() ***************************************
1649
* remote_setup() -- setup remote mechanism after gmake startup
1652
* void remote_setup();
1655
* If remote execution is enabled, initialize the remote mechanisms.
1656
* Calls init_remote and
1657
* initializes the global variables
1659
* If an error occurs in a system call (gethostname, gethostbyname, malloc)
1660
* qmake exits with an appropriate error message and error code.
1663
* Initialization of localhost should be moved to init_remote.
1666
* Interactive/qmake/init_remote()
1667
* Interactive/qmake/remote_cleanup()
1669
****************************************************************************
1672
void remote_setup ()
1674
static char hostbuffer[1024];
1675
struct hostent *hostinfo;
1677
/* if remote enabled, initialize filenames, hostfile, filehandles, number of hosts */
1678
if(remote_enabled) {
1681
if(gethostname(hostbuffer, 1023) != 0) {
1682
remote_exit(EXIT_FAILURE, "gethostname failed", strerror(errno));
1685
hostinfo = gethostbyname(hostbuffer);
1686
if(hostinfo == NULL) {
1687
remote_exit(EXIT_FAILURE, "gethostbyname failed", strerror(errno));
1690
localhost = (char *)malloc(strlen(hostinfo->h_name) + 1);
1691
if(localhost == NULL) {
1692
remote_exit(EXIT_FAILURE, "malloc failed", strerror(errno));
1695
strcpy(localhost, hostinfo->h_name);
1699
/****** Interactive/qmake/remote_cleanup() *************************************
1702
* remote_cleanup() -- cleanup remote mechanism before exit
1705
* void remote_cleanup();
1708
* Cleans up some files ...
1713
* Probably not complete.
1716
* Interactive/qmake/remote_setup()
1718
****************************************************************************
1721
void remote_cleanup ()
1723
/* if remote start is enabled */
1724
if(remote_enabled) {
1726
fprintf(stdout, "cleanup of remote mechanism\n");
1729
if (!dynamic_mode) {
1730
/* close hostfile */
1735
/* unlock, if exit is forced within locked situation */
1740
/* Return nonzero if the next job should be done remotely. */
1742
/****** Interactive/qmake/start_remote_job_p() *********************************
1745
* start_remote_job_p() -- shall next job be started remote?
1748
* int start_remote_job_p(int first_p);
1751
* If remote execution is enabled, prepare job_info record for next
1752
* job and return true (1).
1755
* first_p - no idea what this parameter is for :-(
1758
* start_remote - 0 if next job shall be started locally,
1759
* 1 if it may be started remotely
1762
* Probably determination of next host to use and the decision, if this
1763
* host is really the local host (-> local execution) should be done here.
1766
* Interactive/qmake/start_remote_job
1768
****************************************************************************
1771
int start_remote_job_p (int first_p)
1773
/* if remote is enabled, always return true */
1774
if(remote_enabled) {
1775
if (!dynamic_mode) {
1778
/* set pointer to next free entry in job_info */
1780
for(i = 0; i < host_count; i++) {
1781
if(jobs[i].pid == 0) {
1787
if(next_job == -1) {
1788
remote_exit(EXIT_FAILURE, "disaranged job_info list", NULL);
1795
"enabling next task to be scheduled as Grid Engine "
1799
"enabling next task to be executed as Grid Engine "
1811
/****** Interactive/qmake/is_recursive_make() **********************************
1814
* is_recursive_make -- is a command to execute a recursive make?
1817
* static int is_recursive_make(const char *argv_0)
1820
* Determines from the command name to call (argv[0]), if a job
1821
* is a recursive call to make.
1822
* If the command to execute ends with the name (argv[0]) of the
1823
* actual make process, a recursive make is detected.
1826
* argv_0 - command to execute
1829
* 0, if no recursive make is detected
1830
* 1, if a recursive make is detected
1833
* This function will only detect directly called recursive make calls,
1834
* if make is called from within a shellscript, it will not be detected.
1837
* Interactive/qmake/might_be_recursive_make()
1838
* Interactive/qmake/start_remote_job()
1840
****************************************************************************
1843
static int is_recursive_make(const char *argv_0) {
1846
substring = strstr(argv_0, program_name);
1847
if(substring != NULL) {
1848
if(strcmp(substring, program_name) == 0) {
1850
fprintf(stdout, "detected recursive make - starting on local machine\n");
1859
/****** Interactive/qmake/might_be_recursive_make() ****************************
1862
* might_be_recursive_make -- might a command to exec be recursive make?
1865
* static int might_be_recursive_make(char argv[])
1868
* Tries to detect recursive make calls, that are done from within shell
1869
* script fragments in a makefile - this case is not handled by function
1870
* is_recursive_make().
1871
* Scans through the argument vector and searches for occurence of the
1872
* name of the actual make process (argv[0]).
1875
* argv - argument vector of the command to execute
1878
* 1, if a potential recursive make is detected,
1882
* This function will probably deliver true much too often.
1883
* Imagine, you call qmake and the compiler is passed a define containing
1884
* the string "qmake":
1885
* cc -DMADE_BY=qmake -c foo.c
1886
* In this case each cc call will be supposed to be a recursive make and
1887
* be executed locally without considering the number of slots reserved
1888
* on this host - this behavior might lead to overload on the local host.
1891
* Interactive/qmake/is_recursive_make()
1892
* Interactive/qmake/start_remote_job()
1894
****************************************************************************
1897
static int might_be_recursive_make(char *argv[]) {
1900
for(i = 0; argv[i] != NULL; i++) {
1901
if(strstr(argv[i], program_name) != NULL) {
1902
fprintf(stdout, "\nthis call might lead to a recursive qmake call:\n");
1903
fprintf(stdout, "%s\n", argv[i]);
1904
fprintf(stdout, "starting on local machine\n\n");
1912
static char *get_sge_resource_request(char **args)
1917
while (*arg != NULL && ret == NULL) {
1919
s = strstr(*arg, "SGE_RREQ=");
1921
s += strlen("SGE_RREQ=");
1923
fprintf(stderr, "syntax error in sge resource request\n");
1928
while (*s != '\0' && *s != '"') {
1933
fprintf(stderr, "syntax error in sge resource request\n");
1935
ret = strdup(buffer);
1946
static int count_sge_resource_request(const char *request)
1948
char *copy = strdup(request);
1951
if (strtok(copy, " \t") != NULL) {
1953
while (strtok(NULL, " \t") != NULL) {
1962
static int copy_sge_resource_request(const char *request, char **args, int argc)
1964
char *copy = strdup(request);
1967
token = strtok(copy, " \t");
1968
while (token != NULL) {
1969
args[argc++] = strdup(token);
1970
token = strtok(NULL, " \t");
1977
/* Start a remote job running the command in ARGV,
1978
with environment from ENVP. It gets standard input from STDIN_FD. On
1979
failure, return nonzero. On success, return zero, and set *USED_STDIN
1980
to nonzero if it will actually use STDIN_FD, zero if not, set *ID_PTR to
1981
a unique identification, and set *IS_REMOTE to zero if the job is local,
1982
nonzero if it is remote (meaning *ID_PTR is a process ID). */
1984
/****** Interactive/qmake/start_remote_job() ***********************************
1987
* start_remote_job() -- start a remote job
1990
* int start_remote_job(char **argv, char **envp,
1992
* int *is_remote, int *id_ptr, int *used_stdin);
1995
* Starts a make task.
1997
* If the task is a recursive make call or looks as if it could be a
1998
* recursive make, it is started on the local host.
2000
* The next free execution host is read from qmake's hostfile,
2001
* if it is the localhost, the task is started locally.
2003
* The commandline and the tasks environment are setup and
2004
* the task is started by forking and executing qrsh -inherit ...
2006
* Some administrative information is passed back to the caller.
2009
* argv - argument vector of task to start
2010
* envp - pointer to process environment
2011
* stdin_fd - stdin filehandle, if != 0, stdin will be closed in
2012
* calls to qrsh (qrsh -nostdin)
2013
* is_remote - will task be executed on remote host?
2014
* id_ptr - pid of forked child process
2015
* used_stdin - did we use stdin?
2018
* 0 if function completed successfully
2021
* Interactive/qmake/next_host()
2022
* Interactive/qmake/is_recursive_make()
2023
* Interactive/qmake/might_be__recursive_make()
2026
****************************************************************************
2028
#define ADDTL_ENV_VARS 100
2029
#define ADDTL_ENV_SIZE 4095
2030
int start_remote_job (char **argv, char **envp,
2031
int stdin_fd, int *is_remote,
2032
int *id_ptr, int *used_stdin)
2035
const char *hostname;
2036
int exec_remote = 1;
2037
int recursive_make = 0;
2038
char *addtl_env[ADDTL_ENV_VARS];
2039
char addtl_env_pass[ADDTL_ENV_SIZE + 1];
2040
char envvar[ADDTL_ENV_SIZE + 1];
2043
addtl_env_pass[0] = '\0';
2050
* Parse the given environment and search for variables that are not in the
2051
* current environment or have a changed value.
2052
* These variables have to be set in the execution environment,
2053
* if the task is started by qrsh, the variable names have to be part of a
2056
while (*env != NULL) {
2057
char *copy, *variable, *value, *old_value;
2059
/* we have to dup env as we split it into variable and value */
2060
copy = strdup(*env);
2061
variable = strtok(copy, "=");
2062
value = strtok(NULL, "=");
2063
if (value == NULL) {
2067
/* retrieve variable from current environment */
2068
old_value = getenv(variable);
2070
/* if variable isn't set in current environment, or has been changed */
2071
if (old_value == NULL || strcmp(old_value, value) != 0) {
2072
if (var_idx >= ADDTL_ENV_VARS) {
2073
free(copy); copy = NULL;
2074
fprintf(stderr, "qmake: too many additional environment variables to set\n");
2077
/* store additional environment to set */
2078
addtl_env[var_idx++] = strdup(*env);
2080
/* store variable name for -v option */
2081
if (strlen(addtl_env_pass) + strlen(variable) >= ADDTL_ENV_SIZE) {
2082
free(copy); copy = NULL;
2083
fprintf(stderr, "qmake: additional environment variable names exeed buffer\n");
2087
strcat(addtl_env_pass, ",");
2089
strcat(addtl_env_pass, variable);
2094
/* free the duplicated env entry */
2095
free(copy); copy = NULL;
2099
/* addtl_env is a NULL terminated list (array) */
2100
addtl_env[var_idx] = NULL;
2103
fprintf(stdout, "export the following environment variables: %s\n", addtl_env_pass);
2107
/* check for recursive make */
2108
if (is_recursive_make(argv[0])) {
2109
/* force local execution */
2113
hostname = "dynamic mode";
2115
hostname = localhost;
2116
unlock_hostentry(jobs[next_job].offset);
2118
jobs[next_job].offset = -1;
2120
if (might_be_recursive_make(argv)) {
2121
/* argv contains program name */
2124
hostname = "dynamic mode";
2126
hostname = localhost;
2127
unlock_hostentry(jobs[next_job].offset);
2129
jobs[next_job].offset = -1;
2130
/* dump environment variable RECURSIVE_QMAKE_OPTIONS */
2134
strcpy(envvar, "RECURSIVE_QMAKE_OPTIONS=-inherit");
2137
strcat(envvar, "\n-cwd");
2141
strcat(envvar, "\n-verbose");
2144
for(i = 0; i < sge_v_argc; i++) {
2145
if (strlen(envvar) + strlen(sge_v_argv[i]) >= ADDTL_ENV_SIZE) {
2146
fprintf(stderr, "qmake: RECURSIVE_QMAKE_OPTIONS too big\n");
2149
strcat(envvar, "\n");
2150
strcat(envvar, sge_v_argv[i]);
2154
fprintf(stdout, "saving sge options: %s\n", envvar);
2158
/* remote execution possible */
2160
hostname = "dynamic mode";
2162
hostname = next_host();
2167
/* can we use stdin? */
2172
printf("%s\n", hostname);
2181
*id_ptr = child_pid;
2182
jobs[next_job].pid = child_pid;
2184
/* free the additional environment variables */
2186
while (addtl_env[i] != NULL) {
2188
addtl_env[i] = NULL;
2193
int argc, no_args, no_requests, i;
2195
char *resource_request = NULL;
2197
/* set PAREND env var */
2198
if(getenv("JOB_ID")) {
2199
static char buffer[1024];
2200
sprintf(buffer, "PARENT=%s", getenv("JOB_ID"));
2209
/* count arguments */
2210
while(argv[no_args++]);
2212
/* do we have individual job requests? */
2214
resource_request = get_sge_resource_request(argv);
2215
if (resource_request != NULL) {
2217
fprintf(stdout, "add SGE resource request for this rule: %s\n",
2220
no_requests = count_sge_resource_request(resource_request);
2224
args = (char **)malloc((no_args + no_requests + sge_v_argc + 8 + pass_cwd + be_verbose) * sizeof(char *));
2227
args[argc++] = "qrsh";
2229
args[argc++] = "-noshell";
2232
args[argc++] = "-verbose";
2235
if (!dynamic_mode) {
2236
args[argc++] = "-inherit";
2240
args[argc++] = "-nostdin";
2244
args[argc++] = "-cwd";
2248
args[argc++] = "-now";
2249
args[argc++] = "no";
2251
if (resource_request) {
2252
argc = copy_sge_resource_request(resource_request, args, argc);
2253
free(resource_request);
2254
resource_request = NULL;
2258
if (addtl_env_pass[0] != '\0') {
2259
args[argc++] = "-v";
2260
args[argc++] = addtl_env_pass;
2263
for(i = 0; i < sge_v_argc; i++) {
2264
args[argc++] = sge_v_argv[i];
2267
if (!dynamic_mode) {
2268
args[argc++] = (char *)hostname;
2272
if (recursive_make) {
2273
args[argc++] = argv[0];
2275
args[argc++] = "-verbose";
2277
args[argc++] = "-inherit";
2280
args[argc++] = "-cwd";
2283
if (addtl_env_pass[0] != '\0') {
2284
args[argc++] = "-v";
2285
args[argc++] = addtl_env_pass;
2288
for(i = 0; i < sge_v_argc; i++) {
2289
args[argc++] = sge_v_argv[i];
2292
args[argc++] = "--";
2297
for(; argv[i] != NULL; i++) {
2298
args[argc++] = argv[i];
2304
fprintf(stdout, "starting job: \n");
2305
for(i = 0; args[i] != NULL; i++) {
2306
fprintf(stdout, "args[%3d] = %s\n", i, args[i]);
2310
/* set the RECURSIVE_QMAKE_OPTIONS environment variable */
2311
if ( envvar[0] != 0 ) {
2315
/* set the additional environment variables */
2317
while (addtl_env[i] != NULL) {
2318
putenv(addtl_env[i]);
2322
execvp(args[0], args);
2327
/* Get the status of a dead remote child. Block waiting for one to die
2328
if BLOCK is nonzero. Set *EXIT_CODE_PTR to the exit status, *SIGNAL_PTR
2329
to the termination signal or zero if it exited normally, and *COREDUMP_PTR
2330
nonzero if it dumped core. Return the ID of the child that died,
2331
0 if we would have to block and !BLOCK, or < 0 if there were none. */
2333
/****** qmake/remote_status() ***************************************
2336
* remote_status() -- return status of dead children
2339
* int remote_status(int *exit_code_ptr, int *signal_ptr,
2340
* int *coredump_ptr, int block);
2343
* Reports to the caller (gmake) information about the next child that
2345
* First checks, whether information has been cached in saved_status,
2346
* if yes, returns information about the first record in saved_status
2348
* if not, calls read_remote_status to check for recently finished
2352
* exit_sge_ptr - see RESULT
2353
* signal_ptr - see RESULT
2354
* coredump_ptr - see RESULT
2355
* block - flag whether to block when waiting for child to die
2358
* remote_status - the pid of the dead child,
2359
* 0 if we would have to block and block is 0
2361
* exit_sge_ptr - exit code of the child process
2362
* signal_ptr - 0 when process exited normally, else the signal by
2363
* which the process was terminated
2364
* coredump_ptr - nonzero, if the childprocess dumped core
2373
* qmake/read_remote_status()
2375
****************************************************************************
2378
int remote_status (int *exit_code_ptr, int *signal_ptr,
2379
int *coredump_ptr, int block)
2384
fprintf(stdout, "gmake requesting status of dead child processes\n");
2387
if(saved_status != NULL) {
2388
struct finished_job *jobinfo;
2390
jobinfo = saved_status;
2391
saved_status = saved_status->next;
2393
child_pid = jobinfo->pid;
2394
*exit_code_ptr = jobinfo->exit_code;
2395
*signal_ptr = jobinfo->signal;
2396
*coredump_ptr = jobinfo->coredump;
2403
return read_remote_status(exit_code_ptr, signal_ptr, coredump_ptr, block);
2406
/****** qmake/read_and_save_remote_status() ***************************************
2409
* read_and_save_remote_status() -- read and cache status of dead children
2412
* static void read_and_save_remote_status(int *exit_code_ptr, int *signal_ptr,
2413
* int *coredump_ptr, int block);
2416
* Waits or checks for dead child processes by calling read_remote_status.
2417
* If a child has finished, stores information from read_remote_status
2418
* to a finished_job structure and appends it to the global list
2432
* qmake/read_remote_status()
2434
****************************************************************************
2437
static void read_and_save_remote_status()
2439
static struct finished_job jobinfo;
2442
child_pid = read_remote_status(&(jobinfo.exit_code), &(jobinfo.signal), &(jobinfo.coredump), 0);
2445
struct finished_job *job;
2447
jobinfo.pid = child_pid;
2449
job = (struct finished_job *)malloc(sizeof(finished_job));
2450
memcpy(job, &jobinfo, sizeof(finished_job));
2453
if(saved_status == NULL) {
2456
struct finished_job *j;
2459
while(j->next != NULL) {
2467
/****** qmake/read_remote_status() ***************************************
2470
* read_remote_status() -- return status of dead children
2473
* static int read_remote_status(int *exit_code_ptr, int *signal_ptr,
2474
* int *coredump_ptr, int block);
2477
* Waits or checks for dead child processes.
2479
* - the pid of a dead child process
2481
* - evtl. the signal by which a process was terminated
2482
* - evtl. a flag, that the process dumped core
2483
* Cleans up the job_info for a dead child, unlocks the host where
2484
* the child process was executed and removes an evtl. existing
2488
* exit_sge_ptr - see RESULT
2489
* signal_ptr - see RESULT
2490
* coredump_ptr - see RESULT
2491
* block - flag whether to block when waiting for child to die
2494
* remote_status - the pid of the dead child,
2495
* 0 if we would have to block and block is 0
2497
* exit_sge_ptr - exit code of the child process
2498
* signal_ptr - 0 when process exited normally, else the signal by
2499
* which the process was terminated
2500
* coredump_ptr - nonzero, if the childprocess dumped core
2510
****************************************************************************
2513
static int read_remote_status(int *exit_code_ptr, int *signal_ptr, int *coredump_ptr, int block)
2523
/* suppress misleading error messages */
2526
/* get info about dead children */
2527
child_pid = waitpid(-1, &status, block ? 0 : WNOHANG);
2529
/* waitpid failed? */
2530
if(child_pid <= 0) {
2532
fprintf(stderr, "waiting for child failed: %s\n", errno == 0 ? "timeout" : strerror(errno));
2537
/* waitpid reported stopped or continued child? */
2538
if(WIFSTOPPED(status)) {
2539
fprintf(stderr, "child %d was stopped\n", child_pid);
2544
if(WIFCONTINUED(status)) {
2545
fprintf(stderr, "child %d is continuing\n", child_pid);
2550
if(WIFEXITED(status)) {
2551
*exit_code_ptr = WEXITSTATUS(status);
2554
if(WIFSIGNALED(status)) {
2555
*signal_ptr = WTERMSIG(status);
2557
*coredump_ptr = WCOREDUMP(status);
2561
/* cleanup job_info */
2562
for(i = 0; i < host_count; i++) {
2563
if(jobs[i].pid == child_pid) {
2564
unlock_hostentry(jobs[i].offset);
2566
jobs[i].offset = -1;
2574
/****** qmake/block_remote_children() ***************************************
2577
* block_remote_children() -- ??
2580
* void block_remote_children();
2583
* Block asynchronous notification of remote child death.
2584
* If this notification is done by raising the child termination
2585
* signal, do not block that signal.
2594
* No idea, what this function is meant for :-(
2595
* Hope, we will not need it.
2600
* qmake/unblock_remote_children()
2602
****************************************************************************
2605
void block_remote_children ()
2608
fprintf(stdout, "gmake called block_remote_children()\n");
2614
/* Restore asynchronous notification of remote child death.
2615
If this is done by raising the child termination signal,
2616
do not unblock that signal. */
2617
/****** qmake/unblock_remote_children() ***************************************
2620
* unblock_remote_children() -- ??
2623
* void unblock_remote_children();
2626
* Restore asynchronous notification of remote child death.
2627
* If this is done by raising the child termination signal,
2628
* do not unblock that signal.
2637
* No idea, what this function is meant for :-(
2638
* Hope, we will not need it.
2643
* qmake/block_remote_children()
2645
****************************************************************************
2648
void unblock_remote_children ()
2651
fprintf(stdout, "gmake called unblock_remote_children()\n");
2657
/* Send signal SIG to child ID. Return 0 if successful, -1 if not. */
2658
/****** qmake/remote_kill() ***************************************
2661
* remote_kill -- send a signal to remote job
2664
* int remote_kill(int id, int sig);
2667
* Sends the signal given as parameter to the given process.
2670
* id - process id of child process to notify
2671
* sig - signal to send to child process
2674
* result of kill system call: 0 if 0K, else -1
2684
****************************************************************************
2687
int remote_kill (int id, int sig)
2690
fprintf(stdout, "gmake requested to send signal %d to process %d\n", sig, id);
2693
return kill(id, sig);
2696
/****** qmake/main() ***************************************
2699
* main() -- main program for testing some functions
2702
* int main(int argc, char *argv[]);
2705
* main function to create a test program. It will be compiled,
2706
* if the define TEST_REMOTE is set.
2707
* The following features/functions will be tested:
2708
* - splitting of sge and gmake options
2709
* - parsing of option -inherit
2711
* - create -pe option from -j option
2712
* - setting of architecture as default resource request
2713
* - creation of a qmake hostfile
2714
* - initialization of filenames, filehandles, number of slots
2715
* - determination of next host to use
2718
* - set the environmentvariable TMPDIR
2719
* - create a sge pe hostfile in $TMPDIR
2721
* - call test program with commandline options
2722
* - analyze output of test program
2725
* argc - number of commandline parameters
2726
* argv - commandline parameters, first is program name
2729
* exitcode - allways 0
2733
* setenv TMPDIR /tmp
2735
* cat >$TMPDIR/hostfile
2736
* SOWA.mydomain.de 1 sowa UNDEFINED
2737
* BALROG.mydomain.de 2 balrog UNDEFINED
2738
* BILBUR.mydomain.de 2 bilbur UNDEFINED
2739
* SARUMAN.mydomain.de 1 saruman UNDEFINED
2741
* compiling test program:
2742
* gcc -o test -DTEST_REMOTE remote-sge.c
2744
* ./test -pe make 3 -- -j 3 all install
2749
* !!!! Tests program does no longer work !!!!
2753
****************************************************************************
2757
int main(int argc, char *argv[])
2761
printf("\nTest program for gmake remote functions using Sge/SGE\n");
2763
printf("\nTesting commandline parsing ... \n");
2765
printf("\tbefore splitting\n");
2766
for(i = 0; i < argc; i++) {
2767
printf("\t\targv[%d] = %s\n", i, argv[i]);
2770
parse_options(&argc, &argv);
2772
printf("\tafter splitting\n");
2773
for(i = 0; i < sge_argc; i++) {
2774
printf("\t\tsge_argv[%d] = %s\n", i, sge_argv[i]);
2776
for(i = 0; i < gmake_argc; i++) {
2777
printf("\t\tgmake_argv[%d] = %s\n", i, gmake_argv[i]);
2779
for(i = 0; i < argc; i++) {
2780
printf("\t\targv[%d] = %s\n", i, argv[i]);
2783
printf("\thandling of option -inherit - recognize and strip\n");
2784
printf("\t\t%s\n", inherit_job() ? "found -inherit option" : "no -inherit option set");
2786
for(i = 0; i < sge_argc; i++) {
2787
printf("\t\tsge_argv[%d] = %s\n", i, sge_argv[i]);
2790
printf("\tcreate -pe option from -j option\n");
2791
/* JG: If no pe is given, we use dynamic allocation */
2795
for(i = 0; i < sge_argc; i++) {
2796
printf("\t\tsge_argv[%d] = %s\n", i, sge_argv[i]);
2799
printf("\tequalize NSLOTS with -j option\n");
2800
equalize_nslots(&argc, &argv);
2801
for(i = 0; i < argc; i++) {
2802
printf("\t\targv[%d] = %s\n", i, argv[i]);
2805
printf("\tdefault options ...\n");
2806
set_default_options();
2808
for(i = 0; i < sge_argc; i++) {
2809
printf("\t\tsge_argv[%d] = %s\n", i, sge_argv[i]);
2814
printf("\ninitializing remote execution ... ");
2818
printf("\ntesting qmake hostfile and next_host function\n");
2820
printf("\tnumber of slots is %d\n", host_count);
2824
for(i = 0; i < 20; i++) {
2825
/* reserve job_info reservieren */
2826
start_remote_job_p(1);
2827
jobs[next_job].pid = 123;
2830
printf("\thost for job %3d = %s\n", i, next_host());
2832
/* release some hosts */
2833
if(jobs[next_job].offset % 3) {
2834
unlock_hostentry(jobs[next_job].offset);
2835
jobs[next_job].pid = 0;
2836
jobs[next_job].offset = -1;