3
cl -O2 -Ot -W4 multiruns.c
4
cc -o multiruns -O2 multiruns.c -lm
6
multiruns <rstfile1> <rstfile2> ... <lnLColumn>
8
Examples (comparing three runs with lnL in column 19 in rst1):
10
multiruns rst1.r1 rst1.r2 rst1.r3 19
11
multiruns a/rst1 b/rst1 c/rst1 19
13
March 2003, Ziheng Yang
14
September 2005, changed tworuns into multiruns, ziheng yang
16
This program compares outputs from multiple separate ML runs analyzing
17
many data sets (using ndata) to assemble a result file. Because of local
18
peaks and convergence problems, multiple runs for the same analysis may not
19
generate the same results. Then we should use the results corresponding to
20
the highest lnL. This program takes input files which have summary results
21
from multiple runs, one line for each data set. The program takes one line
22
from each of the input files and compare the first field, which is an index
23
column and should be identical between the input files, and an lnL column.
24
The program decides which run generated the highest lnL, and copy the line
25
from that run into the output file: out.txt.
27
This is useful when you analyze the same set of simulated replicate data
28
sets multiple times, using different starting values. For example, codeml
29
may write a line of output in rst1 for each data set, including parameter
30
estimates and lnL. You can then use this program to compare the rst1 output
31
files from multiple runs to generate one output file. The program allows the
32
fields to be either numerical or text, but the first (index) and lnL columns
43
#define MAXNFIELDS 1000
44
#define MAXLLINE 64000
46
void error2 (char * message);
47
FILE *gfopen(char *filename, char *mode);
48
int splitline (char line[], int fields[]);
51
int main(int argc, char* argv[])
53
FILE *fout, *fin[MAXNFIILES];
54
char infile[MAXNFIILES][96]={"rst1.r1", "rst1.r2"}, outfile[96]="out.txt";
55
int index=0, nfile, nfileread, lnLcolumn=13, i, nrecords=0, lline=MAXLLINE;
56
int nfields[MAXNFIILES],fields[MAXNFIELDS], minf, maxf, miss[MAXNFIILES];
57
char *line[MAXNFIILES];
58
double lnL[MAXNFIILES], lnLmin, lnLmax, indexfield[MAXNFIILES], y;
60
puts("Usage: \n\tmultiruns <file1> <file2> ... <lnLcolumn>\n");
64
for(i=0; i<nfile; i++) {
65
strcpy(infile[i], argv[1+i]);
66
fin[i]=gfopen(infile[i],"r");
67
if((line[i]=(char*)malloc(MAXLLINE*sizeof(char)))==NULL) error2("oom");
68
printf("%s ", infile[i]);
70
printf(" ==> %s\n\n", outfile);
72
sscanf(argv[argc-1], "%d", &lnLcolumn);
74
fout=(FILE*)gfopen(outfile,"w");
76
for(nrecords=0; ; nrecords++) {
77
for(i=0,nfileread=0; i<nfile; i++) {
78
nfields[i]=0; lnL[i]=0; line[i][0]='\0'; miss[i]=1;
79
if(!fgets(line[i], lline, fin[i])) continue;
80
nfields[i]=splitline(line[i],fields);
82
if(nfields[i]>index) sscanf(line[i]+fields[index], "%lf", &indexfield[i]);
83
if(nfields[i]>lnLcolumn) {
84
sscanf(line[i]+fields[lnLcolumn], "%lf", &lnL[i]);
89
if(nfileread==0) break;
90
for(i=0,y=-1; i<nfile; i++) {
92
if(y==-1) y=indexfield[i];
93
else if(y!=indexfield[i]) error2("index field different");
96
for(i=0,lnLmin=1e300,lnLmax=-1e300; i<nfile; i++) {
98
if(lnL[i]<lnLmin) { lnLmin=lnL[i]; minf=i; }
99
if(lnL[i]>lnLmax) { lnLmax=lnL[i]; maxf=i; }
102
fprintf(fout, "%s", line[maxf]);
104
printf("record %4d (", nrecords+1);
105
for(i=0; i<nfile; i++) printf("%c", (miss[i]?'-':'+'));
106
printf(") %10.3f (%d) - %10.3f (%d) = %8.3f\n",
107
lnLmin, minf+1, lnLmax, maxf+1, lnLmin-lnLmax);
109
printf("\nwrote %d records into %s\n", nrecords, outfile);
110
for(i=0; i<nfile; i++) { fclose(fin[i]); free(line[i]); }
114
void error2 (char * message)
115
{ printf("\nError: %s.\n", message); exit(-1); }
117
FILE *gfopen(char *filename, char *mode)
121
if(filename==NULL || filename[0]==0)
122
error2("file name empty.");
124
fp=(FILE*)fopen(filename, mode);
126
printf("\nerror when opening file %s\n", filename);
127
if(!strchr(mode,'r')) exit(-1);
128
printf("tell me the full path-name of the file? ");
129
scanf("%s", filename);
130
if((fp=(FILE*)fopen(filename, mode))!=NULL) return(fp);
131
puts("Can't find the file. I give up.");
137
int splitline (char line[], int fields[])
139
/* This finds out how many fields there are in the line, and marks the starting
140
positions of the fields.
141
Fields are separated by spaces, and texts are allowed as well.
143
int lline=64000, i, nfields=0, InSpace=1;
146
for(i=0; i<lline && *p && *p!='\n'; i++,p++) {
153
/* if(nfields>MAXNFIELDS) puts("raise MAXNFIELDS?"); */