1
/*-----------------------------------------------------------------------
2
BOXSHADE 3.3 (May 1997)
4
this program makes multiple-aligned output from either
5
- PRETTY files from old LINEUP
6
- MSF files from new PILEUP
11
various kinds of shading can be applied to identical/similar residues
12
Output formats supported are
13
- ANSI for display on terminal with ANSI.SYS loaded (PC version)
14
- VT100 for display on VT100 compatible terminals
15
- CRT for display on the main screen ( * PC-version only! * )
17
- Encapsulated POSTSCRIPT (for export to graphics programs)
18
- HPGL for output on plotters or export to graphics programs
19
- RTF (Rich text format) for export to word processing programs
20
- ReGIS graphics for disply on terminals with ReGIS capabilities (DEC)
21
- ReGIS file for printing after conversion with RETOS
22
- LJ250 coloir printer format (DEC)
23
- ASCII output showing either conserved or variable residues
24
- FIG file output for use with xfig 2.1 program
25
- PICT file, mostly used by Mac graphics progs, also many PC progs
26
- HTML output for Web publishing
28
This program might still contain bugs and is not particularly
29
user-friendly. It is completely public-domain and may be
30
passed around and modified without any notice to the author.
32
the authors addresses are:
34
Kay Hofmann Michael D. Baron
35
Bioinformatics Group BBSRC Institute for Animal Health
36
ISREC Pirbright, Surrey GU24 0NF
37
CH-1066 Epalinges s/Lausanne U.K.
48
for any comments write an E-mail to
49
Kay: khofmann@isrec-sun1.unil.ch
50
Michael: baron@bbsrc.ac.uk (though kay wrote 99% of the program and is more
51
likely to be of help, especially with input and output problems)
52
Hartmut: hsc@techfak.uni-kiel.de (don't send Kay or Michael any questions
53
concerning the 'C' version of boxshade)
56
3.3 -converted from Pascal to C, includes several bug fixes over Pascal v3.21,
57
mostly in page switching code. Rewritten Postscript driver: Compact
58
output following the document structure conventions (DSC), additional
59
color mode added. Enhanced memory allocation allows bigger alignments
60
to be processed. Added ruler feature and /grp & /sim command line
61
parameter. Added HTML output.
62
3.2 -added PICT output, somewhat experimental, all feedback gratefully
63
received (by MDB). There are two options, either to output the Text
64
with the shading (T) or just the shading (S); the latter option is for those
65
who find the full file too big for their computer/program, and want to
66
combine the shading with a simple formatted text version of their multiple
67
sequence file e.g. the output from PRETTY (remember to set the block
68
size equal to the linelength).
69
3.0 -major changes to shading strategy introduced by Michael Baron,
70
see Documentation for details
71
-addition of a new set of files (*.grp) and a few new command
72
line qualifiers necessary for this new strategy
73
-added support for ASCII output, showing one complete sequence
74
and either conserved or deviating residues in the other sequences
75
-added FIG output for use with the x-windows based public domain
76
graphics program xfig 2.1
77
-bug fixes in the reading routine, boxshade can now import files
78
created by clustalW and the MSF format created by readseq
80
2.7 -unified source code for VAX/VMS, AXP/OSF1 and MSDOS (Turbo Pascal)
81
-all features of previous 2.6DOS version available for OSF1 and VMS
82
-reads parameter and alignment files in both DOS and UNIX format
83
-writes output files in native format of the machine. UNIX or DOS
84
style text files can be forced by the qualifiers /unix or /dos
85
-added /check qualifier for listing of command line parameters
86
-added the option to hide the master sequence (see documentation)
87
-finally dropped UIS support
90
2.6DOS -added RTF output
92
2.5DOS -added HPGL output
93
-added consensus formation and output
94
-added the /NUMDEF qualifier for default numbering
95
-added the /SPLIT qualifier for splitting output to one page per file.
97
/SYMBCONS qualifiers for output of consensus sequence
99
2.4DOS -initial MSDOS version
100
-ported from VMS version 2.4
103
2.4a -fixed a bug (terminal size problems)
105
2.4 -modified POSTSCRIPT/EPS output, allows scaling of fonts.
106
(.PAR file structure modified, BOX.PSP now obsolete)
107
-support of vertical POSTSCRIPT output
108
-support of output on VT-series terminals
109
-support of input from CLUSTAL V
110
-slight reordering of interactive input
111
-minor bug in command line handling fixed
112
-minor bug in EPS-bounding box fixed
114
2.3 -default parameter management
115
-option for shading according to similarity to a instead of a consensus sequence
116
-support of GCG V7 MSF-format
118
2.2 -support of sequence numbering in output
119
-support of MALIGNED data files
121
2.1 -output code completely rewritten for allowing easier modification
122
-support of ENCAPSULATED POSTSCRIPT output
123
-more compact POSTSCRIPT output files
124
-multipage output for all devices
125
-modifications in PRETTY and CLUSTAL reading routines
126
for coping with slightly altered formats
128
2.0 -added POSTSCRIPT and ReGIS output routines
129
-added a version that compiles without VWS (BOX_NO_UIS)
131
-creation of a .COM file for setting logicals
134
-CLUSTAL and PRETTY input
135
-UIS/VWS and LJ250 sixel output
137
----------------------------------------------------------------------- */
141
#include "bx_types.h"
148
#define DNAPEP() (dnaflag ? "dna" : "pep")
150
char *aaset = "ACDEFGHIKLMNPQRSTVWY";
151
char *aasetlow = "acdefghiklmnpqrstvwy";
155
static GraphicsDevice *OutDev = NULL;
157
static int cons_idx = -1;
158
static int ruler_idx = -1;
160
/*--------------------------------------------------------------------------*/
161
/* user query section section */
162
/*--------------------------------------------------------------------------*/
164
static char *explain_cl(char *cl)
166
static char ncl[256];
168
printf("Allowed command line parameters are:\n");
169
printf(C_SEP "help show this text\n");
170
printf(C_SEP "check show this text and extend command line\n");
171
printf(C_SEP "def use defaults, no unnecessary questions\n");
172
printf(C_SEP "numdef use default numbering\n");
173
printf(C_SEP "dna assume DNA sequences, use box_dna.par\n");
174
printf(C_SEP "split create separate files for multiple pages\n");
175
printf(C_SEP "toseq=xxx shading according to sequence No. xxx\n");
176
printf(C_SEP "in=xxxxx xxxxx is input file name\n");
177
printf(C_SEP "out=xxxxx xxxxx is output file name\n");
178
printf(C_SEP "par=xxxxx xxxxx is parameter file name\n");
179
printf(C_SEP "sim=xxxxx xxxxx is file name for similar residues def.\n");
180
printf(C_SEP "grp=xxxxx xxxxx is file name for grouping residues def.\n");
181
printf(C_SEP "thr=x x is the fraction of sequences that must agree"
182
" for a consensus\n");
183
printf(C_SEP "dev=x x is output device class (see documentation)\n");
184
printf(C_SEP "type=x x is input file format (see documentation)\n");
185
printf(C_SEP "ruler print ruler line\n");
186
printf(C_SEP "cons create consensus line\n");
187
printf(C_SEP "symbcons=xyz xyz are consensus symbols\n");
188
printf(C_SEP "symbcons=\"xyz\" if the one above does not work, try this one\n");
189
printf(C_SEP "unix output files lines are terminated with LF only\n");
190
printf(C_SEP "mac output files lines are terminated with CR only\n");
191
printf(C_SEP "dos output files lines are terminated with CRLF\n");
192
/* printf("On unix systems, use the dash (-) as parameter delimiter\n\n"); */
194
printf("actual command line: %s\n", cl);
195
printf("add to command line: ");
202
static char *get_cl_filename(char *Result, char *cl, char *tag)
208
p = cl + p1 + strlen(tag) - 1;
210
while (*p != '\0' && *p != ' ' && *p != c_sep)
217
static double get_cl_real(char *cl, char *tag)
224
p = cl + p1 + strlen(tag) - 1;
226
while (*p != '\0' && *p != ' ' && *p != c_sep)
229
return( str2real(dummy) );
233
static int get_cl_int(char *cl, char *tag)
240
p = cl + p1 + strlen(tag) - 1;
242
while (*p != '\0' && *p != ' ' && *p != c_sep)
245
return( str2int(dummy) );
249
static void process_command_line(int argc, char **argv)
254
save_binpath(argv[0]);
257
for (idx=1; idx < argc; idx++) {
258
strcat(cl, argv[idx]);
262
if ( indx(cl, C_SEP "help") > 0) {
267
if ( indx(cl, C_SEP "check") > 0) {
269
strcat(cl, explain_cl(cl) );
271
if (indx(cl, C_SEP "dna") > 0)
275
if (indx(cl, C_SEP "def") > 0)
276
interactflag = FALSE;
280
if (indx(cl, C_SEP "in") > 0) {
282
get_cl_filename(inname, cl, C_SEP "in=");
286
if (indx(cl, C_SEP "out=") > 0) {
288
get_cl_filename(outname, cl, C_SEP "out=");
292
if (indx(cl, C_SEP "par=") > 0) {
294
get_cl_filename(parname, cl, C_SEP "par=");
298
if (indx(cl, C_SEP "sim=") > 0) {
300
get_cl_filename(clsimname, cl, C_SEP "sim=");
304
if (indx(cl, C_SEP "grp=") > 0) {
306
get_cl_filename(clgrpname, cl, C_SEP "grp=");
310
if ( (idx=indx(cl, C_SEP "type=")) > 0) {
312
inputmode = cl[idx + 5];
316
if (indx(cl, C_SEP "thr=") > 0) {
318
thrfrac = get_cl_real(cl, C_SEP "thr=");
319
if ((unsigned)thrfrac > 1)
324
if (indx(cl, C_SEP "toseq=") > 0) {
325
clseqconsflag = TRUE;
327
consensnum = get_cl_int(cl, C_SEP "toseq=");
329
clseqconsflag = FALSE;
331
if ( (idx=indx(cl, C_SEP "dev=")) > 0) {
333
outputmode = cl[idx + 4];
337
if (indx(cl, C_SEP "cons") > 0) {
343
if (indx(cl, C_SEP "ruler") > 0) {
348
if ( (idx=indx(cl, C_SEP "symbcons=")) > 0) {
349
clsymbconsflag = TRUE;
353
if (cl[idx + 9] == '"' ||
356
symbcons[0] = cl[idx + incr + 9];
357
symbcons[1] = cl[idx + incr + 10];
358
symbcons[2] = cl[idx + incr + 11];
360
clsymbconsflag = FALSE;
362
if (indx(cl, C_SEP "split") > 0)
367
EOLmode = EOL_default;
368
if (indx(cl, C_SEP "unix" ) > 0) EOLmode = EOL_unix; else
369
if (indx(cl, C_SEP "dos") > 0) EOLmode = EOL_dos; else
370
if (indx(cl, C_SEP "mac") > 0) EOLmode = EOL_mac;
372
if (indx(cl, C_SEP "numdef") > 0 || !interactflag)
378
static BOOL SimGrp(char *template, char *fn, char *explain) {
381
sprintf(fn, template, DNAPEP());
382
if (fexist(fn)) return TRUE;
385
sprintf(fn, "%s%c", get_logical("BOXDIR"), d_sep);
387
if (fexist(fn)) return TRUE;
389
if (explain == NULL) return FALSE;
391
printf("%s %s does not exist, enter filename: ", explain, neu);
393
if (fexist(fn)) return TRUE;
395
printf("\007file does not exist \n");
399
static void ask(void)
407
sprintf(parname, "box_%s.par", DNAPEP());
408
if (!fexist(parname))
409
sprintf(parname, "%s%cbox_%s.par",
410
get_logical("BOXDIR"), d_sep, DNAPEP());
411
if (!fexist(parname)) {
414
printf("Default-parameter file %s does not exist, enter filename: ",
417
ok = fexist(parname);
419
printf("\007file does not exist \n");
422
parfile = fopen(parname, TXT_RD);
423
assert(parfile != NULL);
425
Fgets(line_, 256, parfile);
426
} while (indx(line_, ":GENERAL") != 1);
427
Fgets(line_, 256, parfile);
429
inputmode = line_[0];
430
Fgets(line_, 256, parfile);
432
outputmode = line_[0];
433
Fgets(line_, 256, parfile);
434
if (line_[0] == 'Y' || line_[0] == 'y')
439
masternormal = FALSE;
440
fscanf(parfile, "%d%*[^\n]", &outlen);
442
Fgets(line_, 256, parfile);
443
if (line_[0] == 'Y' || line_[0] == 'y')
447
Fgets(line_, 256, parfile);
448
if (line_[0] == 'Y' || line_[0] == 'y')
452
fscanf(parfile, "%d%*[^\n]", &interlines);
454
Fgets(line_, 256, parfile);
455
if (line_[0] == 'Y' || line_[0] == 'y')
459
Fgets(line_, 256, parfile);
460
if (line_[0] == 'Y' || line_[0] == 'y')
465
Fgets(line_, 256, parfile);
467
if (line_[0] == 'Y' || line_[0] == 'y')
473
Fgets(line_, 256, parfile);
474
if (!clsymbconsflag) {
475
symbcons[0] = line_[0];
476
symbcons[1] = line_[1];
477
symbcons[2] = line_[2];
480
Fgets(line_, 256, parfile);
482
if (line_[0] == 'Y' || line_[0] == 'y')
488
Fgets(line_, 256, parfile);
490
if (line_[0] == 'Y' || line_[0] == 'y')
496
fscanf(parfile, "%lg%*[^\n]", &thrfrac);
499
Fgets(line_, 256, parfile);
502
for (i = 0; i < max_no_seq; i++)
503
memset(seq[i], ' ', max_no_res);
506
printf("BOXSHADE %s\n", BOXSHADE_ver);
508
"This program makes multiple-aligned output from either\n"
509
"PILEUP-MSF, CLUSTAL-ALN, MALIGNED-data and ESEE-save files\n"
510
"(limited to a maximum of %d sequences with up to %d elements each)\n",
511
max_no_seq, max_no_res);
513
"Various kinds of shading can be applied to identical/similar residues\n"
514
"Output is written to screen or to a file in the following formats:\n"
515
"ANSI/VT100, PS/EPS, RTF, HPGL, ReGIS, LJ250-printer, ASCII, xFIG,\n"
520
/**** ask for infile ****/
525
printf("name of aligned input-file : ");
531
printf("\007aligned input file does not exist \n");
535
/**** ask for infile type ****/
537
if (indx(inname, ".pre") > 0 || indx(inname, ".PRE") > 0 ||
538
indx(inname, ".msf") > 0 || indx(inname, ".MSF") > 0)
541
if (indx(inname, ".aln") > 0 || indx(inname, ".ALN") > 0)
544
if (indx(inname, ".mal") > 0 || indx(inname, ".MAL") > 0)
547
if (indx(inname, ".ese") > 0 || indx(inname, ".ESE") > 0)
552
if (indx(inname, ".phy") > 0 || indx(inname, ".PHY") > 0)
555
if (interactflag && !cltypeflag) {
557
printf("Do you want to process (1) Lineup-PRETTY/Pileup-MSF file\n"
558
" (2) CLUSTAL .ALN file\n"
559
" (3) MALIGNED data file\n"
560
" (4) ESEE save file\n"
561
" (5) PHYLIP file (* %c *) : ",
563
Fgets(instring, 51, stdin);
564
if (*instring == '\0')
565
sprintf(instring, "%c", inputmode);
566
if (indx("12345", instring) == 0) {
568
printf("\007---> Please choose a supported type\n");
571
} while (indx("12345", instring) <= 0);
574
if (*instring != '\0')
575
inputmode = instring[0];
580
printf("Output suitable for (%2c) POSTSCRIPT\n", oPS);
581
printf(" (%2c) encapsulated POSTSCRIPT\n", oEPS);
582
printf(" (%2c) HPGL\n", oHPGL);
583
printf(" (%2c) RTF (Rich Text Format)\n", oRTF);
585
printf(" (%2c) PC-screen (PCs only!)\n", oCRT);
587
printf(" (%2c) ANSI-screen (PC-version)\n", oANSI);
588
printf(" (%2c) VT100-screen (DEC-version)\n", oVT);
589
printf(" (%2c) ReGIS-screen (25 lines each\n", oREGISt);
590
printf(" (%2c) ReGIS-file (without breaks)\n", oREGISp);
591
printf(" (%2c) LJ250-printer file\n", oLJ250);
592
printf(" (%2c) ASCII file\n", oASCII);
593
printf(" (%2c) FIG file (for XFIG)\n", oFIG);
594
printf(" (%2c) PICT file\n", oPICT);
595
printf(" (%2c) HTML file\n", oHTML);
596
printf(" current: (* %c *) : ", outputmode);
598
Fgets(instring, 51, stdin);
599
if (*instring == '\0')
600
sprintf(instring, "%c", outputmode);
601
if (indx(allowed_devices, instring) == 0) {
603
printf("\007---> Please choose a supported type\n");
606
} while (indx(allowed_devices, instring) <= 0);
608
if (*instring != '\0')
609
outputmode = instring[0];
612
if (outputmode == oASCII)
621
"-------------------------------------------------------------------\n");
623
printf("similarity to a single sequence? (* y *) : ");
625
printf("similarity to a single sequence? (* n *) : ");
626
Fgets(instring, 51, stdin);
627
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
629
seqconsflag = (toupper(*instring) == 'Y');
633
if (interactflag && !clconsflag) {
635
printf("-------------------------------------------------------------------\n");
636
printf("display consensus line ? (* %c *) : ", YESNO(consflag) );
637
Fgets(instring, 51, stdin);
638
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
639
if (*instring != '\0')
640
consflag = (toupper(instring[0]) == 'Y');
642
if (clconsflag && !clsymbconsflag)
643
printf("-------------------------------------------------------------------\n\n");
646
if (interactflag && consflag && !clsymbconsflag) {
648
"Enter now a string of 3 symbols to be used for consensus display\n"
649
" representing different/all-similar/all-identical residues\n"
650
" see documentation for symbol definition, examples are:\n"
651
" \" .*\" or \"-LU\" (ommit quotes but use blanks)\n"
653
Fgets(instring, 51, stdin);
654
if (*instring != '\0') {
655
while (strlen(instring) != 3) {
656
printf("\007please enter string of THREE (3) symbols !\n");
658
Fgets(instring, 51, stdin);
660
symbcons[0] = instring[0];
661
symbcons[1] = instring[1];
662
symbcons[2] = instring[2];
667
if (interactflag && !rulerflag) {
669
printf("-------------------------------------------------------------------\n"
670
"display ruler line ? (* %c *) : ", YESNO(rulerflag) );
671
Fgets(instring, 51, stdin);
672
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
673
if (*instring != '\0')
674
rulerflag = (toupper(instring[0]) == 'Y');
678
if (interactflag && !clthrflag && outputmode != oASCII) {
682
printf("--------------------------------------------------------------\n"
683
"The threshold is the fraction of residues that must be identical\n"
684
"or similar for shading to occur\n"
685
"Value for threshold (* %6.2f *):", thrfrac);
686
Fgets(instring, 51, stdin);
687
if (*instring == '\0')
691
inno = str2real((void *)instring);
692
if (*depend_err == '\0' && inno <= 1.0 && inno > 0.0) {
697
printf("The fraction must be between 0 and 1\n\n");
701
if (outputmode == oASCII)
707
printf("-------------------------------------------------------------------\n"
709
"How many sequence characters per line (* %3d *) : ",
711
Fgets(instring, 51, stdin);
712
if (*instring != '\0')
713
outlen = str2int((void *)instring);
715
} while (outlen <= min_outlen || outlen >= max_outlen);
721
printf("should sequence name be printed (* %c *) : ", YESNO(seqnameflag));
722
Fgets(instring, 51, stdin);
723
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
725
if (*instring != '\0')
726
seqnameflag = (toupper(instring[0]) == 'Y');
729
if (interactflag && !rulerflag) {
731
printf("should position numbers be printed (* %c *) : ", YESNO(seqnumflag));
732
Fgets(instring, 51, stdin);
734
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
736
if (*instring != '\0')
737
seqnumflag = (toupper(instring[0]) == 'Y');
742
printf("How many lines between two sequence blocks (* %2d *) : ",
744
Fgets(instring, 51, stdin);
745
if (*instring != '\0')
746
interlines = str2int((void *)instring);
748
} while (interlines <= 0 || interlines >= 100);
754
printf("special label for similar residues (* %c *) : ", YESNO(simflag));
755
Fgets(instring, 51, stdin);
756
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
758
if (*instring != '\0')
759
simflag = (toupper(instring[0]) == 'Y');
763
clsimflag = SimGrp(clsimname, simname, NULL);
765
SimGrp("box_%s.sim", simname, "Similarity-file");
767
clgrpflag = SimGrp(clgrpname, grpname, NULL);
769
SimGrp("box_%s.grp", grpname, "Group-file");
772
sprintf(simname, "box_%s.sim", DNAPEP());
773
sprintf(grpname, "box_%s.grp", DNAPEP());
774
if (!fexist(simname))
775
sprintf(simname, "%s%cbox_%s.sim",
776
get_logical("BOXDIR"), d_sep, DNAPEP());
777
if (!fexist(simname)) {
780
printf("Similarity-file %s does not exist, enter filename: ", simname);
782
ok = fexist(simname);
784
printf("\007file does not exist \n");
788
if (!fexist(grpname))
789
sprintf(grpname, "%s%cbox_%s.grp",
790
get_logical("BOXDIR"), d_sep, DNAPEP());
791
if (!fexist(grpname)) {
794
printf("Group-file %s does not exist, enter filename: ", grpname);
796
ok = fexist(grpname);
798
printf("\007file does not exist \n");
808
printf("special label for identical residues in all sequences (* y *) : ");
810
printf("special label for identical residues in all sequences (* n *) : ");
811
Fgets(instring, 51, stdin);
812
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
814
if (*instring != '\0')
815
globalflag = (toupper(instring[0]) == 'Y');
817
for (i = 0; i <= 4; i++)
820
switch (outputmode) {
822
case oCRT: OutDev = &Crt; break;
824
case oANSI: OutDev = &Ansi; break;
825
case oPS: OutDev = &Postscript; break;
826
case oEPS: OutDev = &Eps; break;
827
case oHPGL: OutDev = &Hpgl; break;
828
case oRTF: OutDev = &Rtf; break;
829
case oREGISt: OutDev = &RegisT; break;
830
case oREGISp: OutDev = &RegisP; break;
831
case oVT: OutDev = &Vt; break;
832
case oLJ250: OutDev = &Lj250; break;
833
case oASCII: OutDev = &Ascii; break;
834
case oFIG: OutDev = &Fig; break;
835
case oPICT: OutDev = &Pict; break;
836
case oHTML: OutDev = &Html; break;
843
printf("create identity / similarity matrix (* n *) : ");
844
Fgets(instring, 51, stdin);
845
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
847
if (*instring != '\0')
848
ident_sim = (toupper(instring[0]) == 'Y');
851
/**** ask for matrix file ****/
853
printf("name of matrix output-file : ");
864
static void ask_numbers(void)
869
printf("-------------------------------------------------------\n"
871
"You requested sequence numbering in the output.\n"
872
"Enter the number of the first sequence position\n"
873
"or confirm the suggestion of the program\n" );
875
for (i = 0; i < no_seq; i++) {
876
printf("%s (* %4d *) : ", seqname[i], startno[i]);
877
Fgets(instring, 11, stdin);
878
if (*instring != '\0') {
879
inno = str2int((void *)instring);
880
if (strcmp(depend_err, "str2int"))
889
static void ask_seqcons(void)
895
if (consensnum < 1 || consensnum > no_seq)
896
clseqconsflag = FALSE;
900
{ /*only go into this asking routine if nothing on the command line*/
901
consensnum = 1; /*1 is the default*/
903
printf("-------------------------------------------------------\n"
905
"You requested consensus formation to a single sequence.\n"
906
"Choose now the sequence to compare to the other ones.\n" );
907
for (i = 1; i <= no_seq; i++)
908
printf("(%2d) %s\n", i, seqname[i - 1]);
910
printf("No. of sequence: (* %3d *) : ", consensnum);
911
Fgets(instring, 11, stdin);
912
if (*instring != '\0')
913
consensnum = str2int((void *)instring);
914
} while (consensnum < 1 || consensnum > no_seq);
920
"-------------------------------------------------------------------\n");
921
printf("hide this sequence? (* %c *) : ", YESNO(hideflag));
922
Fgets(instring, 11, stdin);
923
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
924
if (*instring != '\0')
925
hideflag = (toupper(instring[0]) == 'Y');
929
if (outputmode == oASCII) {
934
if (!interactflag || clseqconsflag) {
935
masternormal = FALSE;
941
printf("show this sequence in all-normal rendition? (* n *) : ");
942
Fgets(instring, 11, stdin);
943
} while (indx("nNyY", instring) <= 0 && *instring != '\0');
945
if (*instring == '\0')
947
masternormal = (toupper(instring[0]) == 'Y');
951
/*--------------------------------------------------------------------------*/
952
/* read input files and comparison-table */
953
/*--------------------------------------------------------------------------*/
955
static void read_cmp(void)
957
/* Read the .sim and .grp files */
960
FILE *simfile, *grpfile;
966
for (i = 0; i <= 19; i++) {
967
for (j = 0; j <= 19; j++) {
968
simtable[i][j] = FALSE;
969
grptable[i][j] = FALSE;
972
for (i = 0; i <= 19; i++) {
973
simtable[i][i] = TRUE;
974
grptable[i][i] = TRUE;
977
simfile = fopen(simname, TXT_RD);
978
assert(simfile != NULL);
980
Fgets(line_, 256, simfile);
981
} while (!feof(simfile) && indx(line_, "..") <= 0);
982
while (!feof(simfile)) {
983
Fgets(line_, 256, simfile);
984
if (*line_ == '\0') continue;
985
cp = strchr(aaset, line_[0]);
986
if (cp == NULL) p1 = 0;
987
else p1 = (int)(cp-aaset)+1;
990
for (i = 2; i < ll; i++) {
991
cp = strchr(aaset, line_[i]);
992
if (cp == NULL) p2 = 0;
993
else p2 = (int)(cp-aaset)+1;
995
simtable[p1 - 1][p2 - 1] = TRUE;
996
simtable[p2 - 1][p1 - 1] = TRUE;
1002
grpfile = fopen(grpname, TXT_RD);
1003
assert(grpfile != NULL);
1005
Fgets(line_, 256, grpfile);
1006
} while (!feof(grpfile) && indx(line_, "..") <= 0);
1007
while (!feof(grpfile)) {
1008
Fgets(line_, 256, grpfile);
1009
if ((ll=strlen(line_)) < 2) continue;
1010
for (j = 1; j < ll; j++) {
1011
cp = strchr(aaset, line_[j-1]);
1012
if (cp == NULL) p1 = 0;
1013
else p1 = (int)(cp-aaset)+1;
1015
for (i = j; i < ll; i++) {
1016
cp = strchr(aaset, line_[i]);
1017
if (cp == NULL) p2 = 0;
1018
else p2 = (int)(cp-aaset)+1;
1020
grptable[p1 - 1][p2 - 1] = TRUE;
1021
grptable[p2 - 1][p1 - 1] = TRUE;
1028
if (simfile != NULL)
1030
if (grpfile != NULL)
1034
static int aaset_idx[256];
1035
#define IDX_aaset(ch) ( aaset_idx[(unsigned char)(ch)] )
1036
#define IN_aaset(ch) ( IDX_aaset(ch) >= 0 )
1038
static void build_aaset_table(void) {
1042
for (ch = 0; ch < 256; ++ch)
1045
for (ap = &aaset[0]; *ap != '\0'; ++ap)
1046
aaset_idx[*ap] = ch++;
1049
static BOOL sim(char a, char b)
1053
idx1 = IDX_aaset(a);
1054
if (idx1 < 0) return FALSE;
1055
idx2 = IDX_aaset(b);
1056
if (idx2 < 0) return FALSE;
1058
return simtable[idx1][idx2];
1062
static BOOL grp(char a, char b)
1066
idx1 = IDX_aaset(a);
1067
if (idx1 < 0) return FALSE;
1068
idx2 = IDX_aaset(b);
1069
if (idx2 < 0) return FALSE;
1071
return grptable[idx1][idx2];
1074
static void make_consensus_length(void)
1079
for (i = 0; i < no_seq; i++) {
1080
while (seq[i][seqlen[i] - 1] == '-' || seq[i][seqlen[i] - 1] == '.' ||
1081
seq[i][seqlen[i] - 1] == ' ')
1083
if (seqlen[i] > consenslen)
1084
consenslen = seqlen[i];
1086
printf("consensus length is %d\n", consenslen);
1090
static void make_consensus(void)
1093
int idcount[MAX_NO_SEQ];
1094
int simcount[MAX_NO_SEQ];
1096
int maxidcount, maxsimcount, idindex, simindex;
1099
/* set consensus length = length of longest sequence (not counting dots,
1100
spaces, etc. at the "far" end. May be a problem here for some strange
1102
/* calculate the threshold # of sequences */
1103
thr = (int)(0.5 + thrfrac * no_seq);
1106
for (i = 0; i < consenslen; i++)
1107
cons[i] = seq[consensnum - 1][i];
1111
printf("Building consensus "); fflush(stdout);
1113
/* build a lookup table for 'strchr(aaset,ch)-aaset' */
1114
build_aaset_table();
1117
for (i = 0; i < consenslen; i++) {
1118
int p = (int)((50.0*i) / consenslen);
1120
printf("."); fflush(stdout);
1123
for (j = 0; j < no_seq; j++) {
1126
for (k = 0; k < no_seq; k++) {
1127
/* increment idcount AND simcount if the two residues are the same */
1128
char seq_ji = seq[j][i];
1129
if ( IN_aaset(seq_ji)) {
1130
if (seq[k][i] == seq_ji) {
1134
/* increment only simcount if residues are not the same but fall into
1136
if (grp(seq[k][i], seq_ji))
1141
/* Find the maximum values in idcount and simcount, along with the
1142
indices of those maxima */
1145
maxidcount = idcount[0];
1146
maxsimcount = simcount[0];
1149
for (j = 2; j <= no_seq; j++) {
1150
if (idcount[j - 1] > maxidcount) {
1152
maxidcount = idcount[j - 1];
1154
if (simcount[j - 1] > maxsimcount) {
1155
maxsimcount = simcount[j - 1];
1159
/* check here for the case where several residues/types may have achieved
1161
if (maxidcount >= thr) { /*only look if max is high enough*/
1163
for (j = 0; j < no_seq; j++) {
1164
if (maxidcount == idcount[j] && seq[idindex - 1][i] != seq[j][i])
1168
cons[i] = seq[idindex - 1][i];
1170
/* if there is an equally high idcount for a different residue then
1171
there can't be a single residue consensus */
1172
if (maxsimcount >= thr && !unique) {
1174
for (j = 0; j < no_seq; j++) {
1175
if (maxsimcount == simcount[j] &&
1176
!grp(seq[simindex - 1][i], seq[j][i]))
1180
cons[i] = tolower(seq[simindex - 1][i]);
1181
/*if maxsimcount is not unique and the other residue is NOT in the same
1182
similarity group then there is so consensus based on similarity. If
1183
the two residues with the same similarity score are in the same
1184
similarity group, flag that consensus position by making the
1192
static void make_colors(void)
1194
int i, j, idcount, simcount, pc;
1196
printf("Colorizing "); fflush(stdout);
1199
for (i = 0; i < consenslen; i++) {
1200
int p = (int)((50.0*i) / consenslen);
1202
printf("."); fflush(stdout);
1207
if (strchr(aasetlow, cons[i]) != NULL)
1208
for (j = 0; j < no_seq; j++) {
1210
if (grp(seq[j][i], toupper(cons[i]))) {
1215
for (j = 0; j < no_seq; j++) {
1216
if (seq[j][i] == cons[i])
1219
if (sim(seq[j][i], cons[i]))
1223
} /*count the ids and sims as they are used later for consensus line*/
1225
if (idcount == no_seq &&
1226
strchr(aaset, cons[i]) != NULL ) {
1227
for (j = 0; j < no_seq; j++)
1229
/*if all sequences the same at this point then colour them identical(3)*/
1231
if (idcount + simcount >= thr &&
1232
strchr(aaset, cons[i]) != NULL ) {
1233
for (j = 0; j < no_seq; j++) {
1234
if (seq[j][i] == cons[i])
1235
col[j][i] = 1; /*=> conserved residue*/
1236
else if (sim(seq[j][i], cons[i]))
1238
/*=> similar to consensus(2)*/
1243
/*do shading and count similar residues for the case of a group
1244
consensus; note that in this case there cannot be any residues marked as
1245
'identical', by definition*/
1248
if (idcount == no_seq) {
1249
conschar[i] = symbcons[2];
1250
if (toupper(conschar[i]) == 'U')
1251
conschar[i] = toupper(cons[i]);
1252
else if (toupper(conschar[i]) == 'L')
1253
conschar[i] = tolower(cons[i]);
1254
else if (toupper(conschar[i]) == 'B')
1256
} else if (idcount + simcount >= thr) {
1257
conschar[i] = symbcons[1];
1258
if (toupper(conschar[i]) == 'U')
1259
conschar[i] = toupper(cons[i]);
1260
else if (toupper(conschar[i]) == 'L')
1261
conschar[i] = tolower(cons[i]);
1262
else if (toupper(conschar[i]) == 'B')
1265
conschar[i] = symbcons[0];
1266
if (toupper(conschar[i]) == 'U')
1267
conschar[i] = toupper(cons[i]);
1268
else if (toupper(conschar[i]) == 'L')
1269
conschar[i] = tolower(cons[i]);
1270
else if (toupper(conschar[i]) == 'B')
1279
static void make_lowcase(void)
1283
if ( ! (lc[0] || lc[1] || lc[2] || lc[3] || lc[4]) )
1284
return; /* nothing to do ! */
1286
printf("Lowercase "); fflush(stdout);
1289
for (i = 0; i < no_seq; i++) {
1290
int p = (int)((20.0*i) / no_seq);
1292
printf("."); fflush(stdout);
1295
for (j = 0; j < seqlen[i]; j++) {
1297
seq[i][j] = tolower(seq[i][j]);
1304
static void prepare_names(void)
1310
for (i = 0; i < no_seq; i++) {
1311
cp = seqname[i] + strlen(seqname[i]);
1312
while (cp != seqname[i] && *(cp-1) == ' ')
1314
if (strlen(seqname[i]) > seqname_outlen)
1315
seqname_outlen = strlen(seqname[i]);
1317
for (i = 0; i < no_seq; i++) {
1318
int sl = strlen(seqname[i]);
1319
int p = seqname_outlen - sl;
1321
sprintf(seqname[i]+sl, "%*s", p, "");
1326
static void prepare_numbers(void)
1328
int count, bn, i, j;
1330
for (i = 0; i < no_seq; i++) {
1331
for (j = 1; j < max_no_block; j++)
1332
sprintf(prenum[i][j], "%*s", seqnumlen, "");
1333
sprintf(prenum[i][0], "%*d", seqnumlen, startno[i]);
1334
count = startno[i] - 1;
1336
for (j = 1; j <= seqlen[i]; j++) {
1337
if ( isupper(seq[i][j - 1])
1339
|| (i == ruler_idx) )
1341
if (j % outlen == 0) {
1343
if (count + 1 < seqlen[i] + startno[i])
1344
sprintf(prenum[i][bn-1], "%*d", seqnumlen, count + 1);
1351
/*--------------------------------------------------------------------------*/
1352
/* graphics section */
1353
/*--------------------------------------------------------------------------*/
1355
static void graphics_init(double *xpos, double *ypos)
1358
OutDev->Init(xpos, ypos);
1359
lines_per_page = (int)((dev_maxy - dev_miny) / dev_ysize);
1363
static void graphics_setcolor(int colno) {
1365
if (colno == act_color)
1367
OutDev->Setcolor(colno);
1371
static void graphics_charout(char c, double *xpos, double *ypos) {
1372
OutDev->Charout(c, xpos, ypos);
1375
void GenericStringOut(char *s, double *xpos, double *ypos) {
1376
while (*s != '\0') {
1377
OutDev->Charout(*s, xpos, ypos);
1380
OutDev->Charout(' ', xpos, ypos);
1383
static void graphics_stringout(char *s, double *xpos, double *ypos) {
1384
OutDev->Stringout(s, xpos, ypos);
1387
static void graphics_newline(double *xpos, double *ypos) {
1388
OutDev->Newline(xpos, ypos);
1391
static void graphics_newpage(double *xpos, double *ypos) {
1392
OutDev->Newpage(xpos, ypos);
1396
static void graphics_exit(void) {
1400
static void graphics_out(void)
1402
double xpos, ypos; /*current position of text cursor*/
1409
no_blocks = (consenslen - 1) / outlen + 1;
1410
if (consflag && no_seq < max_no_seq) {
1412
seqlen[cons_idx] = consenslen;
1413
strcpy(seqname[cons_idx], "consensus");
1414
startno[cons_idx] = 1;
1415
for (i = 0; i < consenslen; i++) {
1416
seq[cons_idx][i] = conschar[i];
1417
col[cons_idx][i] = 4; /*set colour to predefined "title" type*/
1421
if (seqconsflag && masternormal) {
1422
for (i = 0; i < consenslen; i++)
1423
col[consensnum - 1][i] = 4;
1425
if (seqconsflag && hideflag) {
1426
if (consensnum < no_seq) {
1427
for (i = consensnum; i < no_seq; i++) {
1428
seqlen[i - 1] = seqlen[i];
1429
strcpy(seqname[i - 1], seqname[i]);
1430
startno[i - 1] = startno[i];
1431
memcpy(seq[i - 1], seq[i], max_no_res * sizeof(char));
1432
memcpy(col[i - 1], col[i], max_no_res * sizeof(byte));
1438
if (rulerflag && no_seq < max_no_seq) {
1440
seqlen[ruler_idx] = consenslen;
1441
strcpy(seqname[ruler_idx], "");
1442
startno[ruler_idx] = 1;
1443
for (i = 0; i < consenslen; i++) {
1444
seq[ruler_idx][i] = '.';
1445
col[ruler_idx][i] = 4; /*set colour to predefined "title" type*/
1447
seq[ruler_idx][0] = '1';
1448
for (i=10; i <= consenslen; i+=10) {
1453
sprintf(no, "%d", j);
1455
} while ( ((j-1)%outlen)+k > outlen || j > consenslen);
1456
if (j+k-1 <= consenslen && (j%10)+k < 9)
1457
memcpy(&seq[ruler_idx][j-1], no, k);
1467
lines_left = no_blocks * (no_seq + interlines) - interlines;
1468
graphics_init(&xpos, &ypos);
1470
for (i = 0; i < no_blocks; i++) {
1471
for (j = 0; j < no_seq; j++) {
1473
graphics_setcolor(4); /*set colours to "title" type*/
1474
/* for (k = 0; k < seqname_outlen; k++)
1475
graphics_charout(seqname[j][k], &xpos, &ypos);
1476
graphics_charout(' ', &xpos, &ypos);
1478
graphics_stringout(seqname[j], &xpos, &ypos);
1481
graphics_setcolor(4);
1483
int sl = strlen(prenum[j][i]);
1484
for (k = 0; k < sl; k++)
1485
graphics_charout(prenum[j][i][k], &xpos, &ypos);
1487
graphics_charout(' ', &xpos, &ypos);
1489
graphics_stringout(prenum[j][i], &xpos, &ypos);
1491
for (k = 0; k < outlen; k++) {
1492
if (i * outlen + k < consenslen) {
1493
graphics_setcolor(col[j][i * outlen + k]);
1494
graphics_charout(seq[j][i * outlen + k], &xpos, &ypos);
1498
if (linecount >= lines_per_page) {
1499
lines_left -= linecount;
1501
if (lines_left > 0) {
1505
outname[indx(outname, ".") - 2] = pagesymbol;
1506
graphics_init(&xpos, &ypos);
1508
graphics_newpage(&xpos, &ypos);
1511
graphics_newline(&xpos, &ypos);
1513
if (linecount + interlines + no_seq <= lines_per_page)
1514
{ /*will the next block fit?*/
1515
for (j = 0; j < interlines; j++) {
1516
graphics_newline(&xpos, &ypos);
1520
/* skipping interlines ... */
1521
lines_left -= interlines;
1522
lines_left -= linecount;
1523
if (lines_left > 0) {
1527
outname[indx(outname, ".") - 2] = pagesymbol;
1528
graphics_init(&xpos, &ypos);
1531
graphics_newpage(&xpos, &ypos);
1541
static void do_out(FILE *outf, BOOL goon)
1543
int i, j, k, id, sim, conlen;
1550
printf("Building identity/similarity matrix "); fflush(stdout);
1553
for (i = 1; i <= no_seq; i++) {
1555
int p = (int)((20.0*i) / no_seq);
1557
printf("."); fflush(stdout);
1561
fprintf(outf, "%s ", seqname[i - 1]);
1563
for (j = 0; j < i - 1; j++) {
1566
for (k = 0; k < consenslen; k++) {
1567
fl1 = IN_aaset(seq[i-1][k]);
1568
fl2 = IN_aaset(seq[j ][k]);
1571
if (seq[i-1][k] == seq[j][k])
1575
fprintf(outf, " %5.1f", 100.0 * id / conlen);
1577
fprintf(outf, "%6s", "----");
1578
for (j = i; j < no_seq; j++) {
1581
for (k = 0; k < consenslen; k++) {
1582
fl1 = IN_aaset(seq[i-1][k]);
1583
fl2 = IN_aaset(seq[j ][k]);
1586
if (seq[i-1][k] == seq[j][k] || grp(seq[i-1][k], seq[j][k]))
1590
fprintf(outf, " %5.1f", 100.0 * sim / conlen);
1599
static void ident_sim_out(void)
1601
if (*identname != '\0') {
1603
assert( outopen(&idf, identname) != NULL );
1604
do_out(idf.f, TRUE);
1607
do_out(stdout, FALSE);
1611
/* * * * * * * * * * * * * * * * * * * * * * * * * *
1612
* Allocate memory -- actually this should be done *
1613
* after we know how many sequences there are and *
1614
* how long each is -- future work */
1615
static void allocate1(void)
1622
seq = Malloc(MAX_NO_SEQ * sizeof(char *));
1623
col = Malloc(MAX_NO_SEQ * sizeof(byte *));
1624
prenum = Malloc(MAX_NO_SEQ * sizeof(numtype *));
1626
max_no_seq = MAX_NO_SEQ;
1627
max_no_res = MAX_NO_RES;
1628
for (i = 0; i < MAX_NO_SEQ; i++) {
1629
seq[i] = malloc(max_no_res * sizeof(char));
1632
if (seq[i] == NULL) {
1645
static void allocate2(void)
1650
if (consflag) ++need;
1651
if (rulerflag) ++need;
1652
if (need > max_no_seq) need = max_no_seq;
1654
for (i=max_no_seq-1; i >= need; --i)
1656
if (consenslen < max_no_res / 2) {
1657
for (i=no_seq; i >= 0; --i) {
1658
seq[i] = realloc(seq[i], (consenslen+1)*sizeof(char));
1659
assert(seq[i] != NULL);
1664
max_no_res = consenslen+1;
1666
cons = Malloc(max_no_res * sizeof(char));
1668
conschar = Malloc(max_no_res * sizeof(char));
1670
for (i = 0; i < max_no_seq; i++) {
1671
col[i] = Malloc(max_no_res * sizeof(byte));
1673
prenum[i] = Malloc(max_no_block * sizeof(numtype));
1677
/*--------------------------------------------------------------------------*/
1679
/*--------------------------------------------------------------------------*/
1681
int main(int argc, char **argv)
1684
process_command_line(argc, argv);
1687
switch (inputmode) {
1688
case '1': read_file_pretty();
1690
case '2': read_file_clustal();
1692
case '3': read_file_maligned();
1694
case '4': read_file_esee();
1696
case '5': read_file_phylip();
1700
if (seqnumflag && !numdefflag)
1706
make_consensus_length();