1
/* $Id: blastpgp.c,v 6.104 2001/10/12 14:55:41 dondosha Exp $ */
2
/**************************************************************************
6
* This software/database is categorized as "United States Government *
7
* Work" under the terms of the United States Copyright Act. It was *
8
* produced as part of the author's official duties as a Government *
9
* employee and thus can not be copyrighted. This software/database is *
10
* freely available to the public for use without a copyright notice. *
11
* Restrictions can not be placed on its present or future use. *
13
* Although all reasonable efforts have been taken to ensure the accuracy *
14
* and reliability of the software and data, the National Library of *
15
* Medicine (NLM) and the U.S. Government do not and can not warrant the *
16
* performance or results that may be obtained by using this software, *
17
* data, or derivative works thereof. The NLM and the U.S. Government *
18
* disclaim any and all warranties, expressed or implied, as to the *
19
* performance, merchantability or fitness for any particular purpose or *
22
* In any work or product derived from this material, proper attribution *
23
* of the author(s) as the source of the software or data would be *
26
**************************************************************************
28
* $Log: blastpgp.c,v $
29
* Revision 6.104 2001/10/12 14:55:41 dondosha
30
* Changed description of the -t option
32
* Revision 6.103 2001/08/29 19:06:37 madden
33
* added variable posComputationcalled in Main, added parameter posComputationCalled to PGPrintPosMatrix
35
* Revision 6.102 2001/08/28 17:34:35 madden
36
* Add -m 9 as tabular output with comments
38
* Revision 6.101 2001/07/30 16:27:42 dondosha
39
* Do not call PGPOutTextMessages with XML output option
41
* Revision 6.100 2001/07/25 19:40:15 dondosha
42
* Multiply hitlist_size by 2 when going to next query if when tweak_parameters set
44
* Revision 6.99 2001/07/24 18:16:55 madden
45
* Set error_return when freeing
47
* Revision 6.98 2001/07/09 19:37:31 kans
48
* return 0 instead of NULL to fix Mac compiler error
50
* Revision 6.97 2001/07/03 20:50:33 madden
51
* Commented out call to PrintTabularOutputHeader
53
* Revision 6.96 2001/06/22 13:48:39 dondosha
54
* Declare variable vnp
56
* Revision 6.95 2001/06/21 21:56:37 dondosha
57
* Fixed memory leak: destroy all error returns; uncommented ObjMgrFreeCache
59
* Revision 6.94 2001/06/15 21:19:28 dondosha
60
* Added tabular output option -m 8
62
* Revision 6.93 2001/06/11 20:31:19 shavirin
63
* Added possibility to make Batch searches in iteractive mode.
65
* Revision 6.92 2001/04/13 20:46:01 madden
66
* Changed default e-value threshold for inclusion in multipass model from 0.002 to 0.005, Changed default constant in pseudocounts for multipass version from 7 to 9
68
* Revision 6.91 2001/04/10 19:20:52 madden
69
* Unsuppress some options suppressed for the release
71
* Revision 6.90 2001/03/30 22:02:04 madden
72
* Suppress options not yet ready for prime-time
74
* Revision 6.89 2000/11/30 17:10:13 shavirin
75
* Initialized to NULL xml pointer.
77
* Revision 6.88 2000/11/28 20:48:49 shavirin
78
* Adopted for usage with many-iterations XML.
80
* Revision 6.87 2000/11/22 21:57:08 shavirin
81
* Added passing of iteration number into XML output.
83
* Revision 6.86 2000/11/22 21:41:52 shavirin
84
* Removed problem with mutiple iteration XML output.
86
* Revision 6.85 2000/11/22 17:34:32 madden
87
* Change NULL to 0 for an intvalue
89
* Revision 6.84 2000/11/22 17:28:31 madden
90
* Enable decline-to-align
92
* Revision 6.83 2000/10/27 21:26:50 shavirin
93
* Produce valid XML output if no hits found.
95
* Revision 6.82 2000/10/27 19:14:41 madden
96
* Change description of -b option
98
* Revision 6.81 2000/10/24 13:51:33 shavirin
99
* Removed unused parameter from the function BXMLPrintOutput().
101
* Revision 6.80 2000/10/23 19:58:22 dondosha
102
* Open and close AsnIo outside of call(s) to BXMLPrintOutput
104
* Revision 6.79 2000/10/17 19:39:20 shavirin
105
* Fixed compilation problems on Mac.
107
* Revision 6.78 2000/10/13 20:58:01 madden
108
* Add YES_TO_DECLINE_TO_ALIGN define and disable same
110
* Revision 6.77 2000/09/28 15:48:30 dondosha
111
* Open <PRE> block in PGPFormatFooter
113
* Revision 6.76 2000/09/27 19:31:44 dondosha
114
* Use original square substitution matrix to pass to txalign on all iterations
116
* Revision 6.75 2000/09/21 17:54:46 dondosha
117
* Do not pass matrix to txalign in case of query-anchored formatting
119
* Revision 6.74 2000/09/13 18:34:35 dondosha
120
* Create BLAST_Matrix from ScoreBlk before converting it to txalign-style matrix
122
* Revision 6.73 2000/09/12 21:51:38 dondosha
123
* Pass the correct scoring matrix to ShowTextAlignFromAnnot
125
* Revision 6.72 2000/08/28 21:55:01 shavirin
126
* Added option (m = 7) to print XML output.
128
* Revision 6.71 2000/08/22 20:18:21 shavirin
129
* Added support for HTML output if decline-to-align parameter is set.
131
* Revision 6.70 2000/08/18 21:31:12 madden
132
* no longer need to raise E-value threshold for composition-based statistics
134
* Revision 6.69 2000/08/08 21:47:55 shavirin
135
* Added parameter decline_align and possibility to print then
136
* discontinuous alignment.
138
* Revision 6.68 2000/07/26 19:04:14 shavirin
139
* Set overriding default threshold only if input value != 0.
141
* Revision 6.67 2000/07/25 18:14:06 shavirin
142
* WARNING: This is no-turning-back changed related to S&W Blast from
145
* Revision 6.66 2000/07/21 20:46:40 madden
146
* Threshold set to default if arg is zero
148
* Revision 6.65 2000/06/27 15:25:19 madden
149
* Changed master-slave to query-anchored
151
* Revision 6.64 2000/03/29 17:32:55 madden
152
* Commented out yet another call to ObjMgrFreeCache
154
* Revision 6.63 2000/03/27 21:32:00 shavirin
155
* Use function ShowTextAlignFromAnnot2 instead of ShowTextAlignFromAnnot3
157
* Revision 6.62 2000/03/23 15:30:22 shavirin
158
* Removed call to ObjMgrFreeCache()
160
* Revision 6.61 2000/03/07 21:59:07 shavirin
161
* Now will use PSSM Matrix to show positives in PSI Blast
163
* Revision 6.60 2000/03/02 21:06:09 shavirin
164
* Added -U option, that allows to consider low characters in FASTA files
165
* as filtered regions (for blastn, blastp and tblastn).
167
* Revision 6.58 2000/01/07 22:01:04 shavirin
168
* Fixed problem with printing alignment.
170
* Revision 6.57 1999/12/21 21:34:05 shavirin
171
* Fixed some memory leaks.
173
* Revision 6.56 1999/11/08 19:12:41 shavirin
174
* Fixed minor SGI warning.
176
* Revision 6.55 1999/10/21 20:27:52 shavirin
177
* Fixed bug resulted in failue to print out seqannot. (-O)
179
* Revision 6.53 1999/10/14 15:52:51 shavirin
180
* Added possibility to make search by list of gis. Fixed some bugs.
182
* Revision 6.52 1999/10/05 17:41:08 shavirin
183
* Corrected in accordance to blast.c changes.
185
* Revision 6.51 1999/09/24 16:06:15 shavirin
186
* Matrix is set to NULL if no matrix calculation produced.
188
* Revision 6.50 1999/09/22 17:53:25 shavirin
189
* Now functions will collect messages in ValNodePtr before printing out.
191
* Revision 6.49 1999/09/21 16:01:46 shavirin
192
* Rearanged all file. Main function was disintegrated into few small
195
* Revision 6.48 1999/08/27 18:17:42 shavirin
196
* Added new parameter to command line - decline_align.
198
* Revision 6.47 1999/08/26 14:58:06 madden
199
* Use float for db length
201
* Revision 6.46 1999/08/04 13:26:49 madden
204
* Revision 6.45 1999/03/31 16:58:04 madden
205
* Removed static FindProt and FindNuc
207
* Revision 6.44 1999/03/24 18:16:33 madden
208
* zero-out groupOrder and featureOrder
210
* Revision 6.43 1999/03/21 19:43:23 madden
211
* Added -Q option to store ASCII version of last position-specific matrix in a file
213
* Revision 6.42 1999/03/04 14:17:20 egorov
214
* Added new parameter to BlastMaskTheResidues() function for correct masking
215
* when query is seqloc. The paramter is not used in this file and is 0.
217
* Revision 6.41 1999/02/18 21:13:11 madden
218
* Check for non-pattern search before call to BlastPruneSapStructDestruct
220
* Revision 6.40 1999/02/10 21:12:27 madden
221
* Added HTML and GI list option, fixed filtering
223
* Revision 6.39 1999/01/22 17:24:51 madden
224
* added line breaks for alignment views
226
* Revision 6.38 1998/12/29 20:03:14 kans
227
* calls UseLocalAsnloadDataAndErrMsg at startup
229
* Revision 6.37 1998/12/17 21:54:39 madden
230
* Added call to BlastPruneHitsFromSeqAlign for other than first round
232
* Revision 6.36 1998/12/16 14:13:36 madden
233
* Fix for more than one pattern in query
235
* Revision 6.35 1998/11/19 14:04:34 madden
236
* Changed message level to SEV_WARNING
238
* Revision 6.34 1998/11/16 16:29:41 madden
239
* Added ErrSetMessageLevel(SEV_INFO) and fixed call to PrintKAParametersExtra
241
* Revision 6.33 1998/09/28 12:33:02 madden
242
* Used BlastErrorPrint
244
* Revision 6.31 1998/09/17 19:54:31 madden
245
* Removed fillCandLambda
247
* Revision 6.29 1998/09/10 22:38:24 madden
248
* Moved convertSeqAlignListToValNodeList and convertValNodeListToSeqAlignList
250
* Revision 6.28 1998/09/09 21:20:02 madden
251
* AS fixed warnings, removed stderr fprintf, added call to PrintKAParametersExtra
253
* Revision 6.27 1998/09/09 16:10:49 madden
256
* Revision 6.26 1998/07/17 15:41:36 madden
257
* Added effective search space flag
259
* Revision 6.24 1998/06/14 19:44:46 madden
262
* Revision 6.23 1998/06/12 21:32:27 madden
263
* Removed extra FnPtr cast
265
* Revision 6.22 1998/06/10 13:33:39 madden
266
* change -h from 0.01 to 0.001
268
* Revision 6.21 1998/06/05 21:48:43 madden
269
* Added -K and -L options
271
* Revision 6.20 1998/05/18 18:01:05 madden
272
* Changed args to allow filter options to be changed
274
* Revision 6.19 1998/05/01 18:31:03 egorov
275
* Add new parametes to BLASTOptionSetGapParam()
277
* Revision 6.18 1998/04/30 14:32:33 madden
278
* init_buff_ex arg changed to 90 for reference
280
* Revision 6.17 1998/04/29 14:29:31 madden
281
* Made reference line longer
283
* Revision 6.16 1998/04/01 22:49:13 madden
284
* Print No hits found message
286
* Revision 6.15 1998/02/25 20:50:50 madden
287
* Added arg for db length
289
* Revision 6.14 1998/02/24 22:48:36 madden
290
* Removed options for culling
292
* Revision 6.13 1998/01/02 14:33:37 madden
295
* Revision 6.12 1997/12/31 17:48:56 madden
296
* Added wordsize option
298
* Revision 6.11 1997/12/23 21:09:44 madden
299
* Added -K and -L for range-dependent blast
301
* Revision 6.10 1997/12/23 20:57:44 madden
302
* Changes for checkpointing
304
* Revision 6.9 1997/11/19 14:26:46 madden
305
* Removed extra break statement
307
* Revision 6.8 1997/11/18 22:24:24 madden
308
* Added call to BLASTOptionSetGapParams
310
* Revision 6.7 1997/11/08 22:04:43 madden
311
* Called BlastOtherReturnsPrepare earlier to before posMatrix is deleted
313
* Revision 6.6 1997/10/27 22:26:49 madden
314
* Added call to ObjMgrFreeCache(0)
316
* Revision 6.5 1997/10/23 20:26:15 madden
317
* Use of init_buff_ex rather than init_buff
319
* Revision 6.4 1997/10/22 21:56:49 madden
320
* Changed default values
322
* Revision 6.3 1997/10/07 21:33:34 madden
325
* Revision 6.2 1997/09/18 22:25:02 madden
326
* b and v options now work
328
* Revision 6.1 1997/09/16 16:40:50 madden
329
* Dbinfo printing changed for multiple db searches
331
* Revision 6.0 1997/08/25 18:19:19 madden
332
* Revision changed to 6.0
334
* Revision 1.24 1997/08/24 19:38:23 madden
335
* Used function BlastOtherReturnsPrepare
337
* Revision 1.23 1997/08/14 21:48:57 madden
338
* Added descriptions and alignments options
340
* Revision 1.22 1997/07/29 19:33:05 madden
341
* Added TXALIGN_SHOW_QS flag
343
* Revision 1.21 1997/07/28 18:36:45 madden
344
* Replaced printf with ErrPostEx and fprintf
346
* Revision 1.20 1997/07/28 16:59:21 madden
347
* Added include for simutil.h
349
* Revision 1.19 1997/07/28 16:50:51 madden
350
* Changed call to ShowTextAlignFromAnnot
352
* Revision 1.18 1997/07/22 19:18:40 madden
353
* printing version, etc.
355
* Revision 1.17 1997/06/25 14:06:21 madden
356
* minor changes to check convergence
358
* Revision 1.16 1997/06/23 20:51:29 madden
359
* Added matrix option
361
* Revision 1.15 1997/06/20 19:30:00 madden
362
* Added align_type info, support for SeqAligns
364
* Revision 1.14 1997/05/23 20:54:48 madden
365
* Added -Z option for final gapped alignment
367
* Revision 1.13 1997/05/07 15:06:01 madden
368
* replacement of search by compactSearch
370
* Revision 1.12 1997/04/21 14:09:27 madden
371
* Added four new master-slave alignment types.
373
* Revision 1.11 1997/04/11 19:03:47 madden
374
* Changes to ignore query ID and show master-slave alignments.
376
* Revision 1.10 1997/04/10 19:28:28 madden
377
* COMMAND_LINE replaced by ALL_ROUNDS.
379
* Revision 1.9 1997/04/10 13:27:32 madden
380
* Added COMMAND_LINE define, option for multiple alignments.
382
* Revision 1.8 1997/04/07 21:44:50 madden
383
* Removed unused variable stats.
385
* Revision 1.7 1997/04/04 21:13:33 madden
386
* Used function BioseqBlastEngineCore, remove PrivateScoreFunc.
388
* Revision 1.6 1997/04/04 16:38:04 madden
389
* Changed filtering option, ObjMgrHold.
391
* Revision 1.5 1997/03/21 20:30:02 madden
392
* Expect value arg made a float.
394
* Revision 1.4 1997/03/13 21:18:51 madden
395
* Changed default extension value to 1 from 2.
397
* Revision 1.3 1997/02/19 21:43:04 madden
398
* Extensive changes for psi-blast. blastp runs now occur multiple times.
400
* Revision 1.2 1997/02/12 15:16:58 madden
401
* Changed from blast2 to new formatting.
403
* Revision 1.1 1997/01/16 21:35:42 madden
406
* Revision 1.1 1997/01/16 21:34:23 madden
417
#include <blastpri.h>
420
#include <gapxdrop.h>
423
#include <sqnutils.h>
424
#include <xmlblast.h>
425
#include <ddvcreate.h>
427
/* Used by the callback function. */
428
FILE *global_fp=NULL;
430
Callback to print out ticks, in UNIX only due to file systems
434
static int LIBCALLBACK
435
tick_callback(Int4 sequence_number, Int4 number_of_positive_hits)
440
fprintf(global_fp, "%s", ".");
446
static int LIBCALLBACK
447
star_callback(Int4 sequence_number, Int4 number_of_positive_hits)
450
fprintf(global_fp, "%s", "*");
456
#define YES_TO_DECLINE_TO_ALIGN
458
#define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
460
static Args myargs [] = {
461
{ "Database", /* 0 */
462
"nr", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL},
463
{ "Query File", /* 1 */
464
"stdin", NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
465
{ "Multiple Hits window size (zero for single hit algorithm)", /* 2 */
466
"40", NULL, NULL, FALSE, 'A', ARG_INT, 0.0, 0, NULL},
467
{ "Threshold for extending hits", /* 3 */
468
"0", NULL, NULL, FALSE, 'f', ARG_INT, 0.0, 0, NULL},
469
{ "Expectation value (E)", /* 4 */
470
"10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL},
471
{ "alignment view options:\n0 = pairwise,\n1 = query-anchored showing identities,\n2 = query-anchored no identities,\n3 = flat query-anchored, show identities,\n4 = flat query-anchored, no identities,\n5 = query-anchored no identities and blunt ends,\n6 = flat query-anchored, no identities and blunt ends,\n7 = XML Blast output,\n8 = Tabular output, \n9 = Tabular output with comments", /* 5 */
472
"0", NULL, NULL, FALSE, 'm', ARG_INT, 0.0, 0, NULL},
473
{ "Output File for Alignment", /* 6 */
474
"stdout", NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
475
{ "Dropoff (X) for blast extensions in bits (default if zero)", /* 7 */
476
"7.0", NULL, NULL, FALSE, 'y', ARG_FLOAT, 0.0, 0, NULL},
477
{ "0 for multiple hits 1-pass, 1 for single hit 1-pass, 2 for 2-pass", /* 8 */
478
"0", NULL, NULL, FALSE, 'P', ARG_INT, 0.0, 0, NULL},
479
{ "Filter query sequence with SEG", /* 9 */
480
"F", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL},
481
{ "Cost to open a gap", /* 10 */
482
"11", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL},
483
{ "Cost to extend a gap", /* 11 */
484
"1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL},
485
{ "X dropoff value for gapped alignment (in bits)", /* 12 */
486
"15", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL},
487
{ "Number of bits to trigger gapping", /* 13 */
488
"22.0", NULL, NULL, FALSE, 'N', ARG_FLOAT, 0.0, 0, NULL},
490
"T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},
491
{ "Start of required region in query", /* 15 */
492
"1", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL},
493
{ "End of required region in query (-1 indicates end of query)", /* 16 */
494
"-1", NULL, NULL, FALSE, 'H', ARG_INT, 0.0, 0, NULL},
495
{ "Number of processors to use", /* 17 */
496
"1", NULL, NULL, FALSE, 'a', ARG_INT, 0.0, 0, NULL},
497
{ "Show GI's in deflines", /* 18 */
498
"F", NULL, NULL, FALSE, 'I', ARG_BOOLEAN, 0.0, 0, NULL},
499
{ "e-value threshold for inclusion in multipass model", /* 19 */
500
"0.005", NULL, NULL, FALSE, 'h', ARG_FLOAT, 0.0, 0, NULL},
501
{ "Constant in pseudocounts for multipass version", /* 20 */
502
"9", NULL, NULL, FALSE, 'c', ARG_INT, 0.0, 0, NULL},
503
{ "Maximum number of passes to use in multipass version", /* 21 */
504
"1", NULL, NULL, FALSE, 'j', ARG_INT, 0.0, 0, NULL},
505
{ "Believe the query defline", /* 22 */
506
"F", NULL, NULL, FALSE, 'J', ARG_BOOLEAN, 0.0, 0, NULL},
507
{ "X dropoff value for final gapped alignment (in bits)", /* 23 */
508
"25", NULL, NULL, FALSE, 'Z', ARG_INT, 0.0, 0, NULL},
509
{ "SeqAlign file ('Believe the query defline' must be TRUE)", /* 24 */
510
NULL, NULL, NULL, TRUE, 'O', ARG_FILE_OUT, 0.0, 0, NULL},
512
"BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL},
513
{ "Number of database sequences to show one-line descriptions for (V)", /* 26 */
514
"500", NULL, NULL, FALSE, 'v', ARG_INT, 0.0, 0, NULL},
515
{ "Number of database sequence to show alignments for (B)", /* 27 */
516
"250", NULL, NULL, FALSE, 'b', ARG_INT, 0.0, 0, NULL},
517
{ "Output File for PSI-BLAST Checkpointing", /* 28 */
518
NULL, NULL, NULL, TRUE, 'C', ARG_FILE_OUT, 0.0, 0, NULL},
519
{ "Input File for PSI-BLAST Restart", /* 29 */
520
NULL, NULL, NULL, TRUE, 'R', ARG_FILE_IN, 0.0, 0, NULL},
521
{ "Word size, default if zero", /* 30 */
522
"0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL},
523
{ "Effective length of the database (use zero for the real size)", /* 31 */
524
"0", NULL, NULL, FALSE, 'z', ARG_INT, 0.0, 0, NULL},
525
{ "Number of best hits from a region to keep", /* 32 */
526
"0", NULL, NULL, FALSE, 'K', ARG_INT, 0.0, 0, NULL},
527
{ "Compute locally optimal Smith-Waterman alignments", /*33*/
528
"F", NULL, NULL, FALSE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
529
{ "Effective length of the search space (use zero for the real size)", /* 34 */
530
"0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL},
531
{ "program option for PHI-BLAST", /* 35 */
532
"blastpgp", NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL},
533
{ "Hit File for PHI-BLAST", /* 36 */
534
"hit_file", NULL, NULL, FALSE, 'k', ARG_FILE_IN, 0.0, 0, NULL},
535
{ "Produce HTML output", /* 37 */
536
"F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
537
{ "Output File for PSI-BLAST Matrix in ASCII", /* 38 */
538
NULL, NULL, NULL, TRUE, 'Q', ARG_FILE_OUT, 0.0, 0, NULL},
539
{ "Input Alignment File for PSI-BLAST Restart", /* 39 */
540
NULL, NULL, NULL, TRUE, 'B', ARG_FILE_IN, 0.0, 0, NULL},
541
{ "Restrict search of database to list of GI's", /* 40 */
542
NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0, 0, NULL},
543
{"Use lower case filtering of FASTA sequence", /* 41 */
544
"F", NULL,NULL,TRUE,'U',ARG_BOOLEAN, 0.0,0,NULL},
545
{ "Use composition based statistics", /*42*/
546
"T", NULL, NULL, FALSE, 't', ARG_BOOLEAN, 0.0, 0, NULL},
547
#ifdef YES_TO_DECLINE_TO_ALIGN
548
{ "Cost to decline alignment (disabled when 0)", /* 43 */
549
"0", NULL, NULL, FALSE, 'L', ARG_INT, 0.0, 0, NULL},
553
typedef struct _pgp_blast_options {
554
BLAST_OptionsBlkPtr options;
555
CharPtr blast_database;
556
BioseqPtr query_bsp, fake_bsp;
557
Int4 number_of_descriptions, number_of_alignments;
561
Boolean believe_query;
562
Uint4 align_options, print_options;
563
/* PHI-PSI Blast variables */
564
Uint1 featureOrder[FEATDEF_ANY];
565
Uint1 groupOrder[FEATDEF_ANY];
569
seedSearchItems *seedSearch;
570
Boolean is_xml_output;
571
} PGPBlastOptions, PNTR PGPBlastOptionsPtr;
573
void PGPGetPrintOptions(Boolean gapped, Uint4Ptr align_options_out,
574
Uint4Ptr print_options_out)
576
Uint4 print_options, align_options;
580
print_options += TXALIGN_SHOW_NO_OF_SEGS;
583
align_options += TXALIGN_COMPRESS;
584
align_options += TXALIGN_END_NUM;
586
if (myargs[18].intvalue) {
587
align_options += TXALIGN_SHOW_GI;
588
print_options += TXALIGN_SHOW_GI;
591
if (myargs[37].intvalue) {
592
align_options += TXALIGN_HTML;
593
print_options += TXALIGN_HTML;
596
if (myargs[5].intvalue != 0) {
597
align_options += TXALIGN_MASTER;
598
if (myargs[5].intvalue == 1 || myargs[5].intvalue == 3)
599
align_options += TXALIGN_MISMATCH;
600
if (myargs[5].intvalue == 3 || myargs[5].intvalue == 4 || myargs[5].intvalue == 6)
601
align_options += TXALIGN_FLAT_INS;
602
if (myargs[5].intvalue == 5 || myargs[5].intvalue == 6)
603
align_options += TXALIGN_BLUNT_END;
605
align_options += TXALIGN_MATRIX_VAL;
606
align_options += TXALIGN_SHOW_QS;
609
*align_options_out = align_options;
610
*print_options_out = print_options;
614
void PGPFreeBlastOptions(PGPBlastOptionsPtr bop)
617
bop->options = BLASTOptionDelete(bop->options);
619
FileClose(bop->infp);
620
FileClose(bop->outfp);
622
MemFree(bop->blast_database);
623
MemFree(bop->patfile);
624
MemFree(bop->seedSearch);
631
PGPBlastOptionsPtr PGPReadBlastOptions(void)
633
PGPBlastOptionsPtr bop;
634
BLAST_OptionsBlkPtr options;
639
bop = MemNew(sizeof(PGPBlastOptions));
641
bop->blast_database = StringSave(myargs [0].strvalue);
643
if ((bop->infp = FileOpen(myargs [1].strvalue, "r")) == NULL) {
644
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open input file %s\n",
645
myargs [1].strvalue);
649
if (myargs [6].strvalue != NULL) {
650
if ((bop->outfp = FileOpen(myargs [6].strvalue, "w")) == NULL) {
651
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open output "
652
"file %s\n", myargs [6].strvalue);
657
bop->number_of_descriptions = myargs[26].intvalue;
658
bop->number_of_alignments = myargs[27].intvalue;
660
if (myargs[22].intvalue != 0)
661
bop->believe_query = TRUE;
663
if (myargs[24].strvalue != NULL) {
665
if (bop->believe_query == FALSE) {
666
ErrPostEx(SEV_FATAL, 0, 0,
667
"-J option must be TRUE to use this option");
671
if ((bop->aip_out = AsnIoOpen (myargs[24].strvalue,"w")) == NULL) {
672
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open output "
673
"file %s\n", myargs[24].strvalue);
678
options = BLASTOptionNew("blastp", (Boolean)myargs[14].intvalue);
679
bop->options = options;
681
if(myargs[41].intvalue) {
682
if((sep = FastaToSeqEntryForDb (bop->infp, FALSE, NULL, bop->believe_query, NULL, NULL, &options->query_lcase_mask)) == NULL) {
683
ErrPostEx(SEV_FATAL, 0, 0, "Unable to read input FASTA file\n");
687
if((sep = FastaToSeqEntryEx(bop->infp, FALSE, NULL, bop->believe_query)) == NULL) {
688
ErrPostEx(SEV_FATAL, 0, 0, "Unable to read input FASTA file\n");
693
SeqEntryExplore(sep, &bop->query_bsp, FindProt);
694
/* sep->data.ptrvalue = NULL;
695
SeqEntryFree(sep); */
697
if (bop->query_bsp == NULL) {
698
ErrPostEx(SEV_FATAL, 0, 0, "Unable to obtain bioseq\n");
702
/* Set default gap params for matrix. */
703
BLASTOptionSetGapParams(options, myargs[25].strvalue, 0, 0);
705
if(myargs[5].intvalue == 7) {
706
bop->is_xml_output = TRUE;
707
} else if (myargs[5].intvalue != 8 && myargs[5].intvalue != 9) {
708
PGPGetPrintOptions(options->gapped_calculation, &bop->align_options,
709
&bop->print_options);
712
/* decrement by one to agree with program values. */
713
options->required_start = myargs[15].intvalue - 1;
714
options->required_end = myargs[16].intvalue;
715
if (options->required_end != -1) {
716
options->required_end--;
719
options->window_size = myargs [2].intvalue;
721
if(myargs [3].intvalue)
722
options->threshold_second = (Int4) myargs [3].intvalue;
724
options->dropoff_2nd_pass = myargs [7].floatvalue;
725
options->expect_value = (Nlm_FloatHi) myargs [4].floatvalue;
726
options->hitlist_size = MAX(bop->number_of_descriptions,
727
bop->number_of_alignments);
729
if (myargs[14].intvalue != 0) {
730
if (myargs[8].intvalue == 0) {
731
options->two_pass_method = FALSE;
732
options->multiple_hits_only = TRUE;
733
} else if (myargs[8].intvalue == 1) {
734
options->two_pass_method = FALSE;
735
options->multiple_hits_only = FALSE;
737
options->two_pass_method = TRUE;
738
options->multiple_hits_only = FALSE;
740
options->gap_open = myargs[10].intvalue;
741
options->gap_extend = myargs[11].intvalue;
743
#ifdef YES_TO_DECLINE_TO_ALIGN
744
if(myargs[43].intvalue != 0) {
745
options->discontinuous = TRUE;
746
options->decline_align = myargs[43].intvalue;
748
options->discontinuous = FALSE;
749
options->decline_align = INT2_MAX;
753
options->gap_x_dropoff = myargs[12].intvalue;
754
options->gap_x_dropoff_final = myargs[23].intvalue;
755
options->gap_trigger = myargs[13].floatvalue;
758
if (StringICmp(myargs[9].strvalue, "T") == 0) {
759
options->filter_string = StringSave("S");
761
options->filter_string = StringSave(myargs[9].strvalue);
764
options->number_of_cpus = (Int2) myargs[17].intvalue;
767
options->isPatternSearch = FALSE;
769
if (0 != (StringCmp("blastpgp",myargs[35].strvalue))) {
770
options->isPatternSearch = TRUE;
771
options->discontinuous = FALSE;
772
options->decline_align = INT2_MAX;
773
bop->program_flag = convertProgramToFlag(myargs[35].strvalue,
777
if (options->isPatternSearch) {
778
bop->patfile = StringSave(myargs[36].strvalue);
779
if ((bop->patfp = FileOpen(bop->patfile, "r")) == NULL) {
780
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open pattern "
781
"file %s\n", bop->patfile);
785
bop->seedSearch = (seedSearchItems *)
786
ckalloc(sizeof(seedSearchItems));
789
if(options->isPatternSearch)
790
fillCandLambda(bop->seedSearch, myargs[25].strvalue, options);
792
options->ethresh = (Nlm_FloatHi) myargs[19].floatvalue;
793
options->pseudoCountConst = myargs[20].intvalue;
794
options->maxNumPasses = myargs[21].intvalue;
795
/*zero out e-value threshold if it will not be used*/
796
if (options->maxNumPasses == 1)
797
options->ethresh = 0.0;
798
if (myargs[30].intvalue)
799
options->wordsize = myargs[30].intvalue;
800
if (myargs[31].intvalue)
801
options->db_length = (Int8) myargs[31].intvalue;
803
options->hsp_range_max = myargs[32].intvalue;
804
if (options->hsp_range_max != 0)
805
options->perform_culling = TRUE;
807
options->block_width = 20; /* Default value - previously '-L' parameter */
809
if (myargs[34].floatvalue)
810
options->searchsp_eff = (Nlm_FloatHi) myargs[34].floatvalue;
812
options->tweak_parameters = (Boolean) myargs[42].intvalue;
813
options->smith_waterman = (Boolean) myargs[33].intvalue;
815
if (bop->options->tweak_parameters) {
816
/*allows for extra matches in first pass of screening,
818
bop->options->original_expect_value = bop->options->expect_value;
819
bop->options->hitlist_size *= 2;
823
/* Seting list of gis to restrict search */
825
if (myargs[40].strvalue) {
826
options->gifile = StringSave(myargs[40].strvalue);
829
options = BLASTOptionValidate(options, "blastp");
834
if(bop->believe_query) {
835
bop->fake_bsp = bop->query_bsp;
837
bop->fake_bsp = BlastMakeFakeBioseq(bop->query_bsp, NULL);
839
BLASTUpdateSeqIdInSeqInt(bop->options->query_lcase_mask,
846
Boolean PGPReadNextQuerySequence(PGPBlastOptionsPtr bop)
850
/* Cleaning up stuff from previous query run */
852
bop->options->query_lcase_mask = SeqLocSetFree(bop->options->query_lcase_mask);
854
if (bop->believe_query == TRUE) { /* Not corect - to be fixed */
855
BioseqFree(bop->query_bsp);
857
BioseqFree(bop->fake_bsp);
860
if(myargs[41].intvalue) {
861
if((sep = FastaToSeqEntryForDb (bop->infp, FALSE, NULL, bop->believe_query, NULL, NULL, &bop->options->query_lcase_mask)) == NULL) {
865
if((sep = FastaToSeqEntryEx(bop->infp, FALSE, NULL,
866
bop->believe_query)) == NULL) {
871
bop->query_bsp = NULL;
872
SeqEntryExplore(sep, &bop->query_bsp, FindProt);
874
if (bop->query_bsp == NULL) {
875
ErrPostEx(SEV_FATAL, 0, 0, "Unable to obtain bioseq\n");
879
if(bop->believe_query) {
880
bop->fake_bsp = bop->query_bsp;
882
bop->fake_bsp = BlastMakeFakeBioseq(bop->query_bsp, NULL);
884
BLASTUpdateSeqIdInSeqInt(bop->options->query_lcase_mask,
891
Boolean PGPFormatHeader(PGPBlastOptionsPtr bop)
893
Boolean html = myargs[37].intvalue;
896
fprintf(bop->outfp, "<PRE>\n");
899
BlastPrintVersionInfo("blastp", html, bop->outfp);
900
fprintf(bop->outfp, "\n");
901
BlastPrintReference(html, 90, bop->outfp);
902
fprintf(bop->outfp, "\n");
903
AcknowledgeBlastQuery(bop->query_bsp, 70,
904
bop->outfp, bop->believe_query, html);
905
PrintDbInformation(bop->blast_database, TRUE, 70, bop->outfp, html);
910
Boolean PGPFormatFooter(PGPBlastOptionsPtr bop, BlastSearchBlkPtr search)
913
ValNodePtr mask_loc, vnp;
914
BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
915
TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
916
CharPtr params_buffer=NULL;
917
ValNodePtr other_returns;
918
BLAST_MatrixPtr blast_matrix;
919
Boolean html = myargs[37].intvalue;
922
fprintf(bop->outfp, "<PRE>\n");
924
other_returns = BlastOtherReturnsPrepare(search);
927
for (vnp=other_returns; vnp; vnp = vnp->next) {
928
switch (vnp->choice) {
930
dbinfo = vnp->data.ptrvalue;
933
ka_params = vnp->data.ptrvalue;
936
ka_params_gap = vnp->data.ptrvalue;
939
params_buffer = vnp->data.ptrvalue;
942
blast_matrix = vnp->data.ptrvalue;
943
BLAST_MatrixDestruct(blast_matrix);
945
case SEQLOC_MASKING_NOTSET:
946
case SEQLOC_MASKING_PLUS1:
947
case SEQLOC_MASKING_PLUS2:
948
case SEQLOC_MASKING_PLUS3:
949
case SEQLOC_MASKING_MINUS1:
950
case SEQLOC_MASKING_MINUS2:
951
case SEQLOC_MASKING_MINUS3:
952
ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
960
dbinfo_head = dbinfo;
962
PrintDbReport(dbinfo, 70, bop->outfp);
963
dbinfo = dbinfo->next;
965
dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
967
if (ka_params && !bop->options->isPatternSearch) {
968
PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, bop->outfp, FALSE);
972
if (bop->options->isPatternSearch)
973
PrintKAParametersExtra(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, bop->seedSearch->paramC, 70, bop->outfp, FALSE);
975
PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, bop->outfp, FALSE);
979
MemFree(ka_params_gap);
981
PrintTildeSepLines(params_buffer, 70, bop->outfp);
982
MemFree(params_buffer);
988
Boolean PGPrintPosMatrix(CharPtr filename, posSearchItems *posSearch,
989
compactSearchItems *compactSearch,
990
Boolean posComputationCalled)
994
if ((fp = FileOpen(filename, "w")) == NULL) {
995
ErrPostEx(SEV_FATAL, 0, 0, "Unable to open matrix output file %s\n",
1000
/* a diagnostic, partly an option with -Q. */
1001
outputPosMatrix(posSearch, compactSearch, fp, posComputationCalled);
1007
SeqAlignPtr PGPSeedSearch(PGPBlastOptionsPtr bop, BlastSearchBlkPtr search,
1008
posSearchItems *posSearch,
1009
SeqLocPtr PNTR seqloc_duplicate,
1010
SeqAlignPtr PNTR PNTR lastSeqAligns,
1011
Int4Ptr numLastSeqAligns)
1013
Uint1Ptr query = NULL; /*query sequence read in*/
1014
Uint1Ptr unfilter_query = NULL; /*needed if seg will filter query*/
1015
SeqLocPtr seg_slp; /*pointer to structure for seg filtering*/
1016
Int4 i, queryLength; /*length of query sequence*/
1020
SeqLocPtr seqloc, next;
1021
ValNodePtr seedReturn; /*return value from seedEngineCore, which
1022
is a list of lists of SeqAligns, one
1023
list of SeqAligns per pattern occurrence*/
1025
ValNodePtr vnp, info_vnp; /* Output text messages from seedEngineCore() */
1027
query = BlastGetSequenceFromBioseq(bop->fake_bsp, &queryLength);
1028
seg_slp = BlastBioseqFilter(bop->fake_bsp,
1029
bop->options->filter_string);
1031
unfilter_query = MemNew((queryLength + 1) * sizeof(Uint1));
1032
for (i = 0; i < queryLength; i++)
1033
unfilter_query[i] = query[i];
1034
BlastMaskTheResidues(query,queryLength,21,seg_slp,FALSE, 0);
1037
search->gap_align = GapAlignBlkNew(1,1);
1038
search->gap_align->gap_open = bop->options->gap_open;
1039
search->gap_align->gap_extend = bop->options->gap_extend;
1042
search->gap_align->decline_align = INT2_MAX;
1044
#ifdef YES_TO_DECLINE_TO_ALIGN
1045
if(myargs[43].intvalue != 0) {
1046
search->gap_align->decline_align = myargs[43].intvalue;
1048
search->gap_align->decline_align = INT2_MAX;
1052
/* search->gap_align->decline_align = (-(BLAST_SCORE_MIN)); */
1053
/* search->gap_align->decline_align = myargs[41].intvalue; */
1055
search->gap_align->x_parameter = bop->options->gap_x_dropoff
1056
*NCBIMATH_LN2/bop->seedSearch->paramLambda;
1057
search->gap_align->matrix = search->sbp->matrix;
1058
initProbs(bop->seedSearch);
1059
init_order(search->gap_align->matrix, bop->program_flag,
1060
FALSE, bop->seedSearch);
1062
for(i = 0; i < queryLength; i++)
1063
query[i] = bop->seedSearch->order[query[i]];
1065
if (unfilter_query) {
1066
for(i = 0; i < queryLength; i++)
1067
unfilter_query[i] = bop->seedSearch->order[unfilter_query[i]];
1071
seedReturn = seedEngineCore(search, bop->options, query,
1073
bop->blast_database, bop->patfile,
1075
bop->patfp, FALSE, FALSE,
1076
bop->seedSearch, bop->options->ethresh,
1077
myargs[34].floatvalue, posSearch,
1078
&seqloc, TRUE, &info_vnp);
1079
if (!bop->is_xml_output)
1080
PGPOutTextMessages(info_vnp, bop->outfp);
1081
ValNodeFreeData(info_vnp);
1083
if (search->error_return) {
1084
BlastErrorPrint(search->error_return);
1085
for (vnp = search->error_return; vnp; vnp = vnp->next) {
1086
BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1088
search->error_return = ValNodeFree(search->error_return);
1090
*seqloc_duplicate = seqloc;
1091
head = convertValNodeListToSeqAlignList(seedReturn, lastSeqAligns,
1094
bop->featureOrder[FEATDEF_REGION] = 1;
1095
bop->groupOrder[FEATDEF_REGION] = 1;
1096
annot = bop->fake_bsp->annot = SeqAnnotNew();
1097
bop->fake_bsp->annot->type = 1; /* ftable. */
1099
next = seqloc->next;
1101
sfp->location = seqloc;
1102
sfp->data.choice = SEQFEAT_REGION;
1103
sfp->data.value.ptrvalue = StringSave("pattern");
1106
annot->next = SeqAnnotNew();
1107
annot = annot->next;
1115
if (unfilter_query != NULL)
1116
unfilter_query = MemFree(unfilter_query);
1121
void PGPFormatMainOutput(SeqAlignPtr head, PGPBlastOptionsPtr bop,
1122
BlastSearchBlkPtr search, Int4 thisPassNum,
1123
SeqAlignPtr PNTR lastSeqAligns,
1124
Int4 numLastSeqAligns, SeqLocPtr seed_seqloc,
1125
Int2Ptr posRepeatSequences)
1127
SeqAnnotPtr seqannot;
1128
ValNodePtr pruneSeed, seedReturn;
1129
BlastPruneSapStructPtr prune;
1130
BLAST_MatrixPtr matrix;
1131
Int4Ptr PNTR txmatrix;
1132
BioseqPtr query_bsp;
1135
fprintf(bop->outfp, "\n\n ***** No hits found ******\n\n");
1139
if (myargs[5].intvalue == 8 || myargs[5].intvalue == 9) {
1140
query_bsp = (bop->believe_query) ? bop->query_bsp : bop->fake_bsp;
1141
if (myargs[5].intvalue == 9)
1142
PrintTabularOutputHeader(bop->blast_database, query_bsp, NULL,
1143
"blastp", thisPassNum, bop->believe_query,
1145
BlastPrintTabulatedResults(head, query_bsp, NULL,
1146
bop->number_of_alignments,
1148
bop->believe_query, 0, 0, bop->outfp);
1152
seqannot = SeqAnnotNew();
1154
AddAlignInfoToSeqAnnot(seqannot, 2);
1155
seqannot->data = head;
1157
if (search->pbp->maxNumPasses != 1) {
1158
fprintf(bop->outfp, "\nResults from round %d\n",
1165
/* ------- Printing deflines for the BLAST output ------- */
1167
if (thisPassNum == 1) {
1168
search->positionBased = FALSE;
1169
if (!bop->options->isPatternSearch) {
1170
prune = BlastPruneHitsFromSeqAlign(head,
1171
bop->number_of_descriptions, NULL);
1172
PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1173
bop->print_options, FIRST_PASS, NULL);
1175
seedReturn = convertSeqAlignListToValNodeList(head,lastSeqAligns,
1178
pruneSeed = SeedPruneHitsFromSeedReturn(seedReturn,
1179
bop->number_of_descriptions);
1180
PrintDefLinesExtra(pruneSeed, 80, bop->outfp, bop->print_options,
1181
FIRST_PASS, NULL, seed_seqloc);
1184
prune = BlastPruneHitsFromSeqAlign(head,
1185
bop->number_of_descriptions, NULL);
1187
PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1189
NOT_FIRST_PASS_REPEATS,
1190
posRepeatSequences);
1191
PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1192
bop->print_options, NOT_FIRST_PASS_NEW,
1193
posRepeatSequences);
1195
PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1196
bop->print_options, FIRST_PASS, NULL);
1197
} /*thisPassNum == 1 */
1199
/* ------- --------------------------------------- ------- */
1201
if (ALL_ROUNDS && search->posConverged) {
1202
fprintf(bop->outfp, "\nCONVERGED!\n");
1209
if (search->sbp->matrix) {
1210
matrix = BLAST_MatrixFill(search->sbp, FALSE);
1211
txmatrix = BlastMatrixToTxMatrix(matrix);
1214
if (!(bop->options->isPatternSearch)) {
1215
prune = BlastPruneHitsFromSeqAlign(head, bop->number_of_alignments,
1217
seqannot->data = prune->sap;
1219
if(bop->options->discontinuous) {
1220
if(!DDV_DisplayBlastPairList(prune->sap, search->mask,
1223
bop->align_options & TXALIGN_HTML ? 6 : 1)) {
1226
" -------- Failure to print alignment... --------"
1230
} else { /* Old type formating */
1231
if (myargs[5].intvalue != 0) {
1232
ShowTextAlignFromAnnot2(seqannot, 60, bop->outfp,
1233
bop->featureOrder, bop->groupOrder,
1234
bop->align_options, NULL,
1238
ShowTextAlignFromAnnot2(seqannot, 60, bop->outfp,
1239
bop->featureOrder, bop->groupOrder,
1240
bop->align_options, txmatrix,
1241
search->mask, FormatScoreFunc,
1246
/* seqannot->data = head; */
1249
if (bop->number_of_alignments < bop->number_of_descriptions) {
1250
pruneSeed = SeedPruneHitsFromSeedReturn(pruneSeed,
1251
bop->number_of_alignments);
1255
if (myargs[5].intvalue != 0) {
1256
ShowTextAlignFromAnnotExtra(bop->query_bsp, pruneSeed,
1257
seed_seqloc, 60, bop->outfp,
1258
bop->featureOrder, bop->groupOrder,
1259
bop->align_options, NULL,
1260
search->mask, NULL);
1262
ShowTextAlignFromAnnotExtra(bop->query_bsp, pruneSeed,
1263
seed_seqloc, 60, bop->outfp,
1264
bop->featureOrder, bop->groupOrder,
1265
bop->align_options, txmatrix,
1266
search->mask, FormatScoreFunc);
1270
if (!(bop->options->isPatternSearch)) {
1271
prune = BlastPruneSapStructDestruct(prune);
1274
search->positionBased = TRUE;
1278
matrix = BLAST_MatrixDestruct(matrix);
1280
txmatrix = TxMatrixDestruct(txmatrix);
1282
seqannot->data = NULL;
1283
seqannot = SeqAnnotFree(seqannot);
1288
void PGPSeqAlignOut(PGPBlastOptionsPtr bop, SeqAlignPtr head)
1290
SeqAnnotPtr seqannot;
1292
if (!bop->aip_out || !head)
1295
seqannot = SeqAnnotNew();
1297
AddAlignInfoToSeqAnnot(seqannot, 2);
1298
seqannot->data = head;
1299
SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, bop->aip_out, NULL);
1300
AsnIoReset(bop->aip_out);
1301
bop->aip_out = AsnIoClose(bop->aip_out);
1302
seqannot->data = NULL;
1303
seqannot = SeqAnnotFree(seqannot);
1309
PGPBlastOptionsPtr bop;
1310
BlastSearchBlkPtr search;
1311
SeqAlignPtr head = NULL;
1312
SeqLocPtr seed_seqloc = NULL;
1314
/* used for psi-blast */
1316
posSearchItems *posSearch;
1317
compactSearchItems *compactSearch;
1318
Boolean recoverCheckpoint = FALSE;
1319
Boolean alreadyRecovered = FALSE;
1320
Boolean freqCheckpoint = FALSE;
1321
Boolean alignCheckpoint = FALSE;
1322
Boolean posComputationCalled = FALSE;
1323
Boolean checkReturn = FALSE;
1324
Boolean next_query = FALSE;
1325
Boolean tabular_output;
1327
SeqAlignPtr PNTR lastSeqAligns = NULL;
1328
/*keeps track of the last SeqAlign in
1329
each list of seedReturn so that the
1330
2-level list can be converted to a 1-level
1331
list and then back to 2-level*/
1332
Int4 numLastSeqAligns = 0;
1334
PSIXmlPtr psixp = NULL;
1337
/* ----- End of definitions ----- */
1339
if (! GetArgs ("blastpgp", NUMARG, myargs))
1341
ErrSetMessageLevel(SEV_WARNING);
1343
UseLocalAsnloadDataAndErrMsg ();
1345
if (! SeqEntryLoad())
1348
bop = PGPReadBlastOptions();
1350
/* Here we will start main do/while loop over many query entries in
1351
the input file. If there is an option for checkpoint recover or
1352
Output File for PSI-BLAST Checkpointing all but first entries in
1353
the input file will be discarded */
1356
search = BLASTSetUpSearchWithReadDb(bop->fake_bsp, "blastp",
1357
bop->query_bsp->length,
1358
bop->blast_database,
1359
bop->options, NULL);
1364
if(bop->is_xml_output) {
1366
if((aip = AsnIoOpen(myargs[6].strvalue, "wx")) == NULL)
1369
psixp = PSIXmlInit(aip, "blastp", bop->blast_database,
1375
if ((NULL != myargs[29].strvalue) || (NULL != myargs[39].strvalue)) {
1376
recoverCheckpoint = TRUE;
1377
if (NULL != myargs[29].strvalue) {
1378
freqCheckpoint = TRUE;
1379
alignCheckpoint = FALSE;
1381
freqCheckpoint = FALSE;
1382
alignCheckpoint = TRUE;
1386
if (recoverCheckpoint)
1387
search->positionBased = TRUE;
1389
search->positionBased = FALSE;
1391
global_fp = bop->outfp;
1392
tabular_output = (myargs[5].intvalue == 8 || myargs[5].intvalue == 9);
1394
if(!bop->is_xml_output && !tabular_output) {
1395
PGPFormatHeader(bop);
1400
compactSearch = NULL;
1401
search->posConverged = FALSE;
1402
global_fp = bop->outfp;
1403
search->error_return = NULL;
1405
if (recoverCheckpoint) {
1406
posSearch = (posSearchItems *) MemNew(1 * sizeof(posSearchItems));
1407
compactSearch = compactSearchNew(compactSearch);
1408
copySearchItems(compactSearch, search, bop->options->matrix);
1409
posInitializeInformation(posSearch,search);
1411
if (freqCheckpoint) {
1412
checkReturn = posReadCheckpoint(posSearch, compactSearch, myargs[29].strvalue, &(search->error_return));
1413
search->sbp->posMatrix = posSearch->posMatrix;
1414
if (NULL == search->sbp->posFreqs)
1415
search->sbp->posFreqs = allocatePosFreqs(compactSearch->qlength, compactSearch->alphabetSize);
1416
copyPosFreqs(posSearch->posFreqs,search->sbp->posFreqs, compactSearch->qlength, compactSearch->alphabetSize);
1418
search->sbp->posMatrix = BposComputation(posSearch, search, compactSearch, myargs[39].strvalue, myargs[28].strvalue, &(search->error_return));
1419
posComputationCalled = TRUE;
1420
if (NULL == search->sbp->posMatrix)
1421
checkReturn = FALSE;
1427
if (search->error_return) {
1428
BlastErrorPrint(search->error_return);
1429
for (vnp = search->error_return; vnp; vnp = vnp->next) {
1430
BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1432
search->error_return = ValNodeFree(search->error_return);
1435
ErrPostEx(SEV_FATAL, 0, 0, "blast: Error recovering from checkpoint");
1439
/* Print out Pos matrix if necessary */
1440
if (myargs[38].strvalue != NULL)
1441
PGPrintPosMatrix(myargs[38].strvalue, posSearch, compactSearch, posComputationCalled);
1446
if (thisPassNum > 1)
1447
bop->options->isPatternSearch = FALSE;
1450
head = SeqAlignSetFree(head);
1453
if(!bop->is_xml_output && !tabular_output) {
1454
search->thr_info->tick_callback = tick_callback;
1455
fprintf(global_fp, "%s", "Searching");
1459
if (1 == thisPassNum && (!recoverCheckpoint)) {
1461
posSearch = (posSearchItems *)
1462
MemNew(1 * sizeof(posSearchItems));
1465
/* ----- Here is real BLAST search done ------- */
1467
if (bop->options->isPatternSearch &&
1468
(1 == thisPassNum && (!recoverCheckpoint))) {
1469
head = PGPSeedSearch(bop, search, posSearch,
1471
&lastSeqAligns, &numLastSeqAligns);
1473
if ((bop->options->tweak_parameters) && (thisPassNum > 1)) {
1474
/*allows for extra matches in first pass of screening,
1475
hitlist_size will be restored in
1476
BioseqBlastEngineCore for the second pass. */
1477
bop->options->original_expect_value =
1478
bop->options->expect_value;
1479
bop->options->hitlist_size *= 2;
1482
if ((1 == thisPassNum) && (!recoverCheckpoint))
1483
head = BioseqBlastEngineCore(search, bop->options, NULL);
1485
head = BioseqBlastEngineCore(search, bop->options,
1486
search->sbp->posMatrix);
1488
/* ---------------------------------------------- */
1490
if (recoverCheckpoint) {
1491
compactSearchDestruct(compactSearch);
1492
recoverCheckpoint = FALSE;
1493
alreadyRecovered = TRUE;
1496
if (thisPassNum == 1) {
1497
compactSearch = compactSearchNew(compactSearch);
1499
MemFree(compactSearch->standardProb);
1502
copySearchItems(compactSearch, search, bop->options->matrix);
1505
/* The next two calls (after the "if") are diagnostics
1506
for Stephen. Don't perform this if only one pass will
1507
be made (i.e., standard BLAST) */
1509
if (ALL_ROUNDS && 1 != search->pbp->maxNumPasses) {
1510
if ((1 == thisPassNum) && (!alreadyRecovered))
1511
posInitializeInformation(posSearch, search);
1512
posPrintInformation(posSearch, search, thisPassNum);
1516
if(!bop->is_xml_output && !tabular_output) {
1517
fprintf(global_fp, "%s", "done\n\n");
1522
if (thisPassNum == 1) {
1523
ReadDBBioseqFetchEnable ("blastpgp", bop->blast_database,
1527
/* Have things converged? */
1528
if (ALL_ROUNDS && search->pbp->maxNumPasses != 1) {
1529
posConvergenceTest(posSearch, search, head, thisPassNum);
1534
search->positionBased = TRUE;
1536
/* Here is all BLAST formating of the main output done */
1538
if(bop->is_xml_output) {
1540
ValNodePtr other_returns;
1543
other_returns = BlastOtherReturnsPrepare(search);
1546
iterp = BXMLBuildOneIteration(head, other_returns, bop->options->is_ooframe, !bop->options->gapped_calculation, thisPassNum, "No hits found");
1548
iterp = BXMLBuildOneIteration(head, other_returns, bop->options->is_ooframe, !bop->options->gapped_calculation, thisPassNum, search->posConverged ? "CONVERGED" : NULL);
1551
IterationAsnWrite(iterp, psixp->aip, psixp->atp);
1552
AsnIoFlush(psixp->aip);
1554
IterationFree(iterp);
1555
BlastOtherReturnsFree(other_returns);
1557
PGPFormatMainOutput(head, bop, search, thisPassNum,
1558
lastSeqAligns, numLastSeqAligns,
1559
seed_seqloc, posSearch->posRepeatSequences);
1562
if (alreadyRecovered) {
1563
posCheckpointFreeMemory(posSearch, compactSearch->qlength);
1564
alreadyRecovered = FALSE;
1567
if (ALL_ROUNDS && thisPassNum > 1) {
1568
posCleanup(posSearch, compactSearch);
1571
if (!search->posConverged && (search->pbp->maxNumPasses == 0 ||
1572
(thisPassNum < search->pbp->maxNumPasses))) {
1574
search->sbp->posMatrix =
1575
CposComputation(posSearch, search, compactSearch,
1576
head, myargs[28].strvalue,
1577
(bop->options->isPatternSearch &&
1579
&(search->error_return), 1.0); /*AAS*/
1580
posComputationCalled = TRUE;
1581
if (search->error_return) {
1582
BlastErrorPrint(search->error_return);
1583
for (vnp = search->error_return; vnp; vnp = vnp->next) {
1584
BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1586
search->error_return = ValNodeFree(search->error_return);
1589
search->sbp->posMatrix =
1590
WposComputation(compactSearch, head, search->sbp->posFreqs);
1591
posComputationCalled = TRUE;
1594
/* DEBUG Printing of the matrix */
1599
fd = FileOpen("omatrix.out", "w");
1600
for(i = 0; i < bop->query_bsp->length; i++) {
1601
for(j = 0; j < 26; j++) {
1602
fprintf(fd, "%d ", search->sbp->posMatrix[i][j]);
1610
search->sbp->posMatrix = NULL;
1613
if (ALL_ROUNDS && thisPassNum > 1) {
1614
MemFree(posSearch->posRepeatSequences);
1617
if (!search->posConverged && (0 == search->pbp->maxNumPasses ||
1618
thisPassNum < search->pbp->maxNumPasses)) {
1620
/* Print out pos matrix if necessary */
1621
if (ALL_ROUNDS && (myargs[38].strvalue != NULL))
1622
PGPrintPosMatrix(myargs[38].strvalue, posSearch,
1623
compactSearch, posComputationCalled);
1626
} while (( 0 == search->pbp->maxNumPasses || thisPassNum < (search->pbp->maxNumPasses)) && (! (search->posConverged)));
1628
if (bop->aip_out != NULL && head != NULL)
1629
PGPSeqAlignOut(bop, head);
1631
head = SeqAlignSetFree(head);
1633
/* Here we will print out footer of BLAST output */
1635
if(!bop->is_xml_output && !tabular_output) {
1636
PGPFormatFooter(bop, search);
1639
/* PGPOneQueryCleanup */
1641
if (bop->options->isPatternSearch) {
1642
bop->seedSearch = MemFree(bop->seedSearch);
1643
FileClose(bop->patfp);
1647
posFreeInformation(posSearch);
1650
compactSearchDestruct(compactSearch);
1651
search = BlastSearchBlkDestruct(search);
1653
/* If these options are set we are not going to proceed with another
1654
queryes in the input file if any */
1656
if(recoverCheckpoint || myargs[38].strvalue != NULL)
1660
next_query = PGPReadNextQuerySequence(bop);
1661
if (bop->options->tweak_parameters)
1662
bop->options->hitlist_size *= 2;
1663
} while (next_query); /* End of main do {} while (); loop */
1665
ReadDBBioseqFetchDisable();
1672
PGPFreeBlastOptions(bop);
1678
/* Nothing below this line is executable code */
1680
#ifdef PRINT_ONLY_ALIGNMENT
1685
seqannot = SeqAnnotFree(seqannot);
1687
seqannot = SeqAnnotNew();
1689
AddAlignInfoToSeqAnnot(seqannot, 2);
1690
seqannot->data = head;
1691
aip = AsnIoOpen("stdout", "w");
1692
SeqAnnotAsnWrite(seqannot, aip, NULL);
1696
seqannot->data = NULL;
1697
seqannot = SeqAnnotFree(seqannot);