1
static char const rcsid[] = "$Id: rpsblast.c,v 6.75 2005/08/29 14:45:34 camacho Exp $";
1
static char const rcsid[] = "$Id: rpsblast.c,v 6.82 2006/04/26 12:47:48 madden Exp $";
3
/* $Id: rpsblast.c,v 6.75 2005/08/29 14:45:34 camacho Exp $
3
/* $Id: rpsblast.c,v 6.82 2006/04/26 12:47:48 madden Exp $
4
4
* ===========================================================================
6
6
* PUBLIC DOMAIN NOTICE
32
32
* Initial Version Creation Date: 12/14/1999
36
36
* File Description:
37
37
* Main file for RPS BLAST program
39
39
* $Log: rpsblast.c,v $
40
* Revision 6.82 2006/04/26 12:47:48 madden
41
* Use SBlastMessage in place of Blast_Message
43
* Revision 6.81 2006/04/21 14:34:50 madden
44
* BLAST_GetQuerySeqLoc prototype change
46
* Revision 6.80 2006/04/20 15:32:37 papadopo
47
* if query IDs are actually used, verify that there are no duplicate IDs
49
* Revision 6.79 2006/01/23 16:44:06 papadopo
50
* change signature of FillHitSavingOptions
52
* Revision 6.78 2006/01/10 20:44:10 madden
53
* Use SBlastSeqalignArray
55
* Revision 6.77 2006/01/06 16:36:14 papadopo
56
* 1. By default, log messages go to stderr and not a logfile
57
* 2. Do not continue execution after a fatal error is encountered
59
* Revision 6.76 2005/12/22 14:22:19 papadopo
60
* change signature of BLAST_FillLookupTableOptions
40
62
* Revision 6.75 2005/08/29 14:45:34 camacho
41
63
* From Ilya Dondoshansky:
42
64
* Retrieve mask_at_hash option from the SBlastOptions structure instead of
284
306
#include <algo/blast/api/blast_format.h>
285
307
#include <algo/blast/api/blast_api.h>
286
308
#include <algo/blast/api/blast_seq.h>
309
#include <algo/blast/api/blast_seqalign.h>
310
#include <algo/blast/api/blast_message_api.h>
289
313
#include "/am/purew/solaris2/new/../purify/purify-4.5-solaris2/purify.h"
393
417
"F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
394
418
/* OPT_LOGIFLE */
395
419
{"Logfile name ",
396
"rpsblast.log", NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
420
"stderr", NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
398
422
{"Use lower case filtering of FASTA sequence",
399
423
"F", NULL,NULL,TRUE,'U',ARG_BOOLEAN, 0.0,0,NULL},
426
450
BlastScoringOptions* score_options = options->score_options;
427
451
BlastEffectiveLengthsOptions* eff_len_options = options->eff_len_options;
428
452
BlastDatabaseOptions* db_options = options->db_options;
453
Blast_Message *core_msg = NULL;
430
455
BLAST_FillLookupTableOptions(lookup_options, kProgram,
431
456
FALSE, /* megablast */
432
457
0, /* default threshold */
433
0, /* default wordsize */
434
FALSE);/* no variable wordsize */
458
0); /* default wordsize */
436
460
BLAST_FillQuerySetUpOptions(query_setup_options, kProgram,
437
461
myargs[OPT_FILTER].strvalue, 0);
460
484
MAX(myargs[OPT_NUM_DESC].intvalue,
461
485
myargs[OPT_NUM_RESULTS].intvalue),
487
0, /* turn off culling */
488
0); /* min diag separation */
465
490
if (myargs[OPT_SEARCHSP].floatvalue != 0) {
466
491
eff_len_options->searchsp_eff = (Int8) myargs[OPT_SEARCHSP].floatvalue;
476
501
/* Validate the options. */
477
502
status = BLAST_ValidateOptions(kProgram, ext_options, score_options,
478
503
lookup_options, word_options, hit_options,
479
&sum_returns->error);
505
sum_returns->error = Blast_MessageToSBlastMessage(core_msg, NULL, NULL, FALSE);
518
544
EBlastProgramType program_number;
519
545
char* dbname = NULL;
523
548
Int4 query_from = 0, query_to = 0;
524
549
SBlastOptions* options = NULL;
525
SeqLoc* query_slp = NULL;
526
SeqAlign* seqalign = NULL;
527
550
BlastFormattingInfo* format_info = NULL;
528
551
Blast_SummaryReturn* sum_returns=Blast_SummaryReturnNew();
529
552
Int4 num_queries = 0;
530
553
SeqLoc* lcase_mask = NULL;
531
SeqLoc* filter_loc = NULL;
532
554
const int kMaxConcatLength = 40000;
533
555
Blast_SummaryReturn* full_sum_returns = NULL;
556
Boolean believe_query = (Boolean) myargs[OPT_BELIEVE_QUERY].intvalue;
535
558
/* select protein or nucleotide query, and choose
536
559
the appropriate program type */
552
575
options = SBlastOptionsFree(options);
553
576
if (sum_returns->error) {
554
Blast_SummaryReturnsPostError(sum_returns);
577
SBlastMessageErrPost(sum_returns->error);
555
578
sum_returns = Blast_SummaryReturnFree(sum_returns);
583
SBlastOptionsSetBelieveQuery(options, believe_query);
560
585
/* initialize the database and then the RPS_specific data files */
562
587
dbname = myargs[OPT_DB].strvalue;
592
617
myargs[OPT_NUM_RESULTS].intvalue,
593
618
(Boolean) myargs[OPT_HTML].intvalue,
594
619
FALSE, (Boolean) myargs[OPT_SHOW_GI].intvalue,
595
(Boolean) myargs[OPT_BELIEVE_QUERY].intvalue);
597
622
BLAST_PrintOutputHeader(format_info);
599
624
/* Loop over sets of queries. */
626
SBlastSeqalignArray* seqalign_arr=NULL;
627
SeqLoc* query_slp = NULL;
628
SeqLoc* filter_loc = NULL;
602
631
if ((Boolean)myargs[OPT_LCASE].intvalue) {
604
633
BLAST_GetQuerySeqLoc(infp, query_is_na, 0, kMaxConcatLength,
605
634
query_from, query_to, &lcase_mask,
606
635
&query_slp, &ctr, &num_queries,
607
myargs[OPT_BELIEVE_QUERY].intvalue, 0);
610
639
BLAST_GetQuerySeqLoc(infp, query_is_na, 0, kMaxConcatLength,
611
640
query_from, query_to, NULL,
612
641
&query_slp, &ctr, &num_queries,
613
myargs[OPT_BELIEVE_QUERY].intvalue, 0);
616
645
if (letters_read <= 0)
648
if (believe_query && BlastSeqlocsHaveDuplicateIDs(query_slp)) {
649
ErrPostEx(SEV_FATAL, 1, 0,
650
"Duplicate IDs detected; please ensure that "
651
"all query sequence identifiers are unique");
619
654
/* Call database search function. Pass NULL for tabular formatting
620
655
* structure pointer, because on-the-fly tabular formatting is not
621
656
* allowed for RPS BLAST.
624
659
Blast_DatabaseSearch(query_slp, dbname, lcase_mask, options, NULL,
625
&seqalign, &filter_loc, sum_returns);
660
&seqalign_arr, &filter_loc, sum_returns);
627
662
/* Free the lower case mask in SeqLoc form. */
628
663
lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);
635
670
/* Post warning or error messages, no matter what the search status
637
Blast_SummaryReturnsPostError(sum_returns);
672
SBlastMessageErrPost(sum_returns->error);
639
674
if (status != 0) {
640
675
ErrPostEx(SEV_FATAL, 1, 0, "BLAST search failed");
646
681
/* format results */
648
683
if (myargs[OPT_ASNOUT].strvalue) {
649
AsnIoPtr asnout = AsnIoOpen(myargs[OPT_ASNOUT].strvalue, (char*)"w");
650
GenericSeqAlignSetAsnWrite(seqalign, asnout);
651
asnout = AsnIoClose(asnout);
684
/* This just prints out the ASN.1 to a secondary file. */
685
BlastFormattingInfo* asn_format_info = NULL;
686
BlastFormattingInfoNew(eAlignViewAsnText, options,
687
blast_program, dbname,
688
myargs[OPT_ASNOUT].strvalue, &asn_format_info);
690
BlastFormattingInfoSetUpOptions(asn_format_info,
691
myargs[OPT_NUM_DESC].intvalue,
692
myargs[OPT_NUM_DESC].intvalue,
698
BLAST_FormatResults(seqalign_arr, num_queries, query_slp,
699
NULL, asn_format_info, sum_returns);
700
asn_format_info = BlastFormattingInfoFree(asn_format_info);
655
BLAST_FormatResults(seqalign, num_queries, query_slp, filter_loc,
704
BLAST_FormatResults(seqalign_arr, num_queries, query_slp, filter_loc,
656
705
format_info, sum_returns);
658
707
/* finish cleanup */
659
708
filter_loc = Blast_ValNodeMaskListFree(filter_loc);
660
seqalign = SeqAlignSetFree(seqalign);
661
709
query_slp = SeqLocSetFree(query_slp);
710
seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
663
712
/* Update the cumulative summary returns structure and clean the returns
664
713
substructures for the current search iteration. */
1187
1236
if (!GetArgs(buf, NUM_ARGS, myargs))
1190
if ( !ErrSetLog (myargs[OPT_LOGFILE].strvalue) ) { /* Logfile */
1194
ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
1239
if (strcmp(myargs[OPT_LOGFILE].strvalue, "stderr") != 0) {
1240
if ( !ErrSetLog (myargs[OPT_LOGFILE].strvalue) ) { /* Logfile */
1197
1246
UseLocalAsnloadDataAndErrMsg ();