33
33
******************************************************************************/
36
36
* $Log: blast.h,v $
37
* Revision 6.68 2004/06/30 12:28:20 madden
38
* Removed some function prototypes and moved to blfmtutl.h
40
* Revision 6.67 2003/03/25 22:21:53 boemker
41
* Clarified behavior of BLAST_Wizard.
43
* Revision 6.66 2003/03/25 19:58:18 boemker
44
* Moved code to initialize search options from blastcgicmd.cpp to here, as
47
* Revision 6.65 2003/03/24 19:42:14 madden
48
* Changes to support query concatenation for blastn and tblastn
50
* Revision 6.64 2003/01/14 20:28:54 madden
51
* New function BLASTAddBlastDBTitleToSeqAnnotEx
53
* Revision 6.63 2002/10/22 17:57:48 camacho
54
* Changes to B2SPssmMultipleQueries
56
* Revision 6.62 2002/10/21 23:13:36 camacho
57
* Added B2SPssmOnTheFly functions
59
* Revision 6.61 2002/09/18 20:23:20 camacho
60
* Added BLASTCalculateSearchSpace
62
* Revision 6.60 2002/09/02 21:15:20 camacho
63
* Changed comment for psi-blast2sequences
65
* Revision 6.59 2002/08/30 18:56:02 dondosha
66
* Made BlastMakeTempProteinBioseq and HackSeqLocId public: needed for Cn3D
68
* Revision 6.58 2002/08/29 20:44:38 camacho
69
* Added description of psi-blast2sequences
71
* Revision 6.57 2002/08/09 19:39:20 camacho
72
* Added constants for some blast search parameters
74
* Revision 6.56 2002/08/01 20:47:24 dondosha
75
* Prototypes changed for megablast functions related to traceback
77
* Revision 6.55 2002/07/02 17:08:00 dondosha
78
* Reverse previous change - not needed
80
* Revision 6.54 2002/07/01 22:52:06 dondosha
81
* Added CheckStartForGappedAlignmentEx with an extra window size parameter
83
* Revision 6.53 2002/05/28 22:00:12 camacho
84
* *** empty log message ***
86
* Revision 6.52 2002/05/13 13:51:33 dondosha
87
* Made two functions public
89
* Revision 6.51 2002/05/09 15:35:51 dondosha
90
* Added BLASTOptionNewEx function with an extra argument for megablast
92
* Revision 6.50 2002/03/14 16:11:40 camacho
93
* Extended BlastTwoSequences to allow comparison between sequence and PSSM
95
* Revision 6.49 2002/02/15 23:36:23 dondosha
96
* Correction for megablast with non-greedy extensions
37
98
* Revision 6.48 2001/07/09 15:12:47 shavirin
38
99
* Functions BLbasicSmithWatermanScoreOnly() and BLSmithWatermanFindStart()
39
100
* used to calculate Smith-waterman alignments on low level become external.
391
456
Int2 LIBCALL BLASTOptionSetGapParams PROTO((BLAST_OptionsBlkPtr options, CharPtr matrix, Int4 open, Int4 extended));
458
/********************* BLASTCalculateSearchSpace **************************
459
Purpose: Calculate the effective search space for a gapped search with a
460
minimal set of options. Assumes the query is a protein sequence (ie:
461
no multiple contexts to consider).
462
Parameters: options [in]: Blast options structure
463
nseq [in]: Number of sequences in the database
464
dblen [in]: Length of the database
465
qlen [in]: Length of the query sequence
466
Returns: Effective search space
467
**************************************************************************/
468
FloatHi LIBCALL BLASTCalculateSearchSpace PROTO((BLAST_OptionsBlkPtr options,
469
Int4 nseq, Int8 dblen, Int4 qlen));
395
472
the setup functions, call before running blast.
402
479
BlastSearchBlkPtr LIBCALL BLASTSetUpSearchByLocWithReadDb PROTO((SeqLocPtr slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives))));
404
BlastSearchBlkPtr LIBCALL BLASTSetUpSearchByLocWithReadDbEx PROTO((SeqLocPtr slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
481
BlastSearchBlkPtr LIBCALL BLASTSetUpSearchByLocWithReadDbEx PROTO((SeqLocPtr slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, QueriesPtr mult_queries));
482
/* --KM added mult_queries param: struct holding info about individual queries that got concatenated when -B option used */
406
484
BlastSearchBlkPtr LIBCALL BLASTSetUpSearch PROTO((BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives))));
441
519
SeqAlignPtr LIBCALL BlastTwoSequencesByLocEx PROTO((SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns));
443
SeqAlignPtr LIBCALL BlastTwoSequencesByLocWithCallback PROTO((SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch))));
521
/* Notes for psi-blast2sequences (compare a PSSM with sequence slp2): (CC)
522
* =============================
523
* 1) This functionality requires (at least) the residue frequencies
524
* (BLAST_MatrixPtr->posFreqs) to compute the PSSM. If the PSSM is
525
* provided (BLAST_MatrixPtr->matrix), then this takes precedence and the
526
* PSSM will not be recomputed. Please note that the PSSM and residue
527
* frequencies are matrices with dimensions 26 by query_length+1. The last
528
* row should be set to BLAST_SCORE_MIN (for PSSMs) or 0.0 (for residue
529
* frequencies). 26 is the alphabet size (also defined as PRO_ALPHABET_SIZE in
532
* 2) The slp1 parameter is the master sequence for the PSSM (used to display
533
* the alignment) and can only be shorter than the PSSM (the PSSM will be
534
* trimmed accordingly).
536
* 3) If the scalingFactor is set to 0.0 (default in the options structure),
537
* the PSSM will be calculated in the same way as psiblast (blastpgp) does it
538
* (that is, without scaling the PSSM). In order to use composition-based
539
* statistics (default in psiblast), please set the options->tweak_parameters
540
* option to TRUE. This is *not* the default in the options structure.
542
* 4) Also, if the scalingFactor is not 0.0 in the options parameter, this
543
* value will be used to scale the PSSM only if it is calculated by this
544
* function (if the PSSM is calculated outside this function it is assumed
545
* that the PSSM has been scaled already and that the same scalingFactor
546
* that was used to create it is passed into this function). This value is
547
* also used to multiply various parameters such as gap costs, X dropoff
548
* values, when performing the matrix rescaling, and to adjust the scores
549
* and Lambda parameters when performing the traceback stage.
550
* The matrix rescaling step will take place prior to the traceback stage.
551
* This functionality resembles what rpsblast/impala do.
553
SeqAlignPtr LIBCALL BlastTwoSequencesByLocWithCallback PROTO((SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), BLAST_MatrixPtr matrix));
445
555
SeqAlignPtr LIBCALL BlastTwoSequencesEx PROTO((BioseqPtr bsp1, BioseqPtr bsp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns));
457
567
BlastSearchBlkPtr LIBCALL
458
568
BlastSequencesOnTheFlyEx PROTO((BlastSearchBlkPtr search, BioseqPtr subject_bsp));
570
/*** PSIBLAST2Sequences API ***/
572
B2SPssmSetupSearch PROTO((BlastSearchBlkPtr search, SeqLocPtr pssm_slp,
573
BLAST_MatrixPtr matrix));
576
B2SPssmCleanUpSearch PROTO((BlastSearchBlkPtr search, BLAST_MatrixPtr matrix));
579
B2SPssmOnTheFly PROTO((BlastSearchBlkPtr search, BioseqPtr subj_bsp));
582
B2SPssmOnTheFlyByLoc PROTO((BlastSearchBlkPtr search, SeqLocPtr subj_slp));
584
/* Compare pssm against all sequences in target_seqs.
585
Returns an array of length ntargets with the corresponding alignments.
586
Caller is responsible for deallocating the return value */
587
SeqAlignPtr * LIBCALL
588
B2SPssmMultipleQueries PROTO((SeqLocPtr pssm_slp, BLAST_MatrixPtr matrix,
589
SeqLocPtr *target_seqs, Int4 ntargets, BLAST_OptionsBlkPtr options));
591
/*** END PSIBLAST2Sequences API ***/
460
594
SeqAlignPtr LIBCALL SumBlastGetGappedAlignmentTraceback PROTO((BlastSearchBlkPtr search, Int4 hit_number, Boolean reverse, Boolean ordinal_number, Uint1Ptr subject, Int4 subject_length));
596
Boolean LIBCALL SumBlastGetGappedAlignmentEx PROTO((BlastSearchBlkPtr search, Int4 hit_number, Boolean reverse, Boolean ordinal_number, Uint1Ptr subject, Int4 subject_length, Boolean do_traceback, SeqAlignPtr PNTR seqalignP, BlastHitRangePtr bhrp, Int2 query_number));
464
599
Performs a complete BLAST search and returns a SeqAnlign.
469
604
SeqAlignPtr LIBCALL BioseqBlastEngineWithCallback PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch))));
606
SeqAlignPtr LIBCALL BioseqBlastEngineWithCallbackMult PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), QueriesPtr mult_queries)); /* AM: Added mult_queries param. */
471
608
SeqAlignPtr LIBCALL BioseqBlastEngineEx PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
473
610
SeqAlignPtr LIBCALL BioseqBlastEngineByLoc PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives))));
475
612
SeqAlignPtr LIBCALL BioseqBlastEngineByLocEx PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
477
SeqAlignPtr LIBCALL BioseqBlastEngineByLocWithCallback PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch))));
614
SeqAlignPtr LIBCALL BioseqBlastEngineByLocWithCallback PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch))));
616
SeqAlignPtr LIBCALL BioseqBlastEngineByLocWithCallbackMult PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), QueriesPtr mult_queries)); /* --KM added mult_queries param */
479
618
Prints error messages.
482
621
void LIBCALL BlastErrorPrint PROTO((ValNodePtr error_return));
483
622
void LIBCALL BlastErrorPrintExtra PROTO((ValNodePtr error_return, Boolean errpostex, FILE* fp));
486
Prints some header information.
489
CharPtr LIBCALL BlastGetVersionNumber PROTO((void));
491
CharPtr LIBCALL BlastGetReference PROTO((Boolean html));
493
Boolean LIBCALL BlastPrintReference PROTO((Boolean html, Int4 line_length, FILE *outfp));
494
Boolean LIBCALL MegaBlastPrintReference PROTO((Boolean html, Int4 line_length, FILE *outfp));
496
CharPtr LIBCALL BlastGetPhiReference PROTO((Boolean html));
498
Boolean LIBCALL BlastPrintPhiReference PROTO((Boolean html, Int4 line_length, FILE *outfp));
500
Boolean BlastPrintVersionInfo PROTO((CharPtr program, Boolean html, FILE *outfp));
501
Boolean BlastPrintVersionInfoEx PROTO((CharPtr program, Boolean html, CharPtr version, CharPtr date, FILE *outfp));
503
CharPtr LIBCALL BlastGetReleaseDate PROTO((void));
505
625
Uint1 LIBCALL BlastGetProgramNumber PROTO((CharPtr blast_program));
506
626
CharPtr LIBCALL BlastGetProgramName PROTO((Uint1 number));
556
676
/* ------ Functions related to Smith-Waterman algorithm ------ */
558
Nlm_FloatHi BLbasicSmithWatermanScoreOnly(Uint1 * matchSeq, Int4 matchSeqLength, Uint1 *query, Int4 queryLength, BLAST_Score **matrix, Int4 gapOpen, Int4 gapExtend, Int4 *matchSeqEnd, Int4 *queryEnd, Int4 *score, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi effSearchSpace, Boolean positionSpecific);
560
Int4 BLSmithWatermanFindStart(Uint1 * matchSeq, Int4 matchSeqLength, Uint1 *query, Int4 queryLength, BLAST_Score **matrix, Int4 gapOpen, Int4 gapExtend, Int4 matchSeqEnd, Int4 queryEnd, Int4 score, Int4 *matchSeqStart, Int4 *queryStart, Boolean positionSpecific);
678
Nlm_FloatHi BLbasicSmithWatermanScoreOnly PROTO((Uint1 * matchSeq, Int4 matchSeqLength, Uint1 *query, Int4 queryLength, BLAST_Score **matrix, Int4 gapOpen, Int4 gapExtend, Int4 *matchSeqEnd, Int4 *queryEnd, Int4 *score, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi effSearchSpace, Boolean positionSpecific));
680
Int4 BLSmithWatermanFindStart PROTO((Uint1 * matchSeq, Int4 matchSeqLength, Uint1 *query, Int4 queryLength, BLAST_Score **matrix, Int4 gapOpen, Int4 gapExtend, Int4 matchSeqEnd, Int4 queryEnd, Int4 score, Int4 *matchSeqStart, Int4 *queryStart, Boolean positionSpecific));
683
CheckStartForGappedAlignment PROTO((BlastSearchBlkPtr search, BLAST_HSPPtr hsp, Uint1Ptr query, Uint1Ptr subject, Int4Ptr PNTR matrix));
685
Int4 GetStartForGappedAlignment PROTO((BlastSearchBlkPtr search, BLAST_HSPPtr hsp, Uint1Ptr query, Uint1Ptr subject, Int4Ptr PNTR matrix));
687
BioseqPtr BlastMakeTempProteinBioseq PROTO((Uint1Ptr sequence, Int4 length,
690
void HackSeqLocId PROTO((SeqLocPtr slp, SeqIdPtr id));
692
/* --------------------------------------------------------------------
694
* BLAST_Wizard & related functions.
696
* Use BLAST_WizardOptionsBlkInit to initialize a
697
* BLAST_WizardOptionsBlk by setting every field to zero or FALSE.
699
* Use BLAST_WizardOptionsBlkDone to free any memory owned by a
700
* BLAST_WizardOptionsBlk, excluding the memory for the
701
* BLAST_WizardOptionsBlk itself, which should be allocated on the
704
* Use BLAST_WizardOptionsMaskInit to initialize a
705
* BLAST_WizardOptionsMask by setting every field to FALSE, indicating
706
* that the corresponding fields of some BLAST_WizardOptionsBlk aren't
709
* Use BLAST_Wizard to initialize a BLAST_WizardOptionsBlk according
710
* to program, service, options, and mask. Alignments, descriptions,
711
* and errors are output parameters. Alignments and descriptions are
712
* optional; error is required. BLAST_Wizard returns a null pointer
713
* if and only if *error != 0 on exit. The returned object must be
714
* freed with BLASTOptionDelete; if *error != 0, then *error must be
715
* freed with MemFree.
717
* --------------------------------------------------------------------
721
BLAST_WizardOptionsBlkInit(
722
BLAST_WizardOptionsBlkPtr options);
725
BLAST_WizardOptionsBlkDone(
726
BLAST_WizardOptionsBlkPtr options);
729
BLAST_WizardOptionsMaskInit(
730
BLAST_WizardOptionsMaskPtr mask);
736
BLAST_WizardOptionsBlkPtr options,
737
BLAST_WizardOptionsMaskPtr mask,
562
742
/* ----------------------------------------------------------- */