1
/* $Id: blast_gapalign.h,v 1.54 2004/08/10 14:52:00 ivanov Exp $
2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
26
* Author: Ilya Dondoshansky
30
/** @file blast_gapalign.h
31
* Structures and functions prototypes used for BLAST gapped extension
32
* @todo FIXME: elaborate on contents.
35
#ifndef __BLAST_GAPALIGN__
36
#define __BLAST_GAPALIGN__
38
#include <algo/blast/core/blast_def.h>
39
#include <algo/blast/core/blast_options.h>
40
#include <algo/blast/core/blast_extend.h>
41
#include <algo/blast/core/gapinfo.h>
42
#include <algo/blast/core/greedy_align.h>
43
#include <algo/blast/core/blast_hits.h>
49
/** Diagonal distance cutoff when looking for HSP inclusion in Mega BLAST */
50
#define MB_DIAG_NEAR 30
52
/** Diagonal distance between HSPs for which one can be considered included in
53
the other in Mega BLAST */
54
#define MB_DIAG_CLOSE 6
56
/** Split subject sequences if longer than this */
57
#define MAX_DBSEQ_LEN 5000000
59
/** Structure supporting the gapped alignment */
60
typedef struct BlastGapAlignStruct {
61
Boolean positionBased; /**< Is this PSI-BLAST? */
62
Int4 position_offset; /**< Offset into the PSSM for the present sequence */
63
GapStateArrayStruct* state_struct; /**< Structure to keep extension
65
GapEditBlock* edit_block; /**< The traceback (gap) information */
66
SGreedyAlignMem* greedy_align_mem;/**< Preallocated memory for the greedy
68
BlastScoreBlk* sbp; /**< Pointer to the scoring information block */
69
Int4 gap_x_dropoff; /**< X-dropoff parameter to use */
70
Int4 query_start, query_stop;/**< Return values: query offsets */
71
Int4 subject_start, subject_stop;/**< Return values: subject offsets */
72
Int4 score; /**< Return value: alignment score */
73
double percent_identity;/**< Return value: percent identity - filled only
74
by the greedy non-affine alignment algorithm */
75
} BlastGapAlignStruct;
77
/** Initializes the BlastGapAlignStruct structure
78
* @param score_params Parameters related to scoring alignments [in]
79
* @param ext_params parameters related to gapped extension [in]
80
* @param max_subject_length Maximum length of any subject sequence (needed
81
* for greedy extension allocation only) [in]
82
* @param sbp The scoring information block [in]
83
* @param gap_align_ptr The BlastGapAlignStruct structure [out]
86
BLAST_GapAlignStructNew(const BlastScoringParameters* score_params,
87
const BlastExtensionParameters* ext_params,
88
Uint4 max_subject_length, BlastScoreBlk* sbp,
89
BlastGapAlignStruct** gap_align_ptr);
91
/** Deallocates memory in the BlastGapAlignStruct structure */
94
BLAST_GapAlignStructFree(BlastGapAlignStruct* gap_align);
96
/** Mega BLAST function performing gapped alignment:
97
* Sorts initial HSPs by diagonal;
99
* - Removes HSP if it is included in another already extended HSP;
100
* - If required, performs ungapped extension;
101
* - Performs greedy gapped extension;
102
* - Saves qualifying HSPs with gapped information into an HSP list
104
* @param program_number Not needed: added for prototype consistency.
105
* @param query The query sequence [in]
106
* @param query_info Query information structure, containing offsets into
107
* the concatenated sequence [in]
108
* @param subject The subject sequence [in]
109
* @param gap_align A placeholder for gapped alignment information and
110
* score block. [in] [out]
111
* @param score_params Options related to scoring alignments [in]
112
* @param ext_params Options related to alignment extension [in]
113
* @param hit_params Options related to saving HSPs [in]
114
* @param init_hitlist Contains all the initial hits [in]
115
* @param hsp_list_ptr List of HSPs with full extension information [out]
116
* @param gapped_stats Return statistics (not filled if NULL) [out]
118
Int2 BLAST_MbGetGappedScore(EBlastProgramType program_number,
119
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
120
BLAST_SequenceBlk* subject,
121
BlastGapAlignStruct* gap_align,
122
const BlastScoringParameters* score_params,
123
const BlastExtensionParameters* ext_params,
124
const BlastHitSavingParameters* hit_params,
125
BlastInitHitList* init_hitlist,
126
BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats);
130
/** Performs gapped extension for all non-Mega BLAST programs, given
131
* that ungapped extension has been done earlier.
132
* Sorts initial HSPs by score (from ungapped extension);
133
* Deletes HSPs that are included in already extended HSPs;
134
* Performs gapped extension;
135
* Saves HSPs into an HSP list.
136
* @param program_number Type of BLAST program [in]
137
* @param query The query sequence block [in]
138
* @param query_info Query information structure, containing offsets into
139
* the concatenated sequence [in]
140
* @param subject The subject sequence block [in]
141
* @param gap_align The auxiliary structure for gapped alignment [in]
142
* @param score_params Options and parameters related to scoring [in]
143
* @param ext_params Options and parameters related to extensions [in]
144
* @param hit_params Options related to saving hits [in]
145
* @param init_hitlist List of initial HSPs (offset pairs with additional
146
* information from the ungapped alignment performed earlier) [in]
147
* @param hsp_list_ptr Structure containing all saved HSPs [out]
148
* @param gapped_stats Return statistics (not filled if NULL) [out]
150
Int2 BLAST_GetGappedScore (EBlastProgramType program_number,
151
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
152
BLAST_SequenceBlk* subject,
153
BlastGapAlignStruct* gap_align,
154
const BlastScoringParameters* score_params,
155
const BlastExtensionParameters* ext_params,
156
const BlastHitSavingParameters* hit_params,
157
BlastInitHitList* init_hitlist,
158
BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats);
160
/** Perform a gapped alignment with traceback
161
* @param program Type of BLAST program [in]
162
* @param query The query sequence [in]
163
* @param subject The subject sequence [in]
164
* @param gap_align The gapped alignment structure [in] [out]
165
* @param score_params Scoring parameters [in]
166
* @param q_start Offset in query where to start alignment [in]
167
* @param s_start Offset in subject where to start alignment [in]
168
* @param query_length Maximal allowed extension in query [in]
169
* @param subject_length Maximal allowed extension in subject [in]
171
Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program,
172
Uint1* query, Uint1* subject,
173
BlastGapAlignStruct* gap_align,
174
const BlastScoringParameters* score_params,
175
Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length);
177
/** Greedy gapped alignment, with or without traceback.
178
* Given two sequences, relevant options and an offset pair, fills the
179
* gap_align structure with alignment endpoints and, if traceback is
180
* performed, gap information.
181
* @param query The query sequence [in]
182
* @param subject The subject sequence [in]
183
* @param query_length The query sequence length [in]
184
* @param subject_length The subject sequence length [in]
185
* @param gap_align The structure holding various information and memory
186
* needed for gapped alignment [in] [out]
187
* @param score_params Parameters related to scoring alignments [in]
188
* @param q_off Starting offset in query [in]
189
* @param s_off Starting offset in subject [in]
190
* @param compressed_subject Is subject sequence compressed? [in]
191
* @param do_traceback Should traceback be saved? [in]
194
BLAST_GreedyGappedAlignment(Uint1* query, Uint1* subject,
195
Int4 query_length, Int4 subject_length, BlastGapAlignStruct* gap_align,
196
const BlastScoringParameters* score_params,
197
Int4 q_off, Int4 s_off, Boolean compressed_subject, Boolean do_traceback);
199
/** Perform a gapped alignment with traceback for PHI BLAST
200
* @param query The query sequence [in]
201
* @param subject The subject sequence [in]
202
* @param gap_align The gapped alignment structure [in] [out]
203
* @param score_params Scoring parameters [in]
204
* @param q_start Offset in query where to start alignment [in]
205
* @param s_start Offset in subject where to start alignment [in]
206
* @param query_length Maximal allowed extension in query [in]
207
* @param subject_length Maximal allowed extension in subject [in]
209
Int2 PHIGappedAlignmentWithTraceback(Uint1* query, Uint1* subject,
210
BlastGapAlignStruct* gap_align,
211
const BlastScoringParameters* score_params,
212
Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length);
214
/** Convert initial HSP list to an HSP list: to be used in ungapped search.
215
* Ungapped data must be available in the initial HSP list for this function
217
* @param init_hitlist List of initial HSPs with ungapped extension
219
* @param query_info Query information structure, containing offsets into
220
* the concatenated queries/strands/frames [in]
221
* @param subject Subject sequence block containing frame information [in]
222
* @param hit_options Hit saving options [in]
223
* @param hsp_list_ptr HSPs in the final form [out]
225
Int2 BLAST_GetUngappedHSPList(BlastInitHitList* init_hitlist,
226
BlastQueryInfo* query_info, BLAST_SequenceBlk* subject,
227
const BlastHitSavingOptions* hit_options,
228
BlastHSPList** hsp_list_ptr);
230
/** Preliminary gapped alignment for PHI BLAST.
231
* @param program_number Type of BLAST program [in]
232
* @param query The query sequence block [in]
233
* @param query_info Query information structure, containing offsets into
234
* the concatenated sequence [in]
235
* @param subject The subject sequence block [in]
236
* @param gap_align The auxiliary structure for gapped alignment [in]
237
* @param score_params Options related to scoring [in]
238
* @param ext_params Options and parameters related to extensions [in]
239
* @param hit_params Options related to saving hits [in]
240
* @param init_hitlist List of initial HSPs, including offset pairs and
241
* pattern match lengths [in]
242
* @param hsp_list_ptr Structure containing all saved HSPs [out]
243
* @param gapped_stats Return statistics (not filled if NULL) [out]
245
Int2 PHIGetGappedScore (EBlastProgramType program_number,
246
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
247
BLAST_SequenceBlk* subject,
248
BlastGapAlignStruct* gap_align,
249
const BlastScoringParameters* score_params,
250
const BlastExtensionParameters* ext_params,
251
const BlastHitSavingParameters* hit_params,
252
BlastInitHitList* init_hitlist,
253
BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats);
255
/** Adjusts range of subject sequence to be passed for gapped extension,
256
* taking into account the length and starting position of the alignment in
258
* @param subject_offset_ptr Start of the subject range [out]
259
* @param subject_length_ptr Length of the subject range [out]
260
* @param query_offset Offset in query from which alignment starts [in]
261
* @param query_length Length of the query sequence [in]
262
* @param start_shift The offset by which the output range is shifted with
263
* respect to the full subject sequence [out]
266
AdjustSubjectRange(Int4* subject_offset_ptr, Int4* subject_length_ptr,
267
Int4 query_offset, Int4 query_length, Int4* start_shift);
269
/** Function to look for the highest scoring window (of size HSP_MAX_WINDOW)
270
* in an HSP and return the middle of this. Used by the gapped-alignment
271
* functions to start the gapped alignments.
272
* @param query The query sequence [in]
273
* @param subject The subject sequence [in]
274
* @param sbp Scoring block, containing matrix [in]
275
* @param q_start Starting offset in query [in]
276
* @param q_length Length of HSP in query [in]
277
* @param s_start Starting offset in subject [in]
278
* @param s_length Length of HSP in subject [in]
279
* @return The offset at which alignment should be started [out]
282
BlastGetStartForGappedAlignment (Uint1* query, Uint1* subject,
283
const BlastScoreBlk* sbp, Uint4 q_start, Uint4 q_length,
284
Uint4 s_start, Uint4 s_length);
289
#endif /* !__BLAST_GAPALIGN__ */