1
/* $Id: greedy_align.c,v 1.38 2005/11/18 14:43:58 madden Exp $
1
/* $Id: greedy_align.c,v 1.41 2006/02/02 17:52:23 papadopo Exp $
2
2
* ===========================================================================
4
4
* PUBLIC DOMAIN NOTICE
37
37
#ifndef SKIP_DOXYGEN_PROCESSING
38
38
static char const rcsid[] =
39
"$Id: greedy_align.c,v 1.38 2005/11/18 14:43:58 madden Exp $";
39
"$Id: greedy_align.c,v 1.41 2006/02/02 17:52:23 papadopo Exp $";
40
40
#endif /* SKIP_DOXYGEN_PROCESSING */
42
42
#include <algo/blast/core/greedy_align.h>
140
140
@param diag_lower Array of lower bounds on diagonal index [in]
141
141
@param diag_upper Array of upper bounds on diagonal index [in]
142
142
@param d Starting distance [in][out]
143
@param diag Starting diagonal [in]
143
@param diag The diagonal of the current traceback position [in]
144
144
@param op_cost The sum of the match and mismatch scores [in]
145
145
@param seq2_index The offset into the second sequence after the traceback
146
146
operation has completed [out]
154
154
Int4 new_seq2_index;
156
/* the goal here is to choose the largest seq2 offset
157
that leads to the current distance. There are three
158
choices possible and each is checked in turn */
156
160
if (diag >= diag_lower[(*d) - op_cost] &&
157
161
diag <= diag_upper[(*d) - op_cost]) {
186
190
@param diag_lower Array of lower bounds on diagonal index [in]
187
191
@param diag_upper Array of upper bounds on diagonal index [in]
188
192
@param d Starting distance [in][out]
189
@param diag Starting diagonal [in]
193
@param diag The diagonal of the current traceback position [in]
190
194
@param gap_open open a gap [in]
191
195
@param gap_extend (Modified) cost to extend a gap [in]
192
196
@param IorD The state of the traceback at present [in]
201
205
Int4 new_seq2_index;
202
206
Int4 gap_open_extend = gap_open + gap_extend;
208
/* as with the previous routine, the traceback operation
209
that leads to the current one must be determined. Either
210
a gap is opened from a previous run of matches, or a
211
gap is extended. If several traceback choices are
212
available, choose the one that generates the largest
215
/* if the previous traceback operation is a gap, then it
216
starts one diagonal away from the current diagonal... */
204
218
if (IorD == eGapAlignIns)
205
219
new_diag = diag - 1;
207
221
new_diag = diag + 1;
223
/* ...and its seq2 offset is fixed */
209
225
if (new_diag >= diag_lower[(*d) - gap_extend] &&
210
226
new_diag <= diag_upper[(*d) - gap_extend]) {
220
236
new_seq2_index = -100;
239
/* make the previous traceback operation a match if it is
240
valid to do so and the resulting seq2 offset exceeds the
241
one derived from extending a gap */
223
243
if (new_diag >= diag_lower[(*d) - gap_open_extend] &&
224
244
new_diag <= diag_upper[(*d) - gap_open_extend] &&
236
256
/** During the traceback for a non-affine greedy alignment,
237
compute the distance that will result from the next
257
compute the diagonal that will result from the next
238
258
traceback operation
240
260
@param last_seq2_off Array of offsets into the second sequence;
245
265
@param diag Index of diagonal that produced the starting distance [in]
246
266
@param seq2_index The offset into the second sequence after the traceback
247
267
operation has completed [out]
248
@return The next distance remaining after the traceback operation
268
@return The diagonal resulting from the next traceback operation
251
272
s_GetNextNonAffineTback(Int4 **last_seq2_off, Int4 d,
252
273
Int4 diag, Int4 *seq2_index)
275
/* choose the traceback operation that results in the
276
largest seq2 offset at this point, then compute the
277
new diagonal that is implied by the operation */
254
279
if (last_seq2_off[d-1][diag-1] >
255
280
MAX(last_seq2_off[d-1][diag], last_seq2_off[d-1][diag+1])) {
256
281
*seq2_index = last_seq2_off[d-1][diag-1];
282
return diag - 1; /* gap in seq2 */
259
284
if (last_seq2_off[d-1][diag] > last_seq2_off[d-1][diag+1]) {
260
285
*seq2_index = last_seq2_off[d-1][diag];
286
return diag; /* match */
263
288
*seq2_index = last_seq2_off[d-1][diag+1];
289
return diag + 1; /* gap in seq1 */
267
292
/** see greedy_align.h for description */
296
321
the way. Instead of score, this code tracks the 'distance'
297
322
(number of mismatches plus number of gaps) between seq1
298
323
and seq2. Instead of walking through sequence offsets, it
299
walks through diagonals that can achieve a given distance */
324
walks through diagonals that can achieve a given distance.
326
Note that in what follows, the numbering of diagonals implies
327
a dot matrix where increasing seq1 offsets go to the right on
328
the x axis, and increasing seq2 offsets go up the y axis.
329
The gapped alignment thus proceeds up and to the right in
330
the graph, and diagonals are numbered increasing to the right */
381
412
seq1_index = index;
383
414
if (index == len1 || index == len2) {
415
/* Return the number of differences, which is zero here */
384
417
if (edit_block != NULL)
385
418
GapPrelimEditBlockAdd(edit_block, eGapAlignSub, index);
386
return 0; /* This function returns number of differences, here it is zero. */
389
422
/* set up the memory pool */
651
683
new_diag = s_GetNextNonAffineTback(last_seq2_off, d,
652
684
best_diag, &new_seq2_index);
653
new_seq1_index = new_seq2_index + new_diag - diag_origin;
655
686
if (new_diag == best_diag) {
764
794
the way. Instead of score, this code tracks the 'distance'
765
795
(number of mismatches plus number of gaps) between seq1
766
796
and seq2. Instead of walking through sequence offsets, it
767
walks through diagonals that can achieve a given distance */
797
walks through diagonals that can achieve a given distance
799
Note that in what follows, the numbering of diagonals implies
800
a dot matrix where increasing seq1 offsets go to the right on
801
the x axis, and increasing seq2 offsets go up the y axis.
802
The gapped alignment thus proceeds up and to the right in
803
the graph, and diagonals are numbered increasing to the right */
858
894
seq1_index = index;
860
896
if (index == len1 || index == len2) {
897
/* return the score of the run of matches */
861
898
if (edit_block != NULL)
862
899
GapPrelimEditBlockAdd(edit_block, eGapAlignSub, index);
863
return (index*match_score);
900
return index * match_score;
866
903
/* set up the memory pool */
888
925
can come from distances further back than d-1 (which is
889
926
sufficient for non-affine alignment). Where non-affine
890
927
alignment only needs to track the current bounds on diagonals
891
to test, the present code must also track bounds for
892
max_penalty previous distances. These share the same
893
preallocated array */
928
to test, the present code must also track the upper and
929
lower bounds on diagonals for max_penalty previous distances.
930
These share the same preallocated array */
895
932
diag_lower = aux_data->diag_bounds;
896
933
diag_upper = aux_data->diag_bounds +
1211
1248
diag_lower, diag_upper, &d, best_diag,
1212
1249
op_cost, &new_seq2_index);
1214
if (seq2_index - new_seq2_index > 0)
1251
/* Note that a block of substitutions is issued only
1252
if the seq2 offset returned does not exceed the
1253
seq2 offset expected. The above may be called several
1254
times before this happens, possibly because other
1255
paths can achieve a better seq2 offset than we're
1256
looking for but those other paths are not reachable
1257
from the current point in the traceback. Each call
1258
to s_GetNextAffineTbackFromMatch reduces the current
1259
distance, so eventually the process terminates with
1260
a valid block of substitutions */
1262
if (seq2_index - new_seq2_index > 0) {
1215
1263
GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
1216
1264
seq2_index - new_seq2_index);
1218
seq2_index = new_seq2_index;
1265
seq2_index = new_seq2_index;
1220
1268
else if (state == eGapAlignIns) {
1222
1270
state = s_GetNextAffineTbackFromIndel(last_seq2_off,
1223
1271
diag_lower, diag_upper, &d, best_diag,
1224
1272
gap_open, gap_extend, eGapAlignIns);
1226
1274
GapPrelimEditBlockAdd(edit_block, eGapAlignIns, 1);
1230
1278
GapPrelimEditBlockAdd(edit_block, eGapAlignDel, 1);
1231
1279
state = s_GetNextAffineTbackFromIndel(last_seq2_off,
1232
1280
diag_lower, diag_upper, &d, best_diag,