552
567
} else if (kGreedyTraceback) {
553
568
BLAST_GreedyGappedAlignment(query, adjusted_subject,
554
569
query_length, adjusted_s_length, gap_align,
555
score_params, q_start, s_start, FALSE, TRUE);
570
score_params, q_start, s_start, FALSE, TRUE,
557
573
BLAST_GappedAlignmentWithTraceback(program_number, query,
558
574
adjusted_subject, gap_align, score_params, q_start, s_start,
559
query_length, adjusted_s_length);
575
query_length, adjusted_s_length,
579
fence_error = (fence_hit && *fence_hit);
562
585
if (gap_align->score >= cutoff) {
563
586
Boolean delete_hsp = FALSE;
564
587
Blast_HSPUpdateWithTraceback(gap_align, hsp);
612
635
sfree(frame_offsets_a);
615
/* Remove any HSPs that share a starting or ending diagonal
616
with a higher-scoring HSP. */
617
Blast_HSPListPurgeHSPsWithCommonEndpoints(program_number, hsp_list);
619
/* Sort HSPs by score again, as the scores might have changed. */
620
Blast_HSPListSortByScore(hsp_list);
622
/* Remove any HSPs that are contained within other HSPs.
623
Since the list is sorted by score already, any HSP
624
contained by a previous HSP is guaranteed to have a
625
lower score, and may be purged. */
626
Blast_IntervalTreeReset(tree);
627
for (index = 0; index < hsp_list->hspcnt; index++) {
628
hsp = hsp_array[index];
639
/* Remove any HSPs that share a starting or ending diagonal
640
with a higher-scoring HSP. */
641
Blast_HSPListPurgeHSPsWithCommonEndpoints(program_number, hsp_list);
643
/* Sort HSPs by score again, as the scores might have changed. */
644
Blast_HSPListSortByScore(hsp_list);
646
/* Remove any HSPs that are contained within other HSPs.
647
Since the list is sorted by score already, any HSP
648
contained by a previous HSP is guaranteed to have a
649
lower score, and may be purged. */
650
Blast_IntervalTreeReset(tree);
651
for (index = 0; index < hsp_list->hspcnt; index++) {
652
hsp = hsp_array[index];
630
if (BlastIntervalTreeContainsHSP(tree, hsp, query_info, 0)) {
631
hsp_array[index] = Blast_HSPFree(hsp);
634
BlastIntervalTreeAddHSP(hsp, tree, query_info,
654
if (BlastIntervalTreeContainsHSP(tree, hsp, query_info,
655
hit_options->min_diag_separation)) {
656
hsp_array[index] = Blast_HSPFree(hsp);
659
BlastIntervalTreeAddHSP(hsp, tree, query_info,
638
665
tree = Blast_IntervalTreeFree(tree);
640
Blast_HSPListPurgeNullHSPs(hsp_list);
642
667
/* Free the local query_info structure, if necessary (RPS tblastn only) */
643
668
if (query_info != query_info_in)
644
669
sfree(query_info);
646
s_HSPListPostTracebackUpdate(program_number, hsp_list, query_info_in,
647
score_params, hit_params, sbp,
648
subject_blk->length);
672
/* Roll back this function by restoring the input HSP list. */
674
Blast_HSPListSwap(hsp_list, orig_hsplist);
675
Blast_HSPListFree(orig_hsplist);
677
if (gap_align->edit_script) {
678
gap_align->edit_script =
679
GapEditScriptDelete(gap_align->edit_script);
682
Blast_HSPListPurgeNullHSPs(hsp_list);
685
Blast_HSPListFree(orig_hsplist);
687
s_HSPListPostTracebackUpdate(program_number, hsp_list, query_info_in,
688
score_params, hit_params, sbp,
689
subject_blk->length);
747
789
EBlastEncoding Blast_TracebackGetEncoding(EBlastProgramType program_number)
749
EBlastEncoding retval = eBlastEncodingError;
751
switch (program_number) {
752
case eBlastTypeBlastn:
753
case eBlastTypePhiBlastn:
754
retval = eBlastEncodingNucleotide;
756
case eBlastTypeBlastp:
757
case eBlastTypeRpsBlast:
758
case eBlastTypeBlastx:
759
case eBlastTypeRpsTblastn:
760
case eBlastTypePsiBlast:
761
case eBlastTypePhiBlastp:
762
retval = eBlastEncodingProtein;
764
case eBlastTypeTblastn:
765
case eBlastTypeTblastx:
766
retval = eBlastEncodingNcbi4na;
769
retval = eBlastEncodingError;
791
if (Blast_SubjectIsProtein(program_number)) {
792
return eBlastEncodingProtein;
793
} else if (Blast_SubjectIsTranslated(program_number)) {
794
return eBlastEncodingNcbi4na;
796
return eBlastEncodingNucleotide;
775
801
/** Delete extra subject sequences hits, if after-traceback hit list size is
776
802
* smaller than preliminary hit list size.
777
803
* @param results All results after traceback, assumed already sorted by best
957
984
encoding = Blast_TracebackGetEncoding(program_number);
958
985
memset((void*) &seq_arg, 0, sizeof(seq_arg));
987
/* the first Karlin block must be valid for traceback to
988
be performed; if it is not valid (i.e. if the first query
989
was completely masked), make up a valid Karlin block.
990
This is possible because all gapped Karlin blocks currently
993
if (sbp->kbp_gap[0] == NULL) {
995
for (index = query_info->first_context;
996
index <= query_info->last_context; index++) {
997
if (sbp->kbp_gap[index] != NULL) {
998
sbp->kbp_gap[0] = sbp->kbp_gap[index];
960
1005
while (BlastHSPStreamRead(hsp_stream, &hsp_list)
961
1006
!= kBlastHSPStream_Eof) {
1115
1160
query_info, gap_align, score_params,
1116
1161
ext_params, hit_params, rps_info, results,
1117
1162
interrupt_search, progress_info);
1118
} else if ((program_number == eBlastTypeBlastp ||
1119
program_number == eBlastTypeTblastn ||
1120
program_number == eBlastTypePhiBlastp ||
1121
program_number == eBlastTypePsiBlast) &&
1122
(ext_params->options->compositionBasedStats > 0 ||
1123
ext_params->options->eTbackExt == eSmithWatermanTbck)) {
1163
} else if (ext_params->options->compositionBasedStats > 0 ||
1164
ext_params->options->eTbackExt == eSmithWatermanTbck) {
1125
1166
Blast_RedoAlignmentCore(program_number, query, query_info, sbp,
1126
hsp_stream, seq_src, gen_code_string,
1127
score_params, ext_params, hit_params,
1167
hsp_stream, seq_src, default_db_genetic_code,
1168
score_params, ext_params, hit_params,
1128
1169
psi_options, results);
1130
1172
BlastSeqSrcGetSeqArg seq_arg;
1131
1173
EBlastEncoding encoding = Blast_TracebackGetEncoding(program_number);
1132
Boolean perform_traceback =
1133
(score_params->options->gapped_calculation &&
1134
(ext_params->options->ePrelimGapExt != eGreedyWithTracebackExt));
1174
Boolean perform_traceback = score_params->options->gapped_calculation;
1135
1175
const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(program_number);
1176
BlastHSPStreamResultBatch *batch =
1177
Blast_HSPStreamResultBatchInit(query_info->num_queries);
1178
const Boolean kSubjectRanges = !(hit_params->restricted_align ||
1179
score_params->options->is_ooframe ||
1180
ext_params->options->eTbackExt ==
1181
eSmithWatermanTbck);
1137
1183
memset((void*) &seq_arg, 0, sizeof(seq_arg));
1139
/* Retrieve all HSP lists from the HSPStream. */
1140
while (BlastHSPStreamRead(hsp_stream, &hsp_list)
1185
/* Retrieve all HSP lists from the HSPStream that contain
1186
hits to the next subject OID. */
1187
while (BlastHSPStreamBatchRead(hsp_stream, batch)
1141
1188
!= kBlastHSPStream_Eof) {
1143
1190
/* check for interrupt */
1144
1191
if (interrupt_search && (*interrupt_search)(progress_info) == TRUE) {
1145
hsp_list = Blast_HSPListFree(hsp_list);
1192
Blast_HSPStreamResultBatchReset(batch);
1146
1193
status = BLASTERR_INTERRUPTED;
1150
/* Perform traceback here, if necessary. */
1197
/* traceback will require fetching the subject sequence */
1151
1199
if (perform_traceback) {
1152
seq_arg.oid = hsp_list->oid;
1200
seq_arg.oid = batch->hsplist_array[0]->oid;
1153
1201
seq_arg.encoding = encoding;
1202
seq_arg.enable_ranges = kSubjectRanges;
1203
seq_arg.check_oid_exclusion = TRUE;
1154
1205
BlastSequenceBlkClean(seq_arg.seq);
1155
if (BlastSeqSrcGetSequence(seq_src, (void*) &seq_arg) < 0)
1206
if (BlastSeqSrcGetSequence(seq_src, (void*) &seq_arg) < 0) {
1207
Blast_HSPStreamResultBatchReset(batch);
1211
/* If the subject is translated and the BlastSeqSrc implementation
1212
* doesn't provide a genetic code string, use the default genetic
1213
* code for all subjects (as in the C toolkit) */
1214
if (Blast_SubjectIsTranslated(program_number) &&
1215
seq_arg.seq->gen_code_string == NULL) {
1216
seq_arg.seq->gen_code_string =
1217
GenCodeSingletonFind(default_db_genetic_code);
1218
ASSERT(seq_arg.seq->gen_code_string);
1158
1221
if (BlastSeqSrcGetTotLen(seq_src) == 0) {
1159
1222
/* This is not a database search, so effective search spaces
1169
1232
if ((status = BLAST_OneSubjectUpdateParameters(program_number,
1170
1233
seq_arg.seq->length, score_params->options,
1171
1234
query_info, sbp, hit_params,
1172
NULL, eff_len_params)) != 0)
1235
NULL, eff_len_params)) != 0) {
1236
Blast_HSPStreamResultBatchReset(batch);
1177
s_PHITracebackFromHSPList(program_number, hsp_list, query,
1178
seq_arg.seq, gap_align, sbp,
1179
score_params, hit_params,
1180
query_info, pattern_blk);
1242
/* process all the hits to this subject sequence, one
1245
for (i = 0; i < batch->num_hsplists; i++) {
1247
hsp_list = batch->hsplist_array[i];
1249
if (perform_traceback) {
1251
s_PHITracebackFromHSPList(program_number, hsp_list, query,
1252
seq_arg.seq, gap_align, sbp,
1253
score_params, hit_params,
1254
query_info, pattern_blk);
1256
Boolean fence_hit = FALSE;
1257
Boolean * fence_hit_ptr = NULL;
1259
/* prepare to deal with partial subject sequence
1260
ranges, if these are configured and a previous
1261
HSP list has not turned them off */
1262
if (seq_arg.enable_ranges == TRUE)
1263
fence_hit_ptr = &fence_hit;
1265
Blast_TracebackFromHSPList(program_number, hsp_list, query,
1266
seq_arg.seq, query_info,
1267
gap_align, sbp, score_params,
1268
ext_params->options, hit_params,
1269
seq_arg.seq->gen_code_string,
1273
/* Disable range support and refetch the
1274
(whole) subject sequence */
1276
seq_arg.enable_ranges = FALSE;
1277
BlastSeqSrcReleaseSequence(seq_src, (void*)&seq_arg);
1278
BlastSeqSrcGetSequence(seq_src, (void*) &seq_arg);
1280
/* Retry the alignment */
1282
Blast_TracebackFromHSPList(program_number, hsp_list,
1284
query_info, gap_align,
1286
ext_params->options,
1288
seq_arg.seq->gen_code_string,
1182
Blast_TracebackFromHSPList(program_number, hsp_list, query,
1183
seq_arg.seq, query_info, gap_align,
1185
ext_params->options, hit_params,
1189
BlastSeqSrcReleaseSequence(seq_src, (void*)&seq_arg);
1190
} else if (! score_params->options->gapped_calculation) {
1191
/* Compute bit scores for searches where the traceback
1192
phase is seperated from the preliminary search. */
1294
/* traceback skipped; compute bit scores for searches
1295
where the traceback phase is seperated from the
1296
preliminary search. */
1194
Blast_HSPListGetBitScores(hsp_list, FALSE, sbp);
1298
Blast_HSPListGetBitScores(hsp_list, FALSE, sbp);
1197
/* Free HSP list structure if all HSPs have been deleted. */
1198
if (hsp_list->hspcnt == 0) {
1199
hsp_list = Blast_HSPListFree(hsp_list);
1301
/* Free HSP list if all HSPs have been deleted. */
1303
batch->hsplist_array[i] = NULL;
1304
if (hsp_list->hspcnt == 0) {
1305
hsp_list = Blast_HSPListFree(hsp_list);
1308
Blast_HSPResultsInsertHSPList(results, hsp_list,
1309
hit_params->options->hitlist_size);
1311
} /* loop over one HSPList batch */
1313
if (perform_traceback) {
1314
BlastSeqSrcReleaseSequence(seq_src, (void*)&seq_arg);
1203
Blast_HSPResultsInsertHSPList(results, hsp_list,
1204
hit_params->options->hitlist_size);
1317
} /* loop over all batches */
1319
batch = Blast_HSPStreamResultBatchFree(batch);
1206
1320
BlastSequenceBlkFree(seq_arg.seq);