2021
2023
return best_score;
2026
/** Simple wrapper around binary search function for BlastQueryInfo structure
2027
* to obtain the context in which this initial ungapped HSP is found
2028
* @param query_info Query information structure [in]
2029
* @param init_hsp Initial HSP [in]
2031
static NCBI_INLINE Int4
2032
s_GetUngappedHSPContext(const BlastQueryInfo* query_info,
2033
const BlastInitHSP* init_hsp)
2035
return BSearchContextInfo(init_hsp->offsets.qs_offsets.q_off, query_info);
2038
/** Adjust the HSP offsets in the initial ungapped HSP structure given the
2040
* @param init_hsp Initial HSP [in|out]
2041
* @param query_start Starting offset of the query sequence in the query info
2044
static NCBI_INLINE void
2045
s_AdjustInitialHSPOffsets(BlastInitHSP* init_hsp, Int4 query_start)
2047
init_hsp->offsets.qs_offsets.q_off -= query_start;
2048
if (init_hsp->ungapped_data) {
2049
init_hsp->ungapped_data->q_start -= query_start;
2053
/** Set up a BLAST_SequenceBlk structure for a single query sequence
2054
* @param concatenated_query Query sequence block for concatenated query [in]
2055
* @param query_info Query information structure for concatenated query [in]
2056
* @param context Context in which the HSP ocurred [in]
2057
* @param single_query query Output sequence block which will contain adjusted
2058
* sequence pointer and sequence length [out]
2059
* @param query_start Starting offset of the single query sequence in the
2060
* concatenated query [out]
2063
s_SetUpLocalBlastSequenceBlk(const BLAST_SequenceBlk* concatenated_query,
2064
const BlastQueryInfo* query_info, Int4 context,
2065
BLAST_SequenceBlk* single_query,
2068
Int4 query_length = 0;
2069
if (concatenated_query->oof_sequence) {
2070
/* Out-of-frame blastx case: all frames of the same parity are mixed
2071
together in a special sequence. */
2072
Int4 query_end_pt = 0;
2073
Int4 mixed_frame_context = context - context % CODON_LENGTH;
2075
*query_start = query_info->contexts[mixed_frame_context].query_offset;
2077
query_info->contexts[mixed_frame_context+CODON_LENGTH-1].query_offset +
2078
query_info->contexts[mixed_frame_context+CODON_LENGTH-1].query_length;
2079
query_length = query_end_pt - *query_start;
2080
single_query->sequence = NULL;
2081
single_query->oof_sequence = concatenated_query->oof_sequence + *query_start;
2083
*query_start = query_info->contexts[context].query_offset;
2084
query_length = query_info->contexts[context].query_length;
2085
single_query->sequence = concatenated_query->sequence + *query_start;
2086
single_query->oof_sequence = NULL;
2088
single_query->length = query_length;
2024
2092
/** Find the HSP offsets relative to the individual query sequence instead of
2025
* the concatenated sequence.
2093
* the concatenated sequence and retrieve relevant portions of the query
2026
2095
* @param query Query sequence block [in]
2027
2096
* @param query_info Query information structure, including context offsets
2029
* @param init_hsp Initial HSP [in] [out]
2030
* @param query_out Optional: query sequence block with modified sequence
2098
* @param init_hsp Initial HSP, this will be modified to hold the adjusted
2099
* offsets [in] [out]
2100
* @param query_out query sequence block with modified sequence
2031
2101
* pointer and sequence length [out]
2032
* @param init_hsp_out Optional: Pointer to initial HSP structure to hold
2033
* adjusted offsets; the input init_hsp will be modified if
2034
* this parameter is NULL [out]
2035
* @param context_out Which context this HSP belongs to? [out]
2102
* @param context Which context this HSP belongs to? [out]
2038
s_GetRelativeCoordinates(const BLAST_SequenceBlk* query,
2039
BlastQueryInfo* query_info,
2040
BlastInitHSP* init_hsp, BLAST_SequenceBlk* query_out,
2041
BlastInitHSP* init_hsp_out, Int4* context_out)
2044
Int4 query_start = 0, query_length = 0;
2046
context = BSearchContextInfo(init_hsp->offsets.qs_offsets.q_off, query_info);
2048
if (query && query->oof_sequence) {
2049
/* Out-of-frame blastx case: all frames of the same parity are mixed
2050
together in a special sequence. */
2051
Int4 query_end_pt = 0;
2052
Int4 mixed_frame_context = context - context % CODON_LENGTH;
2054
query_start = query_info->contexts[mixed_frame_context].query_offset;
2056
query_info->contexts[mixed_frame_context+CODON_LENGTH-1].query_offset +
2057
query_info->contexts[mixed_frame_context+CODON_LENGTH-1].query_length;
2059
query_end_pt - query_start;
2061
query_start = query_info->contexts[context].query_offset;
2062
query_length = query_info->contexts[context].query_length;
2065
if (query && query_out) {
2066
if (query->oof_sequence) {
2067
query_out->sequence = NULL;
2068
query_out->oof_sequence = query->oof_sequence + query_start;
2070
query_out->sequence = query->sequence + query_start;
2071
query_out->oof_sequence = NULL;
2073
query_out->length = query_length;
2077
init_hsp_out->offsets.qs_offsets.q_off =
2078
init_hsp->offsets.qs_offsets.q_off - query_start;
2079
init_hsp_out->offsets.qs_offsets.s_off =
2080
init_hsp->offsets.qs_offsets.s_off;
2081
if (init_hsp->ungapped_data) {
2082
if (!init_hsp_out->ungapped_data) {
2083
init_hsp_out->ungapped_data = (BlastUngappedData*)
2084
BlastMemDup(init_hsp->ungapped_data, sizeof(BlastUngappedData));
2086
memcpy(init_hsp_out->ungapped_data, init_hsp->ungapped_data,
2087
sizeof(BlastUngappedData));
2089
init_hsp_out->ungapped_data->q_start =
2090
init_hsp->ungapped_data->q_start - query_start;
2093
init_hsp->offsets.qs_offsets.q_off -= query_start;
2094
if (init_hsp->ungapped_data)
2095
init_hsp->ungapped_data->q_start -= query_start;
2098
*context_out = context;
2101
Int2 BLAST_MbGetGappedScore(EBlastProgramType program_number,
2102
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
2103
BLAST_SequenceBlk* subject,
2104
BlastGapAlignStruct* gap_align,
2105
const BlastScoringParameters* score_params,
2106
const BlastExtensionParameters* ext_params,
2107
const BlastHitSavingParameters* hit_params,
2108
BlastInitHitList* init_hitlist,
2109
BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats)
2111
const BlastExtensionOptions* ext_options = ext_params->options;
2113
BlastInitHSP* init_hsp;
2114
BlastHSPList* hsp_list;
2115
const BlastHitSavingOptions* hit_options = hit_params->options;
2116
BLAST_SequenceBlk query_tmp;
2118
BlastIntervalTree *tree;
2120
if (*hsp_list_ptr == NULL)
2121
*hsp_list_ptr = hsp_list = Blast_HSPListNew(hit_options->hsp_num_max);
2123
hsp_list = *hsp_list_ptr;
2125
tree = Blast_IntervalTreeInit(0, query->length + 1,
2126
0, subject->length + 1);
2128
for (index=0; index<init_hitlist->total; index++) {
2130
Int4 q_start, q_end, s_start, s_end, score;
2132
init_hsp = &init_hitlist->init_hsp_array[index];
2134
/* Change query coordinates to relative in the initial HSP right here */
2135
s_GetRelativeCoordinates(query, query_info, init_hsp, &query_tmp, NULL,
2138
if (!init_hsp->ungapped_data) {
2139
q_start = q_end = init_hsp->offsets.qs_offsets.q_off;
2140
s_start = s_end = init_hsp->offsets.qs_offsets.s_off;
2143
q_start = init_hsp->ungapped_data->q_start;
2144
q_end = q_start + init_hsp->ungapped_data->length;
2145
s_start = init_hsp->ungapped_data->s_start;
2146
s_end = s_start + init_hsp->ungapped_data->length;
2147
score = init_hsp->ungapped_data->score;
2150
tmp_hsp.score = score;
2151
tmp_hsp.context = context;
2152
tmp_hsp.query.offset = q_start;
2153
tmp_hsp.query.end = q_end;
2154
tmp_hsp.query.frame = query_info->contexts[context].frame;
2155
tmp_hsp.subject.offset = s_start;
2156
tmp_hsp.subject.end = s_end;
2157
tmp_hsp.subject.frame = 1;
2159
if (!BlastIntervalTreeContainsHSP(tree, &tmp_hsp, query_info,
2163
++gapped_stats->extensions;
2165
BLAST_GreedyGappedAlignment(query_tmp.sequence,
2166
subject->sequence, query_tmp.length, subject->length, gap_align,
2167
score_params, init_hsp->offsets.qs_offsets.q_off,
2168
init_hsp->offsets.qs_offsets.s_off, (Boolean) TRUE,
2169
(Boolean) (ext_options->ePrelimGapExt == eGreedyWithTracebackExt));
2171
if (gap_align->score >= hit_options->cutoff_score) {
2172
/* gap_align contains alignment endpoints; init_hsp contains
2173
the offsets to start the alignment from, if traceback is to
2174
be performed later */
2176
Blast_HSPInit(gap_align->query_start, gap_align->query_stop,
2177
gap_align->subject_start, gap_align->subject_stop,
2178
init_hsp->offsets.qs_offsets.q_off,
2179
init_hsp->offsets.qs_offsets.s_off, context,
2180
query_info->contexts[context].frame, 1,
2181
gap_align->score, &(gap_align->edit_script),
2183
Blast_HSPListSaveHSP(hsp_list, new_hsp);
2184
BlastIntervalTreeAddHSP(new_hsp, tree, query_info,
2189
gap_align->edit_script = GapEditScriptDelete(gap_align->edit_script);
2194
tree = Blast_IntervalTreeFree(tree);
2105
s_AdjustHspOffsetsAndGetQueryData(const BLAST_SequenceBlk* query,
2106
const BlastQueryInfo* query_info,
2107
BlastInitHSP* init_hsp,
2108
BLAST_SequenceBlk* query_out,
2111
Int4 query_start = 0;
2119
*context = s_GetUngappedHSPContext(query_info, init_hsp);
2120
s_SetUpLocalBlastSequenceBlk(query, query_info, *context,
2121
query_out, &query_start);
2122
s_AdjustInitialHSPOffsets(init_hsp, query_start);
2210
2136
Blast_PrelimEditBlockToGapEditScript (GapPrelimEditBlock* rev_prelim_tback,
2211
2137
GapPrelimEditBlock* fwd_prelim_tback)
2213
GapEditScript* esp_start = NULL,* esp;
2139
Boolean merge_ops = FALSE;
2214
2141
GapPrelimEditScript *op;
2217
2146
if (rev_prelim_tback == NULL || fwd_prelim_tback == NULL)
2220
/* turn the right extension into a linked list (built
2221
last to first). Reverse the edit script in the process */
2223
for (i=0; i < fwd_prelim_tback->num_ops; i++) {
2224
esp = (GapEditScript*) malloc(sizeof(GapEditScript));
2149
/* The fwd_prelim_tback script will get reversed here as the traceback started from the highest scoring point
2150
and worked backwards. The rev_prelim_tback script does NOT get reversed. Since it was reversed when the
2151
traceback was produced it's already "forward" */
2153
if (fwd_prelim_tback->num_ops > 0 && rev_prelim_tback->num_ops > 0 &&
2154
fwd_prelim_tback->edit_ops[(fwd_prelim_tback->num_ops)-1].op_type ==
2155
rev_prelim_tback->edit_ops[(rev_prelim_tback->num_ops)-1].op_type)
2158
size = fwd_prelim_tback->num_ops+rev_prelim_tback->num_ops;
2162
esp = GapEditScriptNew(size);
2165
for (i=0; i < rev_prelim_tback->num_ops; i++) {
2166
op = rev_prelim_tback->edit_ops + i;
2167
esp->op_type[index] = op->op_type;
2168
esp->num[index] = op->num;
2172
if (fwd_prelim_tback->num_ops == 0)
2176
esp->num[index-1] += fwd_prelim_tback->edit_ops[(fwd_prelim_tback->num_ops)-1].num;
2178
/* If we merge, then we skip the first one. */
2180
i = fwd_prelim_tback->num_ops - 2;
2182
i = fwd_prelim_tback->num_ops - 1;
2184
for (; i >= 0; i--) {
2225
2185
op = fwd_prelim_tback->edit_ops + i;
2226
esp->op_type = op->op_type;
2228
esp->next = esp_start;
2232
/* if the first operation in the forward script matches the
2233
last operation in the reverse script, merge the two
2236
if (rev_prelim_tback->num_ops == 0)
2239
i = rev_prelim_tback->num_ops - 1;
2240
op = rev_prelim_tback->edit_ops + i;
2241
if (esp_start && esp_start->op_type == op->op_type) {
2242
esp_start->num += op->num;
2246
/* prepend the reverse script to the list of actions */
2248
for (; i >= 0; i--) {
2249
esp = (GapEditScript*) malloc(sizeof(GapEditScript));
2250
op = rev_prelim_tback->edit_ops + i;
2251
esp->op_type = op->op_type;
2253
esp->next = esp_start;
2186
esp->op_type[index] = op->op_type;
2187
esp->num[index] = op->num;
2260
2194
/** Fills the BlastGapAlignStruct structure with the results of a gapped
2723
2657
return max_offset;
2726
/** Test for an array of ungapped HSPs to decide whether gapped alignment
2727
* should be performed on all of them. If the best HSP already has score above
2728
* the cutoff for gapped alignments, then just return. Otherwise attempt
2729
* gapped alignment for all HSPs with scores above gap trigger, and see any of
2730
* them receives score above the cutoff.
2731
* @param program_number Type of BLAST program [in]
2732
* @param query Query sequence [in]
2733
* @param query_info Additional query information [in]
2734
* @param subject Subject sequence [in]
2735
* @param gap_align Gapped alignment structure [in]
2736
* @param score_params Scoring parameters [in]
2737
* @param hit_params Hit saving parameters [in]
2738
* @param init_hitlist Initial hit list obtained by ungapped alignment [in]
2739
* @param gapped_stats Gapped extension stats [in]
2740
* @return TRUE if the set of initial HSPs has passed the test, so gapped
2741
* alignment needs to be performed.
2744
s_BlastGappedScorePrelimTest(EBlastProgramType program_number,
2745
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
2746
BLAST_SequenceBlk* subject,
2747
BlastGapAlignStruct* gap_align,
2748
const BlastScoringParameters* score_params,
2749
const BlastHitSavingParameters* hit_params,
2750
BlastInitHitList* init_hitlist,
2751
BlastGappedStats* gapped_stats)
2753
BlastInitHSP* init_hsp = NULL;
2754
BlastInitHSP* init_hsp_array;
2755
Int4 init_hsp_count;
2757
BLAST_SequenceBlk query_tmp;
2759
Int4 **rpsblast_pssms = NULL; /* Pointer to concatenated PSSMs in
2760
RPS-BLAST database */
2761
Boolean further_process = FALSE;
2767
cutoff_score = hit_params->cutoff_score;
2768
is_prot = (program_number != eBlastTypeBlastn &&
2769
program_number != eBlastTypePhiBlastn);
2771
if (program_number == eBlastTypeRpsTblastn ||
2772
program_number == eBlastTypeRpsBlast) {
2773
rpsblast_pssms = gap_align->sbp->psi_matrix->pssm->data;
2776
ASSERT(Blast_InitHitListIsSortedByScore(init_hitlist));
2778
init_hsp_array = init_hitlist->init_hsp_array;
2779
init_hsp_count = init_hitlist->total;
2781
/* If no initial HSP passes the score threshold corresponding to the e-value
2782
cutoff so far, check if any would do after gapped alignment, and exit if
2784
Only attempt to extend initial HSPs whose scores are already above
2787
if (init_hsp_array[0].ungapped_data &&
2788
init_hsp_array[0].ungapped_data->score < cutoff_score) {
2789
BlastInitHSP init_hsp_tmp;
2790
init_hsp_tmp.ungapped_data = NULL;
2791
for (index=0; index<init_hsp_count; index++) {
2792
init_hsp = &init_hsp_array[index];
2795
++gapped_stats->extra_extensions;
2796
++gapped_stats->extensions;
2799
/* Don't modify initial HSP's coordinates here, because it will be
2800
done again if further processing is required */
2801
s_GetRelativeCoordinates(query, query_info, init_hsp, &query_tmp,
2802
&init_hsp_tmp, &context);
2804
gap_align->sbp->psi_matrix->pssm->data = rpsblast_pssms +
2805
query_info->contexts[context].query_offset;
2807
if(is_prot && !score_params->options->is_ooframe) {
2809
BlastGetStartForGappedAlignment(query_tmp.sequence,
2810
subject->sequence, gap_align->sbp,
2811
init_hsp_tmp.ungapped_data->q_start,
2812
init_hsp_tmp.ungapped_data->length,
2813
init_hsp_tmp.ungapped_data->s_start,
2814
init_hsp_tmp.ungapped_data->length);
2815
init_hsp_tmp.offsets.qs_offsets.s_off +=
2816
max_offset - init_hsp_tmp.offsets.qs_offsets.q_off;
2817
init_hsp_tmp.offsets.qs_offsets.q_off = max_offset;
2822
s_BlastProtGappedAlignment(program_number, &query_tmp,
2823
subject, gap_align, score_params, &init_hsp_tmp);
2826
s_BlastDynProgNtGappedAlignment(&query_tmp, subject,
2827
gap_align, score_params, &init_hsp_tmp);
2830
further_process = FALSE;
2833
if (gap_align->score >= cutoff_score) {
2834
further_process = TRUE;
2838
sfree(init_hsp_tmp.ungapped_data);
2841
further_process = TRUE;
2843
++gapped_stats->seqs_ungapped_passed;
2846
if (rpsblast_pssms) {
2847
gap_align->sbp->psi_matrix->pssm->data = rpsblast_pssms;
2850
return further_process;
2853
2660
Int2 BLAST_GetGappedScore (EBlastProgramType program_number,
2854
2661
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
2855
2662
BLAST_SequenceBlk* subject,
3356
3179
/* form the final edit block */
3358
*edit_script_ptr = e_script =
3359
Blast_PrelimEditBlockToGapEditScript(rev_prelim_tback,
3180
e_script = Blast_PrelimEditBlockToGapEditScript(tmp_prelim_tback, fwd_prelim_tback);
3181
GapPrelimEditBlockFree(tmp_prelim_tback);
3362
3183
/* postprocess the edit script */
3365
while (e_script != NULL) {
3185
for (i=0; i<e_script->size; i++)
3187
int total_actions=0;
3367
3188
/* Count the number of nucleotides in the next
3368
3189
traceback operation and delete any traceback operations
3369
3190
that would make the alignment too long. This check is
3370
3191
not needed for in-frame alignment because the
3371
3192
traceback is never changed after it is computed */
3373
last_op = e_script->op_type;
3194
last_op = e_script->op_type[i];
3375
3196
if (last_op == eGapAlignIns)
3376
3197
last_op = eGapAlignSub;
3377
i = last_op * e_script->num;
3379
if (num_nuc + i >= nucl_align_length) {
3380
e_script->num = (nucl_align_length - num_nuc +
3199
total_actions = last_op * e_script->num[i];
3201
if (num_nuc + total_actions >= nucl_align_length) {
3202
e_script->num[i] = (nucl_align_length - num_nuc +
3381
3203
last_op - 1) / last_op;
3382
e_script->next = GapEditScriptDelete(e_script->next);
3204
break; /* We delete the rest of the script. */
3388
/* Only one frame shift operation at a time is
3207
num_nuc += total_actions;;
3210
e_script->size = MIN(i+1, e_script->size); /* If we broke out early then we truncate the edit script. */
3213
for (i=0; i<e_script->size; i++)
3215
if (e_script->op_type[i] % 3 != 0 && e_script->num[i] > 1) {
3216
extra_needed += e_script->num[i] - 1;
3222
GapEditScript* new_esp = GapEditScriptNew(extra_needed+e_script->size);
3224
for (i=0; i<e_script->size; i++)
3226
/* Only one frame shift operation at a time is
3389
3227
allowed in a single edit script element. */
3390
last_op = e_script->op_type;
3391
if (last_op % 3 != 0 && e_script->num > 1) {
3392
Int4 num_ops = e_script->num;
3393
GapEditScript *last = e_script->next;
3394
e_script->next = NULL;
3396
for (i = 1; i < num_ops; i++) {
3397
e_script = GapEditScriptNew(e_script);
3398
e_script->op_type = last_op;
3401
e_script->next = last;
3403
e_script = e_script->next;
3228
new_esp->num[new_esp_i] = e_script->num[i];
3229
new_esp->op_type[new_esp_i] = e_script->op_type[i];
3231
last_op = e_script->op_type[i];
3232
if (last_op % 3 != 0 && e_script->num[i] > 1) {
3233
Int4 num_ops = e_script->num[i];
3235
new_esp->num[new_esp_i-1] = 1;
3236
for (esp_index = 1; esp_index < num_ops; esp_index++) {
3237
new_esp->num[new_esp_i] = 1;
3238
new_esp->op_type[new_esp_i] = last_op;
3243
e_script = GapEditScriptDelete(e_script);
3246
*edit_script_ptr = e_script;
3406
3248
/* finally, add one to the size of any block of substitutions
3407
3249
that follows a frame shift op */
3409
e_script = *edit_script_ptr;
3410
last_op = e_script->op_type;
3411
e_script = e_script->next;
3412
while (e_script != NULL) {
3413
if (e_script->op_type == eGapAlignSub && (last_op % 3) != 0)
3416
last_op = e_script->op_type;
3417
e_script = e_script->next;
3250
last_op = e_script->op_type[0];
3251
for (i=1; i<e_script->size; i++)
3253
if (e_script->op_type[i] == eGapAlignSub && (last_op % 3) != 0)
3254
(e_script->num[i])++;
3256
last_op = e_script->op_type[i];
3420
GapPrelimEditBlockFree(tmp_prelim_tback);