211
206
* parameters [in]
212
207
* @param ext_params Gapped extension parameters [in]
213
208
* @param hit_params Hit saving parameters [in]
214
* @param db_options Database options [in]
215
209
* @param diagnostics Hit counts and other diagnostics [in] [out]
216
210
* @param aux_struct Structure containing different auxiliary data and memory
217
211
* for the preliminary stage of the BLAST search [in]
218
* @param hsp_list_out List of HSPs found for a given subject sequence [in]
212
* @param hsp_list_out_ptr List of HSPs found for a given subject sequence [out]
213
* @param interrupt_search function callback to allow interruption of BLAST
214
* search [in, optional]
215
* @param progress_info contains information about the progress of the current
216
* BLAST search [in|out]
221
s_BlastSearchEngineCore(EBlastProgramType program_number, BLAST_SequenceBlk* query,
222
BlastQueryInfo* query_info_in, BLAST_SequenceBlk* subject,
223
LookupTableWrap* lookup, BlastGapAlignStruct* gap_align,
220
s_BlastSearchEngineOneContext(EBlastProgramType program_number,
221
BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
222
BLAST_SequenceBlk* subject, Int4 orig_length, LookupTableWrap* lookup,
223
BlastGapAlignStruct* gap_align,
224
224
const BlastScoringParameters* score_params,
225
225
const BlastInitialWordParameters* word_params,
226
226
const BlastExtensionParameters* ext_params,
227
227
const BlastHitSavingParameters* hit_params,
228
const BlastDatabaseOptions* db_options,
229
228
BlastDiagnostics* diagnostics,
230
229
BlastCoreAuxStruct* aux_struct,
231
BlastHSPList** hsp_list_out)
230
BlastHSPList** hsp_list_out_ptr,
231
TInterruptFnPtr interrupt_search,
232
SBlastProgress* progress_info)
233
BlastInitHitList* init_hitlist = aux_struct->init_hitlist;
234
BlastHSPList* hsp_list = aux_struct->hsp_list;
235
Uint1* translation_buffer = NULL;
236
Int4* frame_offsets = NULL;
237
Int4* frame_offsets_a = NULL; /* Will be freed if non-null */
238
BlastHitSavingOptions* hit_options = hit_params->options;
239
BlastScoringOptions* score_options = score_params->options;
240
BlastHSPList* combined_hsp_list = NULL;
242
Uint4 context, first_context, last_context;
243
Int4 orig_length = subject->length;
244
Uint1* orig_sequence = subject->sequence;
247
BlastUngappedStats* ungapped_stats = NULL;
248
BlastGappedStats* gapped_stats = NULL;
249
BlastQueryInfo* query_info = query_info_in;
250
Boolean prelim_traceback =
251
(ext_params->options->ePrelimGapExt == eGreedyWithTracebackExt);
253
const Boolean kTranslatedSubject =
254
(Blast_SubjectIsTranslated(program_number) || program_number == eBlastTypeRpsTblastn);
255
const Boolean kNucleotide = (program_number == eBlastTypeBlastn ||
256
program_number == eBlastTypePhiBlastn);
258
if (kTranslatedSubject) {
261
if (score_options->is_ooframe) {
262
BLAST_GetAllTranslations(orig_sequence, eBlastEncodingNcbi2na,
263
orig_length, db_options->gen_code_string, &translation_buffer,
264
&frame_offsets, &subject->oof_sequence);
265
subject->oof_sequence_allocated = TRUE;
266
frame_offsets_a = frame_offsets;
267
} else if (program_number == eBlastTypeRpsTblastn ) {
268
/* For RPS tblastn, subject is actually query, which has already
269
been translated during the setup stage. */
270
translation_buffer = orig_sequence - 1;
271
frame_offsets_a = frame_offsets =
272
ContextOffsetsToOffsetArray(query_info_in);
274
BLAST_GetAllTranslations(orig_sequence, eBlastEncodingNcbi2na,
275
orig_length, db_options->gen_code_string, &translation_buffer,
276
&frame_offsets, NULL);
277
frame_offsets_a = frame_offsets;
279
} else if (kNucleotide) {
287
*hsp_list_out = NULL;
289
if (gap_align->positionBased)
290
matrix = gap_align->sbp->psi_matrix->pssm->data;
292
matrix = gap_align->sbp->matrix->data;
294
hsp_num_max = (hit_options->hsp_num_max ? hit_options->hsp_num_max : INT4_MAX);
297
ungapped_stats = diagnostics->ungapped_stat;
298
gapped_stats = diagnostics->gapped_stat;
301
/* Substitute query info by concatenated database info for RPS BLAST search */
302
if (Blast_ProgramIsRpsBlast(program_number)) {
303
BlastRPSLookupTable* lut = (BlastRPSLookupTable*) lookup->lut;
304
query_info = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo));
305
query_info->num_queries = lut->num_profiles;
306
query_info->last_context = lut->num_profiles - 1;
307
/* Since this will really be "subject info", not "query info",
308
pass program argument such that all frames will be set to 0. */
309
s_RPSOffsetArrayToContextOffsets(query_info,
310
lut->rps_seq_offsets);
313
/* Loop over frames of the subject sequence */
314
for (context=first_context; context<=last_context; context++) {
234
Int2 status = 0; /* return value */
315
235
Int4 chunk; /* loop variable below. */
316
236
Int4 num_chunks; /* loop variable below. */
317
237
Int4 offset = 0; /* Used as offset into subject sequence (if chunked) */
318
238
Int4 total_subject_length; /* Length of subject sequence used when split. */
320
if (kTranslatedSubject) {
322
BLAST_ContextToFrame(eBlastTypeBlastx, context);
324
translation_buffer + frame_offsets[context] + 1;
326
frame_offsets[context+1] - frame_offsets[context] - 1;
328
subject->frame = context;
239
BlastHSPList* combined_hsp_list = NULL;
240
BlastHSPList* hsp_list = NULL;
241
BlastInitHitList* init_hitlist = aux_struct->init_hitlist;
242
BlastScoringOptions* score_options = score_params->options;
243
BlastUngappedStats* ungapped_stats = NULL;
244
BlastGappedStats* gapped_stats = NULL;
246
const Boolean kPrelimTraceback =
247
(ext_params->options->ePrelimGapExt == eGreedyWithTracebackExt);
248
const Boolean kTranslatedSubject =
249
(Blast_SubjectIsTranslated(program_number) || program_number == eBlastTypeRpsTblastn);
250
const Boolean kNucleotide = (program_number == eBlastTypeBlastn ||
251
program_number == eBlastTypePhiBlastn);
252
const int kHspNumMax = BlastHspNumMax(score_options->gapped_calculation, hit_params->options);
255
ungapped_stats = diagnostics->ungapped_stat;
256
gapped_stats = diagnostics->gapped_stat;
259
if (gap_align->positionBased)
260
matrix = gap_align->sbp->psi_matrix->pssm->data;
262
matrix = gap_align->sbp->matrix->data;
331
264
/* Split subject sequence into chunks if it is too long */
332
265
num_chunks = (subject->length - DBSEQ_CHUNK_OVERLAP) /
333
266
(MAX_DBSEQ_LEN - DBSEQ_CHUNK_OVERLAP) + 1;
334
267
total_subject_length = subject->length;
336
269
for (chunk = 0; chunk < num_chunks; ++chunk) {
270
/* Delete if not done in last loop iteration to prevent memory leak. */
271
hsp_list = Blast_HSPListFree(hsp_list); /* In case this was not freed in above loop. */
338
273
offset += subject->length - DBSEQ_CHUNK_OVERLAP;
339
274
if (kNucleotide) {
400
335
/* The subject ordinal id is not yet filled in this HSP list */
401
336
hsp_list->oid = subject->oid;
338
/* check for interrupt */
339
if (interrupt_search && (*interrupt_search)(progress_info) == TRUE) {
340
combined_hsp_list = Blast_HSPListFree(combined_hsp_list);
341
BlastInitHitListReset(init_hitlist);
342
status = BLASTERR_INTERRUPTED;
403
346
Blast_HSPListAdjustOffsets(hsp_list, offset);
404
347
/* Allow merging of HSPs either if traceback is already
405
348
available, or if it is an ungapped search */
406
Blast_HSPListsMerge(hsp_list, &combined_hsp_list, hsp_num_max, offset,
407
(Boolean)(prelim_traceback || !score_options->gapped_calculation));
349
status = Blast_HSPListsMerge(&hsp_list, &combined_hsp_list, kHspNumMax, offset,
350
(Boolean)(kPrelimTraceback || !score_options->gapped_calculation));
408
351
} /* End loop on chunks of subject sequence */
410
if (Blast_HSPListAppend(combined_hsp_list, hsp_list_out, hsp_num_max)) {
353
hsp_list = Blast_HSPListFree(hsp_list); /* In case this was not freed in above loop. */
355
*hsp_list_out_ptr = combined_hsp_list;
360
/** Clean up function for s_BlastSearchEngineCore
361
* @param program_number BLAST program type [in]
362
* @param query_info Query information structure local to
363
* s_BlastSearchEngineCore, which may or may not be deallocated [in]
364
* @param query_info_in Query information [in]
365
* @param translation_buffer buffer containing translated sequence data [in]
366
* @param frame_offsets_a FIXME
369
s_BlastSearchEngineCoreCleanUp(EBlastProgramType program_number,
370
BlastQueryInfo* query_info,
371
const BlastQueryInfo* query_info_in,
372
Uint1* translation_buffer,
373
Int4* frame_offsets_a)
375
/* Free the local query info structure when needed (in RPS BLAST). */
376
if (query_info != query_info_in)
377
BlastQueryInfoFree(query_info);
379
/* Free translation buffer and frame offsets, except for RPS tblastn,
380
* where they are taken from different structures, and hence shouldn't
383
if (program_number != eBlastTypeRpsTblastn) {
384
if (translation_buffer) {
385
sfree(translation_buffer);
389
if (frame_offsets_a) {
390
sfree(frame_offsets_a);
394
/** The core of the BLAST search: comparison between the (concatenated)
395
* query against one subject sequence. Translation of the subject sequence
396
* into 6 frames is done inside, if necessary. If subject sequence is
397
* too long, it can be split into several chunks.
398
* @param program_number BLAST program type [in]
399
* @param query Query sequence structure [in]
400
* @param query_info_in Query information [in]
401
* @param subject Subject sequence structure [in]
402
* @param lookup Lookup table [in]
403
* @param gap_align Structure for gapped alignment information [in]
404
* @param score_params Scoring parameters [in]
405
* @param word_params Initial word finding and ungapped extension
407
* @param ext_params Gapped extension parameters [in]
408
* @param hit_params Hit saving parameters [in]
409
* @param db_options Database options [in]
410
* @param diagnostics Hit counts and other diagnostics [in] [out]
411
* @param aux_struct Structure containing different auxiliary data and memory
412
* for the preliminary stage of the BLAST search [in]
413
* @param hsp_list_out_ptr List of HSPs found for a given subject sequence [in]
414
* @param interrupt_search function callback to allow interruption of BLAST
415
* search [in, optional]
416
* @param progress_info contains information about the progress of the current
417
* BLAST search [in|out]
420
s_BlastSearchEngineCore(EBlastProgramType program_number, BLAST_SequenceBlk* query,
421
BlastQueryInfo* query_info_in, BLAST_SequenceBlk* subject,
422
LookupTableWrap* lookup, BlastGapAlignStruct* gap_align,
423
const BlastScoringParameters* score_params,
424
const BlastInitialWordParameters* word_params,
425
const BlastExtensionParameters* ext_params,
426
const BlastHitSavingParameters* hit_params,
427
const BlastDatabaseOptions* db_options,
428
BlastDiagnostics* diagnostics,
429
BlastCoreAuxStruct* aux_struct,
430
BlastHSPList** hsp_list_out_ptr,
431
TInterruptFnPtr interrupt_search,
432
SBlastProgress* progress_info)
434
BlastHSPList* hsp_list_out=NULL;
435
Uint1* translation_buffer = NULL;
436
Int4* frame_offsets = NULL;
437
Int4* frame_offsets_a = NULL; /* Will be freed if non-null */
438
BlastHitSavingOptions* hit_options = hit_params->options;
439
BlastScoringOptions* score_options = score_params->options;
441
Uint4 context, first_context, last_context;
442
Int4 orig_length = subject->length;
443
Uint1* orig_sequence = subject->sequence;
444
BlastQueryInfo* query_info = query_info_in;
446
const Boolean kTranslatedSubject =
447
(Blast_SubjectIsTranslated(program_number) || program_number == eBlastTypeRpsTblastn);
448
const Boolean kNucleotide = (program_number == eBlastTypeBlastn ||
449
program_number == eBlastTypePhiBlastn);
450
const int kHspNumMax = BlastHspNumMax(score_options->gapped_calculation, hit_options);
452
*hsp_list_out_ptr = NULL;
454
if (kTranslatedSubject) {
457
if (score_options->is_ooframe) {
458
BLAST_GetAllTranslations(orig_sequence, eBlastEncodingNcbi2na,
459
orig_length, db_options->gen_code_string, &translation_buffer,
460
&frame_offsets, &subject->oof_sequence);
461
subject->oof_sequence_allocated = TRUE;
462
frame_offsets_a = frame_offsets;
463
} else if (program_number == eBlastTypeRpsTblastn ) {
464
/* For RPS tblastn, subject is actually query, which has already
465
been translated during the setup stage. */
466
translation_buffer = orig_sequence - 1;
467
frame_offsets_a = frame_offsets =
468
ContextOffsetsToOffsetArray(query_info_in);
470
BLAST_GetAllTranslations(orig_sequence, eBlastEncodingNcbi2na,
471
orig_length, db_options->gen_code_string, &translation_buffer,
472
&frame_offsets, NULL);
473
frame_offsets_a = frame_offsets;
475
} else if (kNucleotide) {
484
/* Substitute query info by concatenated database info for RPS BLAST search */
485
if (Blast_ProgramIsRpsBlast(program_number)) {
486
BlastRPSLookupTable* lut = (BlastRPSLookupTable*) lookup->lut;
487
query_info = BlastQueryInfoNew(eBlastTypeRpsBlast, lut->num_profiles);
488
/* Since this will really be "subject info", not "query info",
489
pass program argument such that all frames will be set to 0. */
490
s_RPSOffsetArrayToContextOffsets(query_info, lut->rps_seq_offsets);
493
/* Loop over frames of the subject sequence */
494
for (context=first_context; context<=last_context; context++) {
495
BlastHSPList* hsp_list_for_chunks = NULL;
496
if (kTranslatedSubject) {
498
BLAST_ContextToFrame(eBlastTypeBlastx, context);
500
translation_buffer + frame_offsets[context] + 1;
502
frame_offsets[context+1] - frame_offsets[context] - 1;
504
subject->frame = context;
506
status = s_BlastSearchEngineOneContext(program_number, query, query_info,
507
subject, orig_length, lookup,
508
gap_align, score_params,
509
word_params, ext_params,
510
hit_params, diagnostics,
511
aux_struct, &hsp_list_for_chunks,
512
interrupt_search, progress_info);
517
if (Blast_HSPListAppend(&hsp_list_for_chunks, &hsp_list_out, kHspNumMax)) {
414
combined_hsp_list = Blast_HSPListFree(combined_hsp_list);
522
/* if searching was interrupted, delete accumulated results
523
but continue execution so temporary structures get freed */
524
if (interrupt_search && (*interrupt_search)(progress_info) == TRUE) {
525
status = BLASTERR_INTERRUPTED;
415
528
} /* End loop on frames */
417
530
/* Restore the original contents of the subject block */
418
531
subject->length = orig_length;
419
532
subject->sequence = orig_sequence;
424
hsp_list = *hsp_list_out;
535
hsp_list_out = Blast_HSPListFree(hsp_list_out);
536
s_BlastSearchEngineCoreCleanUp(program_number, query_info,
537
query_info_in, translation_buffer,
426
542
if (hit_params->link_hsp_params) {
427
status = BLAST_LinkHsps(program_number, hsp_list, query_info,
543
status = BLAST_LinkHsps(program_number, hsp_list_out, query_info,
428
544
subject->length, gap_align->sbp, hit_params->link_hsp_params,
429
545
score_options->gapped_calculation);
430
546
} else if (!Blast_ProgramIsPhiBlast(program_number) &&
432
548
/* Calculate e-values for all HSPs. Skip this step
433
549
for PHI and RPS BLAST, since calculating the E values
434
550
requires precomputation that has not been done yet */
435
status = Blast_HSPListGetEvalues(query_info, hsp_list,
551
status = Blast_HSPListGetEvalues(query_info, hsp_list_out,
436
552
score_options->gapped_calculation,
437
553
gap_align->sbp, 0, 1.0);
440
/* Free the local query info structure when needed (in RPS BLAST). */
441
if (query_info != query_info_in)
442
BlastQueryInfoFree(query_info);
444
556
/* Discard HSPs that don't pass the e-value test. */
445
status = Blast_HSPListReapByEvalue(hsp_list, hit_options);
557
status = Blast_HSPListReapByEvalue(hsp_list_out, hit_options);
447
559
/* If there are no HSPs left, destroy the HSP list too. */
448
if (hsp_list && hsp_list->hspcnt == 0)
449
*hsp_list_out = hsp_list = Blast_HSPListFree(hsp_list);
560
if (hsp_list_out && hsp_list_out->hspcnt == 0)
561
*hsp_list_out_ptr = hsp_list_out = Blast_HSPListFree(hsp_list_out);
451
if (gapped_stats && hsp_list && hsp_list->hspcnt > 0) {
563
if (diagnostics && diagnostics->gapped_stat && hsp_list_out && hsp_list_out->hspcnt > 0) {
564
BlastGappedStats* gapped_stats = diagnostics->gapped_stat;
452
565
++gapped_stats->num_seqs_passed;
453
gapped_stats->good_extensions += hsp_list->hspcnt;
456
/* Free translation buffer and frame offsets, except for RPS tblastn,
457
* where they are taken from different structures, and hence shouldn't
460
if (program_number != eBlastTypeRpsTblastn) {
461
if (translation_buffer) {
462
sfree(translation_buffer);
466
if (frame_offsets_a) {
467
sfree(frame_offsets_a);
566
gapped_stats->good_extensions += hsp_list_out->hspcnt;
569
s_BlastSearchEngineCoreCleanUp(program_number, query_info, query_info_in,
570
translation_buffer, frame_offsets_a);
572
*hsp_list_out_ptr = hsp_list_out;
936
1084
const PSIBlastOptions* psi_options,
937
1085
const BlastDatabaseOptions* db_options,
938
1086
BlastHSPStream* hsp_stream, const BlastRPSInfo* rps_info,
939
BlastDiagnostics* diagnostics, BlastHSPResults** results)
1087
BlastDiagnostics* diagnostics, BlastHSPResults** results,
1088
TInterruptFnPtr interrupt_search,
1089
SBlastProgress* progress_info)
941
1091
Int4 status = 0;
942
1092
BlastScoringParameters* score_params = NULL;
943
1093
BlastExtensionParameters* ext_params = NULL;
944
1094
BlastHitSavingParameters* hit_params = NULL;
945
1095
BlastEffectiveLengthsParameters* eff_len_params = NULL;
946
BlastGapAlignStruct* gap_align;
1096
BlastGapAlignStruct* gap_align = NULL;
947
1097
SPHIPatternSearchBlk* pattern_blk = NULL;
950
1100
BLAST_GapAlignSetUp(program_number, seq_src, score_options,
951
1101
eff_len_options, ext_options, hit_options, query_info, sbp,
952
1102
&score_params, &ext_params, &hit_params, &eff_len_params,
1103
&gap_align)) != 0) {
1104
s_BlastRunFullSearchCleanUp(gap_align, score_params, ext_params,
1105
hit_params, eff_len_params);
957
1110
BLAST_PreliminarySearchEngine(program_number, query, query_info,
958
1111
seq_src, gap_align, score_params, lookup_wrap, word_options,
959
1112
ext_params, hit_params, eff_len_params, psi_options,
960
db_options, hsp_stream, diagnostics)) != 0)
1113
db_options, hsp_stream, diagnostics, interrupt_search,
1114
progress_info)) != 0) {
1115
s_BlastRunFullSearchCleanUp(gap_align, score_params, ext_params,
1116
hit_params, eff_len_params);
963
1120
/* Prohibit any subsequent writing to the HSP stream. */
964
1121
BlastHSPStreamClose(hsp_stream);