56
56
#ifndef SKIP_DOXYGEN_PROCESSING
57
57
static char const rcsid[] =
58
"$Id: blast_engine.c,v 1.270 2010/08/06 14:24:46 kazimird Exp $";
58
"$Id: blast_engine.c,v 1.279 2011/05/31 16:09:30 kazimird Exp $";
59
59
#endif /* SKIP_DOXYGEN_PROCESSING */
61
61
#include <algo/blast/core/blast_engine.h>
85
85
NCBI_XBLAST_EXPORT const int kBlastMajorVersion = 2;
86
86
NCBI_XBLAST_EXPORT const int kBlastMinorVersion = 2;
87
NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 24;
88
NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "Aug-02-2010";
87
NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 25;
88
NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "Feb-01-2011";
90
90
/** Structure to be passed to s_BlastSearchEngineCore, containing pointers
91
91
to various preallocated structures and arrays. */
162
162
backup->num_soft_ranges = 1;
163
163
backup->sm_index = 0;
165
if (subject->mask_type == DB_MASK_SOFT) {
165
if (subject->mask_type == eSoftSubjMasking) {
166
166
ASSERT (backup->seq_ranges);
167
167
ASSERT (backup->num_seq_ranges >= 1);
168
168
backup->soft_ranges = backup->seq_ranges;
169
169
backup->num_soft_ranges = backup->num_seq_ranges;
170
} else if (subject->mask_type == DB_MASK_HARD) {
170
} else if (subject->mask_type == eHardSubjMasking) {
171
171
ASSERT (backup->seq_ranges);
172
172
ASSERT (backup->num_seq_ranges >= 1);
173
173
backup->hard_ranges = backup->seq_ranges;
261
261
/* if soft masking is off */
262
if (subject->mask_type != DB_MASK_SOFT) {
262
if (subject->mask_type != eSoftSubjMasking) {
263
263
s_AllocateSeqRange(subject, backup, 1);
264
264
subject->seq_ranges[0].left = residual;
265
265
subject->seq_ranges[0].right = subject->length;
632
632
Uint4 context, first_context, last_context;
633
633
BlastQueryInfo* query_info = query_info_in;
634
634
Int4 orig_length = subject->length;
635
Int4 stat_length = subject->length;
636
// To support rmblastn -RMH-
637
BlastScoreBlk* sbp = gap_align->sbp;
636
639
const Boolean kTranslatedSubject =
637
640
(Blast_SubjectIsTranslated(program_number) || program_number == eBlastTypeRpsTblastn);
647
650
if (kTranslatedSubject) {
649
652
s_BackupSubject(subject, &backup);
650
if (subject->mask_type) {
653
if (subject->mask_type != eNoSubjMasking) {
651
654
s_AllocateSeqRange(subject, &backup, backup.num_seq_ranges);
653
656
subject->num_seq_ranges = 0;
710
713
subject->frame = BLAST_ContextToFrame(eBlastTypeBlastx, context);
711
714
subject->sequence = translation_buffer + frame_offsets[context] + 1;
712
715
subject->length = frame_offsets[context+1] - frame_offsets[context] - 1;
716
if (subject->length > 0) stat_length = subject->length;
714
718
/* perform per-context mask translation */
715
719
if (context == 0) { /* first positive context */
770
774
status = BLAST_LinkHsps(program_number, hsp_list_out, query_info,
771
775
subject->length, gap_align->sbp, hit_params->link_hsp_params,
772
776
score_options->gapped_calculation);
773
} else if (!Blast_ProgramIsPhiBlast(program_number) &&
774
!Blast_ProgramIsRpsBlast(program_number)) {
777
} else if (!Blast_ProgramIsPhiBlast(program_number)
778
&& !(Blast_ProgramIsRpsBlast(program_number) && !sbp->gbp) ){
775
779
/* Calculate e-values for all HSPs. Skip this step
776
for PHI and RPS BLAST, since calculating the E values
780
for PHI or RPS with old FSC, since calculating the E values
777
781
requires precomputation that has not been done yet */
778
status = Blast_HSPListGetEvalues(query_info, hsp_list_out,
782
Boolean isRPS = FALSE;
783
double scale_factor = 1.0;
784
if (Blast_ProgramIsRpsBlast(program_number)) {
786
scale_factor = score_params->scale_factor;
788
status = Blast_HSPListGetEvalues(query_info, stat_length, hsp_list_out,
779
789
score_options->gapped_calculation,
780
gap_align->sbp, 0, 1.0);
790
isRPS, gap_align->sbp, 0, scale_factor);
791
sbp->matrix_only_scoring = FALSE;
783
/* Discard HSPs that don't pass the e-value test. */
784
status = Blast_HSPListReapByEvalue(hsp_list_out, hit_options);
794
/* Use score threshold rather than evalue if
795
* matrix_only_scoring is used. -RMH-
797
if ( sbp->matrix_only_scoring )
799
status = Blast_HSPListReapByRawScore(hsp_list_out, hit_options);
801
/* Discard HSPs that don't pass the e-value test. */
802
status = Blast_HSPListReapByEvalue(hsp_list_out, hit_options);
786
805
/* If there are no HSPs left, destroy the HSP list too. */
787
806
if (hsp_list_out && hsp_list_out->hspcnt == 0)
1122
1141
/* iterate over all subject sequences */
1123
1142
while ( (seq_arg.oid = BlastSeqSrcIteratorNext(seq_src, itr))
1124
1143
!= BLAST_SEQSRC_EOF) {
1125
1145
if (seq_arg.oid == BLAST_SEQSRC_ERROR)
1127
1147
if (BlastSeqSrcGetSequence(seq_src, &seq_arg) < 0)
1157
1179
GenCodeSingletonFind(db_options->genetic_code);
1159
1181
ASSERT(seq_arg.seq->gen_code_string);
1182
stat_length /= CODON_LENGTH;
1162
1185
s_BlastSearchEngineCore(program_number, query, query_info,
1193
1216
hit_params->link_hsp_params,
1194
1217
gapped_calculation);
1196
Blast_HSPListGetEvalues(query_info, hsp_list,
1219
Blast_HSPListGetEvalues(query_info, stat_length,
1220
hsp_list, gapped_calculation, FALSE,
1200
status = Blast_HSPListReapByEvalue(hsp_list,
1201
hit_params->options);
1223
/* Use score threshold rather than evalue if
1224
* matrix_only_scoring is used. -RMH-
1226
if ( sbp->matrix_only_scoring )
1228
status = Blast_HSPListReapByRawScore(hsp_list,
1229
hit_params->options);
1231
status = Blast_HSPListReapByEvalue(hsp_list,
1232
hit_params->options);
1203
1235
/* Calculate and fill the bit scores, since there will be no
1204
1236
traceback stage where this can be done. */
1205
1237
Blast_HSPListGetBitScores(hsp_list, gapped_calculation, sbp);
1271
Blast_RunPreliminarySearch(EBlastProgramType program,
1272
BLAST_SequenceBlk* query,
1273
BlastQueryInfo* query_info,
1274
const BlastSeqSrc* seq_src,
1275
const BlastScoringOptions* score_options,
1277
LookupTableWrap* lookup_wrap,
1278
const BlastInitialWordOptions* word_options,
1279
const BlastExtensionOptions* ext_options,
1280
const BlastHitSavingOptions* hit_options,
1281
const BlastEffectiveLengthsOptions* eff_len_options,
1282
const PSIBlastOptions* psi_options,
1283
const BlastDatabaseOptions* db_options,
1284
BlastHSPStream* hsp_stream,
1285
BlastDiagnostics* diagnostics)
1287
return Blast_RunPreliminarySearchWithInterrupt(program,
1288
query, query_info, seq_src, score_options, sbp, lookup_wrap,
1289
word_options, ext_options, hit_options, eff_len_options,
1290
psi_options, db_options, hsp_stream, diagnostics, NULL, NULL);
1239
Blast_RunPreliminarySearch(EBlastProgramType program,
1294
Blast_RunPreliminarySearchWithInterrupt(EBlastProgramType program,
1240
1295
BLAST_SequenceBlk* query,
1241
1296
BlastQueryInfo* query_info,
1242
1297
const BlastSeqSrc* seq_src,
1250
1305
const PSIBlastOptions* psi_options,
1251
1306
const BlastDatabaseOptions* db_options,
1252
1307
BlastHSPStream* hsp_stream,
1253
BlastDiagnostics* diagnostics)
1308
BlastDiagnostics* diagnostics,
1309
TInterruptFnPtr interrupt_search, SBlastProgress* progress_info)
1255
1311
Int2 status = 0;
1256
1312
BlastScoringParameters* score_params = NULL;/**< Scoring parameters */
1279
1335
lookup_wrap, word_options,
1280
1336
ext_params, hit_params, eff_len_params,
1281
1337
psi_options, db_options, hsp_stream,
1282
local_diagnostics, 0, 0)) != 0)
1338
local_diagnostics, interrupt_search,
1339
progress_info)) != 0)
1285
1342
/* Do not destruct score block here */