47
49
further manipulation.
49
51
******************************************************************************
52
54
* $Log: blast.c,v $
55
* Revision 6.406 2004/05/21 13:53:37 dondosha
56
* Fix in BLASTMergeHitLists
58
* Revision 6.405 2004/04/28 14:37:06 madden
59
* Changes from Mike Gertz
60
* - modified the link_hsps routine to apply the gap_prob parameter to
61
* the result of BlastSmallGapSumE and BlastLargeGapSumE.
62
* - further modified link_hsps to use BlastGapDecayDivisor to weight
63
* tests based on multiple collections of HSPs.
64
* - removed all reference to gap_prob from the new_link_hsps.
65
* - further modified new_link_hsps to use BlastGapDecayDivisor to weight
66
* tests based on multiple collections of HSPs.
68
* Revision 6.404 2004/04/20 14:55:47 morgulis
69
* 1. Fixed query offsets in results when -B option is used.
70
* 2. Fixes for lower case masking handling with -B option.
72
* Revision 6.403 2004/04/13 21:03:30 madden
73
* Use ignore_gilist Boolean to determine whether gilist lookup should occur
75
* Revision 6.402 2004/03/31 17:58:51 papadopo
76
* Mike Gertz' changes for length adjustment calculations
78
* Revision 6.401 2004/03/22 15:35:39 dondosha
79
* 1. Do not allow cutoff score for saving HSPs to be smaller than gap trigger;
80
* 2. When merging hitlists with a restriction on number of HSPs, keep best
83
* Revision 6.400 2004/02/26 15:52:29 papadopo
84
* Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
86
* Revision 6.399 2004/02/24 14:07:00 camacho
87
* Use approximate sequence length calculation for entrez-limited
88
* nucleotide blast databases.
90
* Revision 6.398 2004/02/03 17:54:16 dondosha
91
* Correction to revision 6.391 in function BlastGetDbChunk
93
* Revision 6.397 2004/01/06 22:37:10 dondosha
94
* Use BLAST_HSPfree function
96
* Revision 6.396 2003/12/29 15:42:46 coulouri
97
* tblastn query concatenation fixes from morgulis
99
* Revision 6.395 2003/12/12 16:01:23 madden
100
* Change to signature of BlastCutoffs, remove BlastCutoffs_simple
102
* Revision 6.394 2003/12/10 17:05:27 dondosha
103
* Added function ReevaluateScoreWithAmbiguities to reevaluate score for one HSP; use it after greedy traceback
105
* Revision 6.393 2003/11/19 18:09:13 dondosha
106
* Use consistent rounding in length adjustment calculation
108
* Revision 6.392 2003/11/10 20:15:29 dondosha
109
* Bug fix in BLASTMergeHsps
111
* Revision 6.391 2003/10/23 17:46:17 dondosha
112
* Fix in BlastGetDbChunk for looking up ordinal ids within a range
114
* Revision 6.390 2003/08/08 16:36:21 dondosha
115
* 1. Treat final_db_seq as 1 beyond the final sequence; 0 is an exception, meaning end of database.
116
* 2. Added more meaningful error message when query length is less than wordsize.
118
* Revision 6.389 2003/05/30 17:20:10 coulouri
121
* Revision 6.388 2003/05/14 20:35:58 camacho
122
* Allow searching empty databases
124
* Revision 6.387 2003/05/13 16:02:53 coulouri
125
* make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
127
* Revision 6.386 2003/05/12 12:23:43 camacho
128
* Sanity check for number of sequences & db length
130
* Revision 6.385 2003/04/23 15:15:36 camacho
131
* Moved reading of gi list to readdb
133
* Revision 6.384 2003/03/24 19:42:13 madden
134
* Changes to support query concatenation for blastn and tblastn
136
* Revision 6.383 2003/03/14 22:33:44 dondosha
137
* Do not increase preliminary hitlist size for ungapped search
139
* Revision 6.382 2003/03/06 19:10:42 madden
140
* Allow search->pbp->process_num to be > 1 if MT enabled
142
* Revision 6.381 2003/03/05 21:30:24 dondosha
143
* Fix in BlastMakeCopyQueryDNAP for single-strand OOF search
145
* Revision 6.380 2002/12/24 14:12:03 dondosha
146
* Removed accidental duplicate lines
148
* Revision 6.379 2002/12/10 23:13:22 bealer
149
* Fix do_the_blast_run and BlastGetDbChunk to calculate beginning and ending
150
* sequence numbers correctly.
151
* Fix BlastGetDbChunk to use precise start and end points, not nearest
153
* Fix do_the_blast_run and BlastGetDbChunk to handle mixed oidlist / real db
154
* multiple database scenarios.
156
* Revision 6.378 2002/12/04 22:39:51 bealer
157
* Undo previous set of changes.
159
* Revision 6.377 2002/11/25 19:53:34 bealer
160
* Remove extraneous commented code.
162
* Revision 6.376 2002/11/25 19:50:26 bealer
163
* Prevent extra work by BlastGetDbChunk when OID lists are used.
165
* Revision 6.375 2002/11/13 18:03:10 dondosha
166
* Correction in BlastReevaluateWithAmbiguities
168
* Revision 6.374 2002/11/08 14:58:43 kans
169
* first argument to NlmReadMFILE must be cast as Uint1Ptr - Mac compiler picked up this inconsistency with the prototype
171
* Revision 6.373 2002/11/07 21:06:15 camacho
172
* Made GetGisFromFile work even without mmap
174
* Revision 6.372 2002/11/04 22:55:56 dondosha
175
* For blastn, calculate number of identities in BlastReevaluateWithAmbiguities
177
* Revision 6.371 2002/10/28 21:44:03 madden
178
* Added comments about gap-free extensions
180
* Revision 6.370 2002/09/18 20:23:19 camacho
181
* Added BLASTCalculateSearchSpace
183
* Revision 6.369 2002/09/11 20:46:25 camacho
184
* Removed deprecated BlastSeqIdListPtr code
186
* Revision 6.368 2002/08/30 18:56:02 dondosha
187
* Made BlastMakeTempProteinBioseq and HackSeqLocId public: needed for Cn3D
189
* Revision 6.367 2002/08/30 15:42:48 dondosha
190
* In blastn, use ewp structure only for the first context
192
* Revision 6.366 2002/08/22 13:39:45 camacho
193
* Close the header and sequence files only if allocated
195
* Revision 6.365 2002/08/07 21:37:47 camacho
196
* Do not remove the search block prematurely in do_gapped_blast_search
198
* Revision 6.364 2002/08/06 17:33:50 madden
199
* Fix return value problem
201
* Revision 6.363 2002/07/19 17:55:47 dondosha
202
* 1.Return 0 status from BLASTPerformFinalSearch when database sequence has 0 length;
203
* 2. Do not destroy search block too early.
205
* Revision 6.362 2002/07/15 18:53:27 camacho
206
* Small fix to previous commit
208
* Revision 6.361 2002/07/14 17:18:13 camacho
209
* Fixed small memory leak in do_blast_search/do_gapped_blast_search
211
* Revision 6.360 2002/07/12 18:02:55 dondosha
212
* Do not call AdjustOffsetsInMaskLoc if no lower case mask
214
* Revision 6.359 2002/07/12 16:06:26 dondosha
215
* Adjust offsets and remove unneeded lower case mask locations when query is a subsequence
217
* Revision 6.358 2002/06/27 13:01:26 kans
218
* BlastGetVirtualOIDList is LIBCALL
220
* Revision 6.357 2002/06/26 00:56:28 camacho
222
* 1. Fixed bug when searching a mixture of real and mask databases.
223
* 2. Clean up of code that calculates the number of sequences and database
226
* Revision 6.356 2002/06/25 16:43:45 dondosha
227
* Get out from all search loops if bad status returned, meaning process ran out of memory
229
* Revision 6.355 2002/06/25 13:11:22 madden
230
* Fix UMR for status in do_gapped_blast_search
232
* Revision 6.354 2002/06/21 21:49:10 camacho
233
* Removed references to thr_info->blast_seqid_list in BlastGetDbChunk
235
* Revision 6.353 2002/06/12 15:43:09 dondosha
236
* Potential uninitialized variable bug fixed
238
* Revision 6.352 2002/06/12 15:33:25 dondosha
239
* Corrected integer types of the variable holding return status in 2 functions
241
* Revision 6.351 2002/06/11 20:40:04 dondosha
242
* Correction to previous change
244
* Revision 6.350 2002/06/11 14:44:45 dondosha
245
* Return status from some functions instead of search block pointer
247
* Revision 6.349 2002/06/05 15:30:34 coulouri
248
* Move signal handling to blastsrv.c
250
* Revision 6.348 2002/05/20 22:49:10 dondosha
251
* Fix for the Mega BLAST case when database sequence is split, and an HSP is accidentally extended across the boundary to a completely masked query
253
* Revision 6.347 2002/05/15 19:51:01 dondosha
254
* Do a sanity check for the final db sequence parameter
256
* Revision 6.346 2002/04/23 16:01:27 madden
257
* Fix for ungapped search of arbitrary matrix
259
* Revision 6.345 2002/04/23 15:40:10 madden
260
* Fix for effective length change and ungapped blast
262
* Revision 6.344 2002/04/19 21:22:30 madden
263
* Added protection for matrices that are only empty strings
265
* Revision 6.343 2002/04/18 12:07:05 madden
266
* Check for Selenocysteine in Bioseq, replace with X
268
* Revision 6.342 2002/04/17 17:30:15 madden
269
* Call getAlphaBeta only for gapped alignments
271
* Revision 6.341 2002/04/16 15:42:15 madden
272
* Save mask1 for lookup table hashing only (change for neighboring)
274
* Revision 6.340 2002/04/04 21:19:15 dondosha
275
* Corrections for megablast with non-greedy extensions
277
* Revision 6.339 2002/03/26 21:20:50 dondosha
278
* 1. Make hitlist size larger for preliminary gapped alignment
279
* 2. Pass readdb structure to megablast set up if it is already initialized
281
* Revision 6.338 2002/03/26 16:46:40 madden
282
* Move calculation of effective lengths to BlastCalculateEffectiveLengths
284
* Revision 6.337 2002/03/06 18:34:31 dondosha
285
* Pass the filtered locations back from the megablast engine to use in formatting
287
* Revision 6.336 2002/02/27 22:39:00 dondosha
288
* Fixed bug in splitting long database sequences for translated searches
290
* Revision 6.335 2002/02/27 17:43:20 dondosha
291
* Made effective database length option work properly
293
* Revision 6.334 2002/02/26 22:25:20 dondosha
294
* Return error as soon as it is found that matrix name is not supported
296
* Revision 6.333 2002/02/26 17:37:40 dondosha
297
* Fixed bug in BlastNtWordFinder for word sizes > 12
299
* Revision 6.332 2002/02/26 15:03:13 dondosha
300
* Accidental newline in sprintf removed
302
* Revision 6.331 2002/02/25 23:26:57 dondosha
303
* Changed error to warning if no letters to be indexed just on one context
53
305
* Revision 6.330 2002/01/04 22:01:33 coulouri
54
306
* Fixed BlastSetLimits() to work under linux
2776
3050
Boolean done=FALSE;
2777
3051
Int4 ordinal_id;
3052
OIDListPtr virtual_oidlist = NULL;
2780
3053
*id_list_number = 0;
2782
3055
NlmMutexLockEx(&thr_info->db_mutex);
2783
3056
if (thr_info->realdb_done) {
2784
if (rdfp->oidlist) {
3057
if (virtual_oidlist = BlastGetVirtualOIDList(rdfp)) {
2785
3058
/* Virtual database. Create id_list using mask file */
2786
OIDListPtr oidlist = rdfp->oidlist;
2788
Int4 maskindex, base, i;
2791
Int4 total_mask = oidlist->total/MASK_WORD_SIZE + 1;
2794
if (thr_info->gi_current < total_mask) {
2797
maskindex = thr_info->gi_current;
2798
base = thr_info->gi_current * MASK_WORD_SIZE;
2802
/* for each long-word mask */
2803
mask = Nlm_SwapUint4(oidlist->list[maskindex]);
2807
if (mask & (((Uint4)0x1)<<(MASK_WORD_SIZE-1))) {
2808
id_list[oidindex++] = base + i;
3061
thr_info->final_db_seq = MIN(thr_info->final_db_seq, virtual_oidlist->total);
3063
gi_end = thr_info->final_db_seq;
3065
if (thr_info->gi_current < gi_end) {
3067
Int4 gi_start = thr_info->gi_current;
3068
Int4 bit_start = gi_start % MASK_WORD_SIZE;
3071
for(gi = gi_start; (gi < gi_end) && (oidindex < thr_info->db_chunk_size);) {
3072
Int4 bit_end = ((gi_end - gi + bit_start) < MASK_WORD_SIZE) ? (gi_end - gi + bit_start) : MASK_WORD_SIZE;
3075
Uint4 mask_index = gi / MASK_WORD_SIZE;
3076
Uint4 mask_word = Nlm_SwapUint4(virtual_oidlist->list[mask_index]);
3079
for(bit = bit_start; bit<bit_end; bit++) {
3080
Uint4 bitshift = (MASK_WORD_SIZE-1)-bit;
3082
if ((mask_word >> bitshift) & 1) {
3083
id_list[ oidindex++ ] = (gi - bit_start) + bit;
2814
base += MASK_WORD_SIZE;
2815
if (maskindex >= total_mask ||
2816
oidindex > thr_info->db_chunk_size) {
3088
gi += bit_end - bit_start;
2820
thr_info->gi_current = maskindex;
3092
thr_info->gi_current = gi;
2821
3093
*id_list_number = oidindex;
2822
BlastTickProc(thr_info->gi_current, thr_info);
3094
BlastTickProc(thr_info->gi_current/32, thr_info);
2830
} else if (thr_info->blast_seqid_list) {
2831
/* global_seqid_list indicates there is a list of selected ID's,
2832
global_seqid_ptr is the position in the list. */
2833
sip = thr_info->blast_seqid_list->seqid_ptr;
2835
NlmMutexUnlock(thr_info->db_mutex);
2839
ordinal_id = SeqId2OrdinalId(rdfp, sip);
2840
while (ordinal_id < 0 && sip) {
2841
thr_info->blast_seqid_list->seqid_ptr =
2842
thr_info->blast_seqid_list->seqid_ptr->next;
2843
sip = thr_info->blast_seqid_list->seqid_ptr;
2844
ordinal_id = SeqId2OrdinalId(rdfp, sip);
2847
if (thr_info->blast_seqid_list->seqid_ptr) {
2848
*start = ordinal_id;
2849
*stop = ordinal_id+1;
2851
thr_info->blast_seqid_list->seqid_ptr =
2852
thr_info->blast_seqid_list->seqid_ptr->next;
3103
int real_readdb_entries;
3104
int total_readdb_entries;
3107
real_readdb_entries = readdb_get_num_entries_total_real(rdfp);
3108
total_readdb_entries = readdb_get_num_entries_total(rdfp);
3109
final_real_seq = MIN( real_readdb_entries, thr_info->final_db_seq );
2858
3111
/* we have real database with start/stop specified */
2859
if (thr_info->db_mutex) {
3112
if (thr_info->db_mutex) {
3113
/* Emit a tick if needed. */
3114
BlastTickProc(thr_info->db_chunk_last, thr_info);
3115
*start = thr_info->db_chunk_last;
3116
if (thr_info->db_chunk_last < final_real_seq) {
3117
*stop = MIN((thr_info->db_chunk_last +
3118
thr_info->db_chunk_size), final_real_seq);
3119
} else {/* Already finished. */
3120
*stop = thr_info->db_chunk_last;
2861
/* Emit a tick if needed. */
2862
BlastTickProc(thr_info->db_chunk_last, thr_info);
2863
*start = thr_info->db_chunk_last;
2864
if (thr_info->db_chunk_last < thr_info->final_db_seq) {
2865
*stop = MIN((thr_info->db_chunk_last +
2866
thr_info->db_chunk_size),
2867
thr_info->final_db_seq);
2868
} else {/* Already finished. */
2869
*stop = thr_info->db_chunk_last;
2870
thr_info->realdb_done = TRUE;
2872
thr_info->db_chunk_last = *stop;
2874
if (*stop != thr_info->final_db_seq) {
2876
*start = thr_info->last_db_seq;
2877
*stop = thr_info->final_db_seq;
2879
thr_info->realdb_done = TRUE;
3122
/* Change parameters for oidlist processing. */
3123
thr_info->realdb_done = TRUE;
3125
thr_info->db_chunk_last = *stop;
3127
if (*stop != final_real_seq) {
3129
*start = thr_info->last_db_seq;
3130
*stop = final_real_seq;
3132
thr_info->realdb_done = TRUE;
3134
if (total_readdb_entries == real_readdb_entries) {
3137
thr_info->gi_current = final_real_seq;
2884
3143
NlmMutexUnlock(thr_info->db_mutex);
2889
/* Signal handler for SIGXCPU, exceeded cpu time soft limit. */
2892
timeout_shutdown(int flag)
2896
time_out_boolean = TRUE;
2898
/* this probably isn't the best way to do this.
2899
* probably ought to be rewritten with posix sigaction() instead.
2900
* -coulouri 20020104
2904
signal(SIGXCPU, SIG_IGN);
2914
* call setrlimit() on unix systems to cap cpu time.
2915
* changed to ignore number of cpus, since this limit is per process,
2916
* not per thread. also added a hard limit at 2x the soft limit.
2917
* -coulouri 20020104
2921
BlastSetLimits(Int4 cpu_limit, Int2 num_cpu)
2932
rl.rlim_cur = cpu_limit;
2933
rl.rlim_max = 2 * cpu_limit;
2935
if (setrlimit(RLIMIT_CPU, &rl) == -1) return FALSE;
2937
if (signal(SIGXCPU, timeout_shutdown) == SIG_ERR) return FALSE;
2941
time_out_boolean = FALSE;
2945
3147
static void do_on_exit_func(VoidPtr ptr)
2947
3149
BlastSearchBlkPtr search;
2958
3160
do_gapped_blast_search(VoidPtr ptr)
2961
BlastSearchBlkPtr search;
2963
Int4 index, index1, start=0, stop=0, id_list_length;
2964
Int4Ptr id_list=NULL;
2966
search = (BlastSearchBlkPtr) ptr;
2967
if (search->thr_info->blast_gi_list || search->rdfp->oidlist) {
2968
id_list = MemNew((search->thr_info->db_chunk_size+33)
3163
BlastSearchBlkPtr search;
3165
Int4 index, index1, start=0, stop=0, id_list_length;
3166
Int4Ptr id_list=NULL;
3167
Uint4 i; /* AM: Support for query concatenation. */
3169
search = (BlastSearchBlkPtr) ptr;
3170
if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
3172
id_list = MemNew((search->thr_info->db_chunk_size+33)*sizeof(Int4));
3175
if (NlmThreadsAvailable() && search->pbp->process_num > 1)
3176
NlmThreadAddOnExit(do_on_exit_func, ptr);
3179
while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list,
3180
&id_list_length, search->thr_info) != TRUE)
3182
if (id_list && id_list_length)
3184
for (index=0; index<id_list_length; index++)
3186
index1 = id_list[index];
3187
if ((status = BLASTPerformSearchWithReadDb(search, index1)) != 0)
3190
if (search->prog_number == blast_type_blastx
3191
|| search->prog_number == blast_type_tblastn
3192
|| search->prog_number == blast_type_psitblastn)
3193
status = BlastLinkHsps(search);
3195
status = BlastReapHitlistByEvalue(search);
3196
if (search->handle_results)
3197
search->handle_results((VoidPtr) search);
3199
BlastSaveCurrentHitlist(search);
3200
/* Emit a tick if needed and we're not MT. */
3201
if (search->thr_info->db_mutex == NULL)
3202
BlastTickProc(index1, search->thr_info);
3203
if (time_out_boolean == TRUE)
3206
} else if (!search->thr_info->realdb_done) {
3207
for (index=start; index<stop; index++)
3209
if ((status = BLASTPerformSearchWithReadDb(search, index)) != 0)
3212
/* AM: Support for query concatenation. */
3213
if( !search->mult_queries )
3215
if (search->prog_number == blast_type_blastx
3216
|| search->prog_number == blast_type_tblastn
3217
|| search->prog_number == blast_type_psitblastn)
3218
status = BlastLinkHsps(search);
3220
status = BlastReapHitlistByEvalue(search);
3221
if (search->handle_results)
3222
search->handle_results((VoidPtr) search);
3224
BlastSaveCurrentHitlist(search);
3226
else /* AM: Support for query concatenation. */
3228
InitHitLists( search );
3229
search->mult_queries->use_mq = TRUE;
3230
search->mult_queries->delete_current_hitlist = FALSE;
3232
for( i = 0; i < search->mult_queries->NumQueries; ++i )
3234
search->mult_queries->current_query = i;
3236
if( search->prog_number == blast_type_blastx
3237
|| search->prog_number == blast_type_tblastn
3238
|| search->prog_number == blast_type_psitblastn )
3239
status = BlastLinkHsps( search );
3241
status = BlastReapHitlistByEvalue( search );
3243
if( search->handle_results )
3244
search->handle_results( (VoidPtr)search );
3246
BlastSaveCurrentHitlist( search );
3249
if( search->mult_queries->delete_current_hitlist )
3251
search->current_hitlist
3252
= BlastHitListDestruct( search->current_hitlist );
3255
search->mult_queries->use_mq = FALSE;
3256
BlastHitListPurge( search->current_hitlist );
3259
/* Emit a tick if needed and we're not MT. */
3260
if (search->thr_info->db_mutex == NULL)
3261
BlastTickProc(index, search->thr_info);
3262
if (time_out_boolean == TRUE)
2972
NlmThreadAddOnExit(do_on_exit_func, ptr);
2975
while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list, &id_list_length, search->thr_info) != TRUE)
2979
for (index=0; index<id_list_length; index++)
2981
index1 = id_list[index];
2982
search = BLASTPerformSearchWithReadDb(search, index1);
2983
if (search->prog_number == blast_type_blastx
2984
|| search->prog_number == blast_type_tblastn
2985
|| search->prog_number == blast_type_psitblastn)
2987
status = BlastLinkHsps(search);
2989
status = BlastReapHitlistByEvalue(search);
2990
if (search->handle_results)
2991
search->handle_results((VoidPtr) search);
2993
BlastSaveCurrentHitlist(search);
2994
/* Emit a tick if needed and we're not MT. */
2995
if (search->thr_info->db_mutex == NULL)
2996
BlastTickProc(index1, search->thr_info);
2997
if (time_out_boolean == TRUE)
3001
for (index=start; index<stop; index++)
3003
search = BLASTPerformSearchWithReadDb(search, index);
3005
if (search->prog_number == blast_type_blastx
3006
|| search->prog_number == blast_type_tblastn
3007
|| search->prog_number == blast_type_psitblastn)
3009
status = BlastLinkHsps(search);
3011
status = BlastReapHitlistByEvalue(search);
3012
if (search->handle_results)
3013
search->handle_results((VoidPtr) search);
3015
BlastSaveCurrentHitlist(search);
3016
/* Emit a tick if needed and we're not MT. */
3017
if (search->thr_info->db_mutex == NULL)
3018
BlastTickProc(index, search->thr_info);
3019
if (time_out_boolean == TRUE)
3023
/* Get out if "stop" was the last seq. */
3024
if (time_out_boolean)
3029
id_list = MemFree(id_list);
3031
return (VoidPtr) search;
3266
/* Get out if "stop" was the last seq. */
3267
if (time_out_boolean || status)
3272
id_list = MemFree(id_list);
3274
if (NlmThreadsAvailable() && search->pbp->process_num > 1)
3275
NlmThreadRemoveOnExit(do_on_exit_func, ptr);
3277
return (VoidPtr) search;
3038
3284
BlastSearchBlkPtr search;
3040
3286
Int4 index, index1, start=0, stop=0, id_list_length;
3041
3287
Int4Ptr id_list=NULL;
3288
Uint4 i; /* AM: Query multiplexing. */
3043
3290
search = (BlastSearchBlkPtr) ptr;
3044
if (search->thr_info->blast_gi_list || search->rdfp->oidlist) {
3291
if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
3045
3293
id_list = MemNew((search->thr_info->db_chunk_size+33)
3046
3294
*sizeof(Int4));
3049
NlmThreadAddOnExit(do_on_exit_func, ptr);
3297
if (NlmThreadsAvailable() && search->pbp->process_num > 1)
3298
NlmThreadAddOnExit(do_on_exit_func, ptr);
3051
3300
while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list,
3052
3301
&id_list_length, search->thr_info) != TRUE) {
3053
3302
if (search->thr_info->realdb_done && id_list) {
3054
3303
for (index=0; index<id_list_length; index++) {
3055
3304
index1 = id_list[index];
3056
search = BLASTPerformSearchWithReadDb(search, index1);
3057
if (search->pbp->do_sum_stats == TRUE &&
3058
!search->pbp->mb_params)
3059
status = BlastLinkHsps(search);
3061
status = BlastGetNonSumStatsEvalue(search);
3062
if (!search->pbp->mb_params || !search->handle_results)
3305
if ((status = BLASTPerformSearchWithReadDb(search, index1))
3308
if (!search->pbp->mb_params) {
3309
if (search->pbp->do_sum_stats == TRUE)
3310
status = BlastLinkHsps(search);
3312
status = BlastGetNonSumStatsEvalue(search);
3063
3313
status = BlastReapHitlistByEvalue(search);
3064
if (search->pbp->mb_params)
3065
MegaBlastReevaluateWithAmbiguities(search, index1);
3066
else if (!search->handle_results)
3067
status = BlastReevaluateWithAmbiguities(search, index1);
3314
if (!search->handle_results)
3315
status = BlastReevaluateWithAmbiguities(search, index1);
3317
MegaBlastReevaluateWithAmbiguities(search);
3069
3320
if (search->handle_results)
3070
3321
search->handle_results((VoidPtr) search);
3085
3336
} else if (!search->thr_info->realdb_done) {
3086
3337
for (index=start; index<stop; index++) {
3087
search = BLASTPerformSearchWithReadDb(search, index);
3088
if (search->pbp->do_sum_stats == TRUE &&
3089
!search->pbp->mb_params)
3090
status = BlastLinkHsps(search);
3092
status = BlastGetNonSumStatsEvalue(search);
3093
if (!search->pbp->mb_params ||
3094
!search->handle_results)
3338
if ((status = BLASTPerformSearchWithReadDb(search, index))
3341
if (!search->pbp->mb_params) {
3342
if (search->pbp->do_sum_stats == TRUE)
3343
status = BlastLinkHsps(search);
3345
status = BlastGetNonSumStatsEvalue(search);
3095
3346
status = BlastReapHitlistByEvalue(search);
3096
if (search->pbp->mb_params)
3097
MegaBlastReevaluateWithAmbiguities(search, index);
3098
else if (!search->handle_results)
3099
status = BlastReevaluateWithAmbiguities(search, index);
3347
if (!search->handle_results)
3348
status = BlastReevaluateWithAmbiguities(search, index);
3350
MegaBlastReevaluateWithAmbiguities(search);
3101
3352
if (search->handle_results)
3102
3353
search->handle_results((VoidPtr) search);
3103
3354
else if (!search->pbp->mb_params)
3104
BlastSaveCurrentHitlist(search);
3355
{ /* AM: Query multiplexing. */
3356
if( !search->mult_queries )
3357
BlastSaveCurrentHitlist(search);
3360
InitHitLists( search );
3361
search->mult_queries->use_mq = TRUE;
3362
search->mult_queries->delete_current_hitlist = FALSE;
3364
for( i = 0; i < search->mult_queries->NumQueries; ++i )
3366
search->mult_queries->current_query = i;
3367
BlastSaveCurrentHitlist(search);
3370
if( search->mult_queries->delete_current_hitlist )
3372
search->current_hitlist
3373
= BlastHitListDestruct( search->current_hitlist );
3376
search->mult_queries->use_mq = FALSE;
3377
BlastHitListPurge( search->current_hitlist );
3106
3381
MegaBlastSaveCurrentHitlist(search);
3138
3417
ReadDBFILEPtr rdfp;
3139
3418
TNlmThread PNTR thread_array;
3140
3419
VoidPtr status=NULL;
3420
int num_entries_total;
3421
int num_entries_total_real;
3142
3425
if (search == NULL)
3428
num_entries_total = readdb_get_num_entries_total (search->rdfp);
3429
num_entries_total_real = readdb_get_num_entries_total_real(search->rdfp);
3431
/* Set 'done with read db' according to whether real databases are present */
3433
if (num_entries_total_real) {
3434
search->thr_info->realdb_done = FALSE;
3436
search->thr_info->realdb_done = TRUE;
3439
/* Make sure first, last sequence indices are in-range (0, NUM-1) */
3441
/* NOTE: search->pbp->final_seq is 1 beyond the last sequence ordinal id,
3442
except when it's <=0, which means search to the last sequence in the
3444
/* search->thr_info versions are not. */
3446
if (search->pbp->final_db_seq > 0) {
3447
end_seq = MIN(search->pbp->final_db_seq, num_entries_total);
3449
end_seq = num_entries_total;
3452
start_seq = MAX(0, MIN(search->pbp->first_db_seq, end_seq));
3454
/* Set BlastGetDbChunk()'s pointers and counters */
3456
search->thr_info->last_db_seq =
3457
search->thr_info->gi_current =
3458
search->thr_info->db_chunk_last = start_seq;
3460
search->thr_info->final_db_seq = end_seq;
3145
3463
search->thr_info->db_chunk_size = BLAST_DB_CHUNK_SIZE;
3148
readdb_get_totals(search->rdfp, &total_length, &num_seq);
3150
3466
num_seq = search->dbseq_num;
3152
3467
search->thr_info->db_incr = num_seq / BLAST_NTICKS;
3153
search->thr_info->last_db_seq = search->pbp->first_db_seq; /* The 1st sequence to compare against. */
3154
if (search->pbp->final_db_seq == 0)
3155
search->thr_info->final_db_seq = readdb_get_num_entries_total_real(search->rdfp);
3157
search->thr_info->final_db_seq = search->pbp->final_db_seq;
3159
search->thr_info->db_chunk_last = search->pbp->first_db_seq;
3161
if (search->thr_info->blast_gi_list || search->rdfp->oidlist)
3162
search->thr_info->gi_current = 0;
3164
/* guess if we need to search any real database */
3165
if (search->thr_info->final_db_seq == 0) {
3166
/* that means that we have only mask and no real database should be searched */
3167
search->thr_info->realdb_done = TRUE;
3169
/* otherwise, we need to start searching all real databases */
3170
search->thr_info->realdb_done = FALSE;
3173
BlastSetLimits(search->pbp->cpu_limit, search->pbp->process_num);
3174
3470
if (NlmThreadsAvailable() && search->pbp->process_num > 1) {
3175
3471
rdfp = search->rdfp;
3176
3472
number_of_entries = INT4_MAX;
3488
/* This function use PACKED INT as lcmask */
3489
static SeqLocPtr blastMergeFilterLocs(SeqLocPtr filter_slp,
3490
SeqLocPtr lcmask, Boolean translate,
3491
Int2 frame, Int4 length)
3494
SeqLocPtr slp, dup_slp;
3496
if(filter_slp == NULL && lcmask == NULL)
3774
/* Adjust offsets in the mask locations list; discard locations outside of
3777
AdjustOffsetsInMaskLoc(SeqLocPtr mask_loc, Int4 start, Int4 length)
3779
SeqLocPtr slp, last_slp = NULL, next_slp, head = NULL;
3785
if (mask_loc->choice == SEQLOC_PACKED_INT)
3786
slp = (SeqLocPtr) mask_loc->data.ptrvalue;
3787
else if (mask_loc->choice == SEQLOC_INT)
3789
else /* Should be impossible */
3793
if (slp->choice == SEQLOC_INT) {
3794
loc = (SeqIntPtr) slp->data.ptrvalue;
3797
if (loc->from >= length || loc->to < 0) {
3798
next_slp = slp->next;
3802
if (loc->to > length)
3807
last_slp->next = slp;
3815
next_slp = slp->next;
3821
last_slp->next = NULL;
3823
if (mask_loc->choice == SEQLOC_PACKED_INT) {
3824
mask_loc->data.ptrvalue = head;
3831
/* This function use PACKED INT as slp2 */
3832
SeqLocPtr blastMergeFilterLocs(SeqLocPtr slp1, SeqLocPtr slp2, Boolean translate,
3833
Int2 frame, Int4 length)
3836
SeqLocPtr slp, dup_slp, dup_head;
3838
if(slp1 == NULL && slp2 == NULL)
3502
dup_slp = blastDuplicateSeqLocInt(lcmask);
3504
/* Request to translate means, that lcmask is DNA SeqLoc, that should be
3844
dup_slp = blastDuplicateSeqLocInt(slp2);
3846
/* Request to translate means, that slp2 is DNA SeqLoc, that should be
3505
3847
translated into protein SeqLoc corresponding to the specific frame */
3507
3849
if(translate) {
3508
3850
BlastConvertDNASeqLoc(dup_slp, frame, length);
3511
if(filter_slp == NULL) {
3512
3854
return dup_slp;
3515
3857
/* OK We have 2 not NULL filters - merging... */
3517
if(filter_slp->choice == SEQLOC_PACKED_INT)
3518
slp = (SeqLocPtr) filter_slp->data.ptrvalue;
3859
if(slp1->choice == SEQLOC_PACKED_INT)
3860
slp = (SeqLocPtr) slp1->data.ptrvalue;
3864
if (dup_slp->choice == SEQLOC_PACKED_INT) {
3866
dup_slp = (SeqLocPtr) dup_slp->data.ptrvalue;
3522
3870
if(slp == NULL) {
3523
3871
ErrPostEx(SEV_WARNING, 0, 0, "Invalid filter detected");
3524
filter_slp->data.ptrvalue = dup_slp->data.ptrvalue;
3525
dup_slp->data.ptrvalue = NULL;
3526
SeqLocFree(dup_slp);
3872
slp1->data.ptrvalue = dup_slp;
3530
3876
while(slp->next != NULL)
3533
slp->next = dup_slp->data.ptrvalue;
3534
dup_slp->data.ptrvalue = NULL;
3535
SeqLocFree(dup_slp);
3879
slp->next = dup_slp;
3541
3885
/* This function is used to filter one frame of the translated DNA
4015
FloatHi LIBCALL BLASTCalculateSearchSpace(BLAST_OptionsBlkPtr options,
4016
Int4 nseq, Int8 dblen, Int4 qlen)
4018
Int4 length_adjustment, qlen_eff;
4020
BLAST_KarlinBlkPtr kbp;
4023
if (options == NULL)
4026
kbp = BlastKarlinBlkCreate();
4027
BlastKarlinBlkGappedCalcEx(kbp, options->gap_open, options->gap_extend,
4028
options->decline_align, options->matrix, NULL);
4030
if (StringCmp(options->program_name, "blastn") != 0 &&
4031
options->gapped_calculation ) {
4032
Nlm_FloatHi alpha, beta; /*alpha and beta for new scoring system */
4033
getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
4034
options->gap_open, options->gap_extend);
4035
BlastComputeLengthAdjustment(kbp->K,
4036
kbp->logK, alpha/kbp->Lambda, beta,
4039
&length_adjustment );
4041
BlastComputeLengthAdjustment(kbp->K, kbp->logK, 1/kbp->H, 0.0,
4044
&length_adjustment );
4047
kbp = BlastKarlinBlkDestruct(kbp);
4049
qlen_eff = qlen - length_adjustment;
4050
dblen_eff = dblen - nseq*length_adjustment;
4051
searchsp = qlen_eff * dblen_eff;
3669
4056
#define DROPOFF_NUMBER_OF_BITS 10.0
3670
4057
#define INDEX_THR_MIN_SIZE 20000
3671
4058
Int2 LIBCALL BLASTSetUpSearchInternalByLoc (BlastSearchBlkPtr search, SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)(Int4 done, Int4 positives))
3691
4078
Uint1Ptr sequence;
3692
4079
Uint1Ptr query_seq, query_seq_start, query_seq_rev, query_seq_start_rev;
3693
4080
ValNodePtr vnp;
3694
Nlm_FloatHi alpha, beta; /*alpha and beta for new scoring system*/
3695
Nlm_FloatHi KtoUse, logKtoUse; /*K to use in length adjustment calculation*/
3696
Nlm_FloatHi LambdaToUse; /*Lambda to use in length adjuatment*/
4081
Int4 query_loc_start;
4083
/* AM: Temporaries to compute effective lengths of individual queries. */
4084
IntArray lengths_eff;
4085
IntArray length_adj_tmp;
4086
Int4 le_iter, length_tmp;
4089
/* AM: To support individual masking in the case of query multiplexing. */
4090
SeqLocPtr *concat_filter_slp, *concat_private_slp, *concat_private_slp_rev,
4091
* indiv_filter_slp, *indiv_private_slp, *indiv_private_slp_rev;
4092
SeqLocPtr ConcatLCaseMask;
4093
Boolean * indiv_mask_at_hash;
4094
QueriesPtr mult_queries = NULL;
3698
4096
if (options == NULL)
3700
ErrPostEx(SEV_FATAL, 0, 0, "BLAST_OptionsBlkPtr is NULL\n");
4098
ErrPostEx(SEV_FATAL, 1, 0, "BLAST_OptionsBlkPtr is NULL\n");
3704
4102
if (query_slp == NULL && query_bsp == NULL)
3706
ErrPostEx(SEV_FATAL, 0, 0, "Query is NULL\n");
4104
ErrPostEx(SEV_FATAL, 1, 0, "Query is NULL\n");
4108
/* AM: Support for query multiplexing. */
4109
mult_queries = search->mult_queries;
4114
= (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4116
= (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4118
= (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4119
concat_private_slp_rev
4120
= (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4122
= (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4123
indiv_private_slp_rev
4124
= (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4126
= (Boolean *)MemNew( mult_queries->NumQueries*sizeof( Boolean ) );
3710
4129
query_seq = NULL; /* Gets rid of warning. */
3711
4130
query_seq_rev = NULL; /* Gets rid of warning. */
3712
4131
query_seq_start = NULL; /* Gets rid of warning. */
4143
query_loc_start = SeqLocStart(query_slp);
4144
if (query_slp->choice != SEQLOC_WHOLE &&
4145
search->pbp->query_lcase_mask) {
4146
search->pbp->query_lcase_mask =
4147
AdjustOffsetsInMaskLoc(search->pbp->query_lcase_mask,
4148
query_loc_start, SeqLocLen(query_slp));
3724
4151
strand = SeqLocStrand(query_slp);
3725
4152
if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
3727
private_slp = SeqLocIntNew(SeqLocStart(query_slp), SeqLocStop(query_slp), Seq_strand_plus, SeqLocId(query_slp));
4154
private_slp = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_plus, SeqLocId(query_slp));
4156
/* AM: Support for query multiplexing. */
4158
for( i = 0; i < mult_queries->NumQueries; ++i )
4160
indiv_private_slp[i]
4162
mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
4164
mult_queries->FakeBsps[i]->id );
4165
concat_private_slp[i]
4166
= SeqLocIntNew( mult_queries->QueryStarts[i],
4167
mult_queries->QueryEnds[i],
4169
SeqLocId( query_slp ) );
3729
4172
if (strand == Seq_strand_minus || strand == Seq_strand_both)
3731
private_slp_rev = SeqLocIntNew(SeqLocStart(query_slp), SeqLocStop(query_slp), Seq_strand_minus, SeqLocId(query_slp));
4174
private_slp_rev = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_minus, SeqLocId(query_slp));
4176
/* AM: Support for query multiplexing. */
4178
for( i = 0; i < mult_queries->NumQueries; ++i )
4180
indiv_private_slp_rev[i]
4182
mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
4184
mult_queries->FakeBsps[i]->id );
4185
concat_private_slp_rev[i]
4186
= SeqLocIntNew( mult_queries->QueryStarts[i],
4187
mult_queries->QueryEnds[i],
4189
SeqLocId( query_slp ) );
3733
4192
private_slp_delete = TRUE;
3734
4193
if (search->prog_number==blast_type_blastn)
3863
4322
if(!(search->pbp->is_rps_blast &&
3864
4323
(!StringCmp(prog_name, "tblastn")||
3865
4324
!StringCmp(prog_name, "psitblastn")))) {
3867
filter_slp = BlastSeqLocFilterEx(private_slp, options->filter_string, &mask_at_hash);
3868
else if (private_slp_rev)
3869
filter_slp = BlastSeqLocFilterEx(private_slp_rev, options->filter_string, &mask_at_hash);
3871
/* If lower case characters were detected in the input
3872
their locations will be masked out */
3874
if(search->pbp->query_lcase_mask != NULL) {
3875
filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, FALSE, 0, 0);
4325
/* AM: Query multiplexing. */
4329
filter_slp = BlastSeqLocFilterEx(private_slp, options->filter_string, &mask_at_hash);
4330
else if (private_slp_rev)
4331
filter_slp = BlastSeqLocFilterEx(private_slp_rev, options->filter_string, &mask_at_hash);
4333
/* If lower case characters were detected in the input
4334
their locations will be masked out */
4336
if(search->pbp->query_lcase_mask != NULL) {
4337
filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, FALSE, 0, 0);
4341
for( i = 0; i < mult_queries->NumQueries; ++i )
4343
if( indiv_private_slp[i] )
4346
= BlastSeqLocFilterEx( indiv_private_slp[i],
4347
options->filter_string,
4348
indiv_mask_at_hash + i );
4349
concat_filter_slp[i]
4350
= BlastSeqLocFilterEx( concat_private_slp[i],
4351
options->filter_string,
4352
indiv_mask_at_hash + i );
4354
else if( indiv_private_slp_rev[i] )
4357
= BlastSeqLocFilterEx( indiv_private_slp_rev[i],
4358
options->filter_string,
4359
indiv_mask_at_hash + i );
4360
concat_filter_slp[i]
4361
= BlastSeqLocFilterEx( concat_private_slp_rev[i],
4362
options->filter_string,
4363
indiv_mask_at_hash + i );
4366
if( mult_queries->LCaseMasks && mult_queries->LCaseMasks[i] )
4368
indiv_filter_slp[i] = blastMergeFilterLocs( indiv_filter_slp[i],
4369
(SeqLocPtr)mult_queries->LCaseMasks[i]->data.ptrvalue,
4371
ConcatLCaseMask = ConcatSeqLoc( mult_queries, mult_queries->LCaseMasks[i],
4372
SeqLocId( query_slp ), i );
4373
concat_filter_slp[i] = blastMergeFilterLocs( concat_filter_slp[i],
4374
(SeqLocPtr)ConcatLCaseMask->data.ptrvalue,
4178
4748
for (index=search->first_context; index<=search->last_context; index++)
4750
/* AM: Changed to support query multiplexing. */
4180
4751
if (search->prog_number != blast_type_blastn ||
4181
4752
index>search->first_context ||
4182
4753
search->last_context==search->first_context)
4183
status = BlastScoreBlkFill(search->sbp, (CharPtr)
4184
search->context[index].query->sequence,search->context[index].query->length, index);
4755
if( search->prog_number == blast_type_tblastn
4756
&& search->mult_queries )
4758
for( i = 0; i < search->mult_queries->NumQueries; ++i )
4760
status = BlastScoreBlkFill(
4762
((CharPtr)search->context[index].query->sequence)
4763
+ search->mult_queries->QueryStarts[i],
4764
search->mult_queries->QueryEnds[i]
4765
- search->mult_queries->QueryStarts[i] + 1,
4770
search->mult_queries->lambda_array[i]
4771
= search->sbp->kbp_std[search->first_context]->Lambda;
4775
if( search->mult_queries->LambdaMin
4776
> search->sbp->kbp_std[search->first_context]->Lambda )
4777
search->mult_queries->LambdaMin
4778
= search->sbp->kbp_std[search->first_context]->Lambda;
4780
if( search->mult_queries->LambdaMax
4781
< search->sbp->kbp_std[search->first_context]->Lambda )
4782
search->mult_queries->LambdaMax
4783
= search->sbp->kbp_std[search->first_context]->Lambda;
4785
if( search->mult_queries->LogKMin
4786
> search->sbp->kbp_std[search->first_context]->logK )
4787
search->mult_queries->LogKMin
4788
= search->sbp->kbp_std[search->first_context]->logK;
4790
if( search->mult_queries->LogKMax
4791
< search->sbp->kbp_std[search->first_context]->logK )
4792
search->mult_queries->LogKMax
4793
= search->sbp->kbp_std[search->first_context]->logK;
4797
search->mult_queries->LambdaMin
4798
= search->mult_queries->LambdaMax
4799
= search->sbp->kbp_std[search->first_context]->Lambda;
4800
search->mult_queries->LogKMin
4801
= search->mult_queries->LogKMax
4802
= search->sbp->kbp_std[search->first_context]->logK;
4808
= BlastScoreBlkFill(search->sbp, (CharPtr)
4809
search->context[index].query->sequence,
4810
search->context[index].query->length,
4186
status = BlastScoreBlkFill(search->sbp, (CharPtr)
4187
search->context[index].query->sequence,
4188
search->context[index+1].query->length,
4816
= BlastScoreBlkFill(search->sbp, (CharPtr)
4817
search->context[index].query->sequence,
4818
search->context[index+1].query->length,
4190
4822
if (status != 0)
4192
4824
sprintf(buffer, "Unable to calculate Karlin-Altschul params, check query sequence");
4251
4891
goto BlastSetUpReturn;
4253
if (search->pbp->gapped_calculation &&
4254
StringCmp(search->prog_name, "blastn") != 0)
4255
min_query_length = (Int4) 1/(search->sbp->kbp_gap_std[search->first_context]->K);
4257
min_query_length = (Int4) 1/(search->sbp->kbp[search->first_context]->K);
4259
if (search->prog_number != blast_type_blastn)
4262
getAlphaBeta(options->matrix,&alpha,&beta,search->pbp->gapped_calculation, search->pbp->gap_open, search->pbp->gap_extend);
4263
if (search->pbp->gapped_calculation) {
4264
KtoUse = search->sbp->kbp_gap_std[search->first_context]->K;
4265
logKtoUse = search->sbp->kbp_gap_std[search->first_context]->logK;
4266
LambdaToUse = search->sbp->kbp_gap_std[search->first_context]->Lambda;
4270
KtoUse = search->sbp->kbp[search->first_context]->K;
4271
logKtoUse = search->sbp->kbp[search->first_context]->logK;
4272
LambdaToUse = search->sbp->kbp[search->first_context]->Lambda;
4275
last_length_adjustment = 0;
4276
for (index=0; index<5; index++)
4278
if (search->prog_number != blast_type_blastn)
4280
length_adjustment = Nlm_Nint(((logKtoUse+log((Nlm_FloatHi)(length-last_length_adjustment)*(Nlm_FloatHi)(MAX(search->dbseq_num, (search->dblen)-(search->dbseq_num*last_length_adjustment)))))*alpha/LambdaToUse) + beta);
4284
length_adjustment = (Int4) ((search->sbp->kbp[search->first_context]->logK)+log((Nlm_FloatHi)(length-last_length_adjustment)*(Nlm_FloatHi)(MAX(1, (search->dblen)-(search->dbseq_num*last_length_adjustment)))))/(search->sbp->kbp[search->first_context]->H);
4286
if (length_adjustment >= length-min_query_length)
4288
length_adjustment = length-min_query_length;
4292
if (ABS(last_length_adjustment-length_adjustment) <= 1)
4294
last_length_adjustment = length_adjustment;
4893
if (options->gapped_calculation &&
4894
StringCmp(options->program_name, "blastn") != 0) {
4896
BLAST_KarlinBlkPtr kbp_gap =
4897
search->sbp->kbp_gap_std[search->first_context];
4898
Nlm_FloatHi alpha, beta; /*alpha and beta for the scoring system */
4899
getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
4900
options->gap_open, options->gap_extend);
4902
BlastComputeLengthAdjustment(kbp_gap->K,
4904
alpha/kbp_gap->Lambda, beta,
4906
search->dblen, search->dbseq_num,
4907
&length_adjustment );
4909
effective_query_length = length - length_adjustment;
4911
/* AM: If concatenating queries, then compute effective lengths of
4912
individual queries. */
4913
if( search->mult_queries )
4915
search->mult_queries->TotalLength = length;
4917
(IntArray) MemNew( sizeof( Int4 )*
4918
search->mult_queries->NumQueries );
4920
(IntArray)MemNew( sizeof( Int4 )*
4921
search->mult_queries->NumQueries );
4924
le_iter < search->mult_queries->NumQueries;
4926
length_tmp = search->mult_queries->QueryEnds[le_iter]
4927
- search->mult_queries->QueryStarts[le_iter]
4929
length_adj_tmp[le_iter] = 0;
4931
BlastComputeLengthAdjustment(kbp_gap->K,
4933
alpha/kbp_gap->Lambda,
4936
search->dblen, search->dbseq_num,
4937
&length_adj_tmp[le_iter] );
4939
lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
4941
search->mult_queries->EffLengths[le_iter] =
4942
lengths_eff[le_iter];
4943
search->mult_queries->Adjustments[le_iter] =
4944
length_adj_tmp[le_iter];
4946
if( search->mult_queries->MinLen > length_tmp )
4947
search->mult_queries->MinLen = length_tmp;
4949
if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
4950
search->mult_queries->MinLenEff = lengths_eff[le_iter];
4954
else /* this is an ungapped alignment or the program is blastn */
4956
BLAST_KarlinBlkPtr kbp = search->sbp->kbp[search->first_context];
4958
BlastComputeLengthAdjustment( kbp->K, kbp->logK, 1/kbp->H, 0.0,
4960
search->dblen, search->dbseq_num,
4961
&length_adjustment );
4963
effective_query_length = length - length_adjustment;
4965
/* AM: If concatenating queries, then compute effective lengths of
4966
individual queries. */
4967
if( search->mult_queries ) {
4968
search->mult_queries->TotalLength = length;
4970
(IntArray)MemNew( sizeof( Int4 )*
4971
search->mult_queries->NumQueries );
4973
(IntArray)MemNew( sizeof( Int4 )*
4974
search->mult_queries->NumQueries );
4977
le_iter < search->mult_queries->NumQueries;
4979
length_tmp = search->mult_queries->QueryEnds[le_iter]
4980
- search->mult_queries->QueryStarts[le_iter]
4982
length_adj_tmp[le_iter] = 0;
4984
BlastComputeLengthAdjustment( kbp->K, kbp->logK,
4987
search->dblen, search->dbseq_num,
4988
&(length_adj_tmp[le_iter]) );
4990
lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
4991
search->mult_queries->EffLengths[le_iter] =
4992
lengths_eff[le_iter];
4993
search->mult_queries->Adjustments[le_iter] =
4994
length_adj_tmp[le_iter];
4996
if( search->mult_queries->MinLen > length_tmp )
4997
search->mult_queries->MinLen = length_tmp;
4999
if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
5000
search->mult_queries->MinLenEff = lengths_eff[le_iter];
4296
5005
search->length_adjustment = MAX(length_adjustment, 0);
4298
if (search->prog_number != blast_type_blastn)
4299
search->dblen_eff = MAX(search->dbseq_num, search->dblen - search->dbseq_num*search->length_adjustment);
4301
search->dblen_eff = MAX(1, search->dblen - search->dbseq_num*search->length_adjustment);
4302
effective_query_length = MAX(length - search->length_adjustment, min_query_length);
5007
if (!search->dblen_eff) {
5009
search->dblen - search->dbseq_num*search->length_adjustment;
5010
/* AM: If concatenating queries find effective db lengths for each query. */
5011
if( search->mult_queries )
5013
for( le_iter = 0; le_iter < search->mult_queries->NumQueries;
5016
if( search->prog_number == blast_type_blastn )
5017
search->mult_queries->DbLenEff[le_iter]
5018
= MAX( 1, search->dblen
5019
- search->dbseq_num*length_adj_tmp[le_iter] );
5021
search->mult_queries->DbLenEff[le_iter]
5022
= MAX( search->dbseq_num,
5024
- search->dbseq_num*length_adj_tmp[le_iter] );
4304
5029
for (index=search->first_context; index<=search->last_context; index++)
4306
5031
search->context[index].query->effective_length = effective_query_length;
5034
/* AM: Setting up effective search spaces for individual queries. */
4309
5035
if (search->searchsp_eff == 0)
4310
5037
search->searchsp_eff = ((Nlm_FloatHi) search->dblen_eff)*((Nlm_FloatHi) effective_query_length);
5039
if( search->mult_queries )
5040
for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
5042
search->mult_queries->SearchSpEff[le_iter]
5043
= ((Nlm_FloatHi)search->mult_queries->DbLenEff[le_iter])
5044
* ((Nlm_FloatHi)lengths_eff[le_iter]);
5046
if( lengths_eff[le_iter] == search->mult_queries->MinLenEff )
5047
search->mult_queries->MinSearchSpEff
5048
= search->mult_queries->SearchSpEff[le_iter];
5051
else if( search->mult_queries )
5052
for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
5053
search->mult_queries->SearchSpEff[le_iter] = search->searchsp_eff;
4312
5055
/* The default is that cutoff_s was not set and is zero. */
4313
5056
if (options->cutoff_s == 0)
4654
This function reads in a list of gi's from a file.
4655
The file may be either in binary or text format.
4657
The binary gilist format has the following construction:
4659
1.) 1st 4 bytes: a 'magic' number: UINT4_MAX
4660
2.) 2nd 4 bytes: total number of gi's in the file (call this value 'number').
4661
3.) 'number' set of 4 bytes, allowing 4 bytes for each gi.
4663
The function GetGisFromFile first checks what the first 4 bytes
4664
of a file are, if they are the 'magic' number, then it proceeds
4665
to read values assuming a binary format. If they are not the
4666
'magic' number, then a text format is assumed.
4668
The binary gilist can be produced from a text gilist using the
4669
function readdb_MakeGiFileBinary.
4673
#define LINE_LEN 1024
4674
5443
BlastDoubleInt4Ptr
4675
5444
GetGisFromFile (CharPtr gifile, Int4Ptr gi_list_size)
4678
BlastDoubleInt4Ptr gi_list;
4680
Int4 index = 0, value, chunk_size = 24, number;
4682
Char line[LINE_LEN];
4686
Char file_name[PATH_MAX], blast_dir[PATH_MAX];
4689
* first looking in current directory, then checking .ncbirc,
4690
* then $BLASTDB and then assuming BLASTDB_DIR
4692
if (FileLength(gifile) > 0) {
4693
char *path = Nlm_FilePathFind(gifile);
4694
if (StringLen(path) > 0) {
4695
StringCpy(blast_dir, path);
4697
StringCpy(blast_dir, ".");
4702
if (getenv("BLASTDB"))
4703
Nlm_GetAppParam("NCBI", "BLAST", "BLASTDB", getenv("BLASTDB"), blast_dir, PATH_MAX);
4706
Nlm_GetAppParam ("NCBI", "BLAST", "BLASTDB", BLASTDB_DIR, blast_dir, PATH_MAX);
4708
sprintf(file_name, "%s%s%s", blast_dir, DIRDELIMSTR, FileNameFind(gifile));
4712
mfp = NlmOpenMFILE(file_name);
4714
ErrPostEx(SEV_ERROR, 0, 0, "Unable to open file %s", file_name);
5446
BlastDoubleInt4Ptr retval = NULL;
5447
Int4ListPtr gilist = NULL;
5450
if ( !(gilist = Int4ListReadFromFile(gifile)))
4718
tmp_value = (Uint4Ptr) mfp->mmp;
4719
if (SwapUint4(*tmp_value) == READDB_MAGIC_NUMBER) {
4721
tmp_value = (Uint4Ptr) mfp->mmp;
4722
number = SwapUint4(*tmp_value);
4723
gi_list = MemNew(number * sizeof(BlastDoubleInt4));
4725
while (index<number) {
4727
tmp_value = (Uint4Ptr) mfp->mmp;
4728
gi_list[index++].gi = SwapUint4(*tmp_value);
4730
*gi_list_size = number;
4731
mfp = NlmCloseMFILE(mfp);
4733
mfp = NlmCloseMFILE(mfp);
4734
if (!(gifp = FileOpen(file_name, "r"))) {
4735
ErrPostEx(SEV_ERROR, 0, 0, "Unable to open file %s", file_name);
4739
gi_list = MemNew(chunk_size * sizeof(BlastDoubleInt4));
4741
while (FileGets(line, LINE_LEN, gifp)) {
4743
/* do correct casting */
4744
status = sscanf(line, "%ld", &tmplong);
4747
/* skip non-valid lines */
4749
/* do we have enough space in gi_list ? */
4750
if (chunk_size < index + 1) {
4752
gi_list = Realloc(gi_list, chunk_size * sizeof(BlastDoubleInt4));
4755
gi_list[index++].gi = value;
4759
*gi_list_size = index;
5453
retval = (BlastDoubleInt4Ptr) MemNew(sizeof(BlastDoubleInt4)*gilist->count);
5458
*gi_list_size = gilist->count;
5460
for (i = 0; i < gilist->count; i++)
5461
retval[i].gi = gilist->i[i];
5463
gilist = Int4ListFree(gilist);
4768
5468
BlastSearchBlkPtr LIBCALL
4825
5535
query_length = query_bsp->length;
5537
/* Increase the hitlist size for the preliminary gapped alignment */
5538
if (options->gapped_calculation)
5539
hitlist_size = MIN(2*options->hitlist_size, options->hitlist_size + 50);
5541
hitlist_size = options->hitlist_size;
5543
/* AM: Query multiplexing */
5546
for( i = 0; i < mult_queries->NumQueries; ++i )
5547
mult_queries->result_info[i].results
5548
= (BLASTResultHitlistPtr *)MemNew(
5549
(hitlist_size + 1)*sizeof( BLASTResultHitlistPtr ) );
5551
mult_queries->max_results_per_query = hitlist_size;
5552
hitlist_size *= mult_queries->NumQueries;
4827
5555
/* On the first call query length is used for the subject length. */
4828
search = BlastSearchBlkNewExtra(options->wordsize, query_length, dbname, multiple_hits, 0, options->threshold_second, options->hitlist_size, prog_name, NULL, first_context, last_context, rdfp, options->window_size);
5556
search = BlastSearchBlkNewExtra(options->wordsize, query_length, dbname, multiple_hits, 0, options->threshold_second, hitlist_size, prog_name, NULL, first_context, last_context, rdfp, options->window_size);
4831
5559
readdb_get_totals_ex(search->rdfp, &(dblen), &(search->dbseq_num), TRUE);
4833
if (seqid_list && !options->use_real_db_size)
4834
BlastAdjustDbNumbers(search->rdfp, &(dblen), &(search->dbseq_num),
4835
seqid_list, NULL, NULL, NULL, 0);
4837
/* Create virtual database if any of the databases have gi lists or
4838
ordinal id masks, or if gi list is provided from options */
4839
BlastProcessGiLists(search, options, gi_list, &gi_list_total);
4840
/* Intended for use when a list of gi's is sent in, but the real size is needed. */
4841
/* It's probably still necessary to call BlastAdjustDbNumbers, but it would be nice
4842
if this were not required. */
4843
if (gi_list_total > 0 && !options->use_real_db_size)
4844
BlastAdjustDbNumbers(search->rdfp, &(dblen),
4845
&(search->dbseq_num), NULL, NULL,
4846
search->rdfp->oidlist, NULL, 0);
4849
/* use length and num of seqs of the database from alias file */
4850
if (search->rdfp->aliaslen && !gi_list)
4851
dblen = search->rdfp->aliaslen;
4852
if (search->rdfp->aliasnseq && !gi_list)
4853
search->dbseq_num = search->rdfp->aliasnseq;
5561
if (!options->ignore_gilist)
5563
/* Create virtual database if any of the databases have gi lists or
5564
ordinal id masks, or if gi list is provided from options */
5565
BlastProcessGiLists(search, options, gi_list, gi_list_total);
5567
/* search->thr_info->blast_gi_list will be non-NULL if gi_list or
5568
* options->gilist or options->gifile was non-NULL and therefore
5569
* intersected with any oidlists in the search->rdfp(s). If this is the
5570
* case, we need to recalculate the database length and number of
5572
if (search->thr_info->blast_gi_list && !options->use_real_db_size)
5573
readdb_get_totals_ex3(search->rdfp, &dblen, &search->dbseq_num,
5574
FALSE, TRUE, eApproximate);
4855
5577
/* command-line/options trump alias file. */
4856
5578
if (options->db_length > 0)
4857
dblen = options->db_length;
5579
dblen = options->db_length;
4858
5580
if (options->dbseq_num > 0)
4859
5581
search->dbseq_num = options->dbseq_num;
4860
5582
if (options->searchsp_eff > 0)
4916
5645
BLASTSetUpSearchByLocWithReadDb(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
4919
return BLASTSetUpSearchWithReadDbInternal (query_slp, NULL, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL);
5648
return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL, NULL);
5649
/* --KM pass NULL mult_queries */
4923
5653
BlastSearchBlkPtr LIBCALL
4924
BLASTSetUpSearchByLocWithReadDbEx(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
5654
BLASTSetUpSearchByLocWithReadDbEx(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, QueriesPtr mult_queries)
5655
/* --KM added mult_queries param */
4927
return BLASTSetUpSearchWithReadDbInternal (query_slp, NULL, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL);
5658
return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL, mult_queries);
5659
/* --KM pass mult_queries */
4929
5661
static BlastSearchBlkPtr
4930
5662
BLASTSetUpSearchEx (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
5429
6168
ABS(diag_compare_hsps(&hspp1[index], &hspp2[index1])) <
5430
6169
OVERLAP_DIAG_CLOSE) {
5431
6170
if (merge_hsps) {
5432
if (BLASTMergeHsps(search, hspp1[index], hspp2[index1], start)) {
5433
hspp2[index1]->gap_info =
5434
GapXEditBlockDelete(hspp2[index1]->gap_info);
5435
hspp2[index1] = MemFree(hspp2[index1]);
6171
if (BLASTMergeHsps(search, hspp1[index], hspp2[index1],
6173
/* Free the second HSP. */
6174
hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
5438
6176
} else { /* No gap information available */
5439
6177
if (BLASTHspContained(hspp1[index], hspp2[index1])) {
5440
hspp1[index] = MemFree(hspp1[index]);
6178
/* Point the first HSP to the new HSP; */
6179
hspp1[index] = BLAST_HSPFree(hspp1[index]);
5441
6180
hspp1[index] = hspp2[index1];
5442
6181
hspp2[index1] = NULL;
5443
} else if (BLASTHspContained(hspp2[index1], hspp1[index]))
5444
hspp2[index1] = MemFree(hspp2[index1]);
6182
/* This HSP has been removed, so break out of the inner
6185
} else if (BLASTHspContained(hspp2[index1], hspp1[index])) {
6186
hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
6190
/* This and remaining HSPs are too far from the one being
5451
new_hspcnt += hspcnt1;
5452
for (index=0; index<hspcnt2; index++) {
5453
if (hspp2[index] != NULL)
6197
HspArrayPurge(hitlist2->hsp_array, hitlist2->hspcnt, FALSE);
6199
/* The new number of HSPs is now the sum of the remaining counts in the
6200
two lists, but if there is a restriction on the number of HSPs to keep,
6201
it might have to be reduced. */
6202
new_hspcnt = hitlist2->hspcnt + hitlist1->hspcnt;
6203
if (search->pbp->hsp_num_max)
6204
new_hspcnt = MIN(new_hspcnt, search->pbp->hsp_num_max);
5457
6206
if (new_hspcnt >= hitlist1->hspmax-1 && hitlist1->do_not_reallocate == FALSE) {
5458
new_hsp_array = (BLAST_HSPPtr PNTR) Realloc(hitlist1->hsp_array, new_hspcnt*2*sizeof(BLAST_HSPPtr));
6207
Int4 new_allocated = 2*new_hspcnt;
6208
if (search->pbp->hsp_num_max)
6209
new_allocated = MIN(new_allocated, search->pbp->hsp_num_max);
6210
new_hsp_array = (BLAST_HSPPtr PNTR)
6211
Realloc(hitlist1->hsp_array, new_allocated*sizeof(BLAST_HSPPtr));
5459
6212
if (new_hsp_array == NULL) {
5460
6213
ErrPostEx(SEV_WARNING, 0, 0, "UNABLE to reallocate in BlastSaveCurrentHsp for ordinal id %ld, continuing with fixed array of %ld HSP's", (long) search->subject_id, (long) hitlist1->hspmax);
5461
6214
hitlist1->do_not_reallocate = TRUE;
5463
6216
hitlist1->hsp_array = new_hsp_array;
5464
hitlist1->hspmax = 2*new_hspcnt;
5468
for (index=0; index<hspcnt1; index++)
5469
hitlist1->hsp_array[index_array[index]] = hspp1[index];
5470
for (index=hitlist1->hspcnt, index1=0;
5471
index1<hitlist2->hspcnt && index<hitlist1->hspmax; index1++) {
5472
if (hitlist2->hsp_array[index1] != NULL)
5473
hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
5475
for (index1=0; index1<hspcnt2 && index<hitlist1->hspmax; index1++) {
5476
if (hspp2[index1] != NULL)
5477
hitlist1->hsp_array[index++] = hspp2[index1];
5480
hspp1 = MemFree(hspp1);
5481
hspp2 = MemFree(hspp2);
5482
index_array = MemFree(index_array);
6217
hitlist1->hspmax = new_allocated;
6219
new_hspcnt = MIN(new_hspcnt, hitlist1->hspmax);
6222
if (new_hspcnt >= hitlist2->hspcnt + hitlist1->hspcnt) {
6223
/* All HSPs from both arrays are saved */
6224
for (index=hitlist1->hspcnt, index1=0;
6225
index1<hitlist2->hspcnt; index1++) {
6226
if (hitlist2->hsp_array[index1] != NULL)
6227
hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
6230
/* Not all HSPs are be saved; sort both arrays by score and save only
6231
the new_hspcnt best ones.
6232
For the merged set of HSPs, allocate array the same size as in the
6234
new_hsp_array = (BLAST_HSP**)
6235
malloc(hitlist1->hspmax*sizeof(BLAST_HSP*));
6236
HeapSort(hitlist1->hsp_array, hitlist1->hspcnt,
6237
sizeof(BLAST_HSP*), score_compare_hsps);
6238
HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSP*),
6239
score_compare_hsps);
6240
index1 = index2 = 0;
6241
for (index = 0; index < new_hspcnt; ++index) {
6242
if (index1 < hitlist1->hspcnt &&
6243
(index2 >= hitlist2->hspcnt ||
6244
(hitlist1->hsp_array[index1]->score >=
6245
hitlist2->hsp_array[index2]->score))) {
6246
new_hsp_array[index] = hitlist1->hsp_array[index1];
6249
new_hsp_array[index] = hitlist2->hsp_array[index2];
6253
/* Free the extra HSPs that could not be saved */
6254
for ( ; index1 < hitlist1->hspcnt; ++index1) {
6255
hitlist1->hsp_array[index1] =
6256
BLAST_HSPFree(hitlist1->hsp_array[index1]);
6258
for ( ; index2 < hitlist2->hspcnt; ++index2) {
6259
hitlist2->hsp_array[index2] =
6260
BLAST_HSPFree(hitlist2->hsp_array[index2]);
6262
/* Point hitlist1's HSP array to the new one */
6263
hitlist1->hsp_array = (BLAST_HSP**) MemFree(hitlist1->hsp_array);
6264
hitlist1->hsp_array = new_hsp_array;
5484
6267
hitlist1->hspcnt = index;
6268
/* Second HSP list now does not own any HSPs */
6269
hitlist2->hspcnt = 0;
5485
6271
return hitlist1;
8927
9870
old_index = new_index;
8929
if (hitlist_count == hitlist_max)
8930
{ /* The list is full, delete the last entry. */
8931
BlastFreeHeap(search, results[hitlist_max-1]);
8932
if (results[hitlist_max-1]->seqalign)
8933
SeqAlignSetFree(results[hitlist_max-1]->seqalign);
8934
results[hitlist_max-1] = BLASTResultHitlistFreeEx(search, results[hitlist_max-1]);
8935
result_struct->hitlist_count--;
8936
hitlist_count = result_struct->hitlist_count;
9873
/* AM: Query multiplexing. */
9876
if (hitlist_count == hitlist_max)
9877
{ /* The list is full, delete the last entry. */
9878
BlastFreeHeap(search, results[hitlist_max-1]);
9879
if (results[hitlist_max-1]->seqalign)
9880
SeqAlignSetFree(results[hitlist_max-1]->seqalign);
9881
results[hitlist_max-1] = BLASTResultHitlistFreeEx(search, results[hitlist_max-1]);
9882
result_struct->hitlist_count--;
9883
hitlist_count = result_struct->hitlist_count;
9885
if (hitlist_max > 1)
9886
Nlm_MemMove((results+new_index+1), (results+new_index), (hitlist_count-new_index)*sizeof(results[0]));
9890
new_index = ResultIndex( current_evalue, high_score,
9892
results, hitlist_count );
9894
tmp_num_results = result_info->NumResults;
9895
del_index = hitlist_count;
9896
mq_new_index = ResultIndex( current_evalue, high_score,
9898
result_info->results,
9899
result_info->NumResults );
9901
if( mq_new_index == mult_queries->max_results_per_query )
9902
{ /* AM: The list is full and new result is too low --- do nothing. */
9903
search->mult_queries->delete_current_hitlist = TRUE;
9905
= BLASTResultHitlistFreeEx( search, result_hitlist );
9907
if( search->thr_info->results_mutex )
9908
NlmMutexUnlock( search->thr_info->results_mutex );
9913
if( result_info->NumResults
9914
== mult_queries->max_results_per_query )
9915
{ /* AM: must remove the worst result for this query. */
9917
= result_info->results[result_info->NumResults - 1];
9919
del_index = ResultIndex1( mq_worst_result,
9920
results, hitlist_count );
9921
BlastFreeHeap( search, results[del_index] );
9923
if( results[del_index]->seqalign )
9924
SeqAlignSetFree( results[del_index]->seqalign );
9927
= BLASTResultHitlistFreeEx( search,
9928
results[del_index] );
9929
hitlist_count = --result_struct->hitlist_count;
9932
if( hitlist_max > 1 )
9933
if( new_index < del_index )
9934
Nlm_MemMove( results + new_index + 1,
9935
results + new_index,
9936
(del_index - new_index)
9937
*sizeof( results[0] ) );
9938
else if( del_index < new_index )
9939
Nlm_MemMove( results + del_index,
9940
results + del_index + 1,
9941
(new_index - del_index)
9942
*sizeof( results[0] ) );
9944
if( mult_queries->max_results_per_query > 1 )
9945
Nlm_MemMove( result_info->results + mq_new_index + 1,
9946
result_info->results + mq_new_index,
9947
(result_info->NumResults - mq_new_index)
9948
*sizeof( results[0] ) );
9950
result_info->NumResults = tmp_num_results;
8938
if (hitlist_max > 1)
8939
Nlm_MemMove((results+new_index+1), (results+new_index), (hitlist_count-new_index)*sizeof(results[0]));
8942
9954
{ /* Case of K=1 and the first hit is eliminated */
8944
9956
BlastInsertList2Heap(search, result_hitlist);
9958
/* AM: Query multiplexing. */
9959
if( mult_queries ) mq_new_index = 0;
8948
9963
{ /* First hit to be stored. */
8950
9965
BlastInsertList2Heap(search, result_hitlist);
9967
/* AM: Query multiplexing. */
9968
if( mult_queries ) mq_new_index = 0;
9023
10051
meff = (Nlm_FloatHi) search->context[search->first_context].query->length;
9024
10052
if (pbp->mb_params)
9025
BlastCutoffs_simple(&s, &e, kbp, searchsp, TRUE);
10053
BlastCutoffs(&s, &e, kbp, searchsp, TRUE, search->pbp->gap_decay_rate );
9028
if (pbp->gapped_calculation && search->prog_number != blast_type_blastn)
9029
BlastCutoffs_simple(&s, &e, kbp_gap, searchsp, FALSE);
10056
if (pbp->gapped_calculation)
10057
{ /* AM: Changed to support query concatenation. */
10058
if( !search->mult_queries )
10059
BlastCutoffs(&s, &e, kbp_gap, searchsp, FALSE, 0.0 );
10061
BlastCutoffs( &s, &e, kbp_gap,
10062
search->mult_queries->MinSearchSpEff,
9031
BlastCutoffs_simple(&s, &e, kbp, searchsp, FALSE);
10066
{ /* AM: Changed to support query concatenation. */
10067
if( !search->mult_queries )
10068
BlastCutoffs(&s, &e, kbp, searchsp, FALSE, 0.0 );
10070
BlastCutoffs( &s, &e, kbp, search->mult_queries->MinSearchSpEff,
9033
10074
/* Determine the secondary cutoff score, S2, to use */
9034
10075
if (e2 == 0. && !pbp->cutoff_s2_set)
9048
10089
BlastCutoffs(&s2, &e2, kbp, meff, avglen, TRUE);
9050
if (pbp->gapped_calculation && search->prog_number != blast_type_blastn)
9051
BlastCutoffs(&s2, &e2, kbp_gap, MIN(avglen,meff), avglen, TRUE);
9053
BlastCutoffs(&s2, &e2, kbp, MIN(avglen,meff), avglen, TRUE);
10091
if (pbp->gapped_calculation)
10093
if( !search->mult_queries )
10094
BlastCutoffs(&s2, &e2, kbp_gap, MIN(avglen,meff) * avglen,
10095
TRUE, search->pbp->gap_decay_rate );
10097
BlastCutoffs( &s2, &e2, kbp_gap,
10098
MIN( avglen,search->mult_queries->MinLen ) * avglen,
10099
TRUE, search->pbp->gap_decay_rate );
10102
{ /* AM: Changed to support query concatenation. */
10103
if( !search->mult_queries )
10104
BlastCutoffs(&s2, &e2, kbp, MIN(avglen,meff) * avglen,
10105
TRUE, search->pbp->gap_decay_rate );
10107
BlastCutoffs( &s2, &e2, kbp,
10108
MIN(avglen,2*(search->mult_queries->MinLen)) * avglen,
10109
TRUE, search->pbp->gap_decay_rate );
9054
10111
/* Adjust s2 to be in line with s, as necessary */
9055
10112
s2 = MAX(s2, 1);
10162
11258
of links, as this was subtracted out for purposes of the
10163
11259
comparison above. */
10164
11260
best[0]->hsp_link.sum[0] += (best[0]->hsp_link.num[0])*cutoff[0];
10165
prob[0] = BlastSmallGapSumE(kbp[search->first_context], gap_size, gap_prob, gap_decay_rate, best[0]->hsp_link.num[0],best[0]->hsp_link.sum[0], best[0]->hsp_link.xsum[0], search->context[search->first_context].query->effective_length, subject_length, FALSE);
11262
/* AM: Support for query concatenation. */
11263
if( best[0]->hsp_link.num[0] > 1 && gap_prob == 0 ) {
11264
prob[0] = INT4_MAX;
11266
if( !search->mult_queries )
11268
BlastSmallGapSumE(kbp[search->first_context],
11270
best[0]->hsp_link.num[0],
11271
best[0]->hsp_link.xsum[0],
11272
search->context[search->first_context].
11273
query->effective_length,
11275
BlastGapDecayDivisor(gap_decay_rate,
11280
BlastSmallGapSumE( kbp[search->first_context],
11282
best[0]->hsp_link.num[0],
11283
best[0]->hsp_link.xsum[0],
11284
search->mult_queries->
11285
EffLengths[query_num],
11287
BlastGapDecayDivisor(gap_decay_rate,
11290
if( best[0]->hsp_link.num[0] > 1 ) {
11291
prob[0] /= gap_prob;
11292
if( prob[0] > INT4_MAX ) prob[0] = INT4_MAX;
10166
11296
best[1]->hsp_link.sum[1] += (best[1]->hsp_link.num[1])*cutoff[1];
10167
prob[1] = BlastLargeGapSumE(kbp[search->first_context], gap_prob, gap_decay_rate, best[1]->hsp_link.num[1],best[1]->hsp_link.sum[1], best[1]->hsp_link.xsum[1], search->context[search->first_context].query->effective_length, subject_length, FALSE);
11298
/* AM: Support for query concatenation. */
11299
if( 1 - gap_prob == 0.0 && best[1]->hsp_link.num[1] > 1 ) {
11300
prob[1] = INT4_MAX;
11302
if( !search->mult_queries )
11304
BlastLargeGapSumE(kbp[search->first_context],
11305
best[1]->hsp_link.num[1],
11306
best[1]->hsp_link.xsum[1],
11307
search->context[search->first_context].
11308
query->effective_length,
11310
BlastGapDecayDivisor(gap_decay_rate,
11315
BlastLargeGapSumE( kbp[search->first_context],
11316
best[1]->hsp_link.num[1],
11317
best[1]->hsp_link.xsum[1],
11318
search->mult_queries->
11319
EffLengths[query_num],
11321
BlastGapDecayDivisor(gap_decay_rate,
11325
if( best[1]->hsp_link.num[1] > 1 ) {
11326
prob[1] /= 1 - gap_prob;
11327
if( prob[1] > INT4_MAX ) prob[1] = INT4_MAX;
10168
11330
ordering_method = prob[0]<=prob[1] ? 0:1;