~ubuntu-branches/ubuntu/edgy/ncbi-tools6/edgy

« back to all changes in this revision

Viewing changes to tools/blast.c

  • Committer: Bazaar Package Importer
  • Author(s): Aaron M. Ucko
  • Date: 2004-06-26 00:18:09 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20040626001809-ma39ub7j6dbh8r3t
Tags: 6.1.20040616-1
* New upstream release.
* debian/blast2.docs: adjusted for new arrangement (a separate
  source-tree directory full of HTML files).
* debian/{control,lib*-dbg.install,rules}: switch to new-style -dbg
  packages containing just the stripped-out symbols.
* debian/{installman,ncbi-tools-bin.install,rules}: upstream has dropped
  f(asta)merge.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/* $Id: blast.c,v 6.330 2002/01/04 22:01:33 coulouri Exp $
 
1
static char const rcsid[] = "$Id: blast.c,v 6.406 2004/05/21 13:53:37 dondosha Exp $";
 
2
 
 
3
/* $Id: blast.c,v 6.406 2004/05/21 13:53:37 dondosha Exp $
2
4
* ===========================================================================
3
5
*
4
6
*                            PUBLIC DOMAIN NOTICE
47
49
        further manipulation.
48
50
 
49
51
******************************************************************************
50
 
 * $Revision: 6.330 $
 
52
 * $Revision: 6.406 $
51
53
 *
52
54
 * $Log: blast.c,v $
 
55
 * Revision 6.406  2004/05/21 13:53:37  dondosha
 
56
 * Fix in BLASTMergeHitLists
 
57
 *
 
58
 * Revision 6.405  2004/04/28 14:37:06  madden
 
59
 * Changes from Mike Gertz
 
60
 *  - modified the link_hsps routine to apply the gap_prob parameter to
 
61
 *     the result of BlastSmallGapSumE and BlastLargeGapSumE.
 
62
 *   - further modified link_hsps to use BlastGapDecayDivisor to weight
 
63
 *     tests based on multiple collections of HSPs.
 
64
 *   - removed all reference to gap_prob from the new_link_hsps.
 
65
 *   - further modified new_link_hsps to use BlastGapDecayDivisor to weight
 
66
 *     tests based on multiple collections of HSPs.
 
67
 *
 
68
 * Revision 6.404  2004/04/20 14:55:47  morgulis
 
69
 * 1. Fixed query offsets in results when -B option is used.
 
70
 * 2. Fixes for lower case masking handling with -B option.
 
71
 *
 
72
 * Revision 6.403  2004/04/13 21:03:30  madden
 
73
 * Use ignore_gilist Boolean to determine whether gilist lookup should occur
 
74
 *
 
75
 * Revision 6.402  2004/03/31 17:58:51  papadopo
 
76
 * Mike Gertz' changes for length adjustment calculations
 
77
 *
 
78
 * Revision 6.401  2004/03/22 15:35:39  dondosha
 
79
 * 1. Do not allow cutoff score for saving HSPs to be smaller than gap trigger;
 
80
 * 2. When merging hitlists with a restriction on number of HSPs, keep best
 
81
 *    scoring ones.
 
82
 *
 
83
 * Revision 6.400  2004/02/26 15:52:29  papadopo
 
84
 * Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
 
85
 *
 
86
 * Revision 6.399  2004/02/24 14:07:00  camacho
 
87
 * Use approximate sequence length calculation for entrez-limited
 
88
 * nucleotide blast databases.
 
89
 *
 
90
 * Revision 6.398  2004/02/03 17:54:16  dondosha
 
91
 * Correction to revision 6.391 in function BlastGetDbChunk
 
92
 *
 
93
 * Revision 6.397  2004/01/06 22:37:10  dondosha
 
94
 * Use BLAST_HSPfree function
 
95
 *
 
96
 * Revision 6.396  2003/12/29 15:42:46  coulouri
 
97
 * tblastn query concatenation fixes from morgulis
 
98
 *
 
99
 * Revision 6.395  2003/12/12 16:01:23  madden
 
100
 * Change to signature of BlastCutoffs, remove BlastCutoffs_simple
 
101
 *
 
102
 * Revision 6.394  2003/12/10 17:05:27  dondosha
 
103
 * Added function ReevaluateScoreWithAmbiguities to reevaluate score for one HSP; use it after greedy traceback
 
104
 *
 
105
 * Revision 6.393  2003/11/19 18:09:13  dondosha
 
106
 * Use consistent rounding in length adjustment calculation
 
107
 *
 
108
 * Revision 6.392  2003/11/10 20:15:29  dondosha
 
109
 * Bug fix in BLASTMergeHsps
 
110
 *
 
111
 * Revision 6.391  2003/10/23 17:46:17  dondosha
 
112
 * Fix in BlastGetDbChunk for looking up ordinal ids within a range
 
113
 *
 
114
 * Revision 6.390  2003/08/08 16:36:21  dondosha
 
115
 * 1. Treat final_db_seq as 1 beyond the final sequence; 0 is an exception, meaning end of database.
 
116
 * 2. Added more meaningful error message when query length is less than wordsize.
 
117
 *
 
118
 * Revision 6.389  2003/05/30 17:20:10  coulouri
 
119
 * add rcsid
 
120
 *
 
121
 * Revision 6.388  2003/05/14 20:35:58  camacho
 
122
 * Allow searching empty databases
 
123
 *
 
124
 * Revision 6.387  2003/05/13 16:02:53  coulouri
 
125
 * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
 
126
 *
 
127
 * Revision 6.386  2003/05/12 12:23:43  camacho
 
128
 * Sanity check for number of sequences & db length
 
129
 *
 
130
 * Revision 6.385  2003/04/23 15:15:36  camacho
 
131
 * Moved reading of gi list to readdb
 
132
 *
 
133
 * Revision 6.384  2003/03/24 19:42:13  madden
 
134
 * Changes to support query concatenation for blastn and tblastn
 
135
 *
 
136
 * Revision 6.383  2003/03/14 22:33:44  dondosha
 
137
 * Do not increase preliminary hitlist size for ungapped search
 
138
 *
 
139
 * Revision 6.382  2003/03/06 19:10:42  madden
 
140
 * Allow search->pbp->process_num to be > 1 if MT enabled
 
141
 *
 
142
 * Revision 6.381  2003/03/05 21:30:24  dondosha
 
143
 * Fix in BlastMakeCopyQueryDNAP for single-strand OOF search
 
144
 *
 
145
 * Revision 6.380  2002/12/24 14:12:03  dondosha
 
146
 * Removed accidental duplicate lines
 
147
 *
 
148
 * Revision 6.379  2002/12/10 23:13:22  bealer
 
149
 * Fix do_the_blast_run and BlastGetDbChunk to calculate beginning and ending
 
150
 * sequence numbers correctly.
 
151
 * Fix BlastGetDbChunk to use precise start and end points, not nearest
 
152
 * multiples of 32.
 
153
 * Fix do_the_blast_run and BlastGetDbChunk to handle mixed oidlist / real db
 
154
 * multiple database scenarios.
 
155
 *
 
156
 * Revision 6.378  2002/12/04 22:39:51  bealer
 
157
 * Undo previous set of changes.
 
158
 *
 
159
 * Revision 6.377  2002/11/25 19:53:34  bealer
 
160
 * Remove extraneous commented code.
 
161
 *
 
162
 * Revision 6.376  2002/11/25 19:50:26  bealer
 
163
 * Prevent extra work by BlastGetDbChunk when OID lists are used.
 
164
 *
 
165
 * Revision 6.375  2002/11/13 18:03:10  dondosha
 
166
 * Correction in BlastReevaluateWithAmbiguities
 
167
 *
 
168
 * Revision 6.374  2002/11/08 14:58:43  kans
 
169
 * first argument to NlmReadMFILE must be cast as Uint1Ptr - Mac compiler picked up this inconsistency with the prototype
 
170
 *
 
171
 * Revision 6.373  2002/11/07 21:06:15  camacho
 
172
 * Made GetGisFromFile work even without mmap
 
173
 *
 
174
 * Revision 6.372  2002/11/04 22:55:56  dondosha
 
175
 * For blastn, calculate number of identities in BlastReevaluateWithAmbiguities
 
176
 *
 
177
 * Revision 6.371  2002/10/28 21:44:03  madden
 
178
 * Added comments about gap-free extensions
 
179
 *
 
180
 * Revision 6.370  2002/09/18 20:23:19  camacho
 
181
 * Added BLASTCalculateSearchSpace
 
182
 *
 
183
 * Revision 6.369  2002/09/11 20:46:25  camacho
 
184
 * Removed deprecated BlastSeqIdListPtr code
 
185
 *
 
186
 * Revision 6.368  2002/08/30 18:56:02  dondosha
 
187
 * Made BlastMakeTempProteinBioseq and HackSeqLocId public: needed for Cn3D
 
188
 *
 
189
 * Revision 6.367  2002/08/30 15:42:48  dondosha
 
190
 * In blastn, use ewp structure only for the first context
 
191
 *
 
192
 * Revision 6.366  2002/08/22 13:39:45  camacho
 
193
 * Close the header and sequence files only if allocated
 
194
 *
 
195
 * Revision 6.365  2002/08/07 21:37:47  camacho
 
196
 * Do not remove the search block prematurely in do_gapped_blast_search
 
197
 *
 
198
 * Revision 6.364  2002/08/06 17:33:50  madden
 
199
 * Fix return value problem
 
200
 *
 
201
 * Revision 6.363  2002/07/19 17:55:47  dondosha
 
202
 * 1.Return 0 status from BLASTPerformFinalSearch when database sequence has 0 length;
 
203
 * 2. Do not destroy search block too early.
 
204
 *
 
205
 * Revision 6.362  2002/07/15 18:53:27  camacho
 
206
 * Small fix to previous commit
 
207
 *
 
208
 * Revision 6.361  2002/07/14 17:18:13  camacho
 
209
 * Fixed small memory leak in do_blast_search/do_gapped_blast_search
 
210
 *
 
211
 * Revision 6.360  2002/07/12 18:02:55  dondosha
 
212
 * Do not call AdjustOffsetsInMaskLoc if no lower case mask
 
213
 *
 
214
 * Revision 6.359  2002/07/12 16:06:26  dondosha
 
215
 * Adjust offsets and remove unneeded lower case mask locations when query is a subsequence
 
216
 *
 
217
 * Revision 6.358  2002/06/27 13:01:26  kans
 
218
 * BlastGetVirtualOIDList is LIBCALL
 
219
 *
 
220
 * Revision 6.357  2002/06/26 00:56:28  camacho
 
221
 *
 
222
 * 1. Fixed bug when searching a mixture of real and mask databases.
 
223
 * 2. Clean up of code that calculates the number of sequences and database
 
224
 *    length.
 
225
 *
 
226
 * Revision 6.356  2002/06/25 16:43:45  dondosha
 
227
 * Get out from all search loops if bad status returned, meaning process ran out of memory
 
228
 *
 
229
 * Revision 6.355  2002/06/25 13:11:22  madden
 
230
 * Fix UMR for status in do_gapped_blast_search
 
231
 *
 
232
 * Revision 6.354  2002/06/21 21:49:10  camacho
 
233
 * Removed references to thr_info->blast_seqid_list in BlastGetDbChunk
 
234
 *
 
235
 * Revision 6.353  2002/06/12 15:43:09  dondosha
 
236
 * Potential uninitialized variable bug fixed
 
237
 *
 
238
 * Revision 6.352  2002/06/12 15:33:25  dondosha
 
239
 * Corrected integer types of the variable holding return status in 2 functions
 
240
 *
 
241
 * Revision 6.351  2002/06/11 20:40:04  dondosha
 
242
 * Correction to previous change
 
243
 *
 
244
 * Revision 6.350  2002/06/11 14:44:45  dondosha
 
245
 * Return status from some functions instead of search block pointer
 
246
 *
 
247
 * Revision 6.349  2002/06/05 15:30:34  coulouri
 
248
 * Move signal handling to blastsrv.c
 
249
 *
 
250
 * Revision 6.348  2002/05/20 22:49:10  dondosha
 
251
 * Fix for the Mega BLAST case when database sequence is split, and an HSP is accidentally extended across the boundary to a completely masked query
 
252
 *
 
253
 * Revision 6.347  2002/05/15 19:51:01  dondosha
 
254
 * Do a sanity check for the final db sequence parameter
 
255
 *
 
256
 * Revision 6.346  2002/04/23 16:01:27  madden
 
257
 * Fix for ungapped search of arbitrary matrix
 
258
 *
 
259
 * Revision 6.345  2002/04/23 15:40:10  madden
 
260
 * Fix for effective length change and ungapped blast
 
261
 *
 
262
 * Revision 6.344  2002/04/19 21:22:30  madden
 
263
 * Added protection for matrices that are only empty strings
 
264
 *
 
265
 * Revision 6.343  2002/04/18 12:07:05  madden
 
266
 * Check for Selenocysteine in Bioseq, replace with X
 
267
 *
 
268
 * Revision 6.342  2002/04/17 17:30:15  madden
 
269
 * Call getAlphaBeta only for gapped alignments
 
270
 *
 
271
 * Revision 6.341  2002/04/16 15:42:15  madden
 
272
 * Save mask1 for lookup table hashing only (change for neighboring)
 
273
 *
 
274
 * Revision 6.340  2002/04/04 21:19:15  dondosha
 
275
 * Corrections for megablast with non-greedy extensions
 
276
 *
 
277
 * Revision 6.339  2002/03/26 21:20:50  dondosha
 
278
 * 1. Make hitlist size larger for preliminary gapped alignment
 
279
 * 2. Pass readdb structure to megablast set up if it is already initialized
 
280
 *
 
281
 * Revision 6.338  2002/03/26 16:46:40  madden
 
282
 * Move calculation of effective lengths to BlastCalculateEffectiveLengths
 
283
 *
 
284
 * Revision 6.337  2002/03/06 18:34:31  dondosha
 
285
 * Pass the filtered locations back from the megablast engine to use in formatting
 
286
 *
 
287
 * Revision 6.336  2002/02/27 22:39:00  dondosha
 
288
 * Fixed bug in splitting long database sequences for translated searches
 
289
 *
 
290
 * Revision 6.335  2002/02/27 17:43:20  dondosha
 
291
 * Made effective database length option work properly
 
292
 *
 
293
 * Revision 6.334  2002/02/26 22:25:20  dondosha
 
294
 * Return error as soon as it is found that matrix name is not supported
 
295
 *
 
296
 * Revision 6.333  2002/02/26 17:37:40  dondosha
 
297
 * Fixed bug in BlastNtWordFinder for word sizes > 12
 
298
 *
 
299
 * Revision 6.332  2002/02/26 15:03:13  dondosha
 
300
 * Accidental newline in sprintf removed
 
301
 *
 
302
 * Revision 6.331  2002/02/25 23:26:57  dondosha
 
303
 * Changed error to warning if no letters to be indexed just on one context
 
304
 *
53
305
 * Revision 6.330  2002/01/04 22:01:33  coulouri
54
306
 * Fixed BlastSetLimits() to work under linux
55
307
 *
2096
2348
#include <gapxdrop.h>
2097
2349
#include <dust.h>
2098
2350
 
2099
 
/* For rlimit stuff. */
2100
 
#if defined(OS_UNIX)
2101
 
#include <sys/resource.h>
2102
 
#include <signal.h>
2103
 
#endif
2104
 
 
2105
2351
#include <mbalign.h>
2106
2352
#include <mblast.h>
 
2353
 
2107
2354
/* 
2108
2355
The last database sequence a tick (progress indicator) was issued for
2109
2356
and the increments (i.e., number of db sequences completed) that a tick 
2114
2361
/*
2115
2362
        Set to TRUE if the process has timed out.
2116
2363
*/
2117
 
Boolean time_out_boolean=FALSE;
 
2364
volatile Boolean time_out_boolean;
2118
2365
 
2119
2366
/*
2120
2367
        SeqId lists if only a certain number of the database sequences will be
2403
2650
                x_variable = y_variable*(gap_size*gap_size);
2404
2651
                x_variable /= (gap_prob + MY_EPS);
2405
2652
                *cutoff_s_second= (BLAST_Score) floor((log(x_variable)/kbp->Lambda)) + 1;
 
2653
                /* Don't allow this cutoff to be too small */
 
2654
                *cutoff_s_second = MAX(*cutoff_s_second, search->pbp->gap_trigger);
2406
2655
                *ignore_small_gaps = FALSE;
2407
2656
           }
2408
2657
           else
2463
2712
                                if (hsp->query.offset > search->required_start ||
2464
2713
                                        hsp->query.end < search->required_end)
2465
2714
                                {
2466
 
                                        hsp_array[index] = MemFree(hsp_array[index]);
 
2715
                                        hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
2467
2716
                                }
2468
2717
                                else
2469
2718
                                {
2481
2730
                                }
2482
2731
                                else
2483
2732
                                {
2484
 
                                        hsp_array[index] = MemFree(hsp_array[index]);
 
2733
                                        hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
2485
2734
                                }
2486
2735
                                        
2487
2736
                                        
2546
2795
        SeqPortPtr spp=NULL;
2547
2796
        Uint1Ptr nt_seq, nt_seq_start, subject, subject_start, query, old_query_s, old_query_f, new_query_s, new_query_f=NULL;
2548
2797
        Uint1Ptr query_start, query_end, subject_real_start=NULL;
 
2798
        Int4 num_ident;
2549
2799
 
2550
2800
/* Only nucl. db's. */
2551
2801
        if (search->prog_number == blast_type_blastp || search->prog_number == blast_type_blastx)
2560
2810
                return 0;
2561
2811
 
2562
2812
/* Check if there are ambiguites at all, return 0 if there are none. */
2563
 
        if(readdb_ambchar_present(search->rdfp, sequence_number) == FALSE)
 
2813
        if(search->prog_number != blast_type_blastn &&
 
2814
           readdb_ambchar_present(search->rdfp, sequence_number) == FALSE) {
 
2815
           
2564
2816
                return 0;
2565
 
 
 
2817
        }
2566
2818
        current_hitlist = search->current_hitlist;
2567
2819
        hspcnt = current_hitlist->hspcnt;
2568
2820
        hspcnt_max = current_hitlist->hspcnt_max;
2681
2933
 
2682
2934
                score = 0;
2683
2935
                sum = 0;
 
2936
                num_ident = 0;
2684
2937
                subject = subject_start;
2685
2938
                old_query_s = query_start + hsp_array[index]->query.offset;
2686
2939
                old_query_f = query_start + hsp_array[index]->query.end; 
2688
2941
                new_query_s = old_query_s;
2689
2942
                for (query=old_query_s; query<old_query_f; query++, subject++)
2690
2943
                {
2691
 
                        if ((sum += matrix[*query][*subject]) <= 0)
 
2944
                   if (*query == *subject)
 
2945
                      ++num_ident;
 
2946
 
 
2947
                        if ((sum += matrix[*query][*subject]) < 0)
2692
2948
                        {
2693
2949
                                if (score > 0)
2694
2950
                                {
2695
 
                                        subject = subject_start + (new_query_f-old_query_s); 
2696
2951
                                        if (score >= search->pbp->cutoff_s2)
2697
2952
                                        {
2698
2953
                                                break;
2699
2954
                                        }
2700
2955
                                }
2701
2956
                                score = sum = 0;
 
2957
                                num_ident = 0;
2702
2958
                                new_query_s = new_query_f = query;
2703
2959
                        }
2704
2960
                        else if (sum > score)
2705
2961
                        {       /* Start of scoring regime. */
2706
 
                                if (score == 0)
2707
 
                                        new_query_s = query;
2708
 
                                score = sum;
2709
 
                                new_query_f = query+1;
 
2962
                           if (score == 0)
 
2963
                              new_query_s = query;
 
2964
                           score = sum;
 
2965
                           new_query_f = query+1;
2710
2966
                        }
2711
2967
                }
2712
2968
 
2719
2975
                        hsp_array[index]->subject.offset = hsp_array[index]->subject.offset + new_query_s - old_query_s;
2720
2976
                        hsp_array[index]->subject.end = hsp_array[index]->subject.end + new_query_f - old_query_f;
2721
2977
                        hsp_array[index]->subject.length = hsp_array[index]->subject.end - hsp_array[index]->subject.offset;
 
2978
                        hsp_array[index]->num_ident = num_ident;
2722
2979
                        hsp_array[index]->linked_set = FALSE;
2723
2980
                        hsp_array[index]->start_of_chain = FALSE;
2724
2981
                        Nlm_MemSet((VoidPtr) &(hsp_array[index]->hsp_link), 0, sizeof(BLAST_HSP_LINK));
2726
2983
                }
2727
2984
                else
2728
2985
                { /* Delete if this is now below the cutoff score. */
2729
 
                        hsp_array[index] = MemFree(hsp_array[index]);
 
2986
                        hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
2730
2987
                }
2731
2988
 
2732
2989
                if (StringCmp(search->prog_name, "blastn") != 0)
2756
3013
        return status;
2757
3014
}
2758
3015
 
 
3016
/* Auxiliary function to retrieve the virtual oidlist attached to the
 
3017
 * rdfp_chain. Returns a pointer to the OIDList, called should *NOT* modify
 
3018
 * this copy. Assumes that this function is called after BlastProcessGiLists
 
3019
 * has been called (while setting up the search) */
 
3020
OIDListPtr LIBCALL BlastGetVirtualOIDList(ReadDBFILEPtr rdfp_chain)
 
3021
{
 
3022
    OIDListPtr virtual_oidlist = NULL;
 
3023
 
 
3024
    while (rdfp_chain) {
 
3025
        if (virtual_oidlist = rdfp_chain->oidlist) {
 
3026
            break;
 
3027
        }
 
3028
        rdfp_chain = rdfp_chain->next;
 
3029
    }
 
3030
    return virtual_oidlist;
 
3031
}
 
3032
 
2759
3033
/*
2760
3034
        Function to assign chunks of the database to a thread.  
2761
3035
        The "start" and "stop" points are returned by the arguments.
2775
3049
{
2776
3050
    Boolean done=FALSE;
2777
3051
    Int4 ordinal_id;
2778
 
    SeqIdPtr sip;
2779
 
 
 
3052
    OIDListPtr virtual_oidlist = NULL;
2780
3053
    *id_list_number = 0;
2781
3054
    
2782
3055
    NlmMutexLockEx(&thr_info->db_mutex);
2783
3056
    if (thr_info->realdb_done) {
2784
 
        if (rdfp->oidlist) {
 
3057
        if (virtual_oidlist = BlastGetVirtualOIDList(rdfp)) {
2785
3058
            /* Virtual database.   Create id_list using mask file */
2786
 
            OIDListPtr oidlist = rdfp->oidlist;
2787
 
            Uint4       mask;
2788
 
            Int4        maskindex, base, i;
2789
 
            Boolean     cont;
2790
 
            Int4        oidindex;
2791
 
            Int4        total_mask = oidlist->total/MASK_WORD_SIZE + 1;
2792
 
 
2793
 
 
2794
 
            if (thr_info->gi_current < total_mask) {
2795
 
 
2796
 
                oidindex = 0;
2797
 
                maskindex = thr_info->gi_current;
2798
 
                base = thr_info->gi_current * MASK_WORD_SIZE;
2799
 
                cont = TRUE;
2800
 
 
2801
 
                while (cont) {
2802
 
                    /* for each long-word mask */
2803
 
                    mask = Nlm_SwapUint4(oidlist->list[maskindex]);
2804
 
 
2805
 
                    i = 0;
2806
 
                    while (mask) {
2807
 
                        if (mask & (((Uint4)0x1)<<(MASK_WORD_SIZE-1))) {
2808
 
                            id_list[oidindex++] = base + i;
 
3059
            Int4 gi_end       = 0;
 
3060
            
 
3061
            thr_info->final_db_seq = MIN(thr_info->final_db_seq, virtual_oidlist->total);
 
3062
            
 
3063
            gi_end = thr_info->final_db_seq;
 
3064
 
 
3065
            if (thr_info->gi_current < gi_end) {
 
3066
                Int4 oidindex  = 0;
 
3067
                Int4 gi_start  = thr_info->gi_current;
 
3068
                Int4 bit_start = gi_start % MASK_WORD_SIZE;
 
3069
                Int4 gi;
 
3070
                
 
3071
                for(gi = gi_start; (gi < gi_end) && (oidindex < thr_info->db_chunk_size);) {
 
3072
                    Int4 bit_end = ((gi_end - gi + bit_start) < MASK_WORD_SIZE) ? (gi_end - gi + bit_start) : MASK_WORD_SIZE;
 
3073
                    Int4 bit;
 
3074
                    
 
3075
                    Uint4 mask_index = gi / MASK_WORD_SIZE;
 
3076
                    Uint4 mask_word  = Nlm_SwapUint4(virtual_oidlist->list[mask_index]);
 
3077
                    
 
3078
                    if ( mask_word ) {
 
3079
                        for(bit = bit_start; bit<bit_end; bit++) {
 
3080
                            Uint4 bitshift = (MASK_WORD_SIZE-1)-bit;
 
3081
                            
 
3082
                            if ((mask_word >> bitshift) & 1) {
 
3083
                                id_list[ oidindex++ ] = (gi - bit_start) + bit;
 
3084
                            }
2809
3085
                        }
2810
 
                        mask <<= 1;
2811
 
                        i++;
2812
 
                    }
2813
 
                    maskindex++;
2814
 
                    base += MASK_WORD_SIZE;
2815
 
                    if (maskindex >= total_mask ||
2816
 
                            oidindex > thr_info->db_chunk_size) {
2817
 
                        cont = FALSE;
2818
 
                    }
 
3086
                    }
 
3087
                    
 
3088
                    gi += bit_end - bit_start;
 
3089
                    bit_start = 0;
2819
3090
                }
2820
 
                thr_info->gi_current = maskindex;
 
3091
                
 
3092
                thr_info->gi_current = gi;
2821
3093
                *id_list_number = oidindex;
2822
 
                BlastTickProc(thr_info->gi_current, thr_info);
 
3094
                BlastTickProc(thr_info->gi_current/32, thr_info);
2823
3095
            } else {
2824
3096
                done = TRUE;
2825
3097
            }
2826
 
 
2827
 
        } else {
2828
 
            done = TRUE;
2829
 
        }
2830
 
    } else if (thr_info->blast_seqid_list) {
2831
 
        /* global_seqid_list indicates there is a list of selected ID's, 
2832
 
           global_seqid_ptr is the position in the list. */
2833
 
        sip = thr_info->blast_seqid_list->seqid_ptr;
2834
 
        if (sip == NULL) {
2835
 
            NlmMutexUnlock(thr_info->db_mutex);
2836
 
            return TRUE;
2837
 
        }
2838
 
 
2839
 
        ordinal_id = SeqId2OrdinalId(rdfp, sip);
2840
 
        while (ordinal_id < 0 && sip) {
2841
 
            thr_info->blast_seqid_list->seqid_ptr = 
2842
 
                thr_info->blast_seqid_list->seqid_ptr->next;
2843
 
            sip = thr_info->blast_seqid_list->seqid_ptr;
2844
 
            ordinal_id = SeqId2OrdinalId(rdfp, sip);
2845
 
        }
2846
 
 
2847
 
        if (thr_info->blast_seqid_list->seqid_ptr) {
2848
 
            *start = ordinal_id;
2849
 
            *stop = ordinal_id+1;
2850
 
            done = FALSE;
2851
 
            thr_info->blast_seqid_list->seqid_ptr = 
2852
 
                thr_info->blast_seqid_list->seqid_ptr->next;
2853
 
        } else {
2854
 
            done = TRUE;
2855
 
        }
2856
 
 
 
3098
            
 
3099
        } else {
 
3100
            done = TRUE;
 
3101
        }
2857
3102
    } else {
 
3103
        int real_readdb_entries;
 
3104
        int total_readdb_entries;
 
3105
        int final_real_seq;
 
3106
 
 
3107
        real_readdb_entries  = readdb_get_num_entries_total_real(rdfp);
 
3108
        total_readdb_entries = readdb_get_num_entries_total(rdfp);
 
3109
        final_real_seq       = MIN( real_readdb_entries, thr_info->final_db_seq );
 
3110
        
2858
3111
        /* we have real database with start/stop specified */
2859
 
        if (thr_info->db_mutex) {
 
3112
        if (thr_info->db_mutex) {
 
3113
            /* Emit a tick if needed. */
 
3114
            BlastTickProc(thr_info->db_chunk_last, thr_info);
 
3115
            *start = thr_info->db_chunk_last;
 
3116
            if (thr_info->db_chunk_last < final_real_seq) {
 
3117
                *stop = MIN((thr_info->db_chunk_last + 
 
3118
                    thr_info->db_chunk_size), final_real_seq);
 
3119
            } else {/* Already finished. */
 
3120
                *stop = thr_info->db_chunk_last;
2860
3121
 
2861
 
            /* Emit a tick if needed. */
2862
 
            BlastTickProc(thr_info->db_chunk_last, thr_info);
2863
 
            *start = thr_info->db_chunk_last;
2864
 
            if (thr_info->db_chunk_last < thr_info->final_db_seq) {
2865
 
                *stop = MIN((thr_info->db_chunk_last + 
2866
 
                            thr_info->db_chunk_size), 
2867
 
                        thr_info->final_db_seq);
2868
 
            } else {/* Already finished. */
2869
 
                *stop = thr_info->db_chunk_last;
2870
 
                thr_info->realdb_done = TRUE;
2871
 
            }
2872
 
            thr_info->db_chunk_last = *stop;
2873
 
        } else {
2874
 
            if (*stop != thr_info->final_db_seq) {
2875
 
                done = FALSE;
2876
 
                *start = thr_info->last_db_seq;
2877
 
                *stop = thr_info->final_db_seq;
2878
 
            } else {
2879
 
                thr_info->realdb_done = TRUE;
2880
 
                done = TRUE;
2881
 
            }
2882
 
        }
 
3122
                /* Change parameters for oidlist processing. */
 
3123
                thr_info->realdb_done  = TRUE;
 
3124
            }
 
3125
            thr_info->db_chunk_last = *stop;
 
3126
        } else {
 
3127
            if (*stop != final_real_seq) {
 
3128
                done = FALSE;
 
3129
                *start = thr_info->last_db_seq;
 
3130
                *stop  = final_real_seq;
 
3131
            } else {
 
3132
                thr_info->realdb_done = TRUE;
 
3133
                
 
3134
                if (total_readdb_entries == real_readdb_entries) {
 
3135
                    done = TRUE;
 
3136
                } else {
 
3137
                    thr_info->gi_current = final_real_seq;
 
3138
                }
 
3139
            }
 
3140
        }
2883
3141
    }
 
3142
    
2884
3143
    NlmMutexUnlock(thr_info->db_mutex);
2885
 
    
2886
3144
    return done;
2887
3145
}
2888
3146
 
2889
 
/* Signal handler for SIGXCPU, exceeded cpu time soft limit. */
2890
 
 
2891
 
static void 
2892
 
timeout_shutdown(int flag)
2893
 
 
2894
 
{
2895
 
#ifdef OS_UNIX
2896
 
  time_out_boolean = TRUE;
2897
 
 
2898
 
  /* this probably isn't the best way to do this.
2899
 
   * probably ought to be rewritten with posix sigaction() instead.
2900
 
   * -coulouri 20020104
2901
 
   */
2902
 
 
2903
 
#ifdef SIG_IGN
2904
 
  signal(SIGXCPU, SIG_IGN);
2905
 
#else
2906
 
  sigignore(SIGXCPU);
2907
 
#endif
2908
 
 
2909
 
#endif
2910
 
}
2911
 
 
2912
 
 
2913
 
/* 
2914
 
 * call setrlimit() on unix systems to cap cpu time.
2915
 
 * changed to ignore number of cpus, since this limit is per process,
2916
 
 * not per thread. also added a hard limit at 2x the soft limit.
2917
 
 * -coulouri 20020104
2918
 
 */
2919
 
 
2920
 
static Boolean
2921
 
BlastSetLimits(Int4 cpu_limit, Int2 num_cpu)
2922
 
{
2923
 
#ifdef OS_UNIX
2924
 
  struct rlimit   rl;
2925
 
 
2926
 
  if (cpu_limit <= 0)
2927
 
    return FALSE;
2928
 
 
2929
 
  if (num_cpu < 1)
2930
 
    return FALSE;
2931
 
 
2932
 
  rl.rlim_cur = cpu_limit;
2933
 
  rl.rlim_max = 2 * cpu_limit;
2934
 
 
2935
 
  if (setrlimit(RLIMIT_CPU, &rl) == -1) return FALSE;
2936
 
  
2937
 
  if (signal(SIGXCPU, timeout_shutdown) == SIG_ERR) return FALSE;
2938
 
 
2939
 
#endif
2940
 
 
2941
 
  time_out_boolean = FALSE;
2942
 
  return TRUE;
2943
 
}
2944
 
 
2945
3147
static void do_on_exit_func(VoidPtr ptr)
2946
3148
{
2947
3149
    BlastSearchBlkPtr search;
2958
3160
do_gapped_blast_search(VoidPtr ptr)
2959
3161
 
2960
3162
{
2961
 
        BlastSearchBlkPtr search;
2962
 
        Int2 status;
2963
 
        Int4 index, index1, start=0, stop=0, id_list_length;
2964
 
        Int4Ptr id_list=NULL;
2965
 
 
2966
 
        search = (BlastSearchBlkPtr) ptr;
2967
 
        if (search->thr_info->blast_gi_list || search->rdfp->oidlist) {
2968
 
            id_list = MemNew((search->thr_info->db_chunk_size+33)
2969
 
                             *sizeof(Int4));
 
3163
    BlastSearchBlkPtr search;
 
3164
    Int2 status=0;
 
3165
    Int4 index, index1, start=0, stop=0, id_list_length;
 
3166
    Int4Ptr id_list=NULL;
 
3167
    Uint4 i; /* AM: Support for query concatenation. */
 
3168
 
 
3169
    search = (BlastSearchBlkPtr) ptr;
 
3170
    if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
 
3171
    {
 
3172
        id_list = MemNew((search->thr_info->db_chunk_size+33)*sizeof(Int4));
 
3173
    }
 
3174
    
 
3175
    if (NlmThreadsAvailable() && search->pbp->process_num > 1)
 
3176
        NlmThreadAddOnExit(do_on_exit_func, ptr);
 
3177
 
 
3178
 
 
3179
    while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list, 
 
3180
                           &id_list_length, search->thr_info) != TRUE)
 
3181
    {
 
3182
        if (id_list && id_list_length)
 
3183
        {
 
3184
            for (index=0; index<id_list_length; index++)
 
3185
            {
 
3186
                index1 = id_list[index];
 
3187
                if ((status = BLASTPerformSearchWithReadDb(search, index1)) != 0)
 
3188
                    break;
 
3189
                               
 
3190
                if (search->prog_number == blast_type_blastx 
 
3191
                    || search->prog_number == blast_type_tblastn 
 
3192
                    || search->prog_number == blast_type_psitblastn)
 
3193
                   status = BlastLinkHsps(search);
 
3194
                            
 
3195
                status = BlastReapHitlistByEvalue(search);
 
3196
                if (search->handle_results)
 
3197
                    search->handle_results((VoidPtr) search);
 
3198
                else
 
3199
                    BlastSaveCurrentHitlist(search);
 
3200
                /* Emit a tick if needed and we're not MT. */
 
3201
                if (search->thr_info->db_mutex == NULL)
 
3202
                    BlastTickProc(index1, search->thr_info);
 
3203
                if (time_out_boolean == TRUE)
 
3204
                    break;      
 
3205
            }
 
3206
        } else if (!search->thr_info->realdb_done) {
 
3207
            for (index=start; index<stop; index++)
 
3208
            {
 
3209
                if ((status = BLASTPerformSearchWithReadDb(search, index)) != 0)
 
3210
                    break;
 
3211
 
 
3212
                /* AM: Support for query concatenation. */
 
3213
                if( !search->mult_queries )
 
3214
                {
 
3215
                  if (search->prog_number == blast_type_blastx 
 
3216
                      || search->prog_number == blast_type_tblastn 
 
3217
                      || search->prog_number == blast_type_psitblastn)
 
3218
                     status = BlastLinkHsps(search);
 
3219
 
 
3220
                  status = BlastReapHitlistByEvalue(search);
 
3221
                  if (search->handle_results)
 
3222
                      search->handle_results((VoidPtr) search);
 
3223
                  else
 
3224
                      BlastSaveCurrentHitlist(search);
 
3225
                }
 
3226
                else /* AM: Support for query concatenation. */
 
3227
                {
 
3228
                  InitHitLists( search );
 
3229
                  search->mult_queries->use_mq = TRUE;
 
3230
                  search->mult_queries->delete_current_hitlist = FALSE;
 
3231
 
 
3232
                  for( i = 0; i < search->mult_queries->NumQueries; ++i )
 
3233
                  {
 
3234
                    search->mult_queries->current_query = i;
 
3235
 
 
3236
                    if( search->prog_number == blast_type_blastx 
 
3237
                        || search->prog_number == blast_type_tblastn 
 
3238
                        || search->prog_number == blast_type_psitblastn )
 
3239
                      status = BlastLinkHsps( search );
 
3240
 
 
3241
                    status = BlastReapHitlistByEvalue( search );
 
3242
 
 
3243
                    if( search->handle_results )
 
3244
                      search->handle_results( (VoidPtr)search );
 
3245
                    else
 
3246
                      BlastSaveCurrentHitlist( search );
 
3247
                  }
 
3248
 
 
3249
                  if( search->mult_queries->delete_current_hitlist )
 
3250
                  {
 
3251
                    search->current_hitlist
 
3252
                      = BlastHitListDestruct( search->current_hitlist );
 
3253
                  }
 
3254
 
 
3255
                  search->mult_queries->use_mq = FALSE;
 
3256
                  BlastHitListPurge( search->current_hitlist );
 
3257
                }
 
3258
 
 
3259
                /* Emit a tick if needed and we're not MT. */
 
3260
                if (search->thr_info->db_mutex == NULL)
 
3261
                    BlastTickProc(index, search->thr_info);
 
3262
                if (time_out_boolean == TRUE)
 
3263
                    break;      
 
3264
            }
2970
3265
        }
2971
 
 
2972
 
        NlmThreadAddOnExit(do_on_exit_func, ptr);
2973
 
 
2974
 
 
2975
 
        while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list, &id_list_length, search->thr_info) != TRUE)
2976
 
        {
2977
 
            if (id_list)
2978
 
            {
2979
 
                for (index=0; index<id_list_length; index++)
2980
 
                {
2981
 
                        index1 = id_list[index];
2982
 
                        search = BLASTPerformSearchWithReadDb(search, index1);
2983
 
           if (search->prog_number == blast_type_blastx 
2984
 
               || search->prog_number == blast_type_tblastn 
2985
 
               || search->prog_number == blast_type_psitblastn)
2986
 
 
2987
 
                                status = BlastLinkHsps(search);
2988
 
 
2989
 
                        status = BlastReapHitlistByEvalue(search);
2990
 
                        if (search->handle_results)
2991
 
                                search->handle_results((VoidPtr) search);
2992
 
                        else
2993
 
                                BlastSaveCurrentHitlist(search);
2994
 
                        /* Emit a tick if needed and we're not MT. */
2995
 
                        if (search->thr_info->db_mutex == NULL)
2996
 
                            BlastTickProc(index1, search->thr_info);
2997
 
                        if (time_out_boolean == TRUE)
2998
 
                            break;      
2999
 
                }
3000
 
             } else {
3001
 
                for (index=start; index<stop; index++)
3002
 
                {
3003
 
                        search = BLASTPerformSearchWithReadDb(search, index);
3004
 
 
3005
 
                       if (search->prog_number == blast_type_blastx 
3006
 
                           || search->prog_number == blast_type_tblastn 
3007
 
                           ||  search->prog_number == blast_type_psitblastn)
3008
 
 
3009
 
                                status = BlastLinkHsps(search);
3010
 
 
3011
 
                        status = BlastReapHitlistByEvalue(search);
3012
 
                        if (search->handle_results)
3013
 
                                search->handle_results((VoidPtr) search);
3014
 
                        else
3015
 
                                BlastSaveCurrentHitlist(search);
3016
 
                        /* Emit a tick if needed and we're not MT. */
3017
 
                        if (search->thr_info->db_mutex == NULL)
3018
 
                                BlastTickProc(index, search->thr_info);
3019
 
                        if (time_out_boolean == TRUE)
3020
 
                                break;  
3021
 
                }
3022
 
             }
3023
 
                /* Get out if "stop" was the last seq. */
3024
 
                if (time_out_boolean)
3025
 
                        break;
3026
 
        }
3027
 
 
3028
 
        if (id_list)
3029
 
                id_list = MemFree(id_list);
3030
 
 
3031
 
        return (VoidPtr) search;
 
3266
        /* Get out if "stop" was the last seq. */
 
3267
        if (time_out_boolean || status)
 
3268
            break;
 
3269
    }
 
3270
 
 
3271
    if (id_list)
 
3272
        id_list = MemFree(id_list);
 
3273
 
 
3274
    if (NlmThreadsAvailable() && search->pbp->process_num > 1)
 
3275
        NlmThreadRemoveOnExit(do_on_exit_func, ptr);
 
3276
 
 
3277
    return (VoidPtr) search;
3032
3278
3033
3279
 
3034
3280
static VoidPtr
3036
3282
 
3037
3283
{
3038
3284
    BlastSearchBlkPtr search;
3039
 
    Int2 status;
 
3285
    Int2 status = 0;
3040
3286
    Int4 index, index1, start=0, stop=0, id_list_length;
3041
3287
    Int4Ptr id_list=NULL;
 
3288
    Uint4 i; /* AM: Query multiplexing. */
3042
3289
 
3043
3290
    search = (BlastSearchBlkPtr) ptr;
3044
 
    if (search->thr_info->blast_gi_list || search->rdfp->oidlist) {
 
3291
        if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
 
3292
    {
3045
3293
        id_list = MemNew((search->thr_info->db_chunk_size+33)
3046
3294
                         *sizeof(Int4));
3047
3295
    }
3048
3296
 
3049
 
    NlmThreadAddOnExit(do_on_exit_func, ptr);
 
3297
    if (NlmThreadsAvailable() && search->pbp->process_num > 1)
 
3298
        NlmThreadAddOnExit(do_on_exit_func, ptr);
3050
3299
    
3051
3300
    while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list,
3052
3301
                           &id_list_length, search->thr_info) != TRUE) {
3053
3302
        if (search->thr_info->realdb_done && id_list) {
3054
3303
            for (index=0; index<id_list_length; index++) {
3055
3304
                index1 = id_list[index];
3056
 
                search = BLASTPerformSearchWithReadDb(search, index1);
3057
 
                if (search->pbp->do_sum_stats == TRUE && 
3058
 
                    !search->pbp->mb_params)
3059
 
                    status = BlastLinkHsps(search);
3060
 
                else
3061
 
                    status = BlastGetNonSumStatsEvalue(search);
3062
 
                if (!search->pbp->mb_params || !search->handle_results)
 
3305
                if ((status = BLASTPerformSearchWithReadDb(search, index1))
 
3306
                    != 0)
 
3307
                   break;
 
3308
                if (!search->pbp->mb_params) {
 
3309
                   if (search->pbp->do_sum_stats == TRUE)
 
3310
                      status = BlastLinkHsps(search);
 
3311
                   else
 
3312
                      status = BlastGetNonSumStatsEvalue(search);
3063
3313
                   status = BlastReapHitlistByEvalue(search);
3064
 
                if (search->pbp->mb_params)
3065
 
                   MegaBlastReevaluateWithAmbiguities(search, index1);
3066
 
                else if (!search->handle_results)
3067
 
                   status = BlastReevaluateWithAmbiguities(search, index1);
 
3314
                   if (!search->handle_results)
 
3315
                      status = BlastReevaluateWithAmbiguities(search, index1);
 
3316
                } else {
 
3317
                   MegaBlastReevaluateWithAmbiguities(search);
 
3318
                }
3068
3319
                
3069
3320
                if (search->handle_results)
3070
3321
                    search->handle_results((VoidPtr) search);
3084
3335
            }
3085
3336
        } else if (!search->thr_info->realdb_done) {
3086
3337
            for (index=start; index<stop; index++) {
3087
 
                search = BLASTPerformSearchWithReadDb(search, index);
3088
 
                if (search->pbp->do_sum_stats == TRUE && 
3089
 
                    !search->pbp->mb_params)
3090
 
                    status = BlastLinkHsps(search);
3091
 
                else
3092
 
                    status = BlastGetNonSumStatsEvalue(search);
3093
 
                if (!search->pbp->mb_params ||
3094
 
                    !search->handle_results) 
 
3338
                if ((status = BLASTPerformSearchWithReadDb(search, index))
 
3339
                    != 0)
 
3340
                   break;
 
3341
                if (!search->pbp->mb_params) {
 
3342
                   if (search->pbp->do_sum_stats == TRUE)
 
3343
                      status = BlastLinkHsps(search);
 
3344
                   else
 
3345
                      status = BlastGetNonSumStatsEvalue(search);
3095
3346
                   status = BlastReapHitlistByEvalue(search);
3096
 
                if (search->pbp->mb_params)
3097
 
                   MegaBlastReevaluateWithAmbiguities(search, index);
3098
 
                else if (!search->handle_results)
3099
 
                   status = BlastReevaluateWithAmbiguities(search, index);
3100
 
                
 
3347
                   if (!search->handle_results)
 
3348
                      status = BlastReevaluateWithAmbiguities(search, index);
 
3349
                } else {
 
3350
                   MegaBlastReevaluateWithAmbiguities(search);
 
3351
                }
3101
3352
                if (search->handle_results)
3102
3353
                   search->handle_results((VoidPtr) search);
3103
3354
                else if (!search->pbp->mb_params)
3104
 
                   BlastSaveCurrentHitlist(search);
 
3355
                { /* AM: Query multiplexing. */
 
3356
                  if( !search->mult_queries )
 
3357
                    BlastSaveCurrentHitlist(search);
 
3358
                  else
 
3359
                  {
 
3360
                    InitHitLists( search );
 
3361
                    search->mult_queries->use_mq = TRUE;
 
3362
                    search->mult_queries->delete_current_hitlist = FALSE;
 
3363
 
 
3364
                    for( i = 0; i < search->mult_queries->NumQueries; ++i )
 
3365
                    {
 
3366
                      search->mult_queries->current_query = i;
 
3367
                      BlastSaveCurrentHitlist(search);
 
3368
                    }
 
3369
                    
 
3370
                    if( search->mult_queries->delete_current_hitlist )
 
3371
                    {
 
3372
                      search->current_hitlist
 
3373
                        = BlastHitListDestruct( search->current_hitlist );
 
3374
                    }
 
3375
 
 
3376
                    search->mult_queries->use_mq = FALSE;
 
3377
                    BlastHitListPurge( search->current_hitlist );
 
3378
                  }
 
3379
                }
3105
3380
                else
3106
3381
                   MegaBlastSaveCurrentHitlist(search);
3107
3382
 
3116
3391
                    break;      
3117
3392
            }
3118
3393
        }
 
3394
 
3119
3395
        /* Get out if "stop" was the last seq. */
3120
 
        if (time_out_boolean)
 
3396
        if (time_out_boolean || status)
3121
3397
            break;
3122
3398
    }
3123
3399
    
3124
3400
    if (id_list)
3125
3401
        id_list = MemFree(id_list);
 
3402
 
 
3403
    if (NlmThreadsAvailable() && search->pbp->process_num > 1)
 
3404
        NlmThreadRemoveOnExit(do_on_exit_func, ptr);
3126
3405
    
3127
3406
    return (VoidPtr) search;
3128
3407
3138
3417
    ReadDBFILEPtr rdfp;
3139
3418
    TNlmThread PNTR thread_array;
3140
3419
    VoidPtr status=NULL;
 
3420
    int num_entries_total;
 
3421
    int num_entries_total_real;
 
3422
    int start_seq;
 
3423
    int end_seq;
3141
3424
    
3142
3425
    if (search == NULL)
3143
3426
        return;
3144
3427
    
 
3428
    num_entries_total      = readdb_get_num_entries_total     (search->rdfp);
 
3429
    num_entries_total_real = readdb_get_num_entries_total_real(search->rdfp);
 
3430
 
 
3431
    /* Set 'done with read db' according to whether real databases are present */
 
3432
    
 
3433
    if (num_entries_total_real) {
 
3434
        search->thr_info->realdb_done = FALSE;
 
3435
    } else {
 
3436
        search->thr_info->realdb_done = TRUE;
 
3437
    }
 
3438
    
 
3439
    /* Make sure first, last sequence indices are in-range (0, NUM-1) */
 
3440
    
 
3441
    /* NOTE: search->pbp->final_seq is 1 beyond the last sequence ordinal id,
 
3442
       except when it's <=0, which means search to the last sequence in the 
 
3443
       database. */
 
3444
    /* search->thr_info versions are not. */
 
3445
    
 
3446
    if (search->pbp->final_db_seq > 0) {
 
3447
        end_seq = MIN(search->pbp->final_db_seq, num_entries_total);
 
3448
    } else {
 
3449
        end_seq = num_entries_total;
 
3450
    }
 
3451
    
 
3452
    start_seq = MAX(0, MIN(search->pbp->first_db_seq, end_seq));
 
3453
    
 
3454
    /* Set BlastGetDbChunk()'s pointers and counters */
 
3455
    
 
3456
    search->thr_info->last_db_seq       =
 
3457
        search->thr_info->gi_current    =
 
3458
        search->thr_info->db_chunk_last = start_seq;
 
3459
    
 
3460
    search->thr_info->final_db_seq = end_seq;
 
3461
    
 
3462
    /* Chunk size */
3145
3463
    search->thr_info->db_chunk_size = BLAST_DB_CHUNK_SIZE;
3146
3464
    
3147
 
#if 0    
3148
 
    readdb_get_totals(search->rdfp, &total_length, &num_seq);   
3149
 
#else
 
3465
    /* Tick control */
3150
3466
    num_seq = search->dbseq_num;
3151
 
#endif
3152
3467
    search->thr_info->db_incr = num_seq / BLAST_NTICKS;
3153
 
    search->thr_info->last_db_seq = search->pbp->first_db_seq;  /* The 1st sequence to compare against. */
3154
 
    if (search->pbp->final_db_seq == 0)
3155
 
        search->thr_info->final_db_seq = readdb_get_num_entries_total_real(search->rdfp);
3156
 
    else
3157
 
        search->thr_info->final_db_seq = search->pbp->final_db_seq;
3158
 
    
3159
 
    search->thr_info->db_chunk_last = search->pbp->first_db_seq;
3160
 
    
3161
 
    if (search->thr_info->blast_gi_list || search->rdfp->oidlist)
3162
 
        search->thr_info->gi_current = 0;
 
3468
    
3163
3469
 
3164
 
    /* guess if we need to search any real database */
3165
 
    if (search->thr_info->final_db_seq == 0) {
3166
 
        /* that means that we have only mask and no real database should be searched */
3167
 
        search->thr_info->realdb_done = TRUE;
3168
 
    } else {
3169
 
        /* otherwise, we need to start searching all real databases */
3170
 
        search->thr_info->realdb_done = FALSE;
3171
 
    }
3172
 
    
3173
 
    BlastSetLimits(search->pbp->cpu_limit, search->pbp->process_num);   
3174
3470
    if (NlmThreadsAvailable() && search->pbp->process_num > 1) {
3175
3471
        rdfp = search->rdfp;
3176
3472
        number_of_entries = INT4_MAX;
3194
3490
            array[index] = BlastSearchBlkDuplicate(search);
3195
3491
            if (array[index] == NULL) {
3196
3492
               search->pbp->process_num = index;
3197
 
               ErrPostEx(SEV_WARNING, 0, 0, "Number of threads reduced to %d", index);
 
3493
               ErrPostEx(SEV_WARNING, 0, 0, "Number of threads reduced to %d", index);
3198
3494
               break;
3199
3495
            }
3200
3496
        }
3201
3497
        
3202
3498
        thread_array = (TNlmThread PNTR) MemNew((search->pbp->process_num)*sizeof(TNlmThread));
3203
3499
        for (index=0; index<search->pbp->process_num; index++) {
3204
 
            if (search->pbp->gapped_calculation &&
3205
 
                StringCmp(search->prog_name, "blastn") != 0)
 
3500
            if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn") != 0)
3206
3501
                thread_array[index] = NlmThreadCreateEx(do_gapped_blast_search, (VoidPtr) array[index], THREAD_RUN|THREAD_BOUND, eTP_Default, NULL, NULL);
3207
3502
            else
3208
3503
                thread_array[index] = NlmThreadCreateEx(do_blast_search, (VoidPtr) array[index], THREAD_RUN|THREAD_BOUND, eTP_Default, NULL, NULL);
3251
3546
            search->real_gap_number_of_hsps += array[index]->real_gap_number_of_hsps;
3252
3547
#endif
3253
3548
            /* Not copied at thread start. */
3254
 
            /* search->blast_seqid_list = NULL; */
3255
3549
            array[index] = BlastSearchBlkDestruct(array[index]);        
3256
3550
        }
3257
3551
        array = MemFree(array);
3265
3559
        NlmMutexDestroy(search->thr_info->ambiguities_mutex);
3266
3560
        search->thr_info->ambiguities_mutex = NULL;
3267
3561
    } else {
3268
 
        if (search->pbp->gapped_calculation &&
3269
 
            StringCmp(search->prog_name, "blastn") != 0)
 
3562
        if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn") != 0)
3270
3563
            do_gapped_blast_search((VoidPtr) search);
3271
3564
        else
3272
 
           do_blast_search((VoidPtr) search);
 
3565
            do_blast_search((VoidPtr) search);
3273
3566
    }
3274
 
    search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp); 
 
3567
    if (search->rdfp->parameters & READDB_CONTENTS_ALLOCATED)
 
3568
        search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp); 
3275
3569
    if (time_out_boolean) {
3276
3570
        sprintf(buffer, "CPU limit exceeded");
3277
3571
        BlastConstructErrorMessage("Blast", buffer, 2, &(search->error_return));
3278
 
        search->timed_out = TRUE;
 
3572
        search->timed_out = TRUE;
3279
3573
    } 
3280
 
    else
3281
 
      /*
3282
 
       * We've finished the search without timing out,
3283
 
       * so we can ignore the soft limit now. -coulouri 20020104
3284
 
       */
3285
 
      {
3286
 
        #ifdef OS_UNIX
3287
 
 
3288
 
        #ifdef SIG_IGN
3289
 
        signal(SIGXCPU, SIG_IGN);
3290
 
        #else
3291
 
        sigignore(SIGXCPU);
3292
 
        #endif
3293
 
 
3294
 
        #endif
3295
 
      }
3296
 
    
 
3574
 
3297
3575
    return;
3298
3576
}
3299
3577
 
3435
3713
                assert(0); */
3436
3714
        }
3437
3715
}
3438
 
/* This function assume specific structure of SeqLoc !! */
3439
 
SeqLocPtr  blastDuplicateSeqLocInt(SeqLocPtr lcmask)
 
3716
/* This function duplicates a SEQLOC_PACKED_INT or a SEQLOC_INT type of SeqLoc */
 
3717
SeqLocPtr  blastDuplicateSeqLocInt(SeqLocPtr slp_head)
3440
3718
{
3441
 
    SeqLocPtr dup_slp, slp, dup_head;
 
3719
    SeqLocPtr dup_slp, slp, dup_head = NULL;
3442
3720
    SeqIntPtr sqip;
3443
3721
 
3444
 
    if(lcmask == NULL)
 
3722
    if(slp_head == NULL)
3445
3723
        return NULL;
3446
3724
 
3447
 
    /* Top level SeqLoc */    
3448
 
    dup_head = dup_slp = ValNodeNew(NULL);
3449
 
    dup_slp->choice = lcmask->choice;
3450
 
 
3451
3725
    /* First seqLoc in lower level */
3452
3726
 
3453
 
    slp = lcmask->data.ptrvalue;
 
3727
    if (slp_head->choice == SEQLOC_PACKED_INT) {
 
3728
       slp = slp_head->data.ptrvalue;
 
3729
       dup_head = ValNodeNew(NULL);
 
3730
       dup_head->choice = slp_head->choice;
 
3731
    } else if (slp_head->choice == SEQLOC_INT) {
 
3732
       slp = slp_head;
 
3733
    } else { 
 
3734
       return NULL;
 
3735
    }
3454
3736
    sqip = slp->data.ptrvalue;
3455
3737
    
3456
 
    dup_slp->data.ptrvalue = (VoidPtr) SeqLocIntNew(sqip->from, sqip->to, sqip->strand, sqip->id);
3457
 
    
3458
 
    dup_slp = dup_slp->data.ptrvalue;
3459
 
    
 
3738
    /* Top level SeqLoc */    
 
3739
 
 
3740
    dup_slp = (VoidPtr) SeqLocIntNew(sqip->from, sqip->to, sqip->strand, sqip->id);
 
3741
    if (dup_head)
 
3742
       dup_head->data.ptrvalue = dup_slp;
 
3743
    else
 
3744
       dup_head = dup_slp;
 
3745
 
3460
3746
    /* Loop over all SeqIntPtr s in this SeqLoc */
3461
 
    
3462
3747
    for(slp = slp->next; slp != NULL; slp = slp->next) {
3463
3748
        sqip = slp->data.ptrvalue;
3464
3749
        dup_slp->next = (VoidPtr) SeqLocIntNew(sqip->from, sqip->to, sqip->strand, sqip->id);
3485
3770
    }
3486
3771
    return;
3487
3772
}
3488
 
/* This function use PACKED INT as lcmask */
3489
 
static SeqLocPtr blastMergeFilterLocs(SeqLocPtr filter_slp, 
3490
 
                                      SeqLocPtr lcmask, Boolean translate,
3491
 
                                      Int2 frame, Int4 length)
3492
 
{
3493
 
 
3494
 
    SeqLocPtr slp, dup_slp;
3495
 
 
3496
 
    if(filter_slp == NULL && lcmask == NULL)
 
3773
 
 
3774
/* Adjust offsets in the mask locations list; discard locations outside of
 
3775
   the range */
 
3776
static SeqLocPtr 
 
3777
AdjustOffsetsInMaskLoc(SeqLocPtr mask_loc, Int4 start, Int4 length)
 
3778
{
 
3779
   SeqLocPtr slp, last_slp = NULL, next_slp, head = NULL;
 
3780
   SeqIntPtr loc;
 
3781
 
 
3782
   if (!mask_loc)
 
3783
      return NULL;
 
3784
 
 
3785
   if (mask_loc->choice == SEQLOC_PACKED_INT)
 
3786
      slp = (SeqLocPtr) mask_loc->data.ptrvalue;
 
3787
   else if (mask_loc->choice == SEQLOC_INT)
 
3788
      slp = mask_loc;
 
3789
   else /* Should be impossible */ 
 
3790
      return NULL;
 
3791
 
 
3792
   while (slp) {
 
3793
      if (slp->choice == SEQLOC_INT) {
 
3794
         loc = (SeqIntPtr) slp->data.ptrvalue;
 
3795
         loc->from -= start;
 
3796
         loc->to -= start;
 
3797
         if (loc->from >= length || loc->to < 0) {
 
3798
            next_slp = slp->next;
 
3799
            SeqLocFree(slp);
 
3800
            slp = next_slp;
 
3801
         } else {
 
3802
            if (loc->to > length)
 
3803
               loc->to = length;
 
3804
            if (loc->from < 0)
 
3805
               loc->from = 0;
 
3806
            if (last_slp) {
 
3807
               last_slp->next = slp;
 
3808
            } else {
 
3809
               head = slp;
 
3810
            }
 
3811
            last_slp = slp;
 
3812
            slp = slp->next;
 
3813
         }
 
3814
      } else {
 
3815
         next_slp = slp->next;
 
3816
         SeqLocFree(slp);
 
3817
         slp = next_slp;
 
3818
      }
 
3819
   }
 
3820
   if (last_slp)
 
3821
      last_slp->next = NULL;
 
3822
   
 
3823
   if (mask_loc->choice == SEQLOC_PACKED_INT) {
 
3824
      mask_loc->data.ptrvalue = head;
 
3825
      return mask_loc;
 
3826
   } else {
 
3827
      return head;
 
3828
   }
 
3829
 
3830
 
 
3831
/* This function use PACKED INT as slp2 */
 
3832
SeqLocPtr blastMergeFilterLocs(SeqLocPtr slp1, SeqLocPtr slp2, Boolean translate,
 
3833
                               Int2 frame, Int4 length)
 
3834
{
 
3835
 
 
3836
    SeqLocPtr slp, dup_slp, dup_head;
 
3837
 
 
3838
    if(slp1 == NULL && slp2 == NULL)
3497
3839
        return NULL;
3498
3840
 
3499
 
    if(lcmask == NULL)
3500
 
        return filter_slp;
3501
 
    
3502
 
    dup_slp = blastDuplicateSeqLocInt(lcmask);
3503
 
 
3504
 
    /* Request to translate means, that lcmask is DNA SeqLoc, that should be
 
3841
    if(slp2 == NULL)
 
3842
        return slp1;
 
3843
 
 
3844
    dup_slp = blastDuplicateSeqLocInt(slp2);
 
3845
 
 
3846
    /* Request to translate means, that slp2 is DNA SeqLoc, that should be
3505
3847
       translated into protein SeqLoc corresponding to the specific frame */
3506
3848
 
3507
3849
    if(translate) {
3508
3850
        BlastConvertDNASeqLoc(dup_slp, frame, length);
3509
3851
    }
3510
3852
    
3511
 
    if(filter_slp == NULL) {
 
3853
    if(slp1 == NULL) {
3512
3854
        return dup_slp;
3513
3855
    }
3514
3856
 
3515
3857
    /* OK We have 2 not NULL filters - merging... */
3516
3858
    
3517
 
    if(filter_slp->choice == SEQLOC_PACKED_INT)
3518
 
        slp = (SeqLocPtr) filter_slp->data.ptrvalue;
 
3859
    if(slp1->choice == SEQLOC_PACKED_INT)
 
3860
        slp = (SeqLocPtr) slp1->data.ptrvalue;
3519
3861
    else
3520
 
        slp = filter_slp;
 
3862
        slp = slp1;
 
3863
 
 
3864
    if (dup_slp->choice == SEQLOC_PACKED_INT) {
 
3865
       dup_head = dup_slp;
 
3866
       dup_slp = (SeqLocPtr) dup_slp->data.ptrvalue;
 
3867
       MemFree(dup_head);
 
3868
    }
3521
3869
    
3522
3870
    if(slp == NULL) {
3523
3871
        ErrPostEx(SEV_WARNING, 0, 0, "Invalid filter detected");
3524
 
        filter_slp->data.ptrvalue = dup_slp->data.ptrvalue;
3525
 
        dup_slp->data.ptrvalue = NULL;
3526
 
        SeqLocFree(dup_slp);
 
3872
        slp1->data.ptrvalue = dup_slp;
3527
3873
    } 
3528
3874
    else
3529
3875
    {
3530
3876
        while(slp->next != NULL)
3531
 
                 slp = slp->next;
 
3877
           slp = slp->next;
3532
3878
    
3533
 
        slp->next = dup_slp->data.ptrvalue;
3534
 
        dup_slp->data.ptrvalue = NULL;
3535
 
        SeqLocFree(dup_slp);
 
3879
        slp->next = dup_slp;
3536
3880
     }
3537
3881
    
3538
 
    return filter_slp;
 
3882
    return slp1;
3539
3883
}
3540
3884
 
3541
3885
/* This function is used to filter one frame of the translated DNA
3592
3936
 
3593
3937
    bsbpp = MemNew(sizeof(BlastSequenceBlkPtr)*2);
3594
3938
    for(m = 0; m < 2; m++) {
3595
 
        
3596
 
        bsbpp[m] = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
3597
 
        
3598
 
        buff_size = bsbpp_in[m]->length+3*CODON_LENGTH;
3599
 
        bsbpp[m]->sequence_start = MemNew(buff_size);
3600
 
 
3601
 
        MemCpy(bsbpp[m]->sequence_start, 
3602
 
               bsbpp_in[m]->sequence_start, buff_size);
3603
 
 
3604
 
        bsbpp[m]->sequence = bsbpp_in[m]->sequence;
3605
 
        
3606
 
        bsbpp[m]->length = bsbpp_in[m]->length;
3607
 
        bsbpp[m]->original_length = bsbpp_in[m]->original_length;
3608
 
        bsbpp[m]->effective_length = bsbpp_in[m]->effective_length;
 
3939
       if (bsbpp_in[m]) {
 
3940
          bsbpp[m] = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
 
3941
        
 
3942
          buff_size = bsbpp_in[m]->length+3*CODON_LENGTH;
 
3943
          bsbpp[m]->sequence_start = MemNew(buff_size);
 
3944
 
 
3945
          MemCpy(bsbpp[m]->sequence_start, 
 
3946
                 bsbpp_in[m]->sequence_start, buff_size);
 
3947
 
 
3948
          bsbpp[m]->sequence = bsbpp_in[m]->sequence;
 
3949
          
 
3950
          bsbpp[m]->length = bsbpp_in[m]->length;
 
3951
          bsbpp[m]->original_length = bsbpp_in[m]->original_length;
 
3952
          bsbpp[m]->effective_length = bsbpp_in[m]->effective_length;
 
3953
       }
3609
3954
    }
3610
3955
 
3611
3956
    return bsbpp;
3666
4011
    return bsbpp;
3667
4012
}
3668
4013
 
 
4014
 
 
4015
FloatHi LIBCALL BLASTCalculateSearchSpace(BLAST_OptionsBlkPtr options, 
 
4016
        Int4 nseq, Int8 dblen, Int4 qlen)
 
4017
{
 
4018
    Int4 length_adjustment, qlen_eff;
 
4019
    Int8 dblen_eff;
 
4020
    BLAST_KarlinBlkPtr kbp;
 
4021
    FloatHi searchsp;
 
4022
 
 
4023
    if (options == NULL)
 
4024
        return 0;
 
4025
 
 
4026
    kbp = BlastKarlinBlkCreate();
 
4027
    BlastKarlinBlkGappedCalcEx(kbp, options->gap_open, options->gap_extend,
 
4028
            options->decline_align, options->matrix, NULL);
 
4029
 
 
4030
    if (StringCmp(options->program_name, "blastn") != 0 && 
 
4031
        options->gapped_calculation ) {
 
4032
        Nlm_FloatHi alpha, beta; /*alpha and beta for new scoring system */
 
4033
        getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
 
4034
                     options->gap_open, options->gap_extend);
 
4035
        BlastComputeLengthAdjustment(kbp->K,
 
4036
                                     kbp->logK, alpha/kbp->Lambda, beta, 
 
4037
                                     qlen,
 
4038
                                     dblen, nseq,
 
4039
                                     &length_adjustment );
 
4040
    } else {
 
4041
        BlastComputeLengthAdjustment(kbp->K, kbp->logK, 1/kbp->H, 0.0, 
 
4042
                                     qlen, 
 
4043
                                     dblen, nseq,
 
4044
                                     &length_adjustment );
 
4045
    }
 
4046
 
 
4047
    kbp = BlastKarlinBlkDestruct(kbp);
 
4048
    
 
4049
    qlen_eff   = qlen - length_adjustment;
 
4050
    dblen_eff  = dblen - nseq*length_adjustment;
 
4051
    searchsp   = qlen_eff * dblen_eff;
 
4052
    
 
4053
    return searchsp;
 
4054
}
 
4055
 
3669
4056
#define DROPOFF_NUMBER_OF_BITS 10.0
3670
4057
#define INDEX_THR_MIN_SIZE 20000
3671
4058
Int2 LIBCALL BLASTSetUpSearchInternalByLoc (BlastSearchBlkPtr search, SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)(Int4 done, Int4 positives))
3677
4064
        Char buffer[128];
3678
4065
        Int2 retval = 0, status, last_index;
3679
4066
        Int4 effective_query_length, query_length, full_query_length,
3680
 
                index, length, length_adjustment=0, last_length_adjustment, min_query_length;
 
4067
                index, length, length_adjustment=0;
3681
4068
        Int4 array_size, max_length, block_width;
3682
4069
        Int4Ptr open, extend;
3683
4070
        Nlm_FloatHiPtr lambda, K, H;
3691
4078
        Uint1Ptr sequence;
3692
4079
        Uint1Ptr query_seq, query_seq_start, query_seq_rev, query_seq_start_rev;
3693
4080
        ValNodePtr vnp;
3694
 
        Nlm_FloatHi alpha, beta; /*alpha and beta for new scoring system*/
3695
 
        Nlm_FloatHi KtoUse, logKtoUse; /*K to use in length adjustment calculation*/
3696
 
        Nlm_FloatHi LambdaToUse; /*Lambda to use in length adjuatment*/
 
4081
        Int4 query_loc_start;
 
4082
 
 
4083
        /* AM: Temporaries to compute effective lengths of individual queries. */
 
4084
        IntArray lengths_eff; 
 
4085
        IntArray length_adj_tmp;
 
4086
        Int4 le_iter, length_tmp;
 
4087
        Int4 i, j;
 
4088
 
 
4089
        /* AM: To support individual masking in the case of query multiplexing. */
 
4090
        SeqLocPtr *concat_filter_slp, *concat_private_slp, *concat_private_slp_rev,
 
4091
                  * indiv_filter_slp, *indiv_private_slp, *indiv_private_slp_rev;
 
4092
        SeqLocPtr ConcatLCaseMask;
 
4093
        Boolean * indiv_mask_at_hash;
 
4094
        QueriesPtr mult_queries = NULL;
3697
4095
 
3698
4096
        if (options == NULL)
3699
4097
        {
3700
 
                ErrPostEx(SEV_FATAL, 0, 0, "BLAST_OptionsBlkPtr is NULL\n");
 
4098
                ErrPostEx(SEV_FATAL, 1, 0, "BLAST_OptionsBlkPtr is NULL\n");
3701
4099
                return 1;
3702
4100
        }
3703
4101
 
3704
4102
        if (query_slp == NULL && query_bsp == NULL)
3705
4103
        {
3706
 
                ErrPostEx(SEV_FATAL, 0, 0, "Query is NULL\n");
 
4104
                ErrPostEx(SEV_FATAL, 1, 0, "Query is NULL\n");
3707
4105
                return 1;
3708
4106
        }
3709
4107
 
 
4108
        /* AM: Support for query multiplexing. */
 
4109
        mult_queries = search->mult_queries;
 
4110
 
 
4111
        if( mult_queries )
 
4112
        {
 
4113
          concat_filter_slp 
 
4114
            = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
 
4115
          indiv_filter_slp 
 
4116
            = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
 
4117
          concat_private_slp 
 
4118
            = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
 
4119
          concat_private_slp_rev 
 
4120
            = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
 
4121
          indiv_private_slp 
 
4122
            = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
 
4123
          indiv_private_slp_rev 
 
4124
            = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
 
4125
          indiv_mask_at_hash
 
4126
            = (Boolean *)MemNew( mult_queries->NumQueries*sizeof( Boolean ) );
 
4127
        }
 
4128
 
3710
4129
        query_seq = NULL;       /* Gets rid of warning. */
3711
4130
        query_seq_rev = NULL;   /* Gets rid of warning. */
3712
4131
        query_seq_start = NULL; /* Gets rid of warning. */
3721
4140
 
3722
4141
        if (query_slp)
3723
4142
        {
 
4143
           query_loc_start = SeqLocStart(query_slp);
 
4144
           if (query_slp->choice != SEQLOC_WHOLE && 
 
4145
               search->pbp->query_lcase_mask) {
 
4146
              search->pbp->query_lcase_mask = 
 
4147
                 AdjustOffsetsInMaskLoc(search->pbp->query_lcase_mask, 
 
4148
                                        query_loc_start, SeqLocLen(query_slp));
 
4149
           }
 
4150
 
3724
4151
                strand = SeqLocStrand(query_slp);
3725
4152
                if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
3726
4153
                {
3727
 
                        private_slp = SeqLocIntNew(SeqLocStart(query_slp), SeqLocStop(query_slp), Seq_strand_plus, SeqLocId(query_slp));
 
4154
                        private_slp = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_plus, SeqLocId(query_slp));
 
4155
 
 
4156
                  /* AM: Support for query multiplexing. */
 
4157
                  if( mult_queries )
 
4158
                    for( i = 0; i < mult_queries->NumQueries; ++i )
 
4159
                    {
 
4160
                      indiv_private_slp[i] 
 
4161
                        = SeqLocIntNew( 0,
 
4162
                                        mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
 
4163
                                        Seq_strand_plus,
 
4164
                                        mult_queries->FakeBsps[i]->id );
 
4165
                      concat_private_slp[i]
 
4166
                        = SeqLocIntNew( mult_queries->QueryStarts[i],
 
4167
                                        mult_queries->QueryEnds[i],
 
4168
                                        Seq_strand_plus,
 
4169
                                        SeqLocId( query_slp ) );
 
4170
                    }
3728
4171
                }
3729
4172
                if (strand == Seq_strand_minus || strand == Seq_strand_both)
3730
4173
                {
3731
 
                        private_slp_rev = SeqLocIntNew(SeqLocStart(query_slp), SeqLocStop(query_slp), Seq_strand_minus, SeqLocId(query_slp));
 
4174
                        private_slp_rev = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_minus, SeqLocId(query_slp));
 
4175
 
 
4176
                  /* AM: Support for query multiplexing. */
 
4177
                  if( mult_queries )
 
4178
                    for( i = 0; i < mult_queries->NumQueries; ++i )
 
4179
                    {
 
4180
                      indiv_private_slp_rev[i] 
 
4181
                        = SeqLocIntNew( 0,
 
4182
                                        mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
 
4183
                                        Seq_strand_minus,
 
4184
                                        mult_queries->FakeBsps[i]->id );
 
4185
                      concat_private_slp_rev[i] 
 
4186
                        = SeqLocIntNew( mult_queries->QueryStarts[i],
 
4187
                                        mult_queries->QueryEnds[i],
 
4188
                                        Seq_strand_minus,
 
4189
                                        SeqLocId( query_slp ) );
 
4190
                    }
3732
4191
                }
3733
4192
                private_slp_delete = TRUE;
3734
4193
                if (search->prog_number==blast_type_blastn)
3863
4322
            if(!(search->pbp->is_rps_blast &&
3864
4323
                                  (!StringCmp(prog_name, "tblastn")||
3865
4324
                                   !StringCmp(prog_name, "psitblastn")))) {
3866
 
                if (private_slp)
3867
 
                    filter_slp = BlastSeqLocFilterEx(private_slp, options->filter_string, &mask_at_hash);
3868
 
                else if (private_slp_rev)
3869
 
                    filter_slp = BlastSeqLocFilterEx(private_slp_rev, options->filter_string, &mask_at_hash);
3870
 
                
3871
 
                /* If lower case characters were detected in the input
3872
 
                   their locations will be masked out */
3873
 
                
3874
 
                if(search->pbp->query_lcase_mask != NULL) {
3875
 
                    filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, FALSE, 0, 0);
 
4325
                /* AM: Query multiplexing. */
 
4326
                if( !mult_queries )
 
4327
                {
 
4328
                  if (private_slp)
 
4329
                      filter_slp = BlastSeqLocFilterEx(private_slp, options->filter_string, &mask_at_hash);
 
4330
                  else if (private_slp_rev)
 
4331
                      filter_slp = BlastSeqLocFilterEx(private_slp_rev, options->filter_string, &mask_at_hash);
 
4332
 
 
4333
                  /* If lower case characters were detected in the input
 
4334
                     their locations will be masked out */
 
4335
                
 
4336
                  if(search->pbp->query_lcase_mask != NULL) {
 
4337
                      filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, FALSE, 0, 0);
 
4338
                  }
3876
4339
                }
 
4340
                else
 
4341
                  for( i = 0; i < mult_queries->NumQueries; ++i )
 
4342
                  {
 
4343
                    if( indiv_private_slp[i] )
 
4344
                    {
 
4345
                      indiv_filter_slp[i]
 
4346
                        = BlastSeqLocFilterEx( indiv_private_slp[i], 
 
4347
                                               options->filter_string, 
 
4348
                                               indiv_mask_at_hash + i );
 
4349
                      concat_filter_slp[i]
 
4350
                        = BlastSeqLocFilterEx( concat_private_slp[i],
 
4351
                                               options->filter_string,
 
4352
                                               indiv_mask_at_hash + i );
 
4353
                    }
 
4354
                    else if( indiv_private_slp_rev[i] )
 
4355
                    {
 
4356
                      indiv_filter_slp[i] 
 
4357
                        = BlastSeqLocFilterEx( indiv_private_slp_rev[i],
 
4358
                                               options->filter_string,
 
4359
                                               indiv_mask_at_hash + i );
 
4360
                      concat_filter_slp[i]
 
4361
                        = BlastSeqLocFilterEx( concat_private_slp_rev[i],
 
4362
                                               options->filter_string,
 
4363
                                               indiv_mask_at_hash + i );
 
4364
                    }
 
4365
 
 
4366
                    if( mult_queries->LCaseMasks && mult_queries->LCaseMasks[i] )
 
4367
                    {
 
4368
                      indiv_filter_slp[i] = blastMergeFilterLocs( indiv_filter_slp[i],
 
4369
                                                                  (SeqLocPtr)mult_queries->LCaseMasks[i]->data.ptrvalue, 
 
4370
                                                                  FALSE, 0, 0 );
 
4371
                      ConcatLCaseMask = ConcatSeqLoc( mult_queries, mult_queries->LCaseMasks[i], 
 
4372
                                                      SeqLocId( query_slp ), i );
 
4373
                      concat_filter_slp[i] = blastMergeFilterLocs( concat_filter_slp[i],
 
4374
                                                                   (SeqLocPtr)ConcatLCaseMask->data.ptrvalue,
 
4375
                                                                   FALSE, 0, 0 );
 
4376
                    }
 
4377
                  }
3877
4378
            }
3878
4379
        }
3879
4380
 
3882
4383
        */
3883
4384
 
3884
4385
        if(StringCmp(prog_name, "blastn") == 0) {
 
4386
          /* AM: Changed to support query multiplexing. */
 
4387
          if( !mult_queries )
3885
4388
                if (filter_slp && !mask_at_hash)
3886
4389
                        ValNodeAddPointer(&(search->mask), SEQLOC_MASKING_NOTSET, filter_slp);
 
4390
                else
 
4391
                        ValNodeAddPointer(&(search->mask1), SEQLOC_MASKING_NOTSET, filter_slp);
 
4392
          else
 
4393
            for( i = 0; i < mult_queries->NumQueries; ++i )
 
4394
              if( !indiv_mask_at_hash[i] )
 
4395
                ValNodeAddPointer( &(search->mask), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
 
4396
              else
 
4397
                ValNodeAddPointer( &(search->mask1), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
3887
4398
        }
3888
4399
 
3889
4400
 
3893
4404
        {
3894
4405
                spp = SeqPortNewByLoc(private_slp, Seq_code_ncbistdaa);
3895
4406
                SeqPortSet_do_virtual(spp, TRUE);
3896
 
                if (filter_slp && !mask_at_hash)
 
4407
 
 
4408
                /* AM: Changed to support query multiplexing. */
 
4409
                if( !mult_queries )
 
4410
                  if (filter_slp && !mask_at_hash)
3897
4411
                        ValNodeAddPointer(&(search->mask), SEQLOC_MASKING_NOTSET, filter_slp);
 
4412
                  else
 
4413
                        ValNodeAddPointer(&(search->mask1), SEQLOC_MASKING_NOTSET, filter_slp);
 
4414
                else
 
4415
                  for( i = 0; i < mult_queries->NumQueries; ++i )
 
4416
                    if( !indiv_mask_at_hash[i] )
 
4417
                      ValNodeAddPointer( &(search->mask), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
 
4418
                    else
 
4419
                      ValNodeAddPointer( &(search->mask1), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
3898
4420
        }
3899
4421
        else if (StringCmp(prog_name, "blastx") == 0 || StringCmp(prog_name, "tblastx") == 0 || StringCmp(prog_name, "blastn") == 0)
3900
4422
        {
3911
4433
        }
3912
4434
        else
3913
4435
        {
3914
 
                ErrPostEx(SEV_FATAL, 0, 0, "Only blastn, blastp, blastx, tblastn tblastx is allowed\n");
 
4436
                ErrPostEx(SEV_FATAL, 1, 0, "Only blastn, blastp, blastx, tblastn tblastx is allowed\n");
3915
4437
                retval = 1;
3916
4438
                goto BlastSetUpReturn;
3917
4439
        }
3927
4449
 
3928
4450
                        if (IS_residue(residue))
3929
4451
                        {
 
4452
                                if (residue == 24) /* 24 is Selenocysteine. */
 
4453
                                {
 
4454
                                        residue = 21; /* change Selenocysteine to X. */
 
4455
                                        sprintf(buffer, "Selenocysteine (U) at position %ld replaced by X", 
 
4456
                                                (long) index+1);
 
4457
                                        BlastConstructErrorMessage("Blast", buffer, 1, &(search->error_return));
 
4458
                                }
3930
4459
                                query_seq[index] = residue;
3931
4460
                                index++;
3932
4461
                        }
3943
4472
                                else
3944
4473
                                        BlastMaskTheResidues(query_seq, full_query_length, 15, filter_slp, FALSE, SeqLocStart(private_slp));
3945
4474
                        }
 
4475
 
 
4476
                        /* AM: query multiplexing. */
 
4477
                        if( mult_queries )
 
4478
                          for( i = 0; i < mult_queries->NumQueries; ++i )
 
4479
                            if( concat_filter_slp[i] )
 
4480
                              BlastMaskTheResidues( query_seq, 
 
4481
                                                    full_query_length,
 
4482
                                                    15, concat_filter_slp[i], FALSE, 
 
4483
                                                    SeqLocStart( private_slp ) );
 
4484
 
3946
4485
                        for (index=0; index<=query_length+1; index++)
3947
4486
                                query_seq_start[index] = ncbi4na_to_blastna[query_seq_start[index]];
3948
4487
                }
3958
4497
                {
3959
4498
                        if (IS_residue(residue))
3960
4499
                        {
 
4500
                                if (residue == 24) /* 24 is Selenocysteine. */
 
4501
                                {
 
4502
                                        residue = 21; /* change Selenocysteine to X. */
 
4503
                                        sprintf(buffer, "Selenocysteine (U) at position %ld replaced by X", 
 
4504
                                                (long) index+1);
 
4505
                                        BlastConstructErrorMessage("Blast", buffer, 1, &(search->error_return));
 
4506
                                }
3961
4507
                                query_seq_rev[index] = residue;
3962
4508
                                index++;
3963
4509
                        }
3974
4520
                           else
3975
4521
                              BlastMaskTheResidues(query_seq_rev, full_query_length, 15, filter_slp, TRUE, full_query_length - SeqLocStop(private_slp_rev) - 1);
3976
4522
                        }
 
4523
 
 
4524
                        /* AM: query multiplexing. */
 
4525
                        if( mult_queries )
 
4526
                          for( i = 0; i < mult_queries->NumQueries; ++i )
 
4527
                            if( concat_filter_slp[i] )
 
4528
                              BlastMaskTheResidues( query_seq_rev, 
 
4529
                                                    full_query_length,
 
4530
                                                    15, concat_filter_slp[i], TRUE, 
 
4531
                                                    full_query_length 
 
4532
                                                      - SeqLocStop( private_slp_rev ) );
 
4533
 
3977
4534
                        for (index=0; index<=query_length+1; index++)
3978
4535
                                query_seq_start_rev[index] =
3979
4536
                                   ncbi4na_to_blastna[query_seq_start_rev[index]];
4042
4599
                        else
4043
4600
                                BlastMaskTheResidues(query_seq, full_query_length, 21, filter_slp, FALSE, SeqLocStart(private_slp));
4044
4601
                }
 
4602
 
 
4603
                /* AM: query multiplexing. */
 
4604
                if( mult_queries )
 
4605
                  for( i = 0; i < mult_queries->NumQueries; ++i )
 
4606
                    if( concat_filter_slp[i] )
 
4607
                      BlastMaskTheResidues( query_seq, full_query_length,
 
4608
                                            21, concat_filter_slp[i], FALSE,
 
4609
                                            SeqLocStart( private_slp ) );
 
4610
 
4045
4611
                BlastSequenceAddSequence(search->context[0].query, NULL, query_seq_start, query_length, query_length, 0);
4046
4612
        }
4047
4613
        else if (StringCmp(prog_name, "blastx") == 0  || StringCmp(prog_name, "tblastx") == 0)
4086
4652
                        }
4087
4653
                        if (filter_slp && !mask_at_hash)
4088
4654
                                ValNodeAddPointer(&(search->mask), FrameToDefine(search->context[index].query->frame), filter_slp);
 
4655
                        else
 
4656
                                ValNodeAddPointer(&(search->mask1), FrameToDefine(search->context[index].query->frame), filter_slp);
4089
4657
                   }
4090
4658
                   BlastSequenceAddSequence(search->context[index].query, NULL, sequence, length, query_length, 0);
4091
4659
                }
4114
4682
 
4115
4683
        if (mask_at_hash)
4116
4684
        { /* No longer needed. */
 
4685
/*
4117
4686
                filter_slp = SeqLocSetFree(filter_slp);
 
4687
*/
4118
4688
        }
4119
4689
        
4120
4690
/* Set the ambiguous residue before the ScoreBlk is filled. */
4125
4695
        }
4126
4696
        else
4127
4697
        {
4128
 
                if(options->matrix!=NULL) {
 
4698
                if(options->matrix!=NULL && *(options->matrix) != NULLB) {
4129
4699
                     search->sbp->read_in_matrix = TRUE;
4130
4700
                } else {
4131
4701
                     search->sbp->read_in_matrix = FALSE;
4177
4747
        retval = 0;
4178
4748
        for (index=search->first_context; index<=search->last_context; index++)
4179
4749
        {
 
4750
           /* AM: Changed to support query multiplexing. */
4180
4751
           if (search->prog_number != blast_type_blastn || 
4181
4752
               index>search->first_context || 
4182
4753
               search->last_context==search->first_context)
4183
 
              status = BlastScoreBlkFill(search->sbp, (CharPtr)
4184
 
                                           search->context[index].query->sequence,search->context[index].query->length, index);
 
4754
           {
 
4755
             if( search->prog_number == blast_type_tblastn
 
4756
                 && search->mult_queries )
 
4757
             {
 
4758
               for( i = 0; i < search->mult_queries->NumQueries; ++i )
 
4759
               {
 
4760
                 status = BlastScoreBlkFill( 
 
4761
                   search->sbp, 
 
4762
                   ((CharPtr)search->context[index].query->sequence)
 
4763
                     + search->mult_queries->QueryStarts[i],
 
4764
                   search->mult_queries->QueryEnds[i]
 
4765
                     - search->mult_queries->QueryStarts[i] + 1,
 
4766
                   index );
 
4767
 
 
4768
                 if( status ) break;
 
4769
 
 
4770
                 search->mult_queries->lambda_array[i]
 
4771
                   = search->sbp->kbp_std[search->first_context]->Lambda;
 
4772
 
 
4773
                 if( i )
 
4774
                 {
 
4775
                   if( search->mult_queries->LambdaMin
 
4776
                       > search->sbp->kbp_std[search->first_context]->Lambda )
 
4777
                     search->mult_queries->LambdaMin
 
4778
                       = search->sbp->kbp_std[search->first_context]->Lambda;
 
4779
 
 
4780
                   if( search->mult_queries->LambdaMax
 
4781
                       < search->sbp->kbp_std[search->first_context]->Lambda )
 
4782
                     search->mult_queries->LambdaMax
 
4783
                       = search->sbp->kbp_std[search->first_context]->Lambda;
 
4784
 
 
4785
                   if( search->mult_queries->LogKMin
 
4786
                       > search->sbp->kbp_std[search->first_context]->logK )
 
4787
                     search->mult_queries->LogKMin
 
4788
                       = search->sbp->kbp_std[search->first_context]->logK;
 
4789
 
 
4790
                   if( search->mult_queries->LogKMax
 
4791
                       < search->sbp->kbp_std[search->first_context]->logK )
 
4792
                     search->mult_queries->LogKMax
 
4793
                       = search->sbp->kbp_std[search->first_context]->logK;
 
4794
                 }
 
4795
                 else
 
4796
                 {
 
4797
                   search->mult_queries->LambdaMin
 
4798
                     = search->mult_queries->LambdaMax
 
4799
                     = search->sbp->kbp_std[search->first_context]->Lambda;
 
4800
                   search->mult_queries->LogKMin 
 
4801
                     = search->mult_queries->LogKMax
 
4802
                     = search->sbp->kbp_std[search->first_context]->logK;
 
4803
                 }
 
4804
               }
 
4805
             }
 
4806
 
 
4807
             status 
 
4808
               = BlastScoreBlkFill(search->sbp, (CharPtr)
 
4809
                                   search->context[index].query->sequence,
 
4810
                                   search->context[index].query->length, 
 
4811
                                   index);
 
4812
           }
4185
4813
           else
4186
 
              status = BlastScoreBlkFill(search->sbp, (CharPtr)
4187
 
                                         search->context[index].query->sequence, 
4188
 
                                         search->context[index+1].query->length, 
4189
 
                                         index);
 
4814
           {
 
4815
             status 
 
4816
               = BlastScoreBlkFill(search->sbp, (CharPtr)
 
4817
                                   search->context[index].query->sequence, 
 
4818
                                   search->context[index+1].query->length, 
 
4819
                                   index);
 
4820
           }
 
4821
 
4190
4822
                if (status != 0)
4191
4823
                {
4192
4824
                        sprintf(buffer, "Unable to calculate Karlin-Altschul params, check query sequence");
4213
4845
 
4214
4846
        search->sbp->kbp_gap = search->sbp->kbp_gap_std;
4215
4847
        search->sbp->kbp = search->sbp->kbp_std;
4216
 
        if (StringCmp(prog_name, "blastn") != 0)
 
4848
        if (search->pbp->gapped_calculation && StringCmp(prog_name, "blastn") != 0)
4217
4849
        {
4218
4850
                array_size = BlastKarlinGetMatrixValues(search->sbp->name, &open, &extend, &lambda, &K, &H, NULL);
4219
4851
                if (array_size > 0)
4233
4865
                        MemFree(lambda);
4234
4866
                        MemFree(K);
4235
4867
                        MemFree(H);
4236
 
                }
 
4868
                } else {
 
4869
                   /* This can only happen in case of unsupported matrix! */
 
4870
                   sprintf(buffer, 
 
4871
                           "matrix %s is not supported\n",
 
4872
                           search->sbp->name);
 
4873
                   BlastConstructErrorMessage("BLASTSetUpSearch", buffer, 2,
 
4874
                                              &search->error_return);
 
4875
                   retval = 1;
 
4876
                }
4237
4877
                if (search->sbp->kbp_ideal == NULL)
4238
4878
                        search->sbp->kbp_ideal = BlastKarlinBlkStandardCalcEx(search->sbp);
4239
4879
        }
4250
4890
        if (retval)
4251
4891
           goto BlastSetUpReturn;
4252
4892
 
4253
 
        if (search->pbp->gapped_calculation &&
4254
 
                StringCmp(search->prog_name, "blastn") != 0)
4255
 
                min_query_length = (Int4) 1/(search->sbp->kbp_gap_std[search->first_context]->K);
4256
 
        else
4257
 
                min_query_length = (Int4) 1/(search->sbp->kbp[search->first_context]->K);
4258
 
 
4259
 
        if (search->prog_number != blast_type_blastn)
4260
 
        {
4261
 
        
4262
 
                getAlphaBeta(options->matrix,&alpha,&beta,search->pbp->gapped_calculation, search->pbp->gap_open, search->pbp->gap_extend);
4263
 
                if (search->pbp->gapped_calculation) {
4264
 
                        KtoUse = search->sbp->kbp_gap_std[search->first_context]->K;
4265
 
                        logKtoUse = search->sbp->kbp_gap_std[search->first_context]->logK;
4266
 
                        LambdaToUse = search->sbp->kbp_gap_std[search->first_context]->Lambda;
4267
 
                }
4268
 
                else 
4269
 
                {
4270
 
                        KtoUse = search->sbp->kbp[search->first_context]->K;
4271
 
                        logKtoUse = search->sbp->kbp[search->first_context]->logK;
4272
 
                        LambdaToUse = search->sbp->kbp[search->first_context]->Lambda;
4273
 
                }
4274
 
        }
4275
 
        last_length_adjustment = 0;
4276
 
        for (index=0; index<5; index++)
4277
 
        {
4278
 
                if (search->prog_number != blast_type_blastn)
4279
 
                {
4280
 
                        length_adjustment = Nlm_Nint(((logKtoUse+log((Nlm_FloatHi)(length-last_length_adjustment)*(Nlm_FloatHi)(MAX(search->dbseq_num, (search->dblen)-(search->dbseq_num*last_length_adjustment)))))*alpha/LambdaToUse) + beta);
4281
 
                }
4282
 
                else
4283
 
                {
4284
 
                        length_adjustment = (Int4) ((search->sbp->kbp[search->first_context]->logK)+log((Nlm_FloatHi)(length-last_length_adjustment)*(Nlm_FloatHi)(MAX(1, (search->dblen)-(search->dbseq_num*last_length_adjustment)))))/(search->sbp->kbp[search->first_context]->H);
4285
 
                }
4286
 
                if (length_adjustment >= length-min_query_length)
4287
 
                {
4288
 
                        length_adjustment = length-min_query_length;
4289
 
                        break;
4290
 
                }
4291
 
        
4292
 
                if (ABS(last_length_adjustment-length_adjustment) <= 1)
4293
 
                        break;
4294
 
                last_length_adjustment = length_adjustment;
4295
 
        }
 
4893
        if (options->gapped_calculation &&
 
4894
        StringCmp(options->program_name, "blastn") != 0) {
 
4895
        
 
4896
        BLAST_KarlinBlkPtr kbp_gap =
 
4897
          search->sbp->kbp_gap_std[search->first_context];
 
4898
        Nlm_FloatHi alpha, beta; /*alpha and beta for the scoring system */
 
4899
        getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
 
4900
                     options->gap_open, options->gap_extend);
 
4901
 
 
4902
        BlastComputeLengthAdjustment(kbp_gap->K,
 
4903
                                     kbp_gap->logK,
 
4904
                                     alpha/kbp_gap->Lambda, beta, 
 
4905
                                     length,
 
4906
                                     search->dblen, search->dbseq_num,
 
4907
                                     &length_adjustment );
 
4908
        
 
4909
        effective_query_length = length - length_adjustment;
 
4910
 
 
4911
        /* AM: If concatenating queries, then compute effective lengths of 
 
4912
           individual queries. */
 
4913
        if( search->mult_queries )
 
4914
                {
 
4915
            search->mult_queries->TotalLength = length;
 
4916
            lengths_eff =
 
4917
                (IntArray) MemNew( sizeof( Int4 )*
 
4918
                                   search->mult_queries->NumQueries );
 
4919
            length_adj_tmp =
 
4920
                (IntArray)MemNew( sizeof( Int4 )*
 
4921
                                  search->mult_queries->NumQueries );
 
4922
 
 
4923
            for( le_iter = 0;
 
4924
                 le_iter < search->mult_queries->NumQueries;
 
4925
                 ++le_iter ) {
 
4926
                length_tmp = search->mult_queries->QueryEnds[le_iter]
 
4927
                    - search->mult_queries->QueryStarts[le_iter] 
 
4928
                    + 1;
 
4929
                length_adj_tmp[le_iter] = 0;
 
4930
                
 
4931
                BlastComputeLengthAdjustment(kbp_gap->K,
 
4932
                                             kbp_gap->logK,
 
4933
                                             alpha/kbp_gap->Lambda,
 
4934
                                             beta, 
 
4935
                                             length_tmp,
 
4936
                                             search->dblen, search->dbseq_num,
 
4937
                                             &length_adj_tmp[le_iter] );
 
4938
 
 
4939
                lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
 
4940
                
 
4941
                search->mult_queries->EffLengths[le_iter] =
 
4942
                    lengths_eff[le_iter];
 
4943
                search->mult_queries->Adjustments[le_iter] =
 
4944
                    length_adj_tmp[le_iter];
 
4945
            
 
4946
                    if( search->mult_queries->MinLen > length_tmp )
 
4947
                      search->mult_queries->MinLen = length_tmp;
 
4948
 
 
4949
            if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
 
4950
                search->mult_queries->MinLenEff = lengths_eff[le_iter];
 
4951
                  }
 
4952
                }
 
4953
    }
 
4954
        else /* this is an ungapped alignment or the program is blastn */
 
4955
        {
 
4956
        BLAST_KarlinBlkPtr kbp = search->sbp->kbp[search->first_context];
 
4957
 
 
4958
        BlastComputeLengthAdjustment( kbp->K, kbp->logK, 1/kbp->H, 0.0, 
 
4959
                                 length, 
 
4960
                                 search->dblen, search->dbseq_num,
 
4961
                                 &length_adjustment );
 
4962
 
 
4963
        effective_query_length = length - length_adjustment;
 
4964
 
 
4965
        /* AM: If concatenating queries, then compute effective lengths of 
 
4966
           individual queries. */
 
4967
        if( search->mult_queries ) {
 
4968
            search->mult_queries->TotalLength = length;
 
4969
            lengths_eff =
 
4970
                (IntArray)MemNew( sizeof( Int4 )*
 
4971
                                  search->mult_queries->NumQueries );
 
4972
            length_adj_tmp =
 
4973
                (IntArray)MemNew( sizeof( Int4 )*
 
4974
                                  search->mult_queries->NumQueries );
 
4975
 
 
4976
            for( le_iter = 0;
 
4977
                 le_iter < search->mult_queries->NumQueries;
 
4978
                 ++le_iter ) {
 
4979
                length_tmp = search->mult_queries->QueryEnds[le_iter]
 
4980
                    - search->mult_queries->QueryStarts[le_iter] 
 
4981
                    + 1;
 
4982
                length_adj_tmp[le_iter] = 0;
 
4983
 
 
4984
                BlastComputeLengthAdjustment( kbp->K, kbp->logK,
 
4985
                                              1/kbp->H, 0.0, 
 
4986
                                              length_tmp, 
 
4987
                                              search->dblen, search->dbseq_num,
 
4988
                                              &(length_adj_tmp[le_iter]) );
 
4989
 
 
4990
                lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
 
4991
                search->mult_queries->EffLengths[le_iter] =
 
4992
                    lengths_eff[le_iter];
 
4993
                search->mult_queries->Adjustments[le_iter] =
 
4994
                    length_adj_tmp[le_iter];
 
4995
                    
 
4996
                if( search->mult_queries->MinLen > length_tmp )
 
4997
                    search->mult_queries->MinLen = length_tmp;
 
4998
                
 
4999
                if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
 
5000
                    search->mult_queries->MinLenEff = lengths_eff[le_iter];
 
5001
            }
 
5002
                }
 
5003
    }
 
5004
 
4296
5005
        search->length_adjustment = MAX(length_adjustment, 0);
4297
5006
 
4298
 
        if (search->prog_number != blast_type_blastn)
4299
 
                search->dblen_eff = MAX(search->dbseq_num, search->dblen - search->dbseq_num*search->length_adjustment); 
4300
 
        else
4301
 
                search->dblen_eff = MAX(1, search->dblen - search->dbseq_num*search->length_adjustment);
4302
 
        effective_query_length = MAX(length - search->length_adjustment, min_query_length);
 
5007
    if (!search->dblen_eff) {
 
5008
        search->dblen_eff =
 
5009
            search->dblen - search->dbseq_num*search->length_adjustment;
 
5010
        /* AM: If concatenating queries find effective db lengths for each query. */
 
5011
           if( search->mult_queries )
 
5012
           {
 
5013
             for( le_iter = 0; le_iter < search->mult_queries->NumQueries; 
 
5014
                  ++le_iter )
 
5015
             {
 
5016
               if( search->prog_number == blast_type_blastn )
 
5017
                 search->mult_queries->DbLenEff[le_iter]
 
5018
                   = MAX( 1, search->dblen 
 
5019
                             - search->dbseq_num*length_adj_tmp[le_iter] );
 
5020
               else
 
5021
                 search->mult_queries->DbLenEff[le_iter]
 
5022
                   = MAX( search->dbseq_num, 
 
5023
                          search->dblen 
 
5024
                          - search->dbseq_num*length_adj_tmp[le_iter] );
 
5025
             }
 
5026
           }
 
5027
        }
4303
5028
        
4304
5029
        for (index=search->first_context; index<=search->last_context; index++)
4305
5030
        {
4306
5031
                search->context[index].query->effective_length = effective_query_length;
4307
5032
        }
4308
5033
 
 
5034
        /* AM: Setting up effective search spaces for individual queries. */
4309
5035
        if (search->searchsp_eff == 0)
 
5036
        {
4310
5037
                search->searchsp_eff = ((Nlm_FloatHi) search->dblen_eff)*((Nlm_FloatHi) effective_query_length);
4311
5038
 
 
5039
                if( search->mult_queries )
 
5040
                  for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
 
5041
                  {
 
5042
                    search->mult_queries->SearchSpEff[le_iter]
 
5043
                      = ((Nlm_FloatHi)search->mult_queries->DbLenEff[le_iter])
 
5044
                      * ((Nlm_FloatHi)lengths_eff[le_iter]);
 
5045
 
 
5046
                    if( lengths_eff[le_iter] == search->mult_queries->MinLenEff )
 
5047
                      search->mult_queries->MinSearchSpEff
 
5048
                        = search->mult_queries->SearchSpEff[le_iter];
 
5049
                  }
 
5050
        }
 
5051
        else if( search->mult_queries )
 
5052
          for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
 
5053
            search->mult_queries->SearchSpEff[le_iter] = search->searchsp_eff;
 
5054
 
4312
5055
        /* The default is that cutoff_s was not set and is zero. */
4313
5056
        if (options->cutoff_s == 0)
4314
5057
        {
4351
5094
        search->pbp->maxNumPasses = options->maxNumPasses;
4352
5095
        search->pbp->pseudoCountConst = options->pseudoCountConst;
4353
5096
 
4354
 
        search->pbp->process_num = options->number_of_cpus;
 
5097
        if (NlmThreadsAvailable()) /* ONly allow more than one cpu if MT compiled. */
 
5098
                search->pbp->process_num = options->number_of_cpus;
 
5099
        else
 
5100
                search->pbp->process_num = 1;
 
5101
 
4355
5102
        search->pbp->cpu_limit = options->cpu_limit;
4356
5103
        search->pbp->gap_decay_rate = options->gap_decay_rate;
4357
5104
        search->pbp->gap_size = options->gap_size;
4377
5124
                {
4378
5125
                        search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
4379
5126
                        search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
 
5127
 
 
5128
                  /* AM: Change to support query multiplexing. */
 
5129
                  if( StringCmp( search->prog_name, "tblastn" ) == 0
 
5130
                      && search->mult_queries )
 
5131
                  {
 
5132
                    search->pbp->gap_trigger 
 
5133
                      = (BLAST_Score)( ( options->gap_trigger*NCBIMATH_LN2
 
5134
                                           + search->mult_queries->LogKMin )
 
5135
                                       /search->mult_queries->LambdaMax );
 
5136
                  }
 
5137
                  else
4380
5138
                        search->pbp->gap_trigger = (BLAST_Score) ((options->gap_trigger*NCBIMATH_LN2+search->sbp->kbp[search->first_context]->logK)/ search->sbp->kbp[search->first_context]->Lambda);
4381
5139
                }
4382
5140
                else
4451
5209
                        avglen = total_length/total_number;
4452
5210
        }
4453
5211
 
 
5212
        /* EMG - For blastn, copy the ungapped Karlin blocks 
 
5213
           to the gapped ones. */
 
5214
        if( search->prog_number == blast_type_blastn ) {
 
5215
                int tmp_index;
 
5216
                for (tmp_index = search->first_context;
 
5217
                     tmp_index <= search->last_context;
 
5218
                     tmp_index++ ) {
 
5219
                        BLAST_KarlinBlk *kbp_gap;
 
5220
                        BLAST_KarlinBlk *kbp;
 
5221
 
 
5222
                        search->sbp->kbp_gap_std[tmp_index] = 
 
5223
                                        BlastKarlinBlkCreate();
 
5224
                        kbp_gap = search->sbp->kbp_gap_std[tmp_index];
 
5225
                        kbp     = search->sbp->kbp_std[tmp_index];
 
5226
                        
 
5227
                        kbp_gap->Lambda = kbp->Lambda;
 
5228
                        kbp_gap->K      = kbp->K;
 
5229
                        kbp_gap->logK   = kbp->logK;
 
5230
                        kbp_gap->H      = kbp->H;
 
5231
                        kbp_gap->paramC = kbp->paramC;
 
5232
                }
 
5233
                search->sbp->kbp_gap = search->sbp->kbp_gap_std;
 
5234
        }
 
5235
 
4454
5236
        if (blast_set_parameters(search, options->dropoff_1st_pass, options->dropoff_2nd_pass, avglen, search->searchsp_eff, options->window_size) != 0) {
4455
5237
           retval = 1;
4456
5238
           goto BlastSetUpReturn;
4497
5279
        {
4498
5280
                if (search->context[search->first_context].query->length < options->wordsize)
4499
5281
                {
4500
 
                        BlastConstructErrorMessage("Blast", buffer, 2, &(search->error_return));
4501
 
                        BlastConstructErrorMessage("Blast", 
4502
 
                                "Query must be at least wordsize", 2, &(search->error_return));
4503
 
                        retval = 1;
4504
 
                        goto BlastSetUpReturn;
 
5282
                   Char tmp_buffer[128];
 
5283
                   sprintf(tmp_buffer, 
 
5284
                           "Query length %ld is less than wordsize %ld",
 
5285
                       search->context[search->first_context].query->length,
 
5286
                           options->wordsize);
 
5287
                   BlastConstructErrorMessage("Blast", buffer, 2,
 
5288
                                              &(search->error_return));
 
5289
                   BlastConstructErrorMessage("Blast", 
 
5290
                      tmp_buffer, 2, &(search->error_return));
 
5291
                   retval = 1;
 
5292
                   goto BlastSetUpReturn;
4505
5293
                }
4506
5294
        }
4507
5295
                
4530
5318
                            status = BlastFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
4531
5319
                        else
4532
5320
                            status = BlastNewFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
4533
 
                        if (status != 0) {
 
5321
                        if (status < 0) {
4534
5322
                            search->thr_info->awake_index = FALSE;
4535
 
                            ErrPostEx(SEV_WARNING, 0, 0, "BlastFindWords returned non-zero status");
 
5323
                            ErrPostEx(SEV_WARNING, 0, 0, 
 
5324
                               "BlastFindWords returned non-zero status");
4536
5325
                            retval = 1;
4537
5326
                            goto BlastSetUpReturn;
4538
5327
                        }
4559
5348
                if (status > 0)
4560
5349
                {
4561
5350
                        search->thr_info->awake_index = FALSE;
4562
 
                        sprintf(buffer, "No valid letters to be indexed");
4563
 
                        BlastConstructErrorMessage("Blast", buffer, 2, &(search->error_return));
4564
 
                        retval = 1;
4565
 
                        goto BlastSetUpReturn;
 
5351
                        sprintf(buffer, "No valid letters to be indexed on context %d", index);
 
5352
                        /* This is just a warning */
 
5353
                        BlastConstructErrorMessage("Blast", buffer, 1,
 
5354
                                                   &(search->error_return));
4566
5355
                }
4567
5356
                else if (status < 0)
4568
5357
                {
4578
5367
           else
4579
5368
              mb_lookup_position_aux_destruct(search->wfp->lookup);
4580
5369
        }
 
5370
 
 
5371
 
4581
5372
        /* 
4582
5373
        Turn off the index thread by setting this flag.  Don't wait for a join, as the
4583
5374
        search will take much longer than the one second for this to die.
4649
5440
        return TRUE;
4650
5441
}
4651
5442
 
4652
 
 
4653
 
/*
4654
 
        This function reads in a list of gi's from a file.
4655
 
The file may be either in binary or text format.  
4656
 
 
4657
 
The binary gilist format has the following construction:
4658
 
 
4659
 
1.) 1st 4 bytes: a 'magic' number: UINT4_MAX
4660
 
2.) 2nd 4 bytes: total number of gi's in the file (call this value 'number').
4661
 
3.) 'number' set of 4 bytes, allowing 4 bytes for each gi.
4662
 
 
4663
 
The function GetGisFromFile first checks what the first 4 bytes
4664
 
of a file are, if they are the 'magic' number, then it proceeds
4665
 
to read values assuming a binary format.  If they are not the
4666
 
'magic' number, then a text format is assumed.
4667
 
 
4668
 
The binary gilist can be produced from a text gilist using the
4669
 
function readdb_MakeGiFileBinary.
4670
 
 
4671
 
*/
4672
 
 
4673
 
#define LINE_LEN        1024
4674
5443
BlastDoubleInt4Ptr 
4675
5444
GetGisFromFile (CharPtr gifile, Int4Ptr gi_list_size)
4676
 
 
4677
5445
{
4678
 
    BlastDoubleInt4Ptr  gi_list;
4679
 
    FILE                *gifp = NULL;
4680
 
    Int4                index = 0, value, chunk_size = 24, number;
4681
 
    Int2                status;
4682
 
    Char                line[LINE_LEN];
4683
 
    long                tmplong;
4684
 
    NlmMFILEPtr         mfp;
4685
 
    Uint4Ptr            tmp_value;
4686
 
    Char                file_name[PATH_MAX], blast_dir[PATH_MAX];
4687
 
    
4688
 
    /**
4689
 
     * first looking in current directory, then checking .ncbirc,
4690
 
     * then $BLASTDB and then assuming BLASTDB_DIR
4691
 
     */
4692
 
    if (FileLength(gifile) > 0) {
4693
 
       char *path = Nlm_FilePathFind(gifile);
4694
 
       if (StringLen(path) > 0) {
4695
 
          StringCpy(blast_dir, path);
4696
 
       } else {
4697
 
          StringCpy(blast_dir, ".");
4698
 
       }
4699
 
       MemFree(path);
4700
 
    } else {
4701
 
#ifdef OS_UNIX
4702
 
       if (getenv("BLASTDB"))
4703
 
          Nlm_GetAppParam("NCBI", "BLAST", "BLASTDB", getenv("BLASTDB"), blast_dir, PATH_MAX);
4704
 
       else
4705
 
#endif
4706
 
          Nlm_GetAppParam ("NCBI", "BLAST", "BLASTDB", BLASTDB_DIR, blast_dir, PATH_MAX);
4707
 
    }
4708
 
    sprintf(file_name, "%s%s%s", blast_dir, DIRDELIMSTR, FileNameFind(gifile));
4709
 
 
4710
 
 
4711
 
    
4712
 
    mfp = NlmOpenMFILE(file_name);
4713
 
    if (mfp == NULL) {
4714
 
        ErrPostEx(SEV_ERROR, 0, 0, "Unable to open file %s", file_name);
 
5446
    BlastDoubleInt4Ptr retval = NULL;
 
5447
    Int4ListPtr gilist = NULL;
 
5448
    register Int4 i;
 
5449
 
 
5450
    if ( !(gilist = Int4ListReadFromFile(gifile)))
4715
5451
        return NULL;
4716
 
    }
4717
 
    
4718
 
    tmp_value = (Uint4Ptr) mfp->mmp;
4719
 
    if (SwapUint4(*tmp_value) == READDB_MAGIC_NUMBER) {
4720
 
        mfp->mmp += 4;
4721
 
        tmp_value = (Uint4Ptr) mfp->mmp;
4722
 
        number = SwapUint4(*tmp_value);
4723
 
        gi_list = MemNew(number * sizeof(BlastDoubleInt4));
4724
 
        index = 0;
4725
 
        while (index<number) {
4726
 
            mfp->mmp += 4;
4727
 
            tmp_value = (Uint4Ptr) mfp->mmp;
4728
 
            gi_list[index++].gi = SwapUint4(*tmp_value);
4729
 
        }
4730
 
        *gi_list_size = number;
4731
 
        mfp = NlmCloseMFILE(mfp);
4732
 
    } else {
4733
 
        mfp = NlmCloseMFILE(mfp);
4734
 
        if (!(gifp = FileOpen(file_name, "r"))) {
4735
 
            ErrPostEx(SEV_ERROR, 0, 0, "Unable to open file %s", file_name);
4736
 
            return NULL;
4737
 
        }
4738
 
        
4739
 
        gi_list = MemNew(chunk_size * sizeof(BlastDoubleInt4));
4740
 
        
4741
 
        while (FileGets(line, LINE_LEN, gifp)) {
4742
 
            
4743
 
            /* do correct casting */
4744
 
            status = sscanf(line, "%ld", &tmplong);
4745
 
            value = tmplong;
4746
 
            
4747
 
            /* skip non-valid lines */
4748
 
            if (status > 0) {
4749
 
                                /* do we have enough space in gi_list ? */
4750
 
                if (chunk_size < index + 1) {
4751
 
                    chunk_size *= 2;
4752
 
                    gi_list = Realloc(gi_list, chunk_size * sizeof(BlastDoubleInt4));
4753
 
                }
4754
 
                
4755
 
                gi_list[index++].gi = value;
4756
 
            }
4757
 
        }
4758
 
        
4759
 
        *gi_list_size = index;
4760
 
    }
4761
 
 
4762
 
    if(gifp != NULL)
4763
 
        FileClose(gifp);
4764
 
 
4765
 
    return gi_list;
 
5452
 
 
5453
    retval = (BlastDoubleInt4Ptr) MemNew(sizeof(BlastDoubleInt4)*gilist->count);
 
5454
    if (!retval)
 
5455
        return retval;
 
5456
 
 
5457
    if (gi_list_size)
 
5458
        *gi_list_size = gilist->count;
 
5459
 
 
5460
    for (i = 0; i < gilist->count; i++)
 
5461
        retval[i].gi = gilist->i[i];
 
5462
 
 
5463
    gilist = Int4ListFree(gilist);
 
5464
 
 
5465
    return retval;
4766
5466
}
4767
5467
 
4768
5468
BlastSearchBlkPtr LIBCALL
4781
5481
                                                    prog_name, 0,
4782
5482
                                                    dbname, options, callback,
4783
5483
                                                    seqid_list, gi_list,
4784
 
                                                    gi_list_total, NULL);
 
5484
                                                    gi_list_total, rdfp);
4785
5485
   else
4786
5486
      return BLASTSetUpSearchWithReadDbInternal(query_slp, query_bsp,
4787
5487
                                                prog_name, qlen,
4791
5491
}
4792
5492
 
4793
5493
 
4794
 
 
4795
5494
BlastSearchBlkPtr
4796
5495
BLASTSetUpSearchWithReadDbInternal (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, ReadDBFILEPtr rdfp)
 
5496
{
 
5497
        return BLASTSetUpSearchWithReadDbInternalMult(query_slp, query_bsp, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, rdfp, NULL);
 
5498
}
 
5499
 
 
5500
 
 
5501
 
 
5502
BlastSearchBlkPtr
 
5503
BLASTSetUpSearchWithReadDbInternalMult (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, ReadDBFILEPtr rdfp, QueriesPtr mult_queries)
 
5504
/* --KM added mult_queries param */
4797
5505
 
4798
5506
{
4799
5507
 
4803
5511
    Int8        dblen;
4804
5512
    Int4        query_length;
4805
5513
    Nlm_FloatHi searchsp_eff=0;
4806
 
    
 
5514
    Int4        hitlist_size;
 
5515
    Int4 i; /* AM: Query multiplexing. */
 
5516
 
4807
5517
    /* Allocate default options if none are allocated yet. */
4808
5518
    if (options == NULL) {
4809
5519
        options = BLASTOptionNew(prog_name, FALSE);
4824
5534
    else
4825
5535
        query_length = query_bsp->length;
4826
5536
    
 
5537
    /* Increase the hitlist size for the preliminary gapped alignment */
 
5538
    if (options->gapped_calculation)
 
5539
       hitlist_size = MIN(2*options->hitlist_size, options->hitlist_size + 50);
 
5540
    else 
 
5541
       hitlist_size = options->hitlist_size;
 
5542
 
 
5543
    /* AM: Query multiplexing */
 
5544
    if( mult_queries )
 
5545
    {
 
5546
      for( i = 0; i < mult_queries->NumQueries; ++i )
 
5547
        mult_queries->result_info[i].results 
 
5548
          = (BLASTResultHitlistPtr *)MemNew( 
 
5549
              (hitlist_size + 1)*sizeof( BLASTResultHitlistPtr ) );
 
5550
 
 
5551
      mult_queries->max_results_per_query = hitlist_size;
 
5552
      hitlist_size *= mult_queries->NumQueries;
 
5553
    }
 
5554
    
4827
5555
    /* On the first call query length is used for the subject length. */
4828
 
    search = BlastSearchBlkNewExtra(options->wordsize, query_length, dbname, multiple_hits, 0, options->threshold_second, options->hitlist_size, prog_name, NULL, first_context, last_context, rdfp, options->window_size);
 
5556
    search = BlastSearchBlkNewExtra(options->wordsize, query_length, dbname, multiple_hits, 0, options->threshold_second, hitlist_size, prog_name, NULL, first_context, last_context, rdfp, options->window_size);
4829
5557
    
4830
5558
    if (search) {
4831
5559
       readdb_get_totals_ex(search->rdfp, &(dblen), &(search->dbseq_num), TRUE);
4832
5560
 
4833
 
       if (seqid_list && !options->use_real_db_size)
4834
 
          BlastAdjustDbNumbers(search->rdfp, &(dblen), &(search->dbseq_num),
4835
 
                               seqid_list, NULL, NULL, NULL, 0);
4836
 
   
4837
 
       /* Create virtual database if any of the databases have gi lists or 
4838
 
          ordinal id masks, or if gi list is provided from options */
4839
 
       BlastProcessGiLists(search, options, gi_list, &gi_list_total);
4840
 
       /* Intended for use when a list of gi's is sent in, but the real size is needed. */
4841
 
        /* It's probably still necessary to call BlastAdjustDbNumbers, but it would be nice
4842
 
           if this were not required. */
4843
 
       if (gi_list_total > 0 && !options->use_real_db_size)
4844
 
          BlastAdjustDbNumbers(search->rdfp, &(dblen), 
4845
 
                               &(search->dbseq_num), NULL, NULL, 
4846
 
                               search->rdfp->oidlist, NULL, 0);
4847
 
 
4848
 
#if 0
4849
 
        /* use length and num of seqs of the database from alias file */
4850
 
        if (search->rdfp->aliaslen && !gi_list)
4851
 
            dblen = search->rdfp->aliaslen;
4852
 
        if (search->rdfp->aliasnseq && !gi_list) 
4853
 
            search->dbseq_num = search->rdfp->aliasnseq;
4854
 
#endif
 
5561
       if (!options->ignore_gilist)
 
5562
       {
 
5563
           /* Create virtual database if any of the databases have gi lists or 
 
5564
              ordinal id masks, or if gi list is provided from options */
 
5565
           BlastProcessGiLists(search, options, gi_list, gi_list_total);
 
5566
 
 
5567
           /* search->thr_info->blast_gi_list will be non-NULL if gi_list or 
 
5568
            * options->gilist or options->gifile was non-NULL and therefore
 
5569
            * intersected with any oidlists in the search->rdfp(s). If this is the
 
5570
            * case, we need to recalculate the database length and number of
 
5571
            * sequences */
 
5572
           if (search->thr_info->blast_gi_list && !options->use_real_db_size)
 
5573
               readdb_get_totals_ex3(search->rdfp, &dblen, &search->dbseq_num,
 
5574
                                 FALSE, TRUE, eApproximate);
 
5575
       }
 
5576
 
4855
5577
        /* command-line/options trump alias file. */
4856
5578
        if (options->db_length > 0)
4857
 
            dblen = options->db_length;
 
5579
           dblen = options->db_length;
4858
5580
        if (options->dbseq_num > 0)
4859
5581
            search->dbseq_num = options->dbseq_num;
4860
5582
        if (options->searchsp_eff > 0)
4867
5589
            searchsp_eff /= 3.0;
4868
5590
        }
4869
5591
        search->dblen = dblen;
 
5592
        if (options->db_length > 0)
 
5593
           search->dblen_eff = dblen;
4870
5594
        search->searchsp_eff = searchsp_eff;
 
5595
        /* AM: Moved next two lines here to be able to use mult_queries 
 
5596
               in BLASTSetUpSearchInternalByLoc() */
 
5597
        /* --KM put mult_queries, from Main, into the search structure */
 
5598
        search->mult_queries = mult_queries;
4871
5599
        status = BLASTSetUpSearchInternalByLoc (search, query_slp, query_bsp, prog_name, qlen, options, callback);
4872
5600
        if (status != 0) {
4873
5601
            ErrPostEx(SEV_WARNING, 0, 0, "SetUpBlastSearch failed.");
4874
5602
            search->query_invalid = TRUE;
4875
5603
        }
4876
 
        
 
5604
 
4877
5605
        if (search->pbp->mb_params) 
4878
5606
            search = GreedyAlignMemAlloc(search);
4879
5607
        else 
4880
5608
            search->abmp = NULL;
4881
5609
        
4882
 
        search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp);
 
5610
        if (search->rdfp->parameters & READDB_CONTENTS_ALLOCATED)
 
5611
            search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp);
4883
5612
    }
4884
5613
    
4885
5614
    if (options_alloc)
4897
5626
BLASTSetUpSearchWithReadDb(BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
4898
5627
 
4899
5628
{
4900
 
        return BLASTSetUpSearchWithReadDbInternal (NULL, query_bsp, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL);
 
5629
        return BLASTSetUpSearchWithReadDbInternal(NULL, query_bsp, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL);
4901
5630
}
4902
5631
 
4903
5632
BlastSearchBlkPtr LIBCALL 
4916
5645
BLASTSetUpSearchByLocWithReadDb(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
4917
5646
 
4918
5647
{
4919
 
        return BLASTSetUpSearchWithReadDbInternal (query_slp, NULL, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL);
 
5648
        return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL, NULL);
 
5649
        /* --KM pass NULL mult_queries */
4920
5650
}
4921
5651
 
4922
5652
 
4923
5653
BlastSearchBlkPtr LIBCALL 
4924
 
BLASTSetUpSearchByLocWithReadDbEx(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
 
5654
BLASTSetUpSearchByLocWithReadDbEx(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, QueriesPtr mult_queries)  
 
5655
/* --KM added mult_queries param */
4925
5656
 
4926
5657
{
4927
 
        return BLASTSetUpSearchWithReadDbInternal (query_slp, NULL, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL);
 
5658
        return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL, mult_queries);
 
5659
        /* --KM pass mult_queries */
4928
5660
}
4929
5661
static BlastSearchBlkPtr
4930
5662
BLASTSetUpSearchEx (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
4935
5667
        Int2 status, first_context, last_context;
4936
5668
        Int4 actual_query_length=0;
4937
5669
        Nlm_FloatHi searchsp_eff=0;
 
5670
        Int4        hitlist_size;
4938
5671
 
4939
5672
        /* Allocate default options if no are allocated yet. */
4940
5673
        if (options == NULL)
4970
5703
        BlastGetFirstAndLastContext(prog_name, query_slp, &first_context, &last_context, options->strand_option);
4971
5704
 
4972
5705
/* On the first call query length is used for the subject length. */
4973
 
        search = BlastSearchBlkNew(options->wordsize, actual_query_length, NULL, multiple_hits, 0, options->threshold_second, options->hitlist_size, prog_name, all_words, first_context, last_context, options->window_size);
 
5706
        hitlist_size = MIN(2*options->hitlist_size, 
 
5707
                           options->hitlist_size + 50);
 
5708
 
 
5709
        search = BlastSearchBlkNew(options->wordsize, actual_query_length, NULL, multiple_hits, 0, options->threshold_second, hitlist_size, prog_name, all_words, first_context, last_context, options->window_size);
4974
5710
 
4975
5711
        if (search)
4976
5712
        {
5294
6030
      if (new_segment1->s_end <= new_segment2->s_end) {
5295
6031
         new_segment1 = new_segment1->next;
5296
6032
         num1++;
5297
 
      }
5298
 
      if (new_segment1->s_end >= new_segment2->s_end) {
 
6033
      } else {
5299
6034
         new_segment2 = new_segment2->next;
5300
6035
         num2++;
5301
6036
      }
5376
6111
BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1, 
5377
6112
                   BLAST_HitListPtr hitlist2, Int4 start, Boolean merge_hsps)
5378
6113
{
5379
 
   BLAST_HSPPtr hsp, PNTR hspp1, PNTR hspp2;
5380
 
   Int4 index, index1, hspcnt1, hspcnt2, new_hspcnt = 0;
 
6114
   BLAST_HSPPtr hsp, hsp_var, PNTR hspp1, PNTR hspp2;
 
6115
   Int4 index, index1, index2;
 
6116
   Int4 hspcnt1, hspcnt2, new_hspcnt = 0;
5381
6117
   BLAST_HSPPtr PNTR new_hsp_array;
5382
 
   Int4Ptr index_array;
5383
6118
 
5384
6119
   if (hitlist1 == NULL) {
5385
6120
      hitlist1 = (BLAST_HitListPtr)
5396
6131
   }
5397
6132
 
5398
6133
   hspcnt1 = hspcnt2 = 0;
5399
 
   hspp1 = (BLAST_HSPPtr PNTR) MemNew(hitlist1->hspcnt*sizeof(BLAST_HSPPtr));
5400
 
   hspp2 = (BLAST_HSPPtr PNTR) MemNew(hitlist2->hspcnt*sizeof(BLAST_HSPPtr));
5401
 
   index_array = (Int4Ptr) MemNew(hitlist1->hspcnt*sizeof(Int4));
5402
6134
 
5403
 
   for (index=0; index<hitlist1->hspcnt; index++) {
 
6135
   /* Put all HSPs that intersect the overlap region at the front of the
 
6136
      respective HSP arrays. */
 
6137
   for (index = 0; index < hitlist1->hspcnt; index++) {
5404
6138
      hsp = hitlist1->hsp_array[index];
5405
6139
      if (hsp->subject.end > start) {
5406
 
         index_array[hspcnt1] = index;
5407
 
         hspp1[hspcnt1++] = hsp;
5408
 
         hitlist1->hsp_array[index] = NULL;
5409
 
      } else
5410
 
         new_hspcnt++;
 
6140
         /* At least part of this HSP lies in the overlap strip. */
 
6141
         hsp_var = hitlist1->hsp_array[hspcnt1];
 
6142
         hitlist1->hsp_array[hspcnt1] = hsp;
 
6143
         hitlist1->hsp_array[index] = hsp_var;
 
6144
         ++hspcnt1;
 
6145
      }
5411
6146
   }
5412
 
   for (index=0; index<hitlist2->hspcnt; index++) {
 
6147
   for (index = 0; index < hitlist2->hspcnt; index++) {
5413
6148
      hsp = hitlist2->hsp_array[index];
5414
6149
      if (hsp->subject.offset < start + DBSEQ_CHUNK_OVERLAP) {
5415
 
         hspp2[hspcnt2++] = hsp;
5416
 
         hitlist2->hsp_array[index] = NULL;
5417
 
      } else
5418
 
         new_hspcnt++;
 
6150
         /* At least part of this HSP lies in the overlap strip. */
 
6151
         hsp_var = hitlist2->hsp_array[hspcnt2];
 
6152
         hitlist2->hsp_array[hspcnt2] = hsp;
 
6153
         hitlist2->hsp_array[index] = hsp_var;
 
6154
         ++hspcnt2;
 
6155
      }
5419
6156
   }
 
6157
   hspp1 = hitlist1->hsp_array;
 
6158
   hspp2 = hitlist2->hsp_array;
5420
6159
 
5421
6160
   HeapSort(hspp1, hspcnt1, sizeof(BLAST_HSPPtr), diag_compare_hsps);
5422
6161
   HeapSort(hspp2, hspcnt2, sizeof(BLAST_HSPPtr), diag_compare_hsps);
5429
6168
             ABS(diag_compare_hsps(&hspp1[index], &hspp2[index1])) < 
5430
6169
             OVERLAP_DIAG_CLOSE) {
5431
6170
            if (merge_hsps) {
5432
 
               if (BLASTMergeHsps(search, hspp1[index], hspp2[index1], start)) {
5433
 
                  hspp2[index1]->gap_info = 
5434
 
                     GapXEditBlockDelete(hspp2[index1]->gap_info);
5435
 
                  hspp2[index1] = MemFree(hspp2[index1]);
5436
 
                  break;
 
6171
               if (BLASTMergeHsps(search, hspp1[index], hspp2[index1], 
 
6172
                                  start)) {
 
6173
                  /* Free the second HSP. */
 
6174
                  hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
5437
6175
               }
5438
6176
            } else { /* No gap information available */
5439
6177
               if (BLASTHspContained(hspp1[index], hspp2[index1])) {
5440
 
                  hspp1[index] = MemFree(hspp1[index]);
 
6178
                  /* Point the first HSP to the new HSP; */
 
6179
                  hspp1[index] = BLAST_HSPFree(hspp1[index]);
5441
6180
                  hspp1[index] = hspp2[index1];
5442
6181
                  hspp2[index1] = NULL;
5443
 
               } else if (BLASTHspContained(hspp2[index1], hspp1[index]))
5444
 
                  hspp2[index1] = MemFree(hspp2[index1]);
5445
 
 
 
6182
                  /* This HSP has been removed, so break out of the inner 
 
6183
                     loop */
 
6184
                  break;
 
6185
               } else if (BLASTHspContained(hspp2[index1], hspp1[index])) {
 
6186
                  hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
 
6187
               }
5446
6188
            }
 
6189
         } else {
 
6190
            /* This and remaining HSPs are too far from the one being 
 
6191
               checked */
 
6192
            break;
5447
6193
         }
5448
6194
      }
5449
6195
   }
5450
6196
 
5451
 
   new_hspcnt += hspcnt1;
5452
 
   for (index=0; index<hspcnt2; index++) {
5453
 
      if (hspp2[index] != NULL)
5454
 
         new_hspcnt++;
5455
 
   }
 
6197
   HspArrayPurge(hitlist2->hsp_array, hitlist2->hspcnt, FALSE);
 
6198
 
 
6199
   /* The new number of HSPs is now the sum of the remaining counts in the 
 
6200
      two lists, but if there is a restriction on the number of HSPs to keep,
 
6201
      it might have to be reduced. */
 
6202
   new_hspcnt = hitlist2->hspcnt + hitlist1->hspcnt;
 
6203
   if (search->pbp->hsp_num_max)
 
6204
      new_hspcnt = MIN(new_hspcnt, search->pbp->hsp_num_max);
5456
6205
   
5457
6206
   if (new_hspcnt >= hitlist1->hspmax-1 && hitlist1->do_not_reallocate == FALSE) {
5458
 
      new_hsp_array = (BLAST_HSPPtr PNTR) Realloc(hitlist1->hsp_array, new_hspcnt*2*sizeof(BLAST_HSPPtr));
 
6207
      Int4 new_allocated = 2*new_hspcnt;
 
6208
      if (search->pbp->hsp_num_max)
 
6209
         new_allocated = MIN(new_allocated, search->pbp->hsp_num_max);
 
6210
      new_hsp_array = (BLAST_HSPPtr PNTR) 
 
6211
         Realloc(hitlist1->hsp_array, new_allocated*sizeof(BLAST_HSPPtr));
5459
6212
      if (new_hsp_array == NULL) {
5460
6213
         ErrPostEx(SEV_WARNING, 0, 0, "UNABLE to reallocate in BlastSaveCurrentHsp for ordinal id %ld, continuing with fixed array of %ld HSP's", (long) search->subject_id, (long) hitlist1->hspmax);
5461
6214
         hitlist1->do_not_reallocate = TRUE; 
5462
6215
      } else {
5463
6216
         hitlist1->hsp_array = new_hsp_array;
5464
 
         hitlist1->hspmax = 2*new_hspcnt;
5465
 
      }
5466
 
   }
5467
 
 
5468
 
   for (index=0; index<hspcnt1; index++) 
5469
 
      hitlist1->hsp_array[index_array[index]] = hspp1[index];
5470
 
   for (index=hitlist1->hspcnt, index1=0; 
5471
 
        index1<hitlist2->hspcnt && index<hitlist1->hspmax; index1++) {
5472
 
      if (hitlist2->hsp_array[index1] != NULL)
5473
 
         hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
5474
 
   }
5475
 
   for (index1=0; index1<hspcnt2 && index<hitlist1->hspmax; index1++) {
5476
 
      if (hspp2[index1] != NULL)
5477
 
         hitlist1->hsp_array[index++] = hspp2[index1];
5478
 
   }
5479
 
 
5480
 
   hspp1 = MemFree(hspp1);
5481
 
   hspp2 = MemFree(hspp2);
5482
 
   index_array = MemFree(index_array);
 
6217
         hitlist1->hspmax = new_allocated;
 
6218
      }
 
6219
      new_hspcnt = MIN(new_hspcnt, hitlist1->hspmax);
 
6220
   }
 
6221
 
 
6222
   if (new_hspcnt >= hitlist2->hspcnt + hitlist1->hspcnt) {
 
6223
      /* All HSPs from both arrays are saved */
 
6224
      for (index=hitlist1->hspcnt, index1=0; 
 
6225
           index1<hitlist2->hspcnt; index1++) {
 
6226
         if (hitlist2->hsp_array[index1] != NULL)
 
6227
            hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
 
6228
      }
 
6229
   } else {
 
6230
      /* Not all HSPs are be saved; sort both arrays by score and save only
 
6231
         the new_hspcnt best ones. 
 
6232
         For the merged set of HSPs, allocate array the same size as in the 
 
6233
         old HSP list. */
 
6234
      new_hsp_array = (BLAST_HSP**) 
 
6235
         malloc(hitlist1->hspmax*sizeof(BLAST_HSP*));
 
6236
      HeapSort(hitlist1->hsp_array, hitlist1->hspcnt, 
 
6237
               sizeof(BLAST_HSP*), score_compare_hsps);
 
6238
      HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSP*),
 
6239
               score_compare_hsps);
 
6240
      index1 = index2 = 0;
 
6241
      for (index = 0; index < new_hspcnt; ++index) {
 
6242
         if (index1 < hitlist1->hspcnt &&
 
6243
             (index2 >= hitlist2->hspcnt ||
 
6244
             (hitlist1->hsp_array[index1]->score >= 
 
6245
             hitlist2->hsp_array[index2]->score))) {
 
6246
            new_hsp_array[index] = hitlist1->hsp_array[index1];
 
6247
            ++index1;
 
6248
         } else {
 
6249
            new_hsp_array[index] = hitlist2->hsp_array[index2];
 
6250
            ++index2;
 
6251
         }
 
6252
      }
 
6253
      /* Free the extra HSPs that could not be saved */
 
6254
      for ( ; index1 < hitlist1->hspcnt; ++index1) {
 
6255
         hitlist1->hsp_array[index1] = 
 
6256
            BLAST_HSPFree(hitlist1->hsp_array[index1]);
 
6257
      }
 
6258
      for ( ; index2 < hitlist2->hspcnt; ++index2) {
 
6259
         hitlist2->hsp_array[index2] = 
 
6260
            BLAST_HSPFree(hitlist2->hsp_array[index2]);
 
6261
      }
 
6262
      /* Point hitlist1's HSP array to the new one */
 
6263
      hitlist1->hsp_array = (BLAST_HSP**) MemFree(hitlist1->hsp_array);
 
6264
      hitlist1->hsp_array = new_hsp_array;
 
6265
   }
5483
6266
   
5484
6267
   hitlist1->hspcnt = index;
 
6268
   /* Second HSP list now does not own any HSPs */
 
6269
   hitlist2->hspcnt = 0;
 
6270
 
5485
6271
   return hitlist1;
5486
6272
}
5487
6273
 
5496
6282
   FloatHi searchsp_eff;
5497
6283
   BLAST_KarlinBlkPtr PNTR kbp;
5498
6284
   Int4 context;
 
6285
   Uint4 query_num; /* AM: Support for query concatenation. */
5499
6286
 
5500
 
   if (search->pbp->gapped_calculation && 
5501
 
       search->prog_number != blast_type_blastn)
 
6287
   if (search->pbp->gapped_calculation)
5502
6288
      kbp = search->sbp->kbp_gap;
5503
6289
   else
5504
6290
      kbp = search->sbp->kbp;
5514
6300
                                       (Int4) (search->last_context+1));
5515
6301
         else
5516
6302
            context = (Int4) hsp->context;
5517
 
         searchsp_eff = (FloatHi) search->dblen_eff *
5518
 
            (FloatHi) search->context[context].query->effective_length;
 
6303
 
 
6304
            /* AM: Changed to support query concatenation. */
 
6305
            if( !search->mult_queries )
 
6306
              searchsp_eff = (FloatHi) search->dblen_eff *
 
6307
                             (FloatHi) search->context[context].query->effective_length;
 
6308
            else
 
6309
            {
 
6310
              query_num = GetQueryNum( search->mult_queries,
 
6311
                                       hsp->query.offset,
 
6312
                                       hsp->query.end,
 
6313
                                       hsp->query.frame );
 
6314
              searchsp_eff = search->mult_queries->SearchSpEff[query_num];
 
6315
            }
5519
6316
         
5520
 
         hsp->evalue = BlastKarlinStoE_simple(hsp->score, kbp[context], 
5521
 
                                              searchsp_eff);
5522
 
         if (hsp->evalue > 10*search->pbp->cutoff_e) {
5523
 
            hsp->gap_info = GapXEditBlockDelete(hsp->gap_info);
5524
 
            hsp = MemFree(hsp);
5525
 
            search->current_hitlist->hsp_array[index] = NULL;
 
6317
         if (kbp[context]) {
 
6318
            /* kbp[context] == NULL means that this alignment has been 
 
6319
               extended across the boundary between different query sequences.
 
6320
               Leave it like this for now */
 
6321
            hsp->evalue = BlastKarlinStoE_simple(hsp->score, kbp[context], 
 
6322
                                                 searchsp_eff);
 
6323
 
 
6324
            if (hsp->evalue > 10*search->pbp->cutoff_e) {
 
6325
               hsp = BLAST_HSPFree(hsp);
 
6326
               search->current_hitlist->hsp_array[index] = NULL;
 
6327
            }
5526
6328
         }
5527
6329
      }
5528
6330
   }
5534
6336
/*
5535
6337
        Performs a BLAST search using a sequence from obtained from readdb.
5536
6338
*/
5537
 
BlastSearchBlkPtr LIBCALL
 
6339
Int2 LIBCALL
5538
6340
BLASTPerformSearchWithReadDb (BlastSearchBlkPtr search, Int4 sequence_number)
5539
6341
 
5540
6342
{
5546
6348
        search->dblen_eff_real += MAX(subject_length-search->length_adjustment, 1);
5547
6349
        search->subject_id = sequence_number;
5548
6350
 
5549
 
        search->dblen_eff_real += MAX(subject_length-search->length_adjustment, 1);
5550
 
        search->subject_id = sequence_number;
5551
 
 
5552
 
        search = BLASTPerformSearch(search, subject_length, subject_seq); 
5553
 
 
5554
 
        return search;
 
6351
        return BLASTPerformSearch(search, subject_length, subject_seq); 
5555
6352
}
5556
6353
 
5557
6354
/*
5560
6357
        BLASTPerformSearchWithReadDb) and when only two seqs are being
5561
6358
        compared.
5562
6359
*/
5563
 
BlastSearchBlkPtr LIBCALL
 
6360
Int2 LIBCALL
5564
6361
BLASTPerformSearch (BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq)
5565
6362
 
5566
6363
{
5575
6372
                status = BLASTPerformFinalSearch(search, subject_length, subject_seq);
5576
6373
        }
5577
6374
        
5578
 
        return search;
 
6375
        return status;
5579
6376
}
5580
6377
 
5581
6378
/*
5670
6467
 
5671
6468
                if (status < 0)
5672
6469
                {               /* Error */
5673
 
                        ErrPostEx(SEV_FATAL, 0, 0, "BlastExtendWordSearch returned non-zero status");
 
6470
                        ErrPostEx(SEV_FATAL, 1, 0, "BlastExtendWordSearch returned non-zero status");
5674
6471
                        return 1;
5675
6472
                }
5676
6473
        }
5711
6508
 
5712
6509
{
5713
6510
    BLAST_HitListPtr current_hitlist, hitlist = NULL;
5714
 
    Int2 inner_frame, inner_frame_max, status, inner_frame_min;
 
6511
    Int2 inner_frame, inner_frame_max, inner_frame_min, status;
5715
6512
    Int4 real_length, length, start = 0, num_chunks, index;
5716
6513
    Uint1Ptr prot_seq;
5717
6514
    
5718
6515
    BlastHitListPurge(search->current_hitlist);
5719
6516
    if (subject_length == 0)
5720
 
        return 1;
 
6517
       /* Normal return */
 
6518
        return 0;
5721
6519
 
5722
6520
    BlastSequenceAddSequence(search->subject, NULL, subject_seq-1, subject_length, subject_length, 0);
5723
6521
    search->current_hitlist_purge = TRUE; /* The default. */
5818
6616
           length = MIN(real_length-start, MAX_DBSEQ_LEN);
5819
6617
           search->subject->length = length;
5820
6618
           /* THE BLAST SEARCH _IS_ HERE! */
5821
 
           status = BlastExtendWordSearch(search, search->pbp->multiple_hits_only);
5822
 
           
 
6619
           if (BlastExtendWordSearch(search, search->pbp->multiple_hits_only) < 0) {
 
6620
              /* Error occurred in BlastExtendWordSearch */
 
6621
              return 1;
 
6622
           }
5823
6623
           /* HSP's were not saved in any special order, sort. */
5824
6624
           current_hitlist = search->current_hitlist;
5825
6625
           if (current_hitlist && current_hitlist->do_not_reallocate == FALSE)
5832
6632
#if 1
5833
6633
           else if (!search->pbp->do_sum_stats && !search->pbp->mb_params) {
5834
6634
              status = BlastNTPreliminaryGappedScore(search, search->subject->sequence, search->subject->length);
 
6635
              if (status < 0)
 
6636
                 return status;
5835
6637
              status = BlastNTGetGappedScore(search, search->subject->length, search->subject->sequence);
 
6638
              if (status < 0)
 
6639
                 return status;
5836
6640
           }  
5837
6641
#endif
5838
6642
           if (num_chunks > 1) {
5846
6650
              }
5847
6651
              start += length - DBSEQ_CHUNK_OVERLAP;
5848
6652
              search->subject->original_length = start;
5849
 
              search->subject->sequence += 
5850
 
                 (length - DBSEQ_CHUNK_OVERLAP)/READDB_COMPRESSION_RATIO;
 
6653
              if (search->prog_number == blast_type_blastn)
 
6654
                 search->subject->sequence += 
 
6655
                    (length - DBSEQ_CHUNK_OVERLAP)/READDB_COMPRESSION_RATIO;
 
6656
              else
 
6657
                 search->subject->sequence += length - DBSEQ_CHUNK_OVERLAP;
5851
6658
              search->current_hitlist->hspcnt = 
5852
6659
                 search->current_hitlist->hspcnt_max = 0;
5853
6660
           }
5857
6664
    if (hitlist) {
5858
6665
       MemFree(search->current_hitlist->hsp_array);
5859
6666
       MemCpy(search->current_hitlist, hitlist, sizeof(BLAST_HitList));
 
6667
       MemFree(hitlist);
5860
6668
       if (!search->pbp->mb_params)
5861
6669
          search->subject->sequence = search->subject->sequence_start + 1;
5862
6670
    }
5989
6797
static Int4
5990
6798
BlastExtendWordSearch(BlastSearchBlkPtr search, Boolean multiple_hits)
5991
6799
{
5992
 
        Int2 status=0;
 
6800
        Int4 status=0;
5993
6801
 
5994
6802
 
5995
6803
        /* multiple hits structure needed to perform mh extensions. */
6378
7186
    register Int4 num_hits;
6379
7187
    register Int4 next_nhits;
6380
7188
    
 
7189
    Int4 i; /*AM: Temporary */
 
7190
 
6381
7191
    BLAST_ExtendWordParamsPtr     ewp_params;
6382
7192
    Boolean                     prelim, succeed_to_right;
6383
7193
    ModLAEntry *mod_lt=lookup->mod_lt;
6432
7242
            s_off = (Int4) (s - subject0);
6433
7243
            next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
6434
7244
            next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
6435
 
            for (;;) {
 
7245
 
 
7246
            for (;;) { 
6436
7247
                do {
6437
7248
                    /* lookup a contiguous word. */
6438
7249
                    s++;
6522
7333
            
6523
7334
            next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
6524
7335
            next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
 
7336
 
6525
7337
            for (;;) {
6526
7338
                do {
6527
7339
                    /* lookup a contiguous word. */
7142
7954
        register BLAST_Score    score, sum;
7143
7955
        register BLAST_ScorePtr PNTR    matrix;
7144
7956
        register BLAST_Score    x, X;
 
7957
        Uint4 query_num; /* AM: Support for query multiplexing. */
7145
7958
        
7146
7959
        
7147
7960
 
7168
7981
        q = query + q_off;
7169
7982
        s =  search->subject->sequence + s_off; 
7170
7983
 
7171
 
        X=pbp->X;
 
7984
        /* AM: Support for query multiplexing. */
 
7985
        if( search->prog_number == blast_type_tblastn && search->mult_queries )
 
7986
        {
 
7987
          query_num = GetQueryNum( search->mult_queries, q_off - word_width + 1, 
 
7988
                                   q_off + 1, 0 );
 
7989
          X = search->mult_queries->dropoff_2nd_pass_array[query_num];
 
7990
        }
 
7991
        else X=pbp->X;
 
7992
 
7172
7993
        matrix = sbp->matrix;
7173
7994
 
7174
7995
        score=0;
7199
8020
 
7200
8021
        q_left--;
7201
8022
 
 
8023
/******************************************************************
 
8024
 
 
8025
The extension procedure used here is to:
 
8026
 
 
8027
1.) keep on extending as long as it increases the total score so far, record this
 
8028
maximum score and the corresponding extents as each new maximum score is reached.
 
8029
 
 
8030
2.) if extending decreases the total score so far then keep on extending
 
8031
until the score has dropped by "X" from the last maximum score to explore
 
8032
whether it is only a local minima that has been encountered:
 
8033
 
 
8034
        a.) if the score drops by "X" from the last maximum score, then stop
 
8035
        the extension and record the last maximum score as well as the 
 
8036
        corresponding extents for query and subject.
 
8037
 
 
8038
        b.) if the score recovers again and becomes higher than the last maximum
 
8039
        score, reset the maximum score so far as well as the corresponding
 
8040
        query and subject offsets.
 
8041
 
 
8042
 
 
8043
3.) When the end of a sequence (either query or subject) is encountered record the last 
 
8044
maximum score as well as the corresponding extents.
 
8045
        
 
8046
 
 
8047
 
 
8048
In the "while" loop below the maximum score is the variable "score" and "sum"
 
8049
is the change since the maximum score was last recorded (i.e., the variable
 
8050
"score" was modified).  
 
8051
 
 
8052
Both x and X are negative and the outer "while" loops continues
 
8053
as long as sum is less negative than x.  Iterations of the "while" 
 
8054
loop with "sum" containing a negative value corresponds to 2.) above.
 
8055
 
 
8056
The inner do-while loop is executed only as long as each extension
 
8057
increases the maximum score, corresponding to 1.) above.
 
8058
 
 
8059
There is no explicit check for the end of a sequence here, but
 
8060
between sequences in the blast database there is a "sentinel"
 
8061
byte.  If this sentinel byte is encountered then matrix[*q][*s]
 
8062
will be much more negative than "X" so that the extension will
 
8063
stop.  This corresponds to 3.) above.
 
8064
 
 
8065
*******************************************************************/
 
8066
 
7202
8067
        sum = 0;
7203
8068
        x = X;
7204
8069
        while (sum > x)
7225
8090
                q = q_right = q_best_right;
7226
8091
                s = search->subject->sequence + (q - query) + diag;
7227
8092
                sum = 0;
7228
 
/* "score" is actually the "maxscore", if sum drops by "score", then the
7229
 
total new score is zero and the extension can stop. */
 
8093
/**************************************************************
 
8094
 
 
8095
The extension to the right is performed in the same way as the extension
 
8096
to the left, except that the extension can stop if the score
 
8097
drops by X or becomes negative, in which case the last maximum score
 
8098
is recorded.
 
8099
 
 
8100
*****************************************************************/
7230
8101
                if ((x = -score) < X)
7231
8102
                        x = X;
7232
8103
                while (sum > x)
8055
8926
                        BlastHitListPurge(search->current_hitlist);
8056
8927
        }
8057
8928
        
8058
 
        compressed_wordsize = lookup->wordsize;
 
8929
        compressed_wordsize = lookup->reduced_wordsize;
8059
8930
        wordsize = wfp->wordsize;
8060
 
        extra_bytes = lookup->wordsize - lookup->reduced_wordsize;
 
8931
        extra_bytes = lookup->wordsize - compressed_wordsize;
8061
8932
 
8062
8933
/* The subject sequence is too short, exit this function now. */
8063
8934
        if (wordsize > search->subject->length)
8067
8938
        lookup_index = index;
8068
8939
/* Determines when to stop scanning the database; does not include remainder. */
8069
8940
        s_end = subject0 + (search->subject->length)/READDB_COMPRESSION_RATIO;
8070
 
        compression_factor = wfp->compression_ratio*lookup->reduced_wordsize;
8071
 
        virtual_wordsize = wordsize - READDB_COMPRESSION_RATIO*compressed_wordsize;
 
8941
        compression_factor = wfp->compression_ratio*compressed_wordsize;
 
8942
        virtual_wordsize = wordsize - READDB_COMPRESSION_RATIO*lookup->wordsize;
8072
8943
        search_context = search->context;
8073
8944
        query_length = search_context[search->first_context].query->length;
8074
8945
        extra_bytes_needed = extra_bytes;
8689
9560
        return 1;
8690
9561
    else if (h1->score > h2->score)
8691
9562
        return -1;
 
9563
 
 
9564
    if( h1->subject_offset < h2->subject_offset )
 
9565
       return 1;
 
9566
    if( h1->subject_offset > h2->subject_offset )
 
9567
          return -1;
 
9568
 
 
9569
    if( h1->subject_length < h2->subject_length )
 
9570
       return 1;
 
9571
    if( h1->subject_length > h2->subject_length )
 
9572
       return -1;
 
9573
 
8692
9574
    else return 0;
8693
9575
}
8694
9576
/*
8717
9599
        Int2 deleted;
8718
9600
        Int4 query_length;
8719
9601
        SeqIdPtr subject_id=NULL;
 
9602
        Int4 align_length;
 
9603
 
 
9604
        /* AM: Query multiplexing. */
 
9605
        QueriesPtr mult_queries = NULL;
 
9606
        Uint4 current_query = 0;
 
9607
        MQ_ResultInfoPtr result_info = NULL;
 
9608
        Int4 mq_new_index, del_index;
 
9609
        BLASTResultHitlistPtr mq_worst_result = NULL;
 
9610
        Uint4 tmp_num_results;
8720
9611
 
8721
9612
        if (search == NULL)
8722
9613
                return 0;       
8727
9618
                return 0;
8728
9619
        }
8729
9620
 
8730
 
        current_hitlist = search->current_hitlist;
 
9621
        /* AM: Support for query concatenation. */
 
9622
        if( !search->mult_queries )
 
9623
          current_hitlist = search->current_hitlist;
 
9624
        else
 
9625
          current_hitlist = search->mult_queries->HitListArray[
 
9626
            search->mult_queries->current_query];
8731
9627
 
8732
9628
        retval = current_hitlist->hspcnt;
8733
9629
 
8734
 
        if (search->pbp->gapped_calculation &&
8735
 
                search->prog_number != blast_type_blastn)
 
9630
        /* AM: Support for query concatenation. */
 
9631
        if( search->mult_queries && !retval ) return 0;
 
9632
 
 
9633
 
 
9634
        if (search->pbp->gapped_calculation)
8736
9635
                kbp = search->sbp->kbp_gap[search->first_context];
8737
9636
        else
8738
9637
                kbp = search->sbp->kbp[search->first_context];
8770
9669
                        hsp_array[index].number = hsp->num;
8771
9670
                        hsp_array[index].score = hsp->score;
8772
9671
                        hsp_array[index].e_value = hsp->evalue;
8773
 
                        hsp_array[index].p_value = hsp->pvalue;
 
9672
                        hsp_array[index].num_ident = hsp->num_ident;
8774
9673
                        hsp_array[index].bit_score = ((hsp->score*kbp->Lambda) -
8775
9674
                                                      kbp->logK)/NCBIMATH_LN2;
8776
9675
                        if (search->prog_number==blast_type_blastn) {
8831
9730
        hitlist_max = result_struct->hitlist_max;
8832
9731
        results = result_struct->results;
8833
9732
 
 
9733
        /* AM: Query multiplexing. */
 
9734
        if( search->mult_queries )
 
9735
        {
 
9736
          mult_queries = search->mult_queries;
 
9737
          current_query = mult_queries->current_query;
 
9738
          result_info = mult_queries->result_info + current_query;
 
9739
        }
 
9740
 
8834
9741
        /* Record the worst evalue for ReevaluateWithAmbiguities. */
8835
9742
        if (hitlist_count == hitlist_max)
8836
9743
        {
8844
9751
        {
8845
9752
                if (hitlist_count == hitlist_max)
8846
9753
                {       /* Array is full, delete the entry. */
 
9754
                  if( !mult_queries ) /* AM: Query multiplexing. */
8847
9755
                        search->current_hitlist =
8848
9756
                           BlastHitListDestruct(search->current_hitlist);
 
9757
                  else search->mult_queries->delete_current_hitlist = TRUE;
 
9758
 
8849
9759
                        result_hitlist = BLASTResultHitlistFreeEx(search, result_hitlist);
8850
9760
                        if (search->thr_info->results_mutex)
8851
9761
                            NlmMutexUnlock(search->thr_info->results_mutex); /* Free mutex. */
8852
9762
                        return 0;
8853
9763
                }
8854
9764
                else
8855
 
                {       /* Add to end of array. */
 
9765
                {       
 
9766
                  /* AM: Query multiplexing. */
 
9767
                  if( !mult_queries )
 
9768
                        /* Add to end of array. */
8856
9769
                        deleted = BlastInsertList2Heap(search, result_hitlist);
 
9770
                  else
 
9771
                  {
 
9772
                    if( result_info->NumResults 
 
9773
                          == mult_queries->max_results_per_query )
 
9774
                    { /* AM: No more results for this query. */
 
9775
                      search->mult_queries->delete_current_hitlist = TRUE;
 
9776
                      result_hitlist 
 
9777
                        = BLASTResultHitlistFreeEx( search, result_hitlist );
 
9778
 
 
9779
                      if( search->thr_info->results_mutex )
 
9780
                        NlmMutexUnlock( search->thr_info->results_mutex );
 
9781
 
 
9782
                      return 0;
 
9783
                    }
 
9784
                    else /* AM: Append to results_struct and to local. */
 
9785
                      deleted = BlastInsertList2Heap(search, result_hitlist);
 
9786
                  }
8857
9787
                }
8858
9788
 
8859
9789
                if (deleted == 1) 
 
9790
                {
 
9791
                  /* AM: Query multiplexing. */
 
9792
                  if( mult_queries ) MQ_UpdateResultLists( mult_queries );
 
9793
 
8860
9794
                        hitlist_count = result_struct->hitlist_count =
8861
9795
                           BlastPurgeResultList(results, hitlist_count);
 
9796
                }
8862
9797
                else if (deleted == 0) 
8863
9798
                {
8864
9799
                        result_hitlist = BLASTResultHitlistFreeEx(search, result_hitlist);
8867
9802
                        return retval;
8868
9803
                }
8869
9804
                new_index = hitlist_count;
 
9805
 
 
9806
                /* AM: Query multiplexing. */
 
9807
                if( mult_queries ) mq_new_index = result_info->NumResults;
8870
9808
        }
8871
9809
        else
8872
9810
        {
8874
9812
          {
8875
9813
            deleted = BlastInsertList2Heap(search, result_hitlist);
8876
9814
            if (deleted == 1) 
 
9815
            {
 
9816
              /* AM: Query multiplexing. */
 
9817
              if( mult_queries ) MQ_UpdateResultLists( mult_queries );
 
9818
 
8877
9819
              hitlist_count = result_struct->hitlist_count =
8878
9820
                 BlastPurgeResultList(results, hitlist_count);
 
9821
            }
8879
9822
            else if (deleted == 0) {
8880
9823
              result_hitlist = BLASTResultHitlistFreeEx(search, result_hitlist);
8881
9824
              if (search->thr_info->results_mutex)
8926
9869
                        }
8927
9870
                        old_index = new_index;
8928
9871
                    }
8929
 
                    if (hitlist_count == hitlist_max)
8930
 
                    {   /* The list is full, delete the last entry. */
8931
 
                        BlastFreeHeap(search, results[hitlist_max-1]);
8932
 
                        if (results[hitlist_max-1]->seqalign)
8933
 
                           SeqAlignSetFree(results[hitlist_max-1]->seqalign);
8934
 
                        results[hitlist_max-1] = BLASTResultHitlistFreeEx(search, results[hitlist_max-1]);
8935
 
                        result_struct->hitlist_count--; 
8936
 
                        hitlist_count = result_struct->hitlist_count;   
 
9872
                    
 
9873
                    /* AM: Query multiplexing. */
 
9874
                    if( !mult_queries )
 
9875
                    {
 
9876
                      if (hitlist_count == hitlist_max)
 
9877
                      { /* The list is full, delete the last entry. */
 
9878
                          BlastFreeHeap(search, results[hitlist_max-1]);
 
9879
                          if (results[hitlist_max-1]->seqalign)
 
9880
                             SeqAlignSetFree(results[hitlist_max-1]->seqalign);
 
9881
                          results[hitlist_max-1] = BLASTResultHitlistFreeEx(search, results[hitlist_max-1]);
 
9882
                          result_struct->hitlist_count--;       
 
9883
                          hitlist_count = result_struct->hitlist_count; 
 
9884
                      }
 
9885
                      if (hitlist_max > 1)
 
9886
                          Nlm_MemMove((results+new_index+1), (results+new_index), (hitlist_count-new_index)*sizeof(results[0]));
 
9887
                    }
 
9888
                    else
 
9889
                    {
 
9890
                      new_index = ResultIndex( current_evalue, high_score,
 
9891
                                               search->subject_id, 
 
9892
                                               results, hitlist_count );
 
9893
 
 
9894
                      tmp_num_results = result_info->NumResults;
 
9895
                      del_index = hitlist_count;
 
9896
                      mq_new_index = ResultIndex( current_evalue, high_score,
 
9897
                                                  search->subject_id, 
 
9898
                                                  result_info->results,
 
9899
                                                  result_info->NumResults );
 
9900
                      
 
9901
                      if( mq_new_index == mult_queries->max_results_per_query )
 
9902
                      { /* AM: The list is full and new result is too low --- do nothing. */
 
9903
                        search->mult_queries->delete_current_hitlist = TRUE;
 
9904
                        result_hitlist 
 
9905
                          = BLASTResultHitlistFreeEx( search, result_hitlist );
 
9906
 
 
9907
                        if( search->thr_info->results_mutex )
 
9908
                          NlmMutexUnlock( search->thr_info->results_mutex );
 
9909
 
 
9910
                        return 0;
 
9911
                      }
 
9912
 
 
9913
                      if( result_info->NumResults 
 
9914
                            == mult_queries->max_results_per_query )
 
9915
                      { /* AM: must remove the worst result for this query. */
 
9916
                        mq_worst_result 
 
9917
                          = result_info->results[result_info->NumResults - 1];
 
9918
                        --tmp_num_results;
 
9919
                        del_index = ResultIndex1( mq_worst_result,
 
9920
                                                  results, hitlist_count );
 
9921
                        BlastFreeHeap( search, results[del_index] );
 
9922
 
 
9923
                        if( results[del_index]->seqalign )
 
9924
                          SeqAlignSetFree( results[del_index]->seqalign );
 
9925
 
 
9926
                        results[del_index] 
 
9927
                          = BLASTResultHitlistFreeEx( search, 
 
9928
                                                      results[del_index] );
 
9929
                        hitlist_count = --result_struct->hitlist_count;
 
9930
                      }
 
9931
 
 
9932
                      if( hitlist_max > 1 )
 
9933
                        if( new_index < del_index )
 
9934
                          Nlm_MemMove( results + new_index + 1, 
 
9935
                                       results + new_index,
 
9936
                                       (del_index - new_index)
 
9937
                                         *sizeof( results[0] ) );
 
9938
                        else if( del_index < new_index )
 
9939
                          Nlm_MemMove( results + del_index,
 
9940
                                       results + del_index + 1,
 
9941
                                       (new_index - del_index)
 
9942
                                         *sizeof( results[0] ) );
 
9943
 
 
9944
                      if( mult_queries->max_results_per_query > 1 )
 
9945
                        Nlm_MemMove( result_info->results + mq_new_index + 1,
 
9946
                                     result_info->results + mq_new_index,
 
9947
                                     (result_info->NumResults - mq_new_index)
 
9948
                                       *sizeof( results[0] ) );
 
9949
 
 
9950
                      result_info->NumResults = tmp_num_results;
8937
9951
                    }
8938
 
                    if (hitlist_max > 1)
8939
 
                        Nlm_MemMove((results+new_index+1), (results+new_index), (hitlist_count-new_index)*sizeof(results[0]));
8940
9952
            }
8941
9953
            else
8942
9954
            {  /* Case of K=1 and the first hit is eliminated */
8943
9955
                new_index = 0;
8944
9956
                BlastInsertList2Heap(search, result_hitlist);
 
9957
 
 
9958
              /* AM: Query multiplexing. */
 
9959
              if( mult_queries ) mq_new_index = 0;
8945
9960
            }
8946
9961
          }
8947
9962
        else
8948
9963
          {     /* First hit to be stored. */
8949
9964
            new_index = 0;
8950
9965
            BlastInsertList2Heap(search, result_hitlist);
 
9966
 
 
9967
            /* AM: Query multiplexing. */
 
9968
            if( mult_queries ) mq_new_index = 0;
8951
9969
          }
8952
9970
        }
8953
9971
        
8955
9973
        {
8956
9974
                results[new_index] = result_hitlist;
8957
9975
                result_struct->hitlist_count++; 
 
9976
 
 
9977
          /* AM: Query multiplexing. */
 
9978
          if( mult_queries )
 
9979
          {
 
9980
            result_info->results[mq_new_index] = result_hitlist;
 
9981
            ++result_info->NumResults;
 
9982
          }
8958
9983
        }
8959
9984
 
8960
9985
        /* We need to sort all hits by score/e_value in results[new_index] */
8961
9986
 
8962
 
 
8963
9987
        HeapSort(results[new_index]->hsp_array, results[new_index]->hspcnt, 
8964
9988
                 sizeof(BLASTResultHsp), RPSResultHspScoreCmp);
8965
9989
 
8985
10009
        BLAST_Score     s, s2;
8986
10010
        BLAST_Score     dropoff_1st_pass, dropoff_2nd_pass;
8987
10011
        Int2 index;
 
10012
        Int4 i; /* AM: Support for query multiplexing. */
8988
10013
        
8989
10014
        Nlm_FloatHi meff, e, e2;
 
10015
        Int2 last_context;
8990
10016
 
8991
10017
        if (search == NULL)
8992
10018
                return 1;
9005
10031
        if (kbp == NULL && kbp_gap == NULL)
9006
10032
                return 1;
9007
10033
 
9008
 
        for (index=search->first_context; index<=search->last_context; index++)
 
10034
        last_context = (search->prog_number == blast_type_blastn) ?
 
10035
           search->first_context : search->last_context;
 
10036
        for (index=search->first_context; index<=last_context; index++)
9009
10037
        {
9010
10038
                ewp = search->context[index].ewp;
9011
10039
                if (ewp == NULL && !pbp->mb_params)
9022
10050
 
9023
10051
        meff = (Nlm_FloatHi) search->context[search->first_context].query->length;
9024
10052
        if (pbp->mb_params)
9025
 
           BlastCutoffs_simple(&s, &e, kbp, searchsp, TRUE);
 
10053
           BlastCutoffs(&s, &e, kbp, searchsp, TRUE, search->pbp->gap_decay_rate );
9026
10054
        else
9027
10055
        {
9028
 
           if (pbp->gapped_calculation && search->prog_number != blast_type_blastn)
9029
 
                BlastCutoffs_simple(&s, &e, kbp_gap, searchsp, FALSE);
 
10056
           if (pbp->gapped_calculation) 
 
10057
     { /* AM: Changed to support query concatenation. */
 
10058
       if( !search->mult_queries )
 
10059
         BlastCutoffs(&s, &e, kbp_gap, searchsp, FALSE, 0.0 );
 
10060
       else
 
10061
         BlastCutoffs( &s, &e, kbp_gap,
 
10062
                       search->mult_queries->MinSearchSpEff, 
 
10063
                       FALSE, 0.0 );
 
10064
     }
9030
10065
           else
9031
 
                BlastCutoffs_simple(&s, &e, kbp, searchsp, FALSE);
 
10066
     { /* AM: Changed to support query concatenation. */
 
10067
       if( !search->mult_queries )
 
10068
         BlastCutoffs(&s, &e, kbp, searchsp, FALSE, 0.0 );
 
10069
       else
 
10070
         BlastCutoffs( &s, &e, kbp, search->mult_queries->MinSearchSpEff, 
 
10071
                        FALSE, 0.0 ); 
 
10072
     }
9032
10073
        }
9033
10074
        /* Determine the secondary cutoff score, S2, to use */
9034
10075
        if (e2 == 0. && !pbp->cutoff_s2_set)
9047
10088
/*
9048
10089
                BlastCutoffs(&s2, &e2, kbp, meff, avglen, TRUE);
9049
10090
*/
9050
 
                if (pbp->gapped_calculation && search->prog_number != blast_type_blastn)
9051
 
                        BlastCutoffs(&s2, &e2, kbp_gap, MIN(avglen,meff), avglen, TRUE);
9052
 
                else
9053
 
                        BlastCutoffs(&s2, &e2, kbp, MIN(avglen,meff), avglen, TRUE);
 
10091
    if (pbp->gapped_calculation)
 
10092
    {
 
10093
      if( !search->mult_queries )
 
10094
        BlastCutoffs(&s2, &e2, kbp_gap, MIN(avglen,meff) * avglen,
 
10095
                     TRUE, search->pbp->gap_decay_rate );
 
10096
      else
 
10097
        BlastCutoffs( &s2, &e2, kbp_gap, 
 
10098
                      MIN( avglen,search->mult_queries->MinLen ) * avglen,
 
10099
                      TRUE, search->pbp->gap_decay_rate ); 
 
10100
    }
 
10101
    else
 
10102
    { /* AM: Changed to support query concatenation. */
 
10103
      if( !search->mult_queries )
 
10104
        BlastCutoffs(&s2, &e2, kbp, MIN(avglen,meff) * avglen,
 
10105
                     TRUE, search->pbp->gap_decay_rate );
 
10106
      else
 
10107
        BlastCutoffs( &s2, &e2, kbp, 
 
10108
                      MIN(avglen,2*(search->mult_queries->MinLen)) * avglen,
 
10109
                      TRUE, search->pbp->gap_decay_rate ); 
 
10110
    }
9054
10111
                /* Adjust s2 to be in line with s, as necessary */
9055
10112
                s2 = MAX(s2, 1);
9056
10113
                if (s2 > s)
9078
10135
        
9079
10136
        dropoff_1st_pass = (BLAST_Score) ceil((Nlm_FloatHi) dropoff_number_of_bits_1st_pass * NCBIMATH_LN2 / kbp->Lambda);
9080
10137
        dropoff_1st_pass = (BLAST_Score) MIN((Nlm_FloatHi) dropoff_1st_pass, s);
9081
 
        dropoff_2nd_pass = (BLAST_Score) ceil((Nlm_FloatHi) dropoff_number_of_bits_2nd_pass * NCBIMATH_LN2 / kbp->Lambda);
 
10138
 
 
10139
        /* AM: Change to support query multiplexing. */
 
10140
        if( search->prog_number == blast_type_tblastn && search->mult_queries )
 
10141
          dropoff_2nd_pass = (BLAST_Score)ceil( 
 
10142
            (Nlm_FloatHi)dropoff_number_of_bits_2nd_pass*NCBIMATH_LN2
 
10143
              /search->mult_queries->LambdaMin );
 
10144
        else
 
10145
          dropoff_2nd_pass = (BLAST_Score) ceil((Nlm_FloatHi) dropoff_number_of_bits_2nd_pass * NCBIMATH_LN2 / kbp->Lambda);
 
10146
 
9082
10147
        dropoff_2nd_pass = (BLAST_Score) MIN((Nlm_FloatHi) dropoff_2nd_pass, s);        
 
10148
 
 
10149
        /* AM: Change to support query multiplexing. */
 
10150
        if( search->prog_number == blast_type_tblastn && search->mult_queries )
 
10151
          for( i = 0; i < search->mult_queries->NumQueries; ++i )
 
10152
            search->mult_queries->dropoff_2nd_pass_array[i]
 
10153
              = - (BLAST_Score)ceil( (Nlm_FloatHi)dropoff_number_of_bits_2nd_pass*NCBIMATH_LN2
 
10154
                                     /search->mult_queries->lambda_array[i] );
 
10155
 
9083
10156
        /* The drop-off parameter MUST be negative. */
9084
10157
        pbp->dropoff_1st_pass = -dropoff_1st_pass;
9085
10158
        pbp->dropoff_2nd_pass = -dropoff_2nd_pass;
9145
10218
        BLAST_HSPPtr hsp;
9146
10219
        Int4 index;
9147
10220
 
9148
 
        hitlist = search->current_hitlist;
 
10221
        /* AM: Support for query concatenation. */
 
10222
        if( !search->mult_queries || !search->mult_queries->use_mq )
 
10223
          hitlist = search->current_hitlist;
 
10224
        else
 
10225
          hitlist = search->mult_queries->HitListArray[
 
10226
            search->mult_queries->current_query];
 
10227
 
9149
10228
        if (hitlist && hitlist->hspcnt > 0)
9150
10229
        {
9151
10230
           /* Link up the HSP's for this hitlist. */
9329
10408
   return end;
9330
10409
}
9331
10410
 
 
10411
 
9332
10412
#define WINDOW_SIZE 20
9333
10413
static FloatHi SumHSPEvalue(BlastSearchBlkPtr search, BLAST_HSPPtr head_hsp, 
9334
10414
                            BLAST_HSPPtr hsp, BLAST_ScorePtr sumscore)
9351
10431
      MAX(head_hsp->score, head_hsp->sumscore);
9352
10432
   score_prime = *sumscore * search->sbp->kbp_gap[search->first_context]->Lambda;
9353
10433
 
9354
 
   sum_evalue = 
 
10434
   sum_evalue =
9355
10435
      BlastUnevenGapSumE(search->sbp->kbp_gap[search->first_context],
9356
 
                         2*WINDOW_SIZE, search->pbp->longest_intron + WINDOW_SIZE, 
9357
 
                         gap_prob, gap_decay_rate, num,*sumscore, score_prime, 
9358
 
                         search->context[search->first_context].query->effective_length, 
9359
 
                         subject_length, FALSE);
 
10436
                         2*WINDOW_SIZE,
 
10437
                         search->pbp->longest_intron + WINDOW_SIZE,
 
10438
                         num, score_prime,
 
10439
                         search->context[search->first_context].
 
10440
                         query->effective_length,
 
10441
                         subject_length,
 
10442
                         BlastGapDecayDivisor(gap_decay_rate, num));
 
10443
 
9360
10444
   sum_evalue *= (FloatHi) search->dblen / subject_length;
9361
10445
   return sum_evalue;
9362
10446
}
9632
10716
   return head_hsp;
9633
10717
}
9634
10718
 
 
10719
 
9635
10720
/*
9636
10721
        This function orders and "links" the HSP's.  It does this
9637
10722
        by first ordering them backwards (with "rev_compare_hsps") and 
9693
10778
        Int4 path_changed;  /* will be set if an element is removed that may change an existing path */
9694
10779
        Int4 first_pass, use_current_max; 
9695
10780
        LinkHelpStruct *lh_helper=0;
 
10781
        Uint4 query_num; /* AM: to support query concatenation. */
9696
10782
 
9697
10783
        if (search == NULL || hitlist == NULL)
9698
10784
                return NULL;
9705
10791
        }
9706
10792
        lh_helper= hitlist->lh_helper;
9707
10793
 
9708
 
        if (search->pbp->gapped_calculation && search->prog_number != blast_type_blastn)
 
10794
        if (search->pbp->gapped_calculation)
9709
10795
        {
9710
10796
                kbp = search->sbp->kbp_gap;
9711
10797
        }
9715
10801
        }
9716
10802
 
9717
10803
        total_number_of_hsps = hitlist->hspcnt;
9718
 
        subject_length = MAX((search->subject->length - search->length_adjustment), 1);
 
10804
 
 
10805
        /* AM: Support for query concatenation */
 
10806
        if( !search->mult_queries )
 
10807
          subject_length = MAX((search->subject->length - search->length_adjustment), 1);
 
10808
        else
 
10809
        {
 
10810
          query_num = GetQueryNum( search->mult_queries, hsp_array[0]->query.offset,
 
10811
                                   hsp_array[0]->query.end, hsp_array[0]->query.frame );
 
10812
          subject_length = MAX((search->subject->length 
 
10813
                                - search->mult_queries->Adjustments[query_num]), 1);
 
10814
        }
9719
10815
        
9720
10816
        if (StringCmp(search->prog_name, "tblastn") == 0
9721
10817
            || StringCmp(search->prog_name, "tblastx") == 0
10162
11258
                    of links, as this was subtracted out for purposes of the
10163
11259
                    comparison above. */
10164
11260
                    best[0]->hsp_link.sum[0] += (best[0]->hsp_link.num[0])*cutoff[0];
10165
 
                    prob[0] = BlastSmallGapSumE(kbp[search->first_context], gap_size, gap_prob, gap_decay_rate, best[0]->hsp_link.num[0],best[0]->hsp_link.sum[0], best[0]->hsp_link.xsum[0], search->context[search->first_context].query->effective_length, subject_length, FALSE);
 
11261
 
 
11262
                    /* AM: Support for query concatenation. */
 
11263
            if( best[0]->hsp_link.num[0] > 1 && gap_prob == 0 ) {
 
11264
              prob[0] = INT4_MAX;
 
11265
            } else {
 
11266
              if( !search->mult_queries )
 
11267
                prob[0] =
 
11268
                  BlastSmallGapSumE(kbp[search->first_context],
 
11269
                                    gap_size,
 
11270
                                    best[0]->hsp_link.num[0],
 
11271
                                    best[0]->hsp_link.xsum[0],
 
11272
                                    search->context[search->first_context].
 
11273
                                    query->effective_length,
 
11274
                                    subject_length,
 
11275
                                    BlastGapDecayDivisor(gap_decay_rate,
 
11276
                                                         best[0]->
 
11277
                                                         hsp_link.num[0]));
 
11278
              else
 
11279
                prob[0] =
 
11280
                  BlastSmallGapSumE( kbp[search->first_context],
 
11281
                                     gap_size,
 
11282
                                     best[0]->hsp_link.num[0],
 
11283
                                     best[0]->hsp_link.xsum[0],
 
11284
                                     search->mult_queries->
 
11285
                                     EffLengths[query_num],
 
11286
                                     subject_length,
 
11287
                                     BlastGapDecayDivisor(gap_decay_rate,
 
11288
                                                          best[0]->
 
11289
                                                          hsp_link.num[0]));
 
11290
              if( best[0]->hsp_link.num[0] > 1 ) {
 
11291
                prob[0] /= gap_prob;
 
11292
                if( prob[0] > INT4_MAX ) prob[0] = INT4_MAX;
 
11293
              }
 
11294
            }
 
11295
 
10166
11296
                    best[1]->hsp_link.sum[1] += (best[1]->hsp_link.num[1])*cutoff[1];
10167
 
                    prob[1] = BlastLargeGapSumE(kbp[search->first_context], gap_prob, gap_decay_rate, best[1]->hsp_link.num[1],best[1]->hsp_link.sum[1], best[1]->hsp_link.xsum[1], search->context[search->first_context].query->effective_length, subject_length, FALSE);
 
11297
 
 
11298
                    /* AM: Support for query concatenation. */
 
11299
            if( 1 - gap_prob == 0.0 && best[1]->hsp_link.num[1] > 1 ) {
 
11300
              prob[1] = INT4_MAX;
 
11301
            } else{
 
11302
              if( !search->mult_queries )
 
11303
                prob[1] =
 
11304
                  BlastLargeGapSumE(kbp[search->first_context],
 
11305
                                    best[1]->hsp_link.num[1],
 
11306
                                    best[1]->hsp_link.xsum[1],
 
11307
                                    search->context[search->first_context].
 
11308
                                    query->effective_length,
 
11309
                                    subject_length,
 
11310
                                    BlastGapDecayDivisor(gap_decay_rate,
 
11311
                                                         best[1]->
 
11312
                                                         hsp_link.num[1]));
 
11313
              else
 
11314
                prob[1] =
 
11315
                  BlastLargeGapSumE( kbp[search->first_context],
 
11316
                                     best[1]->hsp_link.num[1],
 
11317
                                     best[1]->hsp_link.xsum[1],
 
11318
                                     search->mult_queries->
 
11319
                                     EffLengths[query_num],
 
11320
                                     subject_length,
 
11321
                                     BlastGapDecayDivisor(gap_decay_rate,
 
11322
                                                          best[1]->
 
11323
                                                          hsp_link.num[1]));
 
11324
 
 
11325
              if( best[1]->hsp_link.num[1] > 1 ) {
 
11326
                prob[1] /= 1 - gap_prob;
 
11327
                if( prob[1] > INT4_MAX ) prob[1] = INT4_MAX;
 
11328
              }
 
11329
            }
10168
11330
                    ordering_method = prob[0]<=prob[1] ? 0:1;
10169
11331
                  }
10170
11332
                  else
10172
11334
                    /* We only consider the case of big gaps. */
10173
11335
                    best[1]->hsp_link.sum[1] += (best[1]->hsp_link.num[1])*cutoff[1];
10174
11336
                    /* gap_prob=0 here as small gaps are NOT considered. */
10175
 
                    prob[1] = BlastLargeGapSumE(kbp[search->first_context], 0.0, gap_decay_rate, best[1]->hsp_link.num[1],best[1]->hsp_link.sum[1], best[1]->hsp_link.xsum[1], search->context[search->first_context].query->effective_length, subject_length, FALSE);
 
11337
 
 
11338
                    /* AM: Support for query concatenation. */
 
11339
            if( !search->mult_queries )
 
11340
              prob[1] =
 
11341
                BlastLargeGapSumE(kbp[search->first_context],
 
11342
                                  best[1]->hsp_link.num[1],
 
11343
                                  best[1]->hsp_link.xsum[1],
 
11344
                                  search->context[search->first_context].
 
11345
                                  query->effective_length,
 
11346
                                  subject_length,
 
11347
                                  BlastGapDecayDivisor(gap_decay_rate,
 
11348
                                                       best[1]->
 
11349
                                                       hsp_link.num[1]));
 
11350
            else
 
11351
              prob[1] =
 
11352
                BlastLargeGapSumE( kbp[search->first_context],
 
11353
                                   best[1]->hsp_link.num[1],
 
11354
                                   best[1]->hsp_link.xsum[1],
 
11355
                                   search->mult_queries->EffLengths[query_num],
 
11356
                                   subject_length,
 
11357
                                   BlastGapDecayDivisor(gap_decay_rate,
 
11358
                                                        best[1]->
 
11359
                                                        hsp_link.num[1]));
10176
11360
                    ordering_method = 1;
10177
11361
                  }
10178
11362
                }
10181
11365
                    /* We only consider the case of big gaps. */
10182
11366
                    best[1]->hsp_link.sum[1] += (best[1]->hsp_link.num[1])*cutoff[1];
10183
11367
                    /* gap_prob=0 here as small gaps are NOT considered. */
10184
 
                    prob[1] = BlastLargeGapSumE(kbp[search->first_context], 0.0, gap_decay_rate, best[1]->hsp_link.num[1],best[1]->hsp_link.sum[1], best[1]->hsp_link.xsum[1], search->context[search->first_context].query->effective_length, subject_length, TRUE);
10185
 
                    ordering_method = 1;
 
11368
 
 
11369
                    /* AM: Support for query concatenation. */
 
11370
            if( !search->mult_queries )
 
11371
              prob[1] =
 
11372
                BlastLargeGapSumE( kbp[search->first_context],
 
11373
                                   best[1]->hsp_link.num[1],
 
11374
                                   best[1]->hsp_link.xsum[1],
 
11375
                                   search->context[search->first_context].
 
11376
                                   query->effective_length,
 
11377
                                   subject_length,
 
11378
                                   BlastGapDecayDivisor(gap_decay_rate,
 
11379
                                                        best[1]->
 
11380
                                                        hsp_link.num[1]));
 
11381
            else
 
11382
              prob[1] =
 
11383
                BlastLargeGapSumE( kbp[search->first_context],
 
11384
                                   best[1]->hsp_link.num[1],
 
11385
                                   best[1]->hsp_link.xsum[1],
 
11386
                                   search->mult_queries->EffLengths[query_num],
 
11387
                                   subject_length,
 
11388
                                   BlastGapDecayDivisor(gap_decay_rate,
 
11389
                                                        best[1]->
 
11390
                                                        hsp_link.num[1]));
 
11391
 
 
11392
            ordering_method = 1;
10186
11393
                }
10187
11394
 
10188
11395
                best[ordering_method]->start_of_chain = TRUE;
10189
11396
                
10190
 
                prob[ordering_method] *= 
 
11397
                /* AM: Support for query concatenation. */
 
11398
                if( !search->mult_queries )
 
11399
                  prob[ordering_method] *= 
10191
11400
                        ((Nlm_FloatHi)search->searchsp_eff/((Nlm_FloatHi)subject_length*search->context[search->first_context].query->effective_length));
 
11401
                else
 
11402
                {
 
11403
                  prob[ordering_method] 
 
11404
                    *= ((Nlm_FloatHi)search->mult_queries->SearchSpEff[query_num]
 
11405
                       /((Nlm_FloatHi)subject_length
 
11406
                       *search->mult_queries->EffLengths[query_num]));
 
11407
                }
 
11408
 
10192
11409
                best[ordering_method]->evalue = prob[ordering_method];
10193
11410
 
10194
11411
/* remove the links that have been ordered already. */
10326
11543
        if (search == NULL)
10327
11544
                return 1;
10328
11545
 
10329
 
        cutoff = search->pbp->cutoff_e;
10330
 
 
10331
 
        hitlist = search->current_hitlist;
 
11546
        cutoff = search->pbp->cutoff_e;
 
11547
 
 
11548
        /* AM: Support for query concatenation. */
 
11549
        if( !search->mult_queries 
 
11550
            || search->prog_number != blast_type_tblastn 
 
11551
            || !search->mult_queries->use_mq )
 
11552
          hitlist = search->current_hitlist;
 
11553
        else
 
11554
          hitlist = search->mult_queries->HitListArray[
 
11555
            search->mult_queries->current_query];
 
11556
 
10332
11557
        if (hitlist)
10333
11558
        {
10334
11559
                hitlist->hspcnt_max = hitlist->hspcnt;
10339
11564
                        if (hsp->evalue > cutoff &&
10340
11565
                                (search->pbp->no_check_score || search->pbp->cutoff_s > hsp->score))
10341
11566
                        {
10342
 
                            hsp_array[index]->gap_info = 
10343
 
                               GapXEditBlockDelete(hsp_array[index]->gap_info);
10344
 
                            hsp_array[index] = MemFree(hsp_array[index]);
 
11567
                            hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
10345
11568
                            hsp_deleted = TRUE;
10346
11569
                        }
10347
11570
                        else
10348
11571
                        {
10349
 
                                hsp->pvalue = BlastKarlinEtoP(hsp->evalue);
 
11572
                                /*hsp->pvalue = BlastKarlinEtoP(hsp->evalue);*/
10350
11573
                                hsp_cnt++;
10351
11574
                        }
10352
11575
                }
10369
11592
                        search->number_of_seqs_better_E++;
10370
11593
                }
10371
11594
        }
10372
 
        search->current_hitlist = hitlist;
 
11595
 
 
11596
        /* AM: Support for query concatenation. */
 
11597
        if( !search->mult_queries 
 
11598
            || search->prog_number != blast_type_tblastn 
 
11599
            || !search->mult_queries->use_mq )
 
11600
          search->current_hitlist = hitlist;
 
11601
        else
 
11602
          search->mult_queries->HitListArray[
 
11603
            search->mult_queries->current_query] = hitlist;
 
11604
 
10373
11605
        return 0;
10374
11606
}
10375
11607
 
10388
11620
        BLAST_KarlinBlkPtr PNTR kbp;
10389
11621
        Int4 hsp_cnt;
10390
11622
        Int4 index;
 
11623
        /* AM: Added to support query concatencation. */
 
11624
        Int4 query_num;
10391
11625
 
10392
11626
        if (search == NULL)
10393
11627
                return 1;
10394
11628
 
10395
 
        if (search->pbp->gapped_calculation && search->prog_number != blast_type_blastn)
 
11629
        if (search->pbp->gapped_calculation)
10396
11630
        {
10397
11631
                kbp = search->sbp->kbp_gap;
10398
11632
        }
10409
11643
                for (index=0; index<hsp_cnt; index++)
10410
11644
                {
10411
11645
                        hsp = hsp_array[index];
10412
 
                        if (!search->pbp->mb_params)
10413
 
                           hsp->evalue = BlastKarlinStoE_simple(hsp->score, kbp[hsp->context], search->searchsp_eff);
 
11646
                        if (!search->pbp->mb_params) 
 
11647
                        {
 
11648
                          /* AM: changed to support query concatenation. */
 
11649
                          if( !search->mult_queries )
 
11650
                            hsp->evalue = BlastKarlinStoE_simple(hsp->score, 
 
11651
                                                                 kbp[hsp->context], 
 
11652
                                                                 search->searchsp_eff);
 
11653
                          else
 
11654
                          {
 
11655
                            query_num = GetQueryNum( search->mult_queries,
 
11656
                                                     hsp->query.offset,
 
11657
                                                     hsp->query.end,
 
11658
                                                     hsp->query.frame );
 
11659
                            hsp->evalue = BlastKarlinStoE_simple( hsp->score,
 
11660
                                                                  kbp[hsp->context],
 
11661
                                                                  search->mult_queries->SearchSpEff[query_num] );
 
11662
                          }
 
11663
                        }
10414
11664
                        else {
10415
11665
                           FloatHi searchsp_eff;
10416
 
                           hsp->context = BinarySearchInt4(hsp->query.offset, search->query_context_offsets, (Int4) (search->last_context+1));
10417
 
                           searchsp_eff = (FloatHi) search->dblen_eff *
 
11666
                           hsp->context = BinarySearchInt4(hsp->query.offset,
 
11667
                                                           search->query_context_offsets, (Int4) (search->last_context+1));
 
11668
                           if (kbp[hsp->context]) {
 
11669
                              searchsp_eff = (FloatHi) search->dblen_eff *
10418
11670
                              (FloatHi) search->context[hsp->context].query->effective_length;
10419
 
                           hsp->evalue = BlastKarlinStoE_simple(hsp->score, kbp[hsp->context], searchsp_eff);
 
11671
                              hsp->evalue = BlastKarlinStoE_simple(hsp->score,
 
11672
                                   kbp[hsp->context], searchsp_eff);
 
11673
                           }
10420
11674
                        }
10421
11675
                }
10422
11676
        }