~ubuntu-branches/ubuntu/feisty/ncbi-tools6/feisty

« back to all changes in this revision

Viewing changes to algo/blast/core/blast_kappa.c

  • Committer: Bazaar Package Importer
  • Author(s): Barry deFreese
  • Date: 2006-07-19 23:28:07 UTC
  • mfrom: (1.1.5 upstream)
  • Revision ID: james.westby@ubuntu.com-20060719232807-et3cdmcjgmnyleyx
Tags: 6.1.20060507-3ubuntu1
Re-merge with Debian

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/* $Id: blast_kappa.c,v 1.62 2005/12/02 17:16:51 madden Exp $
 
1
/* $Id: blast_kappa.c,v 1.71 2006/05/03 14:30:59 madden Exp $
2
2
 * ==========================================================================
3
3
 *
4
4
 *                            PUBLIC DOMAIN NOTICE
34
34
 
35
35
#ifndef SKIP_DOXYGEN_PROCESSING
36
36
static char const rcsid[] =
37
 
"$Id: blast_kappa.c,v 1.62 2005/12/02 17:16:51 madden Exp $";
 
37
"$Id: blast_kappa.c,v 1.71 2006/05/03 14:30:59 madden Exp $";
38
38
#endif /* SKIP_DOXYGEN_PROCESSING */
39
39
 
40
40
#include <float.h>
48
48
#include <algo/blast/core/blast_filter.h>
49
49
#include <algo/blast/core/link_hsps.h>
50
50
#include "blast_psi_priv.h"
51
 
#include "matrix_freq_ratios.h"
52
51
#include "blast_gapalign_priv.h"
53
52
#include "blast_posit.h"
54
53
#include "blast_hits_priv.h"
55
54
 
56
55
#include <algo/blast/composition_adjustment/nlm_linear_algebra.h>
57
 
#include <algo/blast/composition_adjustment/composition_constants.h>
58
 
#include <algo/blast/composition_adjustment/composition_adjustment.h>
59
56
#include <algo/blast/composition_adjustment/compo_heap.h>
60
 
#include <algo/blast/composition_adjustment/smith_waterman.h>
61
57
#include <algo/blast/composition_adjustment/redo_alignment.h>
 
58
#include <algo/blast/composition_adjustment/matrix_frequency_data.h>
 
59
 
 
60
 
 
61
/** Compile-time option; if set to a true value, then blastp runs
 
62
    that use Blast_RedoAlignmentCore to compute the traceback will not
 
63
    SEG the subject sequence */
 
64
#ifndef KAPPA_BLASTP_NO_SEG_SEQUENCE
 
65
#define KAPPA_BLASTP_NO_SEG_SEQUENCE 0
 
66
#endif
 
67
 
 
68
 
 
69
/** Compile-time option; if set to a true value, then blastp runs
 
70
    that use Blast_RedoAlignmentCore to compute the traceback will not
 
71
    SEG the subject sequence */
 
72
#ifndef KAPPA_TBLASTN_NO_SEG_SEQUENCE
 
73
#define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0
 
74
#endif
 
75
 
62
76
 
63
77
/**
64
 
 * Scale the scores in an HSP list and reset the bit scores.
 
78
 * Given a list of HSPs with (possibly) high-precision scores, rescale
 
79
 * the scores to have standard precision and set the scale-independent
 
80
 * bit scores.  This routine does *not* resort the list; it is assumed
 
81
 * that the list is already sorted according to e-values that have been
 
82
 * computed using the initial, higher-precision scores.
65
83
 *
66
84
 * @param hsp_list          the HSP list
67
85
 * @param logK              Karlin-Altschul statistical parameter [in]
68
86
 * @param lambda            Karlin-Altschul statistical parameter [in]
69
87
 * @param scoreDivisor      the value by which reported scores are to be
70
 
 * @todo rename to something which is more intention revealing, merge with
71
 
 * function of the same name in blast_traceback.c
72
88
 */
73
 
/* WHY */
74
89
static void
75
 
s_HSPListRescaleScores(BlastHSPList * hsp_list,
76
 
                       double lambda,
77
 
                       double logK,
78
 
                       double scoreDivisor)
 
90
s_HSPListNormalizeScores(BlastHSPList * hsp_list,
 
91
                         double lambda,
 
92
                         double logK,
 
93
                         double scoreDivisor)
79
94
{
80
95
    int hsp_index;
81
96
    for(hsp_index = 0; hsp_index < hsp_list->hspcnt; hsp_index++) {
87
102
    }
88
103
}
89
104
 
 
105
 
90
106
/**
91
107
 * Remove from a hitlist all HSPs that are completely contained in an
92
108
 * HSP that occurs earlier in the list and that:
98
114
 * @param hspcnt            length of hsp_array
99
115
 */
100
116
static void
101
 
s_HitlistReapContained(
102
 
                       BlastHSP * hsp_array[],
103
 
                       Int4 * hspcnt)
 
117
s_HitlistReapContained(BlastHSP * hsp_array[], Int4 * hspcnt)
104
118
{
105
119
    Int4 iread;       /* iteration index used to read the hitlist */
106
120
    Int4 iwrite;      /* iteration index used to write to the hitlist */
110
124
 
111
125
    for (iread = 1;  iread < *hspcnt;  iread++) {
112
126
        /* for all HSPs in the hitlist */
113
 
        Int4         ireadBack;  /* iterator over indices less than iread */
114
 
        BlastHSP    *hsp1;       /* an HSP that is a candidate for deletion */
 
127
        Int4      ireadBack;  /* iterator over indices less than iread */
 
128
        BlastHSP *hsp1;       /* an HSP that is a candidate for deletion */
115
129
 
116
130
        hsp1 = hsp_array[iread];
117
131
        for (ireadBack = 0;  ireadBack < iread && hsp1 != NULL;  ireadBack++) {
118
 
            /* for all HSPs before hsp1 in the hitlist and while hsp1 has not
119
 
               been deleted */
 
132
            /* for all HSPs before hsp1 in the hitlist and while hsp1
 
133
             * has not been deleted */
120
134
            BlastHSP *hsp2;    /* an HSP that occurs earlier in hsp_array
121
135
                                * than hsp1 */
122
136
            hsp2 = hsp_array[ireadBack];
157
171
}
158
172
 
159
173
 
 
174
/** A callback used to free an EditScript that has been stored in a
 
175
 * BlastCompo_Alignment. */
160
176
static void s_FreeEditScript(void * edit_script)
161
177
{
162
178
    if (edit_script != NULL)
173
189
 *
174
190
 * @param alignments A list of distinct alignments; freed before return [in]
175
191
 * @param oid        Ordinal id of a database sequence [in]
176
 
 * @return Allocated and filled BlastHSPList strucutre.
 
192
 * @param queryInfo  information about all queries in this search [in]
 
193
 * @return Allocated and filled BlastHSPList structure.
177
194
 */
178
195
static BlastHSPList *
179
 
s_HSPListFromDistinctAlignments(
180
 
                                BlastCompo_Alignment ** alignments,
181
 
                                int oid)
 
196
s_HSPListFromDistinctAlignments(BlastCompo_Alignment ** alignments,
 
197
                                int oid,
 
198
                                BlastQueryInfo* queryInfo)
182
199
{
183
 
    const int unknown_value = 0;
184
 
    BlastHSPList * hsp_list = Blast_HSPListNew(0);
185
 
    BlastCompo_Alignment * align;
 
200
    int status;                    /* return code for any routine called */
 
201
    const int unknown_value = 0;   /* dummy constant to use when a
 
202
                                      parameter value is not known */
 
203
    BlastCompo_Alignment * align;  /* an alignment in the list */
 
204
    BlastHSPList * hsp_list;       /* the new HSP list */
186
205
 
 
206
    hsp_list = Blast_HSPListNew(0);
 
207
    if (hsp_list == NULL) {
 
208
        return NULL;
 
209
    }
187
210
    hsp_list->oid = oid;
188
211
 
189
212
    for (align = *alignments;  NULL != align;  align = align->next) {
190
213
        BlastHSP * new_hsp = NULL;
191
214
        GapEditScript * editScript = align->context;
 
215
        int query_offset, queryStart, queryEnd;
192
216
        align->context = NULL;
193
 
        Blast_HSPInit(align->queryStart,   align->queryEnd,
194
 
                      align->matchStart,   align->matchEnd,
195
 
                      unknown_value, unknown_value,
196
 
                      0, 0, align->frame, align->score,
197
 
                      &editScript, &new_hsp);
 
217
        
 
218
        query_offset = queryInfo->contexts[align->queryIndex].query_offset;
 
219
        queryStart = align->queryStart - query_offset;
 
220
        queryEnd = align->queryEnd - query_offset;
198
221
 
 
222
        status = Blast_HSPInit(queryStart, queryEnd,
 
223
                               align->matchStart, align->matchEnd,
 
224
                               unknown_value, unknown_value,
 
225
                               align->queryIndex, 
 
226
                               0, (Int2) align->frame, align->score,
 
227
                               &editScript, &new_hsp);
 
228
        switch (align->matrix_adjust_rule) {
 
229
        case eDontAdjustMatrix:
 
230
            new_hsp->comp_adjustment_method = eNoCompositionBasedStats;
 
231
            break;
 
232
        case eCompoScaleOldMatrix:
 
233
            new_hsp->comp_adjustment_method = eCompositionBasedStats;
 
234
            break;
 
235
        default:
 
236
            new_hsp->comp_adjustment_method = eCompositionMatrixAdjust;
 
237
            break;
 
238
        }
 
239
        if (status != 0)
 
240
            break;
199
241
        /* At this point, the subject and possibly the query sequence have
200
242
         * been filtered; since it is not clear that num_ident of the
201
243
         * filtered sequences, rather than the original, is desired,
202
 
         * explictly leave num_ident blank. */
 
244
         * explicitly leave num_ident blank. */
203
245
        new_hsp->num_ident = 0;
204
246
 
205
 
        Blast_HSPListSaveHSP(hsp_list, new_hsp);
206
 
    }
207
 
    BlastCompo_AlignmentsFree(alignments, s_FreeEditScript);
208
 
    Blast_HSPListSortByScore(hsp_list);
209
 
 
 
247
        status = Blast_HSPListSaveHSP(hsp_list, new_hsp);
 
248
        if (status != 0)
 
249
            break;
 
250
    }
 
251
    if (status == 0) {
 
252
        BlastCompo_AlignmentsFree(alignments, s_FreeEditScript);
 
253
        Blast_HSPListSortByScore(hsp_list);
 
254
    } else {
 
255
        hsp_list = Blast_HSPListFree(hsp_list);
 
256
    }
210
257
    return hsp_list;
211
258
}
212
259
 
213
260
 
214
 
static void
 
261
/**
 
262
 * Adding evalues to a list of HSPs and remove those that do not have
 
263
 * sufficiently good (low) evalue.
 
264
 *
 
265
 * @param *pbestScore      best (highest) score in the list
 
266
 * @param *pbestEvalue     best (lowest) evalue in the list
 
267
 * @param hsp_list         the list
 
268
 * @param subject_length   length of the subject sequence
 
269
 * @param program_number   the type of BLAST search being performed
 
270
 * @param queryInfo        information about the queries
 
271
 * @param sbp              the score block for this search
 
272
 * @param hitParams        parameters used to assign evalues and
 
273
 *                         decide whether to save hits.
 
274
 * @param pvalueForThisPair  composition p-value
 
275
 * @param LambdaRatio        lambda ratio, if available
 
276
 * @param subject_id         index of subject
 
277
 *
 
278
 * @return 0 on success; -1 on failure (can fail because some methods
 
279
 *         of generating evalues use auxiliary structures)
 
280
 */
 
281
static int
215
282
s_HitlistEvaluateAndPurge(int * pbestScore, double *pbestEvalue,
216
283
                          BlastHSPList * hsp_list,
217
284
                          int subject_length,
219
286
                          BlastQueryInfo* queryInfo,
220
287
                          BlastScoreBlk* sbp,
221
288
                          const BlastHitSavingParameters* hitParams,
222
 
                          int do_link_hsps)
 
289
                          double pvalueForThisPair,
 
290
                          double LambdaRatio,
 
291
                          int subject_id)
223
292
{
 
293
    int status = 0;
224
294
    *pbestEvalue = DBL_MAX;
225
295
    *pbestScore  = 0;
226
 
    if (do_link_hsps) {
227
 
        BLAST_LinkHsps(program_number, hsp_list,
228
 
                       queryInfo, subject_length,
229
 
                       sbp, hitParams->link_hsp_params, TRUE);
 
296
    if (hitParams->link_hsp_params) {
 
297
        status = BLAST_LinkHsps(program_number, hsp_list, queryInfo,
 
298
                                subject_length, sbp,
 
299
                                hitParams->link_hsp_params, TRUE);
230
300
    } else {
231
 
        Blast_HSPListGetEvalues(queryInfo, hsp_list, TRUE, sbp,
232
 
                                0.0, /* use a non-zero gap decay only when
233
 
                                        linking hsps */
234
 
                                1.0); /* Use scaling factor equal to
235
 
                                         1, because both scores and
236
 
                                         Lambda are scaled, so they
237
 
                                         will cancel each other. */
238
 
    }
239
 
    Blast_HSPListReapByEvalue(hsp_list, hitParams->options);
240
 
    if (hsp_list->hspcnt > 0) {
241
 
        *pbestEvalue = hsp_list->best_evalue;
242
 
        *pbestScore  = hsp_list->hsp_array[0]->score;
243
 
    }
 
301
        status =
 
302
            Blast_HSPListGetEvalues(queryInfo, hsp_list, TRUE, sbp,
 
303
                                    0.0, /* use a non-zero gap decay
 
304
                                            only when linking HSPs */
 
305
                                    1.0); /* Use scaling factor equal to
 
306
                                             1, because both scores and
 
307
                                             Lambda are scaled, so they
 
308
                                             will cancel each other. */
 
309
    }
 
310
    if (status == 0) {
 
311
        Blast_HSPListReapByEvalue(hsp_list, hitParams->options);
 
312
        if (hsp_list->hspcnt > 0) {
 
313
            *pbestEvalue = hsp_list->best_evalue;
 
314
            *pbestScore  = hsp_list->hsp_array[0]->score;
 
315
        }
 
316
    }
 
317
    return status == 0 ? 0 : -1;
244
318
}
245
319
 
246
320
 
 
321
/**
 
322
 * A callback routine: compute lambda for the given score
 
323
 * probabilities.
 
324
 * (@sa calc_lambda_type).
 
325
 */
247
326
static double
248
327
s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
249
328
{
250
 
    int i, n;
251
 
    double avg;
252
 
    Blast_ScoreFreq freq;
 
329
   
 
330
    int i;                 /* loop index */      
 
331
    int score_range;       /* range of possible scores */
 
332
    double avg;            /* expected score of aligning two characters */
 
333
    Blast_ScoreFreq freq;  /* score frequency data */
253
334
 
254
 
    n = max_score - min_score + 1;
 
335
    score_range = max_score - min_score + 1;
255
336
    avg = 0.0;
256
 
    for (i = 0;  i < n;  i++) {
 
337
    for (i = 0;  i < score_range;  i++) {
257
338
        avg += (min_score + i) * probs[i];
258
339
    }
259
340
    freq.score_min = min_score;
264
345
    freq.sprob = &probs[-min_score];
265
346
    freq.score_avg = avg;
266
347
 
267
 
    return  Blast_KarlinLambdaNR(&freq, lambda0);
268
 
}
269
 
 
270
 
 
271
 
/** Return the a matrix of the frequency ratios that underlie the
272
 
 * score matrix being used on this pass. The returned matrix
273
 
 * is position-specific, so if we are in the first pass, use
274
 
 * query to convert the 20x20 standard matrix into a position-specific
275
 
 * variant. matrixName is the name of the underlying 20x20
276
 
 * score matrix used. numPositions is the length of the query;
277
 
 * startNumerator is the matrix of frequency ratios as stored
278
 
 * in posit.h. It needs to be divided by the frequency of the
279
 
 * second character to get the intended ratio
280
 
 * @param sbp statistical information for blast [in]
281
 
 * @param query the query sequence [in]
282
 
 * @param matrixName name of the underlying matrix [in]
283
 
 * @param startNumerator matrix of frequency ratios as stored
284
 
 *      in posit.h. It needs to be divided by the frequency of the
285
 
 *      second character to get the intended ratio [in]
286
 
 * @param numPositions length of the query [in]
287
 
 */
288
 
static void
 
348
    return Blast_KarlinLambdaNR(&freq, lambda0);
 
349
}
 
350
 
 
351
 
 
352
/** Fill a two-dimensional array with the frequency ratios that
 
353
 * underlie a position specific score matrix (PSSM).
 
354
 *
 
355
 * @param returnRatios     a two-dimensional array with BLASTAA_SIZE
 
356
 *                         columns
 
357
 * @param numPositions     the number of rows in returnRatios
 
358
 * @param query            query sequence data, of length numPositions
 
359
 * @param matrixName       the name of the position independent matrix
 
360
 *                         corresponding to this PSSM
 
361
 * @param startNumerator   position-specific data used to generate the
 
362
 *                         PSSM
 
363
 * @return   0 on success; -1 if the named matrix isn't known, or if
 
364
 *           there was a memory error
 
365
 * @todo find out what start numerator is.
 
366
 */
 
367
static int
 
368
s_GetPosBasedStartFreqRatios(double ** returnRatios,
 
369
                             Int4 numPositions,
 
370
                             Uint1 * query,
 
371
                             const char *matrixName,
 
372
                             double **startNumerator)
 
373
{
 
374
    Int4 i,j;                            /* loop indices */
 
375
    SFreqRatios * stdFreqRatios = NULL;  /* frequency ratios for the
 
376
                                            named matrix. */
 
377
    double *standardProb;                /* probabilities of each
 
378
                                            letter*/
 
379
    const double kPosEpsilon = 0.0001;   /* values below this cutoff
 
380
                                            are treated specially */
 
381
 
 
382
    stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
 
383
    if (stdFreqRatios == NULL) {
 
384
        return -1;
 
385
    }
 
386
    for (i = 0;  i < numPositions;  i++) {
 
387
        for (j = 0;  j < BLASTAA_SIZE;  j++) {
 
388
            returnRatios[i][j] = stdFreqRatios->data[query[i]][j];
 
389
        }
 
390
    }
 
391
    stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
 
392
 
 
393
    standardProb = BLAST_GetStandardAaProbabilities();
 
394
    if(standardProb == NULL) {
 
395
        return -1;
 
396
    }
 
397
    /*reverse multiplication done in posit.c*/
 
398
    for (i = 0;  i < numPositions;  i++) {
 
399
        for (j = 0;  j < BLASTAA_SIZE;  j++) {
 
400
            if ((standardProb[query[i]] > kPosEpsilon) &&
 
401
                (standardProb[j] > kPosEpsilon) &&
 
402
                (j != eStopChar) && (j != eXchar) &&
 
403
                (startNumerator[i][j] > kPosEpsilon)) {
 
404
                returnRatios[i][j] = startNumerator[i][j] / standardProb[j];
 
405
            }
 
406
        }
 
407
    }
 
408
    sfree(standardProb);
 
409
 
 
410
    return 0;
 
411
}
 
412
 
 
413
 
 
414
/**
 
415
 * Fill a two-dimensional array with the frequency ratios that underlie the
 
416
 * named score matrix.
 
417
 *
 
418
 * @param returnRatios  a two-dimensional array of size
 
419
 *                      BLASTAA_SIZE x  BLASTAA_SIZE
 
420
 * @param matrixName    the name of a matrix
 
421
 * @return   0 on success; -1 if the named matrix isn't known, or if
 
422
 *           there was a memory error
 
423
 */
 
424
static int
289
425
s_GetStartFreqRatios(double ** returnRatios,
290
 
                     Uint1 * query,
291
 
                     const char *matrixName,
292
 
                     double **startNumerator,
293
 
                     Int4 numPositions,
294
 
                     Boolean positionSpecific)
 
426
                     const char *matrixName)
295
427
{
296
 
    Int4 i,j;
 
428
    /* Loop indices */
 
429
    int i,j;
 
430
    /* Frequency ratios for the matrix */
297
431
    SFreqRatios * stdFreqRatios = NULL;
298
 
    const double kPosEpsilon = 0.0001;
299
432
 
300
433
    stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
301
 
    if (positionSpecific) {
302
 
        for (i = 0;  i < numPositions;  i++) {
303
 
            for (j = 0;  j < BLASTAA_SIZE;  j++) {
304
 
                returnRatios[i][j] = stdFreqRatios->data[query[i]][j];
305
 
            }
306
 
        }
307
 
    } else {
308
 
        for (i = 0;  i < BLASTAA_SIZE;  i++) {
309
 
            for (j = 0;  j < BLASTAA_SIZE;  j++) {
310
 
                returnRatios[i][j] = stdFreqRatios->data[i][j];
311
 
            }
 
434
    if (stdFreqRatios == NULL) {
 
435
        return -1;
 
436
    }
 
437
    for (i = 0;  i < BLASTAA_SIZE;  i++) {
 
438
        for (j = 0;  j < BLASTAA_SIZE;  j++) {
 
439
            returnRatios[i][j] = stdFreqRatios->data[i][j];
312
440
        }
313
441
    }
314
442
    stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
315
443
 
316
 
    if (positionSpecific) {
317
 
        double *standardProb; /*probabilities of each letter*/
318
 
        standardProb = BLAST_GetStandardAaProbabilities();
319
 
 
320
 
        /*reverse multiplication done in posit.c*/
321
 
        for (i = 0;  i < numPositions;  i++) {
322
 
            for (j = 0;  j < BLASTAA_SIZE;  j++) {
323
 
                if ((standardProb[query[i]] > kPosEpsilon) &&
324
 
                    (standardProb[j] > kPosEpsilon) &&
325
 
                    (j != eStopChar) && (j != eXchar) &&
326
 
                    (startNumerator[i][j] > kPosEpsilon)) {
327
 
                    returnRatios[i][j] = startNumerator[i][j]/standardProb[j];
328
 
                }
329
 
            }
330
 
        }
331
 
        sfree(standardProb);
332
 
    }
 
444
    return 0;
333
445
}
334
446
 
335
447
 
336
448
/** SCALING_FACTOR is a multiplicative factor used to get more bits of
337
449
 * precision in the integer matrix scores. It cannot be arbitrarily
338
 
 * large because we do not want total alignment scores to exceedto
 
450
 * large because we do not want total alignment scores to exceed
339
451
 * -(BLAST_SCORE_MIN) */
340
452
#define SCALING_FACTOR 32
341
453
 
342
454
 
343
455
/**
344
 
 * produce a scaled-up version of the position-specific matrix
345
 
 * starting from posFreqs
 
456
 * Produce a scaled-up version of the position-specific matrix
 
457
 * with a given set of position-specific residue frequencies.
346
458
 *
347
459
 * @param fillPosMatrix     is the matrix to be filled
348
 
 * @param nonposMatrix      is the underlying position-independent matrix,
349
 
 *                          used to fill positions where frequencies are
350
 
 *                          irrelevant
351
 
 * @param sbp               stores various parameters of the search
352
460
 * @param matrixName        name of the standard substitution matrix [in]
353
461
 * @param posFreqs          PSSM's frequency ratios [in]
354
462
 * @param query             Query sequence data [in]
355
463
 * @param queryLength       Length of the query sequence above [in]
 
464
 * @param sbp               stores various parameters of the search
 
465
 * @param scale_factor      amount by which ungapped parameters should be
 
466
 *                          scaled.
 
467
 * @return 0 on success; -1 on failure
356
468
 */
357
469
static int
358
 
s_ScalePosMatrix(int **fillPosMatrix,
359
 
                 int **nonposMatrix,
360
 
                 const char *matrixName,
361
 
                 double **posFreqs,
362
 
                 Uint1 *query,
 
470
s_ScalePosMatrix(int ** fillPosMatrix,
 
471
                 const char * matrixName,
 
472
                 double ** posFreqs,
 
473
                 Uint1 * query,
363
474
                 int queryLength,
364
 
                 BlastScoreBlk* sbp)
 
475
                 BlastScoreBlk* sbp,
 
476
                 double scale_factor)
365
477
{
 
478
    /* Data used by scaling routines */    
366
479
    Kappa_posSearchItems *posSearch = NULL;
 
480
    /* A reduced collection of search parameters used by PSI-blast */
367
481
    Kappa_compactSearchItems *compactSearch = NULL;
 
482
    /* Representation of a PSSM internal to PSI-blast */
368
483
    _PSIInternalPssmData* internal_pssm = NULL;
369
 
    int status = PSI_SUCCESS;
 
484
    /* return code */
 
485
    int status = 0;
370
486
 
371
487
    posSearch = Kappa_posSearchItemsNew(queryLength, matrixName,
372
488
                                        fillPosMatrix, posFreqs);
373
489
    compactSearch = Kappa_compactSearchItemsNew(query, queryLength, sbp);
374
 
 
375
490
    /* Copy data into new structures */
376
491
    internal_pssm = _PSIInternalPssmDataNew(queryLength, BLASTAA_SIZE);
 
492
    if (posSearch == NULL || compactSearch == NULL || internal_pssm == NULL) {
 
493
        status = -1;
 
494
        goto cleanup;
 
495
    }
377
496
    _PSICopyMatrix_int(internal_pssm->pssm, posSearch->posMatrix,
378
497
                       internal_pssm->ncols, internal_pssm->nrows);
379
498
    _PSICopyMatrix_int(internal_pssm->scaled_pssm,
384
503
                          internal_pssm->nrows);
385
504
    status = _PSIConvertFreqRatiosToPSSM(internal_pssm, query, sbp,
386
505
                                         compactSearch->standardProb);
387
 
    if (status != PSI_SUCCESS) {
388
 
        internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
389
 
        posSearch = Kappa_posSearchItemsFree(posSearch);
390
 
        compactSearch = Kappa_compactSearchItemsFree(compactSearch);
391
 
        return status;
 
506
    if (status != 0) {
 
507
        goto cleanup;
392
508
    }
393
509
    /* Copy data from new structures to posSearchItems */
394
510
    _PSICopyMatrix_int(posSearch->posMatrix, internal_pssm->pssm,
400
516
                          internal_pssm->freq_ratios,
401
517
                          internal_pssm->ncols, internal_pssm->nrows);
402
518
    status = Kappa_impalaScaling(posSearch, compactSearch, (double)
403
 
                                 SCALING_FACTOR, FALSE, sbp);
404
 
    if (status != 0) {
405
 
        internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
406
 
        posSearch = Kappa_posSearchItemsFree(posSearch);
407
 
        compactSearch = Kappa_compactSearchItemsFree(compactSearch);
408
 
        return status;
409
 
    }
 
519
                                 scale_factor, FALSE, sbp);
 
520
cleanup:
410
521
    internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
411
522
    posSearch = Kappa_posSearchItemsFree(posSearch);
412
523
    compactSearch = Kappa_compactSearchItemsFree(compactSearch);
 
524
 
413
525
    return status;
414
526
}
415
527
 
416
528
 
 
529
/**
 
530
 * Convert an array of HSPs to a list of BlastCompo_Alignment objects.
 
531
 * The context field of each BlastCompo_Alignment is set to point to the
 
532
 * corresponding HSP.
 
533
 *
 
534
 * @param hsp_array             an array of HSPs
 
535
 * @param hspcnt                the length of hsp_array
 
536
 * @param queryInfo            information about the concatenated query
 
537
 * @param localScalingFactor    the amount by which this search is scaled
 
538
 *
 
539
 * @return the new list of alignments; or NULL if there is an out-of-memory
 
540
 *         error (or if the original array is empty)
 
541
 */
417
542
static BlastCompo_Alignment *
418
 
s_ResultHspToDistinctAlign(BlastQueryInfo* queryInfo,
419
 
                           BlastHSP * hsp_array[], Int4 hspcnt,
 
543
s_ResultHspToDistinctAlign(BlastHSP * hsp_array[], Int4 hspcnt,
 
544
                           BlastQueryInfo* queryInfo,
420
545
                           double localScalingFactor)
421
546
{
422
 
    BlastCompo_Alignment *aligns = NULL, *tail = NULL, *new_align = NULL;
423
 
    int hsp_index;
 
547
    BlastCompo_Alignment * aligns = NULL;      /* new list of alignments */
 
548
    BlastCompo_Alignment * tail = NULL;        /* last element in aligns */
 
549
    int hsp_index;                             /* loop index */
 
550
    
424
551
    for (hsp_index = 0;  hsp_index < hspcnt;  hsp_index++) {
425
 
        int queryIndex, queryEnd, matchEnd;
426
 
        BlastHSP * hsp = hsp_array[hsp_index];
427
 
        queryEnd = hsp->query.end;
428
 
        matchEnd = hsp->subject.end;
429
 
        /* YIKES! how do we handle multiple queries */
430
 
        /*
431
 
          if(search->mult_queries != NULL) {
432
 
          queryIndex =
433
 
          GetQueryNum(search->mult_queries,
434
 
          hsp->query_offset, queryEnd - 1, 0);
435
 
          } else {
436
 
          queryIndex = 0;
437
 
          }
 
552
        BlastHSP * hsp = hsp_array[hsp_index]; /* current HSP */
 
553
        BlastCompo_Alignment * new_align;      /* newly-created alignment */
 
554
        int queryStart;                        /* start of the query context
 
555
                                                  in the concatenated query */
 
556
        int queryOffset, queryEnd;             /* coordinates of the
 
557
                                                  query portion of the
 
558
                                                  concatenated query */
 
559
        /* Incoming alignments will have coordinates of the query
 
560
           portion relative to a particular query context; they must
 
561
           be shifted for used in the composition_adjustment library.
438
562
        */
439
 
        queryIndex = 0;
 
563
        queryStart = queryInfo->contexts[hsp->context].query_offset;
 
564
        queryOffset = hsp->query.offset + queryStart;
 
565
        queryEnd = hsp->query.end + queryStart;
440
566
        new_align =
441
 
            BlastCompo_AlignmentNew(hsp->score * localScalingFactor,
442
 
                                    eNoCompositionAdjustment,
443
 
                                    hsp->query.offset, queryEnd, queryIndex,
444
 
                                    hsp->subject.offset, matchEnd,
 
567
            BlastCompo_AlignmentNew((int) (hsp->score * localScalingFactor),
 
568
                                    eDontAdjustMatrix,
 
569
                                    queryOffset, queryEnd, hsp->context,
 
570
                                    hsp->subject.offset, hsp->subject.end,
445
571
                                    hsp->subject.frame, hsp);
446
572
        if (new_align == NULL) /* out of memory */
447
573
            goto error_return;
448
 
        if (tail == NULL) {
 
574
        if (tail == NULL) { /* if the list aligns is empty; */
 
575
            /* make new_align the first element in the list */
449
576
            aligns = new_align;
450
577
        } else {
 
578
            /* otherwise add new_align to the end of the list */
451
579
            tail->next = new_align;
452
580
        }
453
581
        tail = new_align;
462
590
 
463
591
/**
464
592
 * Redo a S-W alignment using an x-drop alignment.  The result will
465
 
 * usually be the same as the S-W alignment. The call to ALIGN
 
593
 * usually be the same as the S-W alignment. The call to ALIGN_EX
466
594
 * attempts to force the endpoints of the alignment to match the
467
595
 * optimal endpoints determined by the Smith-Waterman algorithm.
468
 
 * ALIGN is used, so that if the data structures for storing BLAST
 
596
 * ALIGN_EX is used, so that if the data structures for storing BLAST
469
597
 * alignments are changed, the code will not break
470
598
 *
471
599
 * @param query         the query data
476
604
 * @param matchStart    start of the alignment in the subject sequence
477
605
 * @param matchEnd      end of the alignment in the query sequence,
478
606
 *                      as computed by the Smith-Waterman algorithm
479
 
 * @param scoringParams Settings for gapped alignment.[in]
480
607
 * @param gap_align     parameters for a gapped alignment
 
608
 * @param scoringParams Settings for gapped alignment.[in]
481
609
 * @param score         score computed by the Smith-Waterman algorithm
482
 
 * @param localScalingFactor    the factor by which the scoring system has
483
 
 *                              been scaled in order to obtain greater
484
 
 *                              precision
485
610
 * @param queryAlignmentExtent  length of the alignment in the query sequence,
486
611
 *                              as computed by the x-drop algorithm
487
612
 * @param matchAlignmentExtent  length of the alignment in the subject
490
615
 *                              algorithm
491
616
 */
492
617
static void
493
 
s_SWFindFinalEndsUsingXdrop(
494
 
                            BlastCompo_SequenceData * query,
 
618
s_SWFindFinalEndsUsingXdrop(BlastCompo_SequenceData * query,
495
619
                            Int4 queryStart,
496
620
                            Int4 queryEnd,
497
621
                            BlastCompo_SequenceData * subject,
500
624
                            BlastGapAlignStruct* gap_align,
501
625
                            const BlastScoringParameters* scoringParams,
502
626
                            Int4 score,
503
 
                            double localScalingFactor,
504
627
                            Int4 * queryAlignmentExtent,
505
628
                            Int4 * matchAlignmentExtent,
506
629
                            Int4 * newScore)
509
632
                                   * method rather than Smith-Waterman */
510
633
    Int4 doublingCount = 0;       /* number of times X-dropoff had to be
511
634
                                   * doubled */
 
635
    Int4 gap_x_dropoff_orig = gap_align->gap_x_dropoff;
512
636
 
513
637
    GapPrelimEditBlockReset(gap_align->rev_prelim_tback);
514
638
    GapPrelimEditBlockReset(gap_align->fwd_prelim_tback);
528
652
        }
529
653
    } while((XdropAlignScore < score) && (doublingCount < 3));
530
654
 
 
655
    gap_align->gap_x_dropoff = gap_x_dropoff_orig;
531
656
    *newScore = XdropAlignScore;
532
657
}
533
658
 
534
659
 
535
660
/**
536
 
 * A Kappa_MatchingSequence represents a subject sequence to be aligned
537
 
 * with the query.  This abstract sequence is used to hide the
538
 
 * complexity associated with actually obtaining and releasing the
539
 
 * data for a matching sequence, e.g. reading the sequence from a DB
540
 
 * or translating it from a nucleotide sequence.
541
 
 *
542
 
 * We draw a distinction between a sequence itself, and strings of
543
 
 * data that may be obtained from the sequence.  The amino
544
 
 * acid/nucleotide data is represented by an object of type
545
 
 * BlastCompo_SequenceData.  There may be more than one instance of
546
 
 * BlastCompo_SequenceData per Kappa_MatchingSequence, each representing a
547
 
 * different range in the sequence, or a different translation frame.
 
661
 * BLAST-specific information that is associated with a
 
662
 * BlastCompo_MatchingSequence.
548
663
 */
549
 
typedef struct Kappa_SequenceLocalData {
 
664
typedef struct
 
665
BlastKappa_SequenceInfo {
550
666
    EBlastProgramType prog_number; /**< identifies the type of blast
551
667
                                        search being performed. The type
552
668
                                        of search determines how sequence
558
674
                                     structure was designed to be
559
675
                                     allocated on the stack, i.e.: in
560
676
                                     Kappa_MatchingSequenceInitialize) */
561
 
} Kappa_SequenceLocalData;
 
677
} BlastKappa_SequenceInfo;
 
678
 
 
679
 
 
680
/** Release the resources associated with a matching sequence. */
 
681
static void
 
682
s_MatchingSequenceRelease(BlastCompo_MatchingSequence * self)
 
683
{
 
684
    if (self != NULL) {
 
685
        BlastKappa_SequenceInfo * local_data = self->local_data;
 
686
        BlastSeqSrcReleaseSequence(local_data->seq_src,
 
687
                                   (void*)&local_data->seq_arg);
 
688
        BlastSequenceBlkFree(local_data->seq_arg.seq);
 
689
        free(self->local_data);
 
690
        self->local_data = NULL;
 
691
    }
 
692
}
562
693
 
563
694
 
564
695
/**
573
704
 * @param gen_code_string   genetic code for translated queries
574
705
 * @param subject_index     index of the matching sequence in the database
575
706
 */
576
 
static void
577
 
s_MatchingSequenceInitialize(
578
 
                             BlastCompo_MatchingSequence * self,
 
707
static int
 
708
s_MatchingSequenceInitialize(BlastCompo_MatchingSequence * self,
579
709
                             EBlastProgramType program_number,
580
710
                             const BlastSeqSrc* seqSrc,
581
711
                             const Uint1* gen_code_string,
582
712
                             Int4 subject_index)
583
713
{
584
 
    Kappa_SequenceLocalData * local_data =
585
 
        malloc(sizeof(Kappa_SequenceLocalData));
586
 
    self->local_data = local_data;
587
 
 
588
 
    local_data->seq_src      = seqSrc;
589
 
    local_data->prog_number  = program_number;
590
 
    local_data->genetic_code = gen_code_string;
591
 
 
592
 
    memset((void*) &local_data->seq_arg, 0, sizeof(local_data ->seq_arg));
593
 
    local_data->seq_arg.oid = self->index = subject_index;
594
 
 
595
 
    if( program_number == eBlastTypeTblastn ) {
596
 
        local_data->seq_arg.encoding = eBlastEncodingNcbi4na;
597
 
    } else {
598
 
        local_data->seq_arg.encoding = eBlastEncodingProtein;
599
 
    }
600
 
    if (BlastSeqSrcGetSequence(seqSrc, (void*) &local_data->seq_arg) >= 0) {
601
 
        self->length =
602
 
            BlastSeqSrcGetSeqLen(seqSrc, (void*) &local_data->seq_arg);
603
 
    } else {
604
 
        self->length = 0;
605
 
    }
606
 
}
607
 
 
608
 
 
609
 
/** Release the resources associated with a matching sequence. */
610
 
static void
611
 
s_MatchingSequenceRelease(BlastCompo_MatchingSequence * self)
612
 
{
613
 
    if (self != NULL) {
614
 
        Kappa_SequenceLocalData * local_data = self->local_data;
615
 
        BlastSeqSrcReleaseSequence(local_data->seq_src,
616
 
                                   (void*)&local_data->seq_arg);
617
 
        BlastSequenceBlkFree(local_data->seq_arg.seq);
618
 
        free(self->local_data);
619
 
        self->local_data = NULL;
 
714
    BlastKappa_SequenceInfo * seq_info;  /* BLAST-specific sequence
 
715
                                            information */
 
716
    self->length = 0;
 
717
    self->local_data = NULL;
 
718
 
 
719
    seq_info = malloc(sizeof(BlastKappa_SequenceInfo));
 
720
    if (seq_info != NULL) {
 
721
        self->local_data = seq_info;
 
722
 
 
723
        seq_info->seq_src      = seqSrc;
 
724
        seq_info->prog_number  = program_number;
 
725
        seq_info->genetic_code = gen_code_string;
 
726
 
 
727
        memset((void*) &seq_info->seq_arg, 0, sizeof(seq_info->seq_arg));
 
728
        seq_info->seq_arg.oid = self->index = subject_index;
 
729
 
 
730
        if( program_number == eBlastTypeTblastn ) {
 
731
            seq_info->seq_arg.encoding = eBlastEncodingNcbi4na;
 
732
        } else {
 
733
            seq_info->seq_arg.encoding = eBlastEncodingProtein;
 
734
        }
 
735
        if (BlastSeqSrcGetSequence(seqSrc, (void*) &seq_info->seq_arg) >= 0) {
 
736
            self->length =
 
737
                BlastSeqSrcGetSeqLen(seqSrc, (void*) &seq_info->seq_arg);
 
738
        } else {
 
739
            self->length = 0;
 
740
        }
 
741
    }
 
742
    if (self->length == 0) {
 
743
        /* Could not obtain the required data */
 
744
        s_MatchingSequenceRelease(self);
 
745
        return -1;
 
746
    } else {
 
747
        return 0;
620
748
    }
621
749
}
622
750
 
628
756
 
629
757
 
630
758
/**
 
759
 * Filter low complexity regions from the sequence data; uses the SEG
 
760
 * algorithm.
 
761
 *
 
762
 * @param seqData            data to be filtered
 
763
 * @param program_name       type of search being performed
 
764
 * @return   0 for success; -1 for out-of-memory
 
765
 */
 
766
static int
 
767
s_DoSegSequenceData(BlastCompo_SequenceData * seqData,
 
768
                    EBlastProgramType program_name)
 
769
{
 
770
    int status = 0;
 
771
    BlastSeqLoc* mask_seqloc = NULL;
 
772
    SBlastFilterOptions* filter_options = NULL;
 
773
 
 
774
    status = BlastFilteringOptionsFromString(program_name,
 
775
                                             BLASTP_MASK_INSTRUCTIONS,
 
776
                                             &filter_options, NULL);
 
777
    if (status == 0) {
 
778
        status = BlastSetUp_Filter(program_name, seqData->data,
 
779
                                   seqData->length, 0, filter_options,
 
780
                                   &mask_seqloc, NULL);
 
781
        filter_options = SBlastFilterOptionsFree(filter_options);
 
782
    }
 
783
    if (status == 0) {
 
784
        Blast_MaskTheResidues(seqData->data, seqData->length,
 
785
                              FALSE, mask_seqloc, FALSE, 0);
 
786
    }
 
787
    if (mask_seqloc != NULL) {
 
788
        mask_seqloc = BlastSeqLocFree(mask_seqloc);
 
789
    }
 
790
    return status;
 
791
}
 
792
 
 
793
 
 
794
/**
631
795
 * Obtain a string of translated data
632
796
 *
633
797
 * @param self          the sequence from which to obtain the data [in]
634
798
 * @param range         the range and translation frame to get [in]
635
799
 * @param seqData       the resulting data [out]
 
800
 *
 
801
 * @return 0 on success; -1 on failure
636
802
 */
637
 
static void
 
803
static int
638
804
s_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence * self,
639
805
                             const BlastCompo_SequenceRange * range,
640
806
                             BlastCompo_SequenceData * seqData )
641
807
{
642
 
    ASSERT( 0 && "Not implemented" );
 
808
    int status = 0;
 
809
    BlastKappa_SequenceInfo * local_data; /* BLAST-specific
 
810
                                             information associated
 
811
                                             with the sequence */
 
812
    Uint1 * translation_buffer;           /* a buffer for the translated,
 
813
                                             amino-acid sequence */
 
814
    Int4 translated_length;  /* length of the translated sequence */
 
815
    int translation_frame;   /* frame in which to translate */
 
816
    Uint1 * na_sequence;     /* the nucleotide sequence */
 
817
    int translation_start;   /* location in na_sequence to start
 
818
                                translating */
 
819
    int num_nucleotides;     /* the number of nucleotides to be translated */
 
820
    
 
821
    local_data = self->local_data;
 
822
    na_sequence = local_data->seq_arg.seq->sequence_start;
 
823
 
 
824
    /* Initialize seqData to nil, in case this routine fails */
 
825
    seqData->buffer = NULL;
 
826
    seqData->data = NULL;
 
827
    seqData->length = 0;
 
828
 
 
829
    translation_frame = range->context;
 
830
    if (translation_frame > 0) {
 
831
        translation_start = 3 * range->begin;
 
832
    } else {
 
833
        translation_start =
 
834
            self->length - 3 * range->end + translation_frame + 1;
 
835
    }
 
836
    num_nucleotides =
 
837
        3 * (range->end - range->begin) + ABS(translation_frame) - 1;
 
838
    
 
839
    status = Blast_GetPartialTranslation(na_sequence + translation_start,
 
840
                                         num_nucleotides,
 
841
                                         (Int2) translation_frame,
 
842
                                         local_data->genetic_code,
 
843
                                         &translation_buffer,
 
844
                                         &translated_length,
 
845
                                         NULL);
 
846
    if (status == 0) {
 
847
        seqData->buffer = translation_buffer;
 
848
        seqData->data   = translation_buffer + 1;
 
849
        seqData->length = translated_length;
 
850
 
 
851
        if ( !(KAPPA_TBLASTN_NO_SEG_SEQUENCE) ) {
 
852
            status = s_DoSegSequenceData(seqData, eBlastTypeTblastn);
 
853
            if (status != 0) {
 
854
                free(seqData->buffer);
 
855
                seqData->buffer = NULL;
 
856
                seqData->data = NULL;
 
857
                seqData->length = 0;
 
858
            }
 
859
        }
 
860
    }
 
861
    return status;
 
862
}
 
863
 
 
864
 
 
865
/**
 
866
 * Get a string of protein data from a protein sequence.
 
867
 *
 
868
 * @param self          a protein sequence [in]
 
869
 * @param range         the range to get [in]
 
870
 * @param seqData       the resulting data [out]
 
871
 *
 
872
 * @return 0 on success; -1 on failure
 
873
 */
 
874
static int
 
875
s_SequenceGetProteinRange(const BlastCompo_MatchingSequence * self,
 
876
                          const BlastCompo_SequenceRange * range,
 
877
                          BlastCompo_SequenceData * seqData )
 
878
{
 
879
    int status = 0;       /* return status */
 
880
    Int4       idx;       /* loop index */
 
881
    Uint1     *origData;  /* the unfiltered data for the sequence */
 
882
    /* BLAST-specific sequence information */
 
883
    BlastKappa_SequenceInfo * local_data = self->local_data;
 
884
 
 
885
    seqData->data = NULL;
 
886
    seqData->length = 0;
 
887
    /* Copy the entire sequence (necessary for SEG filtering.) */
 
888
    seqData->buffer  = calloc((self->length + 2), sizeof(Uint1));
 
889
    if (seqData->buffer == NULL) {
 
890
        return -1;
 
891
    }
 
892
    /* First and last characters of the buffer MUST be '\0', which is
 
893
     * true here because the buffer was allocated using calloc. */
 
894
    seqData->data    = seqData->buffer + 1;
 
895
    seqData->length  = self->length;
 
896
 
 
897
    origData = local_data->seq_arg.seq->sequence;
 
898
    for (idx = 0;  idx < seqData->length;  idx++) {
 
899
        /* Copy the sequence data, replacing occurrences of amino acid
 
900
         * number 24 (Selenocysteine) with number 21 (Undetermined or
 
901
         * atypical). */
 
902
        if (origData[idx] != 24) {
 
903
            seqData->data[idx] = origData[idx];
 
904
        } else {
 
905
            seqData->data[idx] = 21;
 
906
            fprintf(stderr, "Selenocysteine (U) at position %ld"
 
907
                    " replaced by X\n",
 
908
                    (long) idx + 1);
 
909
        }
 
910
    }
 
911
    if ( !(KAPPA_BLASTP_NO_SEG_SEQUENCE) ) {
 
912
        status = s_DoSegSequenceData(seqData, eBlastTypeBlastp);
 
913
    }
 
914
    /* Fit the data to the range. */
 
915
    seqData ->data    = &seqData->data[range->begin - 1];
 
916
    *seqData->data++  = '\0';
 
917
    seqData ->length  = range->end - range->begin;
 
918
 
 
919
    if (status != 0) {
 
920
        free(seqData->buffer);
 
921
        seqData->buffer = NULL;
 
922
        seqData->data = NULL;
 
923
    }
 
924
    return status;
643
925
}
644
926
 
645
927
 
649
931
 * @param self          sequence information [in]
650
932
 * @param range        range specifying the range of data [in]
651
933
 * @param seqData       the sequence data obtained [out]
 
934
 *
 
935
 * @return 0 on success; -1 on failure
652
936
 */
653
937
static int
654
 
s_SequenceGetRange(
655
 
                   const BlastCompo_MatchingSequence * self,
 
938
s_SequenceGetRange(const BlastCompo_MatchingSequence * self,
656
939
                   const BlastCompo_SequenceRange * range,
657
940
                   BlastCompo_SequenceData * seqData )
658
941
{
659
 
    Kappa_SequenceLocalData * local_data = self->local_data;
660
 
    if (local_data->prog_number ==  eBlastTypeTblastn) {
 
942
    BlastKappa_SequenceInfo * seq_info = self->local_data;
 
943
    if (seq_info->prog_number ==  eBlastTypeTblastn) {
661
944
        /* The sequence must be translated. */
662
 
        s_SequenceGetTranslatedRange(self, range, seqData);
663
 
    } else {
664
 
        /* The sequence does not need to be translated. */
665
 
        Int4       idx;
666
 
        Uint1     *origData;  /* the unfiltered data for the sequence */
667
 
 
668
 
        /* Copy the entire sequence (necessary for SEG filtering.) */
669
 
        seqData->buffer  = calloc((self->length + 2), sizeof(Uint1));
670
 
        /* First and last characters of the buffer MUST be '\0', which is
671
 
         * true here because the buffer was allocated using calloc. */
672
 
        seqData->data    = seqData->buffer + 1;
673
 
        seqData->length  = self->length;
674
 
 
675
 
        origData = local_data->seq_arg.seq->sequence;
676
 
        for (idx = 0;  idx < seqData->length;  idx++) {
677
 
            /* Copy the sequence data, replacing occurrences of amino acid
678
 
             * number 24 (Selenocysteine) with number 21 (Undetermined or
679
 
             * atypical). */
680
 
            if (origData[idx] != 24) {
681
 
                seqData->data[idx] = origData[idx];
682
 
            } else {
683
 
                seqData->data[idx] = 21;
684
 
                fprintf(stderr, "Selenocysteine (U) at position %ld"
685
 
                        " replaced by X\n",
686
 
                        (long) idx + 1);
687
 
            }
688
 
        }
689
 
#ifndef KAPPA_NO_SEG_SEQUENCE
690
 
        /* take as input an amino acid  string and its length; compute
691
 
         * a filtered amino acid string and return the filtered string */
692
 
        {{
693
 
            BlastSeqLoc* mask_seqloc;
694
 
            const EBlastProgramType k_program_name = eBlastTypeBlastp;
695
 
            SBlastFilterOptions* filter_options;
696
 
 
697
 
            BlastFilteringOptionsFromString(k_program_name,
698
 
                                            BLASTP_MASK_INSTRUCTIONS,
699
 
                                            &filter_options, NULL);
700
 
            BlastSetUp_Filter(k_program_name, seqData->data, seqData->length,
701
 
                              0, filter_options, &mask_seqloc, NULL);
702
 
            filter_options = SBlastFilterOptionsFree(filter_options);
703
 
 
704
 
            Blast_MaskTheResidues(seqData->data, seqData->length,
705
 
                                  FALSE, mask_seqloc, FALSE, 0);
706
 
            mask_seqloc = BlastSeqLocFree(mask_seqloc);
707
 
        }}
708
 
#endif
709
 
        /* Fit the data to the range. */
710
 
        seqData ->data    = &seqData->data[range->begin - 1];
711
 
        *seqData->data++  = '\0';
712
 
        seqData ->length  = range->end - range->begin;
713
 
    } /* end else the sequence does not need to be translated */
714
 
    return 0;
715
 
}
716
 
 
717
 
 
718
 
/**
719
 
 * Computes an appropriate starting point for computing the traceback
720
 
 * for an HSP.  The start point depends on the matrix, the window, and
721
 
 * the filtered sequence, and so may not be the start point saved in
722
 
 * the HSP during the preliminary gapped extension.
723
 
 *
724
 
 * @param q_start       the start point in the query [out]
725
 
 * @param s_start       the start point in the subject [out]
726
 
 * @param sbp           general scoring info (includes the matrix) [in]
727
 
 * @param positionBased is this search position-specific? [in]
728
 
 * @param hsp           the HSP to be considered [in]
729
 
 * @param window        the window used to compute the traceback [in]
730
 
 * @param query         the query data [in]
731
 
 * @param subject       the subject data [in]
732
 
 */
733
 
/* WHY */
734
 
static void
735
 
s_StartingPointForHit(Int4 * q_start,
736
 
                      Int4 * s_start,
737
 
                      const BlastScoreBlk* sbp,
738
 
                      Boolean positionBased,
739
 
                      BlastHSP * hsp,
740
 
                      BlastCompo_SequenceRange * range,
741
 
                      BlastCompo_SequenceData * query,
742
 
                      BlastCompo_SequenceData * subject)
743
 
{
744
 
    hsp->subject.offset       -= range->begin;
745
 
    hsp->subject.gapped_start -= range->begin;
746
 
 
747
 
    if(BLAST_CheckStartForGappedAlignment(hsp, query->data,
748
 
                                          subject->data, sbp)) {
749
 
        /* We may use the starting point supplied by the HSP. */
750
 
        *q_start = hsp->query.gapped_start;
751
 
        *s_start = hsp->subject.gapped_start;
752
 
    } else {
753
 
        /* We must recompute the start for the gapped alignment, as the
754
 
           one in the HSP was unacceptable.*/
755
 
        *q_start =
756
 
            BlastGetStartForGappedAlignment(query->data,
757
 
                                            subject->data, sbp,
758
 
                                            hsp->query.offset,
759
 
                                            hsp->query.end -
760
 
                                            hsp->query.offset,
761
 
                                            hsp->subject.offset,
762
 
                                            hsp->subject.end -
763
 
                                            hsp->subject.offset);
764
 
        *s_start =
765
 
            (hsp->subject.offset - hsp->query.offset) + *q_start;
 
945
        return s_SequenceGetTranslatedRange(self, range, seqData);
 
946
    } else {
 
947
        return s_SequenceGetProteinRange(self, range, seqData);
766
948
    }
767
949
}
768
950
 
769
951
 
770
 
struct Blast_GappingParamsContext {
771
 
    BlastGapAlignStruct * gap_align;
772
 
    const BlastScoringParameters* scoringParams;
773
 
    BlastScoreBlk* sbp;
774
 
    double localScalingFactor;
775
 
    Int4 prog_number;
776
 
};
777
 
typedef struct Blast_GappingParamsContext Blast_GappingParamsContext;
 
952
/** Data and data-structures needed to perform a gapped alignment */
 
953
typedef struct BlastKappa_GappingParamsContext {
 
954
    const BlastScoringParameters*
 
955
        scoringParams;                /**< scoring parameters for a
 
956
                                           gapped alignment */
 
957
    BlastGapAlignStruct * gap_align;  /**< additional parameters for a
 
958
                                           gapped alignment */
 
959
    BlastScoreBlk* sbp;               /**< the score block for this search */
 
960
    double localScalingFactor;        /**< the amount by which this
 
961
                                           search has been scaled */
 
962
    EBlastProgramType prog_number;    /**< the type of search being
 
963
                                           performed */
 
964
} BlastKappa_GappingParamsContext;
778
965
 
779
966
 
780
967
/**
781
 
 * Reads a GapAlignBlk that has been used to compute a traceback, and
782
 
 * return a BlastCompo_Alignment representing the alignment.
783
 
 *
784
 
 * @param gap_align         the GapAlignBlk
785
 
 * @param window            the window used to compute the traceback
 
968
 * Reads a BlastGapAlignStruct that has been used to compute a
 
969
 * traceback, and return a BlastCompo_Alignment representing the
 
970
 * alignment.  The BlastGapAlignStruct is in coordinates local to the
 
971
 * ranges being aligned; the resulting alignment is in coordinates w.r.t.
 
972
 * the whole query and subject.
 
973
 *
 
974
 * @param gap_align         the BlastGapAlignStruct
 
975
 * @param *edit_script      the edit script from the alignment; on exit
 
976
 *                          NULL.  The edit_script is usually
 
977
 *                          gap_align->edit_script, but we don't want
 
978
 *                          an implicit side effect on the gap_align.
 
979
 * @param query_range       the range of the query used in this alignment
 
980
 * @param subject_range     the range of the subject used in this alignment
 
981
 * @param matrix_adjust_rule   the rule used to compute the scoring matrix
 
982
 *
 
983
 * @return the new alignment on success or NULL on error
786
984
 */
787
985
static BlastCompo_Alignment *
788
986
s_NewAlignmentFromGapAlign(BlastGapAlignStruct * gap_align,
 
987
                           GapEditScript ** edit_script,
789
988
                           BlastCompo_SequenceRange * query_range,
790
989
                           BlastCompo_SequenceRange * subject_range,
791
 
                           int whichMode)
 
990
                           EMatrixAdjustRule matrix_adjust_rule)
792
991
{
 
992
    /* parameters to BlastCompo_AlignmentNew */
793
993
    int queryStart, queryEnd, queryIndex, matchStart, matchEnd, frame;
794
994
    BlastCompo_Alignment * obj; /* the new alignment */
795
995
 
 
996
    /* In the composition_adjustment library, the query start/end are
 
997
       indices into the concatenated query, and so must be shifted.  */
796
998
    queryStart = gap_align->query_start + query_range->begin;
797
999
    queryEnd   = gap_align->query_stop + query_range->begin;
798
1000
    queryIndex = query_range->context;
800
1002
    matchEnd   = gap_align->subject_stop  + subject_range->begin;
801
1003
    frame      = subject_range->context;
802
1004
 
803
 
    obj = BlastCompo_AlignmentNew(gap_align->score, whichMode,
 
1005
    obj = BlastCompo_AlignmentNew(gap_align->score, matrix_adjust_rule,
804
1006
                                  queryStart, queryEnd, queryIndex,
805
1007
                                  matchStart, matchEnd, frame,
806
 
                                  gap_align->edit_script);
807
 
    gap_align->edit_script = NULL;
 
1008
                                  *edit_script);
 
1009
    if (obj != NULL) {
 
1010
        *edit_script = NULL;
 
1011
    }
808
1012
    return obj;
809
1013
}
810
1014
 
811
1015
 
812
 
/**
813
 
 * Create a new BlastCompo_Alignment and append the list of
814
 
 * alignments represented by "next."
815
 
 *
816
 
 * @param query         query sequence data
817
 
 * @param queryStart    the start of the alignment in the query
818
 
 * @param queryEnd      the end of the alignment in the query
819
 
 * @param subject       subject sequence data
820
 
 * @param matchStart    the start of the alignment in the subject range
821
 
 * @param matchEnd      the end of the alignment in the subject range
822
 
 * @param score         the score of this alignment
823
 
 * @param window        the subject window of this alignment
824
 
 * @param gap_align     alignment info for gapped alignments
825
 
 * @param scoringParams Settings for gapped alignment.[in]
826
 
 * @param localScalingFactor    the factor by which the scoring system has
827
 
 *                              been scaled in order to obtain greater
828
 
 *                              precision
829
 
 * @param prog_number   the type of alignment being performed
830
 
 * @param next          preexisting list of alignments [out]
 
1016
/** A callback used when performing SmithWaterman alignments:
 
1017
 * Calculate the traceback for one alignment by performing an x-drop
 
1018
 * alignment in the forward direction, possibly increasing the x-drop
 
1019
 * parameter until the desired score is attained.
 
1020
 *
 
1021
 * The start, end and score of the alignment should be obtained
 
1022
 * using the Smith-Waterman algorithm before this routine is called.
 
1023
 *
 
1024
 * @param *pnewAlign       the new alignment
 
1025
 * @param *pqueryEnd       on entry, the end of the alignment in the
 
1026
 *                         query, as computed by the Smith-Waterman
 
1027
 *                         algorithm.  On exit, the end as computed by
 
1028
 *                         the x-drop algorithm
 
1029
 * @param *pmatchEnd       like as *pqueryEnd, but for the subject
 
1030
 *                         sequence
 
1031
 * @param queryStart       the starting point in the query
 
1032
 * @param matchStart       the starting point in the subject
 
1033
 * @param score            the score of the alignment, as computed by
 
1034
 *                         the Smith-Waterman algorithm
 
1035
 * @param query            query sequence data
 
1036
 * @param query_range      range of this query in the concatenated
 
1037
 *                         query
 
1038
 * @param ccat_query_length   total length of the concatenated query
 
1039
 * @param subject          subject sequence data
 
1040
 * @param subject_range    range of subject_data in the translated
 
1041
 *                         query, in amino acid coordinates
 
1042
 * @param full_subject_length   length of the full subject sequence
 
1043
 * @param gapping_params        parameters used to compute gapped
 
1044
 *                              alignments
 
1045
 * @param matrix_adjust_rule    the rule used to compute the scoring matrix
 
1046
 *
 
1047
 * @returns 0   (posts a fatal error if it fails)
 
1048
 * @sa new_xdrop_align_type
831
1049
 */
832
1050
static int
833
1051
s_NewAlignmentUsingXdrop(BlastCompo_Alignment ** pnewAlign,
835
1053
                         Int4 queryStart, Int4 matchStart, Int4 score,
836
1054
                         BlastCompo_SequenceData * query,
837
1055
                         BlastCompo_SequenceRange * query_range,
838
 
                         Int4 queryLength,
 
1056
                         Int4 ccat_query_length,
839
1057
                         BlastCompo_SequenceData * subject,
840
1058
                         BlastCompo_SequenceRange * subject_range,
841
 
                         Int4 subjectLength,
 
1059
                         Int4 full_subject_length,
842
1060
                         BlastCompo_GappingParams * gapping_params,
843
 
                         ECompoAdjustModes whichMode)
 
1061
                         EMatrixAdjustRule matrix_adjust_rule)
844
1062
{
845
1063
    Int4 newScore;
846
1064
    /* Extent of the alignment as computed by an x-drop alignment
847
1065
     * (usually the same as (queryEnd - queryStart) and (matchEnd -
848
1066
     * matchStart)) */
849
1067
    Int4 queryExtent, matchExtent;
850
 
    BlastCompo_Alignment * obj;  /* the new object */
851
 
    Blast_GappingParamsContext * context = gapping_params->context;
 
1068
    BlastCompo_Alignment * obj = NULL;  /* the new object */
 
1069
    /* BLAST-specific parameters needed compute an X-drop alignment */
 
1070
    BlastKappa_GappingParamsContext * context = gapping_params->context;
 
1071
    /* Auxiliarly structure for computing gapped alignments */
852
1072
    BlastGapAlignStruct * gap_align = context->gap_align;
 
1073
    /* Scoring parameters for gapped alignments */
853
1074
    const BlastScoringParameters* scoringParams = context->scoringParams;
854
 
    double localScalingFactor = context->localScalingFactor;
855
 
    GapEditScript* editScript;
 
1075
    /* A structure containing the traceback of a gapped alignment */
 
1076
    GapEditScript* editScript = NULL;
 
1077
 
 
1078
    /* suppress unused parameter warnings; this is a callback
 
1079
       function, so these parameter cannot be deleted */
 
1080
    (void) ccat_query_length;
 
1081
    (void) full_subject_length;
 
1082
 
 
1083
    gap_align->gap_x_dropoff = gapping_params->x_dropoff;
856
1084
 
857
1085
    s_SWFindFinalEndsUsingXdrop(query,   queryStart, *pqueryEnd,
858
1086
                                subject, matchStart, *pmatchEnd,
859
1087
                                gap_align, scoringParams,
860
 
                                score, localScalingFactor,
861
 
                                &queryExtent, &matchExtent,
 
1088
                                score, &queryExtent, &matchExtent,
862
1089
                                &newScore);
863
1090
    *pqueryEnd = queryStart + queryExtent;
864
1091
    *pmatchEnd = matchStart + matchExtent;
866
1093
    editScript =
867
1094
        Blast_PrelimEditBlockToGapEditScript(gap_align->rev_prelim_tback,
868
1095
                                             gap_align->fwd_prelim_tback);
869
 
    obj = BlastCompo_AlignmentNew(newScore, whichMode,
870
 
                                  queryStart, *pqueryEnd,
871
 
                                  query_range->context,
872
 
                                  matchStart, *pmatchEnd,
873
 
                                  subject_range->context, editScript);
 
1096
    if (editScript != NULL) {
 
1097
        /* Shifted values of the endpoints */
 
1098
        Int4 aqueryStart =  queryStart + query_range->begin;
 
1099
        Int4 aqueryEnd   = *pqueryEnd  + query_range->begin;
 
1100
        Int4 amatchStart =  matchStart + subject_range->begin;
 
1101
        Int4 amatchEnd   = *pmatchEnd  + subject_range->begin;
 
1102
 
 
1103
        obj = BlastCompo_AlignmentNew(newScore, matrix_adjust_rule,
 
1104
                                      aqueryStart, aqueryEnd,
 
1105
                                      query_range->context,
 
1106
                                      amatchStart, amatchEnd,
 
1107
                                      subject_range->context, editScript);
 
1108
        if (obj == NULL) {
 
1109
            GapEditScriptDelete(editScript);
 
1110
        }
 
1111
    }
874
1112
    *pnewAlign = obj;
875
1113
 
876
 
    return 0;
 
1114
    return obj != NULL ? 0 : -1;
877
1115
}
878
1116
 
879
1117
 
 
1118
/**
 
1119
 * A callback: calculate the traceback for one alignment by
 
1120
 * performing an x-drop alignment in both directions
 
1121
 *
 
1122
 * @param in_align         the existing alignment, without traceback
 
1123
 * @param matrix_adjust_rule    the rule used to compute the scoring matrix
 
1124
 * @param query_data       query sequence data
 
1125
 * @param query_range      range of this query in the concatenated
 
1126
 *                         query
 
1127
 * @param ccat_query_length   total length of the concatenated query
 
1128
 * @param subject_data     subject sequence data
 
1129
 * @param subject_range    range of subject_data in the translated
 
1130
 *                         query, in amino acid coordinates
 
1131
 * @param full_subject_length   length of the full subject sequence
 
1132
 * @param gapping_params        parameters used to compute gapped
 
1133
 *                              alignments
 
1134
 * @sa redo_one_alignment_type
 
1135
 */
880
1136
static BlastCompo_Alignment *
881
1137
s_RedoOneAlignment(BlastCompo_Alignment * in_align,
882
 
                   ECompoAdjustModes whichMode,
 
1138
                   EMatrixAdjustRule matrix_adjust_rule,
883
1139
                   BlastCompo_SequenceData * query_data,
884
1140
                   BlastCompo_SequenceRange * query_range,
885
1141
                   int ccat_query_length,
888
1144
                   int full_subject_length,
889
1145
                   BlastCompo_GappingParams * gapping_params)
890
1146
{
891
 
    Int4 q_start, s_start;
892
 
    Blast_GappingParamsContext * context = gapping_params->context;
 
1147
    int status;                /* return code */
 
1148
    Int4 q_start, s_start;     /* starting point in query and subject */
 
1149
    /* BLAST-specific parameters needed to compute a gapped alignment */
 
1150
    BlastKappa_GappingParamsContext * context = gapping_params->context;
 
1151
    /* Score block for this search */
893
1152
    BlastScoreBlk* sbp = context->sbp;
 
1153
    /* Auxiliary structure for computing gapped alignments */
894
1154
    BlastGapAlignStruct* gapAlign = context->gap_align;
895
 
    Boolean positionBased = (sbp->psi_matrix ? TRUE : FALSE);
 
1155
    /* The preliminary gapped HSP that were are recomputing */
896
1156
    BlastHSP * hsp = in_align->context;
897
1157
 
898
 
    s_StartingPointForHit(&q_start, &s_start, sbp, positionBased,
899
 
                          hsp, subject_range, query_data, subject_data);
 
1158
    /* suppress unused parameter warnings; this is a callback
 
1159
       function, so these parameter cannot be deleted */
 
1160
    (void) ccat_query_length;
 
1161
    (void) full_subject_length;
 
1162
 
 
1163
    /* Shift the subject offset and gapped start to be offsets
 
1164
       into the translated subject_range; shifting in this manner
 
1165
       is necessary for BLAST_CheckStartForGappedAlignment */
 
1166
    hsp->subject.offset       -= subject_range->begin;
 
1167
    hsp->subject.end          -= subject_range->begin;
 
1168
    hsp->subject.gapped_start -= subject_range->begin;
 
1169
 
 
1170
    if(BLAST_CheckStartForGappedAlignment(hsp, query_data->data,
 
1171
                                          subject_data->data, sbp)) {
 
1172
        /* We may use the starting point supplied by the HSP. */
 
1173
        q_start = hsp->query.gapped_start;
 
1174
        s_start = hsp->subject.gapped_start;
 
1175
    } else {
 
1176
        /* We must recompute the start for the gapped alignment, as the
 
1177
           one in the HSP was unacceptable.*/
 
1178
        q_start =
 
1179
            BlastGetStartForGappedAlignment(query_data->data,
 
1180
                                            subject_data->data, sbp,
 
1181
                                            hsp->query.offset,
 
1182
                                            hsp->query.end -
 
1183
                                            hsp->query.offset,
 
1184
                                            hsp->subject.offset,
 
1185
                                            hsp->subject.end -
 
1186
                                            hsp->subject.offset);
 
1187
        s_start =
 
1188
            (hsp->subject.offset - hsp->query.offset) + q_start;
 
1189
    }
 
1190
    /* Undo the shift so there is no side effect on the incoming HSP
 
1191
       list. */
 
1192
    hsp->subject.offset       += subject_range->begin;
 
1193
    hsp->subject.end          += subject_range->begin;
 
1194
    hsp->subject.gapped_start += subject_range->begin;
 
1195
 
900
1196
    gapAlign->gap_x_dropoff = gapping_params->x_dropoff;
901
1197
 
902
 
    BLAST_GappedAlignmentWithTraceback(context->prog_number,
903
 
                                       query_data->data,
904
 
                                       subject_data->data, gapAlign,
905
 
                                       context->scoringParams,
906
 
                                       q_start, s_start,
907
 
                                       query_data->length,
908
 
                                       subject_data->length);
909
 
    return s_NewAlignmentFromGapAlign(gapAlign, query_range, subject_range,
910
 
                                      whichMode);
 
1198
    status =
 
1199
        BLAST_GappedAlignmentWithTraceback(context->prog_number,
 
1200
                                           query_data->data,
 
1201
                                           subject_data->data, gapAlign,
 
1202
                                           context->scoringParams,
 
1203
                                           q_start, s_start,
 
1204
                                           query_data->length,
 
1205
                                           subject_data->length);
 
1206
    if (status == 0) {
 
1207
        return s_NewAlignmentFromGapAlign(gapAlign, &gapAlign->edit_script,
 
1208
                                          query_range, subject_range,
 
1209
                                          matrix_adjust_rule);
 
1210
    } else {
 
1211
        return NULL;
 
1212
    }
911
1213
}
912
1214
 
913
1215
 
914
1216
/**
915
 
 * A s_SearchParameters represents the data needed by
916
 
 * RedoAlignmentCore to adjust the parameters of a search, including
917
 
 * the original value of these parameters
 
1217
 * A BlastKappa_SavedParameters holds the value of certain search
 
1218
 * parameters on entry to RedoAlignmentCore.  These values are
 
1219
 * restored on exit.
918
1220
 */
919
 
typedef struct s_SearchParameters {
 
1221
typedef struct BlastKappa_SavedParameters {
920
1222
    Int4          gap_open;    /**< a penalty for the existence of a gap */
921
1223
    Int4          gapExtend;   /**< a penalty for each residue in the
922
1224
                                    gap */
927
1229
    double original_expect_value;   /**< expect value on entry */
928
1230
    /** copy of the original gapped Karlin-Altschul block
929
1231
     * corresponding to the first context */
930
 
    Blast_KarlinBlk* kbp_gap_orig;
931
 
    /** pointer to the array of gapped Karlin-Altschul block for all
932
 
     * contexts; needed to restore the search to its original
933
 
     * configuration.  */
934
 
    Blast_KarlinBlk** orig_kbp_gap_array;
935
 
} s_SearchParameters;
 
1232
    Blast_KarlinBlk** kbp_gap_orig;
 
1233
    Int4             num_queries;   /**< Number of queries in this search */
 
1234
} BlastKappa_SavedParameters;
936
1235
 
937
1236
 
938
1237
/**
939
 
 * Release the data associated with a s_SearchParameters and
 
1238
 * Release the data associated with a BlastKappa_SavedParameters and
940
1239
 * delete the object
941
1240
 * @param searchParams the object to be deleted [in][out]
942
1241
 */
943
1242
static void
944
 
s_SearchParametersFree(s_SearchParameters ** searchParams)
 
1243
s_SavedParametersFree(BlastKappa_SavedParameters ** searchParams)
945
1244
{
946
1245
    /* for convenience, remove one level of indirection from searchParams */
947
 
    s_SearchParameters *sp = *searchParams;
948
 
 
949
 
    if(sp->kbp_gap_orig) Blast_KarlinBlkFree(sp->kbp_gap_orig);
950
 
 
951
 
    Nlm_Int4MatrixFree(&sp->origMatrix);
952
 
 
 
1246
    BlastKappa_SavedParameters *sp = *searchParams;
 
1247
 
 
1248
    if (sp != NULL) {
 
1249
        if (sp->kbp_gap_orig != NULL) {
 
1250
            int i;
 
1251
            for (i = 0;  i < sp->num_queries;  i++) {
 
1252
                if (sp->kbp_gap_orig[i] != NULL)
 
1253
                    Blast_KarlinBlkFree(sp->kbp_gap_orig[i]);
 
1254
            }
 
1255
            free(sp->kbp_gap_orig);
 
1256
        }
 
1257
        if (sp->origMatrix != NULL)
 
1258
            Nlm_Int4MatrixFree(&sp->origMatrix);
 
1259
    }
953
1260
    sfree(*searchParams);
954
1261
    *searchParams = NULL;
955
1262
}
956
1263
 
957
1264
 
958
1265
/**
959
 
 * Create a new instance of s_SearchParameters
 
1266
 * Create a new instance of BlastKappa_SavedParameters
960
1267
 *
961
 
 * @param rows              number of rows in the scoring matrix
962
 
 * @param adjustParameters  if >0, use composition-based statistics
963
 
 * @param numQueries        the number of queries in the concatenated
964
 
 *                          query
965
 
 * @param positionBased     if true, the search is position-based
 
1268
 * @param rows               number of rows in the scoring matrix
 
1269
 * @param numQueries         number of queries in this search
 
1270
 * @param compo_adjust_mode  if >0, use composition-based statistics
 
1271
 * @param positionBased      if true, the search is position-based
966
1272
 */
967
 
static s_SearchParameters *
968
 
s_SearchParametersNew(
969
 
                      Int4 rows,
970
 
                      Int4 adjustParameters,
971
 
                      Boolean positionBased)
 
1273
static BlastKappa_SavedParameters *
 
1274
s_SavedParametersNew(Int4 rows,
 
1275
                     Int4 numQueries,
 
1276
                     ECompoAdjustModes compo_adjust_mode,
 
1277
                     Boolean positionBased)
972
1278
{
973
 
    s_SearchParameters *sp;   /* the new object */
974
 
    sp = malloc(sizeof(s_SearchParameters));
 
1279
    int i;
 
1280
    BlastKappa_SavedParameters *sp;   /* the new object */
 
1281
    sp = malloc(sizeof(BlastKappa_SavedParameters));
975
1282
 
976
 
    sp->orig_kbp_gap_array = NULL;
 
1283
    if (sp == NULL) {
 
1284
        goto error_return;
 
1285
    }
977
1286
    sp->kbp_gap_orig       = NULL;
978
1287
    sp->origMatrix         = NULL;
979
1288
 
980
 
    sp->kbp_gap_orig = Blast_KarlinBlkNew();
981
 
    if (adjustParameters) {
 
1289
    sp->kbp_gap_orig = calloc(numQueries, sizeof(Blast_KarlinBlk*));
 
1290
    if (sp->kbp_gap_orig == NULL) {
 
1291
        goto error_return;
 
1292
    }
 
1293
    sp->num_queries = numQueries;
 
1294
    for (i = 0;  i < numQueries;  i++) {
 
1295
        sp->kbp_gap_orig[i] = NULL;
 
1296
    }
 
1297
    if (compo_adjust_mode != eNoCompositionBasedStats) {
982
1298
        if (positionBased) {
983
1299
            sp->origMatrix = Nlm_Int4MatrixNew(rows, BLASTAA_SIZE);
984
1300
        } else {
985
1301
            sp->origMatrix = Nlm_Int4MatrixNew(BLASTAA_SIZE, BLASTAA_SIZE);
986
1302
        }
 
1303
        if (sp->origMatrix == NULL)
 
1304
            goto error_return;
987
1305
    }
988
1306
    return sp;
 
1307
error_return:
 
1308
    s_SavedParametersFree(&sp);
 
1309
    return NULL;
989
1310
}
990
1311
 
991
1312
 
993
1314
 * Record the initial value of the search parameters that are to be
994
1315
 * adjusted.
995
1316
 *
996
 
 * @param searchParams     holds the recorded values [out]
997
 
 * @param search           the search parameters [in]
998
 
 * @param query            a list of query data [in]
999
 
 * @param numQueries       the length of the array query [in]
 
1317
 * @param searchParams       holds the recorded values [out]
 
1318
 * @param sbp                a score block [in]
 
1319
 * @param scoring            gapped alignment parameters [in]
 
1320
 * @param query_length       length of the concatenated query [in]
 
1321
 * @param compo_adjust_mode  composition adjustment mode [in]
 
1322
 * @param positionBased     is this search position-based [in]
1000
1323
 */
1001
 
static void
1002
 
s_RecordInitialSearch(s_SearchParameters * searchParams,
1003
 
                      BLAST_SequenceBlk * queryBlk,
1004
 
                      BlastQueryInfo* queryInfo,
 
1324
static int
 
1325
s_RecordInitialSearch(BlastKappa_SavedParameters * searchParams,
1005
1326
                      BlastScoreBlk* sbp,
1006
1327
                      const BlastScoringParameters* scoring,
1007
1328
                      int query_length,
1008
 
                      Boolean adjustParameters,
 
1329
                      ECompoAdjustModes compo_adjust_mode,
1009
1330
                      Boolean positionBased)
1010
1331
{
1011
 
    Blast_KarlinBlk* kbp;     /* statistical parameters used to evaluate a
1012
 
                               * query-subject pair */
1013
 
    /* YIKES! How do I get these! */
1014
 
    /*
1015
 
      searchParams->original_expect_value = search->pbp->cutoff_e;
1016
 
    */
1017
 
    searchParams->gap_open   = scoring->gap_open;
1018
 
    searchParams->gapExtend  = scoring->gap_extend;
1019
 
    searchParams->gapDecline = scoring->decline_align;
 
1332
    int i;
 
1333
 
 
1334
    searchParams->gap_open     = scoring->gap_open;
 
1335
    searchParams->gapExtend    = scoring->gap_extend;
 
1336
    searchParams->gapDecline   = scoring->decline_align;
1020
1337
    searchParams->scale_factor = scoring->scale_factor;
1021
1338
 
1022
 
    searchParams->orig_kbp_gap_array   = sbp->kbp_gap;
1023
 
    kbp = sbp->kbp_gap[0];
1024
 
    Blast_KarlinBlkCopy(searchParams->kbp_gap_orig, kbp);
 
1339
    for (i = 0;  i < searchParams->num_queries;  i++) { 
 
1340
        if (sbp->kbp_gap[i] != NULL) {
 
1341
            /* There is a kbp_gap for query i and it must be copied */
 
1342
            searchParams->kbp_gap_orig[i] = Blast_KarlinBlkNew();
 
1343
            if (searchParams->kbp_gap_orig[i] == NULL) {
 
1344
                return -1;
 
1345
            }
 
1346
            Blast_KarlinBlkCopy(searchParams->kbp_gap_orig[i],
 
1347
                                sbp->kbp_gap[i]);
 
1348
        }
 
1349
    }
1025
1350
 
1026
 
    if (adjustParameters) {
1027
 
        Int4 **matrix;
1028
 
        Int4 i, j;                  /* iteration indices */
1029
 
        int rows;
 
1351
    if (compo_adjust_mode != eNoCompositionBasedStats) {
 
1352
        Int4 **matrix;              /* scoring matrix */
 
1353
        int j;                      /* iteration index */
 
1354
        int rows;                   /* number of rows in matrix */
1030
1355
        if (positionBased) {
1031
1356
            matrix = sbp->psi_matrix->pssm->data;
1032
1357
            rows = query_length;
1040
1365
            }
1041
1366
        }
1042
1367
    }
 
1368
    return 0;
1043
1369
}
1044
1370
 
1045
1371
 
1046
1372
/**
1047
1373
 * Rescale the search parameters in the search object and options
1048
1374
 * object to obtain more precision.
 
1375
 *
 
1376
 * @param sbp               score block to be rescaled
 
1377
 * @param sp                scoring parameters to be rescaled
 
1378
 * @param num_queries       number of queries in this search
 
1379
 * @param scale_factor      amount by which to scale this search
1049
1380
 */
1050
1381
static void
1051
 
s_RescaleSearch(s_SearchParameters * sp,
1052
 
                BLAST_SequenceBlk* queryBlk,
1053
 
                BlastQueryInfo* queryInfo,
1054
 
                BlastScoreBlk* sbp,
1055
 
                BlastScoringParameters* scoringParams,
1056
 
                double localScalingFactor,
1057
 
                Boolean positionBased)
 
1382
s_RescaleSearch(BlastScoreBlk* sbp,
 
1383
                BlastScoringParameters* sp,
 
1384
                int num_queries,
 
1385
                double scale_factor)
1058
1386
{
1059
 
    Blast_KarlinBlk* kbp;     /* the statistical parameters used to
1060
 
                               * evaluate alignments of a
1061
 
                               * query-subject pair */
1062
 
    kbp = sbp->kbp_gap[0];
1063
 
    kbp->Lambda /= localScalingFactor;
1064
 
    kbp->logK = log(kbp->K);
 
1387
    int i;
 
1388
    for (i = 0;  i < num_queries;  i++) {
 
1389
        if (sbp->kbp_gap[i] != NULL) {
 
1390
            Blast_KarlinBlk * kbp = sbp->kbp_gap[i];
 
1391
            kbp->Lambda /= scale_factor;
 
1392
            kbp->logK = log(kbp->K);
 
1393
        }
 
1394
    }
1065
1395
 
1066
 
    /* YIKES! and what about the cutoff_e */
1067
 
    /*
1068
 
      search->pbp->cutoff_e   = options->kappa_expect_value;
1069
 
    */
1070
 
    scoringParams->gap_open   = BLAST_Nint(sp->gap_open  * localScalingFactor);
1071
 
    scoringParams->gap_extend = BLAST_Nint(sp->gapExtend * localScalingFactor);
1072
 
    scoringParams->scale_factor = localScalingFactor;
1073
 
    if (sp->gapDecline != INT2_MAX) {
1074
 
        scoringParams->decline_align =
1075
 
            BLAST_Nint(sp->gapDecline * localScalingFactor);
 
1396
    sp->gap_open = BLAST_Nint(sp->gap_open  * scale_factor);
 
1397
    sp->gap_extend = BLAST_Nint(sp->gap_extend * scale_factor);
 
1398
    sp->scale_factor = scale_factor;
 
1399
    if (sp->decline_align != INT2_MAX) {
 
1400
        sp->decline_align = BLAST_Nint(sp->decline_align * scale_factor);
1076
1401
    }
1077
1402
}
1078
1403
 
1079
1404
 
1080
1405
/**
1081
 
 * Restore the parameters that were adjusted to their original values
1082
 
 * @param searchParams      a record of the original values [in]
1083
 
 * @param search            the search to be restored [out]
1084
 
 * @param options           the option block to be restored [out]
1085
 
 * @param matrix            the scoring matrix to be restored [out]
1086
 
 * @param SmithWaterman     if true, we have performed a Smith-Waterman
1087
 
 *                          alignment with these search parameters [in]
 
1406
 * Restore the parameters that were adjusted to their original values.
 
1407
 *
 
1408
 * @param sbp                the score block to be restored
 
1409
 * @param scoring            the scoring parameters to be restored
 
1410
 * @param searchParams       the initial recorded values of the parameters
 
1411
 * @param query_length       the concatenated query length
 
1412
 * @param positionBased      is this search position-based
 
1413
 * @param compo_adjust_mode  mode of composition adjustment
1088
1414
 */
1089
1415
static void
1090
 
s_RestoreSearch(s_SearchParameters * searchParams,
1091
 
                BlastScoreBlk* sbp,
1092
 
                Int4 ** matrix,
 
1416
s_RestoreSearch(BlastScoreBlk* sbp,
 
1417
                BlastScoringParameters* scoring,
 
1418
                const BlastKappa_SavedParameters * searchParams,
1093
1419
                int query_length,
1094
 
                BlastScoringParameters* scoring,
1095
1420
                Boolean positionBased,
1096
 
                Boolean adjustParameters)
 
1421
                ECompoAdjustModes compo_adjust_mode)
1097
1422
{
1098
 
    Blast_KarlinBlk* kbp;       /* statistical parameters used to
1099
 
                                   evaluate the significance of
1100
 
                                   alignment of a query-subject
1101
 
                                   pair */
1102
 
    Int4 i, j;
1103
 
    /* YIKES! More stuff I don't know how to deal with */
1104
 
    /*
1105
 
      search->pbp->gap_x_dropoff_final = searchParams->gap_x_dropoff_final;
1106
 
      search->pbp->cutoff_e      = searchParams->original_expect_value;
1107
 
      search->pbp->gap_open      = searchParams->gap_open;
1108
 
      search->pbp->gap_extend    = searchParams->gapExtend;
1109
 
      search->pbp->decline_align = searchParams->gapDecline;
1110
 
      GapAlignBlkDelete(search->gap_align);
1111
 
      search->gap_align    = searchParams->orig_gap_align;
1112
 
      search->sbp->kbp_gap = searchParams->orig_kbp_gap_array;
1113
 
    */
1114
 
    kbp = sbp->kbp_gap[0];
1115
 
    Blast_KarlinBlkCopy(kbp, searchParams->kbp_gap_orig);
1116
 
 
1117
 
    if(adjustParameters) {
1118
 
        int rows;
 
1423
    int i;
 
1424
 
 
1425
    scoring->gap_open = searchParams->gap_open;
 
1426
    scoring->gap_extend = searchParams->gapExtend;
 
1427
    scoring->decline_align = searchParams->gapDecline;
 
1428
    scoring->scale_factor = searchParams->scale_factor;
 
1429
 
 
1430
    for (i = 0;  i < searchParams->num_queries;  i++) {
 
1431
        if (sbp->kbp_gap[i] != NULL) {
 
1432
            Blast_KarlinBlkCopy(sbp->kbp_gap[i],
 
1433
                                searchParams->kbp_gap_orig[i]);
 
1434
        }
 
1435
    }
 
1436
    if(compo_adjust_mode != eNoCompositionBasedStats) {
 
1437
        int  j;             /* iteration index */
 
1438
        Int4 ** matrix;     /* matrix to be restored */
 
1439
        int rows;           /* number of rows in the matrix */
 
1440
 
1119
1441
        if (positionBased) {
 
1442
            matrix = sbp->psi_matrix->pssm->data;
1120
1443
            rows = query_length;
1121
1444
        } else {
 
1445
            matrix = sbp->matrix->data;
1122
1446
            rows = BLASTAA_SIZE;
1123
1447
        }
1124
 
        for(i = 0; i < rows; i++) {
1125
 
            for(j = 0; j < BLASTAA_SIZE; j++) {
 
1448
        for (i = 0;  i < rows;  i++) {
 
1449
            for (j = 0;  j < BLASTAA_SIZE;  j++) {
1126
1450
                matrix[i][j] = searchParams->origMatrix[i][j];
1127
1451
            }
1128
1452
        }
1130
1454
}
1131
1455
 
1132
1456
 
1133
 
static void
 
1457
/**
 
1458
 * Initialize an object of type Blast_MatrixInfo.
 
1459
 *
 
1460
 * @param self            object being initialized
 
1461
 * @param queryBlk        the query sequence data
 
1462
 * @param sbp             score block for this search
 
1463
 * @param scale_factor    amount by which ungapped parameters should be
 
1464
 *                        scaled
 
1465
 * @param matrixName      name of the matrix
 
1466
 */
 
1467
static int
1134
1468
s_MatrixInfoInit(Blast_MatrixInfo * self,
1135
 
                 double localScalingFactor,
1136
1469
                 BLAST_SequenceBlk* queryBlk,
1137
 
                 BlastQueryInfo* queryInfo,
1138
1470
                 BlastScoreBlk* sbp,
1139
 
                 BlastScoringParameters* scoringParams,
1140
 
                 Boolean positionBased,
 
1471
                 double scale_factor,
1141
1472
                 const char * matrixName)
1142
1473
{
1143
 
    Uint1 * query;             /* the query sequence */
1144
 
    int queryLength;
1145
 
    /*  Int4 queryLength; */          /* the length of the query sequence */
1146
 
    double initialUngappedLambda;
1147
 
 
1148
 
    /* YIKES! */
1149
 
    /*
1150
 
      query       = search->context[0].query->sequence;
1151
 
      queryLength = search->context[0].query->length;
1152
 
    */
1153
 
    query = &queryBlk->sequence[0];
1154
 
    queryLength = queryInfo->contexts[0].query_length;
 
1474
    int status = 0;    /* return status */
 
1475
    int lenName;       /* length of matrixName as a string */
 
1476
 
 
1477
    /* copy the matrix name (strdup is not standard C) */
 
1478
    lenName = strlen(matrixName);
 
1479
    if (NULL == (self->matrixName = malloc(lenName + 1))) {
 
1480
        return -1;
 
1481
    }
 
1482
    memcpy(self->matrixName, matrixName, lenName + 1);
 
1483
 
1155
1484
    if (self->positionBased) {
1156
 
        /*  YIKES!
1157
 
            if(sbp->posFreqs == NULL) {
1158
 
            sbp->posFreqs =
1159
 
            allocatePosFreqs(queryLength, BLASTAA_SIZE);
1160
 
            }
1161
 
        */
1162
 
        s_GetStartFreqRatios(self->startFreqRatios, query, matrixName,
1163
 
                             sbp->psi_matrix->freq_ratios, queryLength,
1164
 
                             TRUE);
1165
 
        s_ScalePosMatrix(self->startMatrix, sbp->matrix->data,
1166
 
                         matrixName,sbp->psi_matrix->freq_ratios, query,
1167
 
                         queryInfo->max_length, sbp);
1168
 
        initialUngappedLambda = sbp->kbp_psi[0]->Lambda;
 
1485
        status = s_GetPosBasedStartFreqRatios(self->startFreqRatios,
 
1486
                                              queryBlk->length,
 
1487
                                              queryBlk->sequence,
 
1488
                                              matrixName,
 
1489
                                              sbp->psi_matrix->freq_ratios);
 
1490
        if (status == 0) {
 
1491
            status = s_ScalePosMatrix(self->startMatrix, matrixName,
 
1492
                                      sbp->psi_matrix->freq_ratios,
 
1493
                                      queryBlk->sequence,
 
1494
                                      queryBlk->length, sbp, scale_factor);
 
1495
            self->ungappedLambda = sbp->kbp_psi[0]->Lambda / scale_factor;
 
1496
        }
1169
1497
    } else {
1170
 
        s_GetStartFreqRatios(self->startFreqRatios, query, matrixName,
1171
 
                             NULL, BLASTAA_SIZE, FALSE);
1172
 
        initialUngappedLambda = sbp->kbp_ideal->Lambda;
1173
 
    }
1174
 
    self->ungappedLambda = initialUngappedLambda / localScalingFactor;
1175
 
    if ( !positionBased ) {
1176
 
        SFreqRatios * freqRatios;  /* frequency ratios for the matrix */
1177
 
 
1178
 
        freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
1179
 
        /*
1180
 
          if (freqRatios == NULL) {
1181
 
          ErrPostEx(SEV_FATAL, 1, 0, "blastpgp: Cannot adjust parameters "
1182
 
          "for matrix %s\n", matrixName);
1183
 
          }
1184
 
        */
1185
 
        Blast_Int4MatrixFromFreq(self->startMatrix, BLASTAA_SIZE,
1186
 
                                 freqRatios->data, self->ungappedLambda);
1187
 
        freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios);
1188
 
    }
1189
 
    self->matrixName = strdup(matrixName);
1190
 
}
1191
 
 
1192
 
 
1193
 
static void
1194
 
s_GetQueryInfo(BlastCompo_QueryInfo **pquery, int * pnumQueries,
1195
 
               Uint1 * ccat_query, BlastQueryInfo* queryInfo)
1196
 
{
1197
 
    int query_index;
1198
 
    int numQueries = queryInfo->num_queries;
1199
 
    BlastCompo_QueryInfo * query = calloc(numQueries,
1200
 
                                          sizeof(BlastCompo_QueryInfo));
1201
 
    *pnumQueries = numQueries;
1202
 
    *pquery = query;
1203
 
    for (query_index = 0;  query_index < numQueries;  query_index++) {
1204
 
        query[query_index].eff_search_space =
1205
 
            queryInfo->contexts[query_index].eff_searchsp;
1206
 
    }
1207
 
    for (query_index = 0;  query_index < numQueries;  query_index++) {
1208
 
        query[query_index].origin =
1209
 
            queryInfo->contexts[query_index].query_offset;
1210
 
        query[query_index].seq.data = &ccat_query[query[query_index].origin];
1211
 
        query[query_index].seq.length =
1212
 
            queryInfo->contexts[query_index].query_length;
1213
 
    }
1214
 
    for (query_index = 0;  query_index < numQueries;  query_index++) {
1215
 
        Blast_ReadAaComposition(&query[query_index].composition,
1216
 
                                query[query_index].seq.data,
1217
 
                                query[query_index].seq.length);
1218
 
    }
1219
 
}
1220
 
 
1221
 
 
1222
 
static void
1223
 
s_GappingParamsInit(Blast_GappingParamsContext * context,
1224
 
                    BlastCompo_GappingParams * gapping_params,
1225
 
                    BlastGapAlignStruct * gap_align,
1226
 
                    const BlastScoringParameters* scoring,
1227
 
                    BlastScoreBlk* sbp,
1228
 
                    double localScalingFactor,
1229
 
                    Int4 program_number,
1230
 
                    double Lambda)
1231
 
{
1232
 
    context->gap_align = gap_align;
1233
 
    context->scoringParams = scoring;
1234
 
    context->sbp = sbp;
1235
 
    context->localScalingFactor = localScalingFactor;
1236
 
    context->prog_number = program_number;
1237
 
 
1238
 
    gapping_params->gap_open = scoring->gap_open;
1239
 
    gapping_params->gap_extend = scoring->gap_extend;
1240
 
    gapping_params->decline_align = scoring->decline_align;
1241
 
    /* YIKES! different x-dropoff due to different pass through the
1242
 
       blast code */
1243
 
    gapping_params->x_dropoff = gap_align->gap_x_dropoff;
1244
 
    gapping_params->context = context;
1245
 
}
1246
 
 
1247
 
 
 
1498
        self->ungappedLambda = sbp->kbp_ideal->Lambda / scale_factor;
 
1499
        status = s_GetStartFreqRatios(self->startFreqRatios, matrixName);
 
1500
        if (status == 0) {
 
1501
            Blast_Int4MatrixFromFreq(self->startMatrix, self->startFreqRatios,
 
1502
                                     self->ungappedLambda);
 
1503
        }
 
1504
    }
 
1505
    return status;
 
1506
}
 
1507
 
 
1508
 
 
1509
/**
 
1510
 * Save information about all queries in an array of objects of type
 
1511
 * BlastCompo_QueryInfo.
 
1512
 *
 
1513
 * @param query_data        query sequence data
 
1514
 * @param blast_query_info  information about all queries, as an
 
1515
 *                          internal blast data structure
 
1516
 *
 
1517
 * @return the new array on success, or NULL on error
 
1518
 */
 
1519
static BlastCompo_QueryInfo *
 
1520
s_GetQueryInfo(Uint1 * query_data, BlastQueryInfo * blast_query_info)
 
1521
{
 
1522
    int i;                   /* loop index */
 
1523
    BlastCompo_QueryInfo *
 
1524
        compo_query_info;    /* the new array */
 
1525
    int num_queries;         /* the number of queries/elements in
 
1526
                                compo_query_info */
 
1527
 
 
1528
    num_queries = blast_query_info->num_queries;
 
1529
    compo_query_info = calloc(num_queries, sizeof(BlastCompo_QueryInfo));
 
1530
    if (compo_query_info != NULL) {
 
1531
        for (i = 0;  i < num_queries;  i++) {
 
1532
            BlastCompo_QueryInfo * query_info = &compo_query_info[i];
 
1533
            BlastContextInfo * query_context = &blast_query_info->contexts[i];
 
1534
 
 
1535
            query_info->eff_search_space =
 
1536
                (double) query_context->eff_searchsp;
 
1537
            query_info->origin = query_context->query_offset;
 
1538
            query_info->seq.data = &query_data[query_info->origin];
 
1539
            query_info->seq.length = query_context->query_length;
 
1540
 
 
1541
            Blast_ReadAaComposition(&query_info->composition,
 
1542
                                    query_info->seq.data,
 
1543
                                    query_info->seq.length);
 
1544
        }
 
1545
    }
 
1546
    return compo_query_info;
 
1547
}
 
1548
 
 
1549
 
 
1550
/**
 
1551
 * Create a new object of type BlastCompo_GappingParams.  The new
 
1552
 * object contains the parameters needed by the composition adjustment
 
1553
 * library to compute a gapped alignment.
 
1554
 *
 
1555
 * @param context     the data structures needed by callback functions
 
1556
 *                    that perform the gapped alignments.
 
1557
 * @param extendParams parameters used for a gapped extension
 
1558
 * @param num_queries  the number of queries in the concatenated query
 
1559
 */
 
1560
static BlastCompo_GappingParams *
 
1561
s_GappingParamsNew(BlastKappa_GappingParamsContext * context,
 
1562
                   const BlastExtensionParameters* extendParams,
 
1563
                   int num_queries)
 
1564
{
 
1565
    int i;
 
1566
    double min_lambda = DBL_MAX;   /* smallest gapped Lambda */
 
1567
    const BlastScoringParameters * scoring = context->scoringParams;
 
1568
    const BlastExtensionOptions * options = extendParams->options;
 
1569
    /* The new object */
 
1570
    BlastCompo_GappingParams * gapping_params = NULL;
 
1571
 
 
1572
    gapping_params = malloc(sizeof(BlastCompo_GappingParams));
 
1573
    if (gapping_params != NULL) {
 
1574
        gapping_params->gap_open = scoring->gap_open;
 
1575
        gapping_params->gap_extend = scoring->gap_extend;
 
1576
        gapping_params->decline_align = scoring->decline_align;
 
1577
        gapping_params->context = context;
 
1578
    }
 
1579
    
 
1580
    for (i = 0;  i < num_queries;  i++) {
 
1581
        if (context->sbp->kbp_gap[i] != NULL &&
 
1582
            context->sbp->kbp_gap[i]->Lambda < min_lambda) {
 
1583
            min_lambda = context->sbp->kbp_gap[i]->Lambda;
 
1584
        }
 
1585
    }
 
1586
    gapping_params->x_dropoff = (Int4)
 
1587
        MAX(options->gap_x_dropoff_final*NCBIMATH_LN2 / min_lambda,
 
1588
            extendParams->gap_x_dropoff_final);
 
1589
    context->gap_align->gap_x_dropoff = gapping_params->x_dropoff;
 
1590
 
 
1591
    return gapping_params;
 
1592
}
 
1593
 
 
1594
 
 
1595
/** Callbacks used by the Blast_RedoOneMatch* routines */
1248
1596
static const Blast_RedoAlignCallbacks
1249
1597
redo_align_callbacks = {
1250
1598
    s_CalcLambda, s_SequenceGetRange, s_RedoOneAlignment,
1251
 
    s_NewAlignmentUsingXdrop
 
1599
    s_NewAlignmentUsingXdrop, s_FreeEditScript
1252
1600
};
1253
1601
 
1254
1602
 
 
1603
/** 
 
1604
 * Read the parameters required for the Blast_RedoOneMatch* functions from
 
1605
 * the corresponding parameters in standard BLAST datatypes.  Return a new
 
1606
 * object representing these parameters.
 
1607
 */
1255
1608
static Blast_RedoAlignParams *
1256
 
s_GetAlignParams(Blast_GappingParamsContext * context,
1257
 
                 EBlastProgramType program_number,
1258
 
                 BlastGapAlignStruct * gap_align,
 
1609
s_GetAlignParams(BlastKappa_GappingParamsContext * context,
1259
1610
                 BLAST_SequenceBlk * queryBlk,
1260
1611
                 BlastQueryInfo* queryInfo,
1261
 
                 BlastScoreBlk* sbp,
1262
 
                 BlastScoringParameters* scoringParams,
1263
 
                 const BlastExtensionParameters* extendParams,
1264
1612
                 const BlastHitSavingParameters* hitParams,
1265
 
                 const PSIBlastOptions* psiOptions,
1266
 
                 const char * matrixName,
1267
 
                 double localScalingFactor,
1268
 
                 int adjustParameters)
 
1613
                 const BlastExtensionParameters* extendParams)
1269
1614
{
1270
 
    int rows;
1271
 
    int cutoff_s;
1272
 
    double cutoff_e;
1273
 
    BlastCompo_GappingParams * gapping_params = NULL;
1274
 
    Blast_MatrixInfo * scaledMatrixInfo;
1275
 
    Blast_KarlinBlk* kbp;
1276
 
    int subject_is_translated = program_number == eBlastTypeTblastn;
1277
 
    /* YIKES! wrong test for do_link_hsps */
1278
 
    int do_link_hsps = program_number == eBlastTypeTblastn;
1279
 
    Boolean positionBased = (sbp->psi_matrix ? TRUE : FALSE);
1280
 
 
 
1615
    int status = 0;    /* status code */
 
1616
    int rows;          /* number of rows in the scoring matrix */
 
1617
    int cutoff_s;      /* cutoff score for saving an alignment */
 
1618
    double cutoff_e;   /* cutoff evalue for saving an alignment */
 
1619
    BlastCompo_GappingParams *
 
1620
        gapping_params = NULL;    /* parameters needed to compute a gapped
 
1621
                                     alignment */
 
1622
    Blast_MatrixInfo *
 
1623
        scaledMatrixInfo;         /* information about the scoring matrix */
 
1624
    /* does this kind of search translate the database sequence */
 
1625
    int subject_is_translated = context->prog_number == eBlastTypeTblastn;
 
1626
    /* is this a positiion-based search */
 
1627
    Boolean positionBased = (Boolean) (context->sbp->psi_matrix != NULL);
 
1628
    /* will BLAST_LinkHsps be called to assign e-values */
 
1629
    Boolean do_link_hsps = (Boolean) (hitParams->link_hsp_params != NULL);
 
1630
    ECompoAdjustModes compo_adjust_mode =
 
1631
        (ECompoAdjustModes) extendParams->options->compositionBasedStats;
 
1632
    
1281
1633
    if (do_link_hsps) {
1282
 
        ASSERT( 0 && "Which cutoff needed here?" );
1283
 
        /*     cutoff_s = search->pbp->cutoff_s2 * localScalingFactor; */
 
1634
        cutoff_s =
 
1635
            (int) (hitParams->cutoff_score * context->localScalingFactor);
1284
1636
    } else {
1285
1637
        /* There is no cutoff score; we consider e-values instead */
1286
1638
        cutoff_s = 0;
1288
1640
    cutoff_e = hitParams->options->expect_value;
1289
1641
    rows = positionBased ? queryInfo->max_length : BLASTAA_SIZE;
1290
1642
    scaledMatrixInfo = Blast_MatrixInfoNew(rows, positionBased);
1291
 
    s_MatrixInfoInit(scaledMatrixInfo, localScalingFactor,
1292
 
                     queryBlk, queryInfo, sbp, scoringParams,
1293
 
                     positionBased, matrixName);
1294
 
    kbp = sbp->kbp_gap[0];
1295
 
    gapping_params = malloc(sizeof(BlastCompo_GappingParams));
1296
 
    s_GappingParamsInit(context, gapping_params, gap_align, scoringParams,
1297
 
                        sbp, localScalingFactor, program_number,
1298
 
                        kbp->Lambda);
1299
 
    return
1300
 
        Blast_RedoAlignParamsNew(&scaledMatrixInfo, &gapping_params,
1301
 
                                 adjustParameters, positionBased,
1302
 
                                 subject_is_translated,
1303
 
                                 queryInfo->max_length, cutoff_s, cutoff_e,
1304
 
                                 do_link_hsps, kbp->Lambda, kbp->logK,
1305
 
                                 &redo_align_callbacks);
 
1643
    status = s_MatrixInfoInit(scaledMatrixInfo, queryBlk, context->sbp,
 
1644
                              context->localScalingFactor,
 
1645
                              context->scoringParams->options->matrix);
 
1646
    if (status != 0) {
 
1647
        return NULL;
 
1648
    }
 
1649
    gapping_params = s_GappingParamsNew(context, extendParams,
 
1650
                                        queryInfo->num_queries);
 
1651
    if (gapping_params == NULL) {
 
1652
        return NULL;
 
1653
    } else {
 
1654
        return
 
1655
            Blast_RedoAlignParamsNew(&scaledMatrixInfo, &gapping_params,
 
1656
                                     compo_adjust_mode, positionBased,
 
1657
                                     subject_is_translated,
 
1658
                                     queryInfo->max_length, cutoff_s, cutoff_e,
 
1659
                                     do_link_hsps, &redo_align_callbacks);
 
1660
    }
1306
1661
}
1307
1662
 
1308
1663
 
1309
1664
/**
1310
 
 * Convert a BlastCompo_Heap to a flat list of SeqAligns. Note that
1311
 
 * there may be more than one alignment per element in the heap.  The
1312
 
 * new list preserves the order of the SeqAligns associated with each
1313
 
 * HeapRecord. (@todo this function is named as it is for
1314
 
 * compatibility with kappa.c, rename in the future)
 
1665
 * Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
1315
1666
 *
1316
 
 * @param self           a BlastCompo_Heap
1317
1667
 * @param results        BLAST core external results structure (pre-SeqAlign)
1318
1668
 *                       [out]
 
1669
 * @param heaps          an array of BlastCompo_Heap objects
1319
1670
 * @param hitlist_size   size of each list in the results structure above [in]
1320
1671
 */
1321
1672
static void
1322
 
s_HeapToFlatList(BlastCompo_Heap * self, BlastHSPResults * results,
1323
 
                 Int4 hitlist_size)
1324
 
{
1325
 
    BlastHSPList* hsp_list;
1326
 
    BlastHitList* hitlist =
1327
 
        results->hitlist_array[0] = Blast_HitListNew(hitlist_size);
1328
 
 
1329
 
    hsp_list = NULL;
 
1673
s_FillResultsFromCompoHeaps(BlastHSPResults * results,
 
1674
                            BlastCompo_Heap heaps[],
 
1675
                            Int4 hitlist_size)
 
1676
{
 
1677
    int query_index;   /* loop index */
 
1678
    int num_queries;   /* Number of queries in this search */
 
1679
 
 
1680
    num_queries = results->num_queries; 
 
1681
    for (query_index = 0;  query_index < num_queries;  query_index++) {
 
1682
        BlastHSPList* hsp_list;
 
1683
        BlastHitList* hitlist;
 
1684
        BlastCompo_Heap * heap = &heaps[query_index];
 
1685
 
 
1686
        results->hitlist_array[query_index] = Blast_HitListNew(hitlist_size);
 
1687
        hitlist = results->hitlist_array[query_index];
 
1688
 
 
1689
        while (NULL != (hsp_list = BlastCompo_HeapPop(heap))) {
 
1690
            Blast_HitListUpdate(hitlist, hsp_list);
 
1691
        }
 
1692
    }
 
1693
    Blast_HSPResultsReverseOrder(results);
 
1694
}
 
1695
 
 
1696
 
 
1697
/** Remove all matches from a BlastCompo_Heap. */
 
1698
static void s_ClearHeap(BlastCompo_Heap * self)
 
1699
{
 
1700
    BlastHSPList* hsp_list = NULL;   /* an element of the heap */
 
1701
 
1330
1702
    while (NULL != (hsp_list = BlastCompo_HeapPop(self))) {
1331
 
        Blast_HitListUpdate(hitlist, hsp_list);
 
1703
        hsp_list = Blast_HSPListFree(hsp_list);
1332
1704
    }
1333
1705
}
1334
1706
 
1335
1707
 
1336
1708
/**
1337
 
 *  Top level routine to recompute alignments for each
1338
 
 *  match found by the gapped BLAST algorithm
1339
 
 *
1340
 
 *  @param search           is the structure with all the information about
1341
 
 *                          the search
1342
 
 *  @param options          is used to pass certain command line options
1343
 
 *                          taken in by BLAST
1344
 
 *  @param hitlist_count    is the number of old matches
1345
 
 *  @param adjustParameters determines whether we are to adjust the
1346
 
 *                          Karlin-Altschul parameters and score matrix
1347
 
 *  @param SmithWaterman    determines whether the new local alignments
1348
 
 *                          should be computed by the optimal Smith-Waterman
1349
 
 *                          algorithm; SmithWaterman false means that
1350
 
 *                          alignments will be recomputed by the current
1351
 
 *                          X-drop algorithm as implemented in the procedure
1352
 
 *                          ALIGN.
1353
 
 *  @return                 a array of lists of SeqAlign; each element
1354
 
 *                          in the array is a list of SeqAligns for
1355
 
 *                          one query in the concatenated query.
1356
 
 *  It is assumed that at least one of adjustParameters and
1357
 
 *  SmithWaterman is >0 or true when this procedure is called A linked list
1358
 
 *  of alignments is returned; the alignments are sorted according to
1359
 
 *  the lowest E-value of the best alignment for each matching
1360
 
 *  sequence; alignments for the same matching sequence are in the
1361
 
 *  list consecutively regardless of the E-value of the secondary
1362
 
 *  alignments. Ties in sorted order are much rarer than for the
1363
 
 *  standard BLAST method, but are broken deterministically based on
1364
 
 *  the index of the matching sequences in the database.
 
1709
 *  Recompute alignments for each match found by the gapped BLAST
 
1710
 *  algorithm.
1365
1711
 */
1366
1712
Int2
1367
1713
Blast_RedoAlignmentCore(EBlastProgramType program_number,
1377
1723
                        const PSIBlastOptions* psiOptions,
1378
1724
                        BlastHSPResults* results)
1379
1725
{
1380
 
    double localScalingFactor;            /* the factor by which to
1381
 
                                           * scale the scoring system in
1382
 
                                           * order to obtain greater
1383
 
                                           * precision */
 
1726
    int status_code = 0;                    /* return value code */
 
1727
    /* the factor by which to scale the scoring system in order to
 
1728
     * obtain greater precision */
 
1729
    double localScalingFactor;
 
1730
    /* the values of the search parameters that will be recorded, altered
 
1731
     * in the search structure in this routine, and then restored before
 
1732
     * the routine exits. */
 
1733
    BlastKappa_SavedParameters *savedParams = NULL;
 
1734
    /* forbidden ranges for each database position (used in
 
1735
     * Smith-Waterman alignments) */
 
1736
    Blast_ForbiddenRanges forbidden = {0,};
 
1737
    /* a collection of alignments for each query sequence with
 
1738
     * sequences from the database */
 
1739
    BlastCompo_Heap * redoneMatches = NULL;
 
1740
    /* stores all fields needed for computing a compositionally
 
1741
     * adjusted score matrix using Newton's method */
 
1742
    Blast_CompositionWorkspace *NRrecord = NULL;
 
1743
    /* loop index */
 
1744
    int query_index;
 
1745
    /* number of queries in the concatenated query */
 
1746
    int numQueries;
 
1747
    /* keeps track of gapped alignment params */
 
1748
    BlastGapAlignStruct* gapAlign = NULL;
 
1749
    /* All alignments above this value will be reported, no matter how
 
1750
     * many. */
 
1751
    double inclusion_ethresh;
 
1752
    /* array of lists of alignments for each query to this subject */
 
1753
    BlastCompo_Alignment ** alignments = NULL;
 
1754
 
 
1755
    BlastCompo_QueryInfo * query_info = NULL;
 
1756
    Blast_RedoAlignParams * redo_align_params = NULL;
 
1757
    Boolean positionBased = (Boolean) (sbp->psi_matrix != NULL);
 
1758
    ECompoAdjustModes compo_adjust_mode =
 
1759
        (ECompoAdjustModes) extendParams->options->compositionBasedStats;
 
1760
    Boolean smithWaterman =
 
1761
        (Boolean) (extendParams->options->eTbackExt == eSmithWatermanTbck);
 
1762
    /* alignment data for the current query-subject match */
 
1763
    BlastHSPList* thisMatch = NULL;
 
1764
    /* existing alignments for a match */
 
1765
    BlastCompo_Alignment * incoming_aligns = NULL;
1384
1766
    Int4      **matrix;                   /* score matrix */
1385
 
    s_SearchParameters *searchParams; /* the values of the search
1386
 
                                       * parameters that will be
1387
 
                                       * recorded, altered in the
1388
 
                                       * search structure in this
1389
 
                                       * routine, and then restored
1390
 
                                       * before the routine
1391
 
                                       * exits. */
1392
 
    Blast_ForbiddenRanges forbidden;    /* forbidden ranges for each
1393
 
                                           * database position (used
1394
 
                                           * in Smith-Waterman
1395
 
                                           * alignments)
1396
 
                                           */
1397
 
    BlastCompo_Heap * redoneMatches;  /* a collection of alignments
1398
 
                                       * for each query sequence with
1399
 
                                       * sequences from the
1400
 
                                       * database */
1401
 
    Blast_CompositionWorkspace
1402
 
        *NRrecord = NULL;        /* stores all fields needed for
1403
 
                                  * computing a compositionally adjusted
1404
 
                                  * score matrix using Newton's method */
1405
 
    Int4 query_index;            /* loop index */
1406
 
    Int4 numQueries;             /* number of queries in the
1407
 
                                    concatenated query */
1408
 
    BlastGapAlignStruct* gapAlign;        /* keeps track of gapped
1409
 
                                             alignment params */
1410
 
    double inclusion_ethresh;    /* All alignments above this value will be
1411
 
                                    reported, no matter how many. */
1412
 
    BlastCompo_QueryInfo * query_info = NULL;
1413
 
    Blast_RedoAlignParams * redo_align_params;
1414
 
    Boolean positionBased = (sbp->psi_matrix ? TRUE : FALSE);
1415
 
    Boolean adjustParameters = extendParams->options->compositionBasedStats;
1416
 
    Boolean SmithWaterman;
1417
 
    int status_code;
1418
 
    BlastHSPList* thisMatch = NULL;  /* alignment data for the
1419
 
                                      * current query-subject
1420
 
                                      * match */
1421
 
    BlastCompo_Alignment * incoming_aligns;  /* existing algnments
1422
 
                                                for a match */
1423
 
    Blast_GappingParamsContext gapping_params_context;
1424
 
    int do_link_hsps;
1425
 
 
 
1767
    BlastKappa_GappingParamsContext gapping_params_context;
 
1768
 
 
1769
    double pvalueForThisPair = (-1); /* p-value for this match
 
1770
                                        for composition; -1 == no adjustment*/
 
1771
    double LambdaRatio; /*lambda ratio*/
 
1772
    /* which test function do we use to see if a composition-adjusted
 
1773
       p-value is desired; value needs to be passed in eventually*/
 
1774
    int compositionTestIndex = 0;
 
1775
 
 
1776
    if (positionBased) {
 
1777
        matrix = sbp->psi_matrix->pssm->data;
 
1778
    } else {
 
1779
        matrix = sbp->matrix->data;
 
1780
    }
1426
1781
    /**** Validate parameters *************/
 
1782
    if (matrix == NULL) {
 
1783
        return -1;
 
1784
    }
1427
1785
    if (0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20") &&
1428
 
       !adjustParameters) {
1429
 
        return 0;                   /* BLOSUM62_20 only makes sense if
1430
 
                                     * adjustParameters is on */
 
1786
        compo_adjust_mode == eNoCompositionBasedStats) {
 
1787
        return -1;                   /* BLOSUM62_20 only makes sense if
 
1788
                                      * compo_adjust_mode is on */
1431
1789
    }
1432
1790
    if (positionBased) {
1433
 
        adjustParameters = adjustParameters ? 1 : 0;
1434
 
    }
1435
 
    if (extendParams->options->eTbackExt == eSmithWatermanTbck) {
1436
 
        SmithWaterman = TRUE;
1437
 
    } else {
1438
 
        SmithWaterman = FALSE;
1439
 
    }
 
1791
        /* Position based searches can only use traditional
 
1792
         * composition based stats */
 
1793
        if ((int) compo_adjust_mode > 1) {
 
1794
            compo_adjust_mode = eCompositionBasedStats;
 
1795
        }
 
1796
        /* A position-based search can only have one query */
 
1797
        ASSERT(queryInfo->num_queries == 1);
 
1798
        ASSERT(queryBlk->length == (Int4)sbp->psi_matrix->pssm->ncols);
 
1799
    }
 
1800
    if ((int) compo_adjust_mode > 1 &&
 
1801
        !Blast_FrequencyDataIsAvailable(scoringParams->options->matrix)) {
 
1802
        return -1;   /* Unsupported matrix */
 
1803
    }
 
1804
    /*****************/
1440
1805
    inclusion_ethresh =
1441
1806
        (psiOptions != NULL) ? psiOptions->inclusion_ethresh : 0;
1442
1807
 
1443
 
    /*****************/
1444
 
    /* Initialize searchParams */
1445
 
    searchParams =
1446
 
        s_SearchParametersNew(queryInfo->max_length, adjustParameters,
 
1808
    /* Initialize savedParams */
 
1809
    savedParams =
 
1810
        s_SavedParametersNew(queryInfo->max_length, queryInfo->num_queries,
 
1811
                             compo_adjust_mode, positionBased);
 
1812
    if (savedParams == NULL) {
 
1813
        status_code = -1;
 
1814
        goto function_cleanup;
 
1815
    }
 
1816
    status_code =
 
1817
        s_RecordInitialSearch(savedParams, sbp, scoringParams,
 
1818
                              queryInfo->max_length, compo_adjust_mode,
1447
1819
                              positionBased);
1448
 
    s_RecordInitialSearch(searchParams, queryBlk, queryInfo, sbp,
1449
 
                          scoringParams, queryInfo->max_length,
1450
 
                          adjustParameters, positionBased);
1451
 
    if (adjustParameters) {
 
1820
    if (status_code != 0) {
 
1821
        goto function_cleanup;
 
1822
    }
 
1823
    if (compo_adjust_mode != eNoCompositionBasedStats) {
1452
1824
        if((0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20"))) {
1453
1825
            localScalingFactor = SCALING_FACTOR / 10;
1454
1826
        } else {
1457
1829
    } else {
1458
1830
        localScalingFactor = 1.0;
1459
1831
    }
1460
 
    s_RescaleSearch(searchParams, queryBlk, queryInfo, sbp, scoringParams,
1461
 
                    localScalingFactor, positionBased);
1462
 
    /********/
1463
 
    if (positionBased) {
1464
 
        matrix = sbp->psi_matrix->pssm->data;
1465
 
        if ( !matrix ) {
1466
 
            /* YIKES! error return
1467
 
               Char* msg =
1468
 
               "Cannot perform position-specific search without a PSSM";
1469
 
               BlastConstructErrorMessage("RedoAlignmentCore", msg, 3,
1470
 
               &(search->error_return));
1471
 
               return NULL;
1472
 
            */
1473
 
        }
1474
 
    } else {
1475
 
        matrix = sbp->matrix->data;
1476
 
    }
1477
 
    if ((status_code=BLAST_GapAlignStructNew(scoringParams,
1478
 
                                             extendParams,
1479
 
                                             BlastSeqSrcGetMaxSeqLen(seqSrc),
1480
 
                                             sbp, &gapAlign)) != 0) {
1481
 
        return status_code;
1482
 
    }
1483
 
    gapAlign->gap_x_dropoff =
1484
 
        extendParams->gap_x_dropoff_final * localScalingFactor;
 
1832
    s_RescaleSearch(sbp, scoringParams, queryInfo->num_queries,
 
1833
                    localScalingFactor);
 
1834
    status_code =
 
1835
        BLAST_GapAlignStructNew(scoringParams, extendParams,
 
1836
                                BlastSeqSrcGetMaxSeqLen(seqSrc), sbp,
 
1837
                                &gapAlign);
 
1838
    if (status_code != 0) {
 
1839
        return (Int2) status_code;
 
1840
    }
 
1841
    gapping_params_context.gap_align = gapAlign;
 
1842
    gapping_params_context.scoringParams = scoringParams;
 
1843
    gapping_params_context.sbp = sbp;
 
1844
    gapping_params_context.localScalingFactor = localScalingFactor;
 
1845
    gapping_params_context.prog_number = program_number;
1485
1846
    redo_align_params =
1486
 
        s_GetAlignParams(&gapping_params_context, program_number,
1487
 
                         gapAlign, queryBlk, queryInfo,
1488
 
                         sbp, scoringParams, extendParams, hitParams,
1489
 
                         psiOptions, scoringParams->options->matrix,
1490
 
                         localScalingFactor, adjustParameters);
1491
 
    do_link_hsps = redo_align_params->do_link_hsps;
1492
 
 
1493
 
    s_GetQueryInfo(&query_info, &numQueries, queryBlk->sequence, queryInfo);
1494
 
    if(SmithWaterman) {
1495
 
        Blast_ForbiddenRangesInitialize(&forbidden, queryInfo->max_length);
 
1847
        s_GetAlignParams(&gapping_params_context, queryBlk, queryInfo, 
 
1848
                         hitParams, extendParams);
 
1849
    if (redo_align_params == NULL) {
 
1850
        status_code = -1;
 
1851
        goto function_cleanup;
 
1852
    }
 
1853
    numQueries = queryInfo->num_queries;
 
1854
    query_info = s_GetQueryInfo(queryBlk->sequence, queryInfo);
 
1855
    if (query_info == NULL) {
 
1856
        status_code = -1;
 
1857
        goto function_cleanup;
 
1858
    }
 
1859
    if(smithWaterman) {
 
1860
        status_code =
 
1861
            Blast_ForbiddenRangesInitialize(&forbidden, queryInfo->max_length);
 
1862
        if (status_code != 0) {
 
1863
            goto function_cleanup;
 
1864
        }
1496
1865
    }
1497
1866
    redoneMatches = calloc(numQueries, sizeof(BlastCompo_Heap));
 
1867
    if (redoneMatches == NULL) {
 
1868
        status_code = -1;
 
1869
        goto function_cleanup;
 
1870
    }
1498
1871
    for (query_index = 0;  query_index < numQueries;  query_index++) {
1499
 
        BlastCompo_HeapInitialize(&redoneMatches[query_index],
1500
 
                                  hitParams->options->hitlist_size,
1501
 
                                  inclusion_ethresh);
 
1872
        status_code =
 
1873
            BlastCompo_HeapInitialize(&redoneMatches[query_index],
 
1874
                                      hitParams->options->hitlist_size,
 
1875
                                      inclusion_ethresh);
 
1876
        if (status_code != 0) {
 
1877
            goto function_cleanup;
 
1878
        }
1502
1879
    }
1503
 
    if( adjustParameters > 1 && !positionBased ) {
 
1880
    if( (int) compo_adjust_mode > 1 && !positionBased ) {
1504
1881
        NRrecord = Blast_CompositionWorkspaceNew();
1505
 
        Blast_CompositionWorkspaceInit(NRrecord,
1506
 
                                       scoringParams->options->matrix);
 
1882
        status_code =
 
1883
            Blast_CompositionWorkspaceInit(NRrecord,
 
1884
                                           scoringParams->options->matrix);
 
1885
        if (status_code != 0) {
 
1886
            goto function_cleanup;
 
1887
        }
 
1888
    }
 
1889
    alignments = calloc(numQueries, sizeof(BlastCompo_Alignment *));
 
1890
    if (alignments == NULL) {
 
1891
        status_code = -1;
 
1892
        goto function_cleanup;
1507
1893
    }
1508
1894
    while (BlastHSPStreamRead(hsp_stream, &thisMatch) != kBlastHSPStream_Eof) {
1509
1895
        /* for all matching sequences */
1510
 
        BlastCompo_MatchingSequence matchingSeq; /* the data for a matching
1511
 
                                                  * database sequence */
1512
 
        BlastCompo_Alignment ** alignments; /* array of lists of
1513
 
                                             * alignments for each
1514
 
                                             * query to this subject */
1515
 
        alignments = calloc(numQueries, sizeof(BlastCompo_Alignment *));
 
1896
        Blast_KarlinBlk * kbp;
1516
1897
 
 
1898
        /* the data for a matching database sequence */
 
1899
        BlastCompo_MatchingSequence matchingSeq = {0,};
1517
1900
        if(thisMatch->hsp_array == NULL) {
1518
1901
            continue;
1519
1902
        }
1522
1905
            break;
1523
1906
        }
1524
1907
        /* Get the sequence for this match */
1525
 
        s_MatchingSequenceInitialize(&matchingSeq, program_number,
1526
 
                                     seqSrc, gen_code_string, thisMatch->oid);
 
1908
        status_code =
 
1909
            s_MatchingSequenceInitialize(&matchingSeq, program_number,
 
1910
                                         seqSrc, gen_code_string,
 
1911
                                         thisMatch->oid);
 
1912
        if (status_code != 0) {
 
1913
            goto match_loop_cleanup;
 
1914
        }
1527
1915
        incoming_aligns =
1528
 
            s_ResultHspToDistinctAlign(queryInfo, thisMatch->hsp_array,
1529
 
                                       thisMatch->hspcnt, localScalingFactor);
1530
 
        if (SmithWaterman) {
1531
 
            Blast_RedoOneMatchSmithWaterman(alignments,
1532
 
                                            redo_align_params,
1533
 
                                            incoming_aligns,
1534
 
                                            thisMatch->hspcnt,
1535
 
                                            &matchingSeq, query_info,
1536
 
                                            numQueries, matrix,
1537
 
                                            NRrecord, &forbidden,
1538
 
                                            redoneMatches);
 
1916
            s_ResultHspToDistinctAlign(thisMatch->hsp_array, thisMatch->hspcnt,
 
1917
                                       queryInfo, localScalingFactor);
 
1918
        if (incoming_aligns == NULL) {
 
1919
            status_code = -1;
 
1920
            goto match_loop_cleanup;
 
1921
        }
 
1922
        /* All alignments in thisMatch should be to the same query */
 
1923
        kbp = sbp->kbp_gap[thisMatch->query_index];
 
1924
        if (smithWaterman) {
 
1925
            status_code =
 
1926
                Blast_RedoOneMatchSmithWaterman(alignments,
 
1927
                                                redo_align_params,
 
1928
                                                incoming_aligns,
 
1929
                                                thisMatch->hspcnt,
 
1930
                                                kbp->Lambda, kbp->logK,
 
1931
                                                &matchingSeq, query_info,
 
1932
                                                numQueries, matrix,
 
1933
                                                NRrecord, &forbidden,
 
1934
                                                redoneMatches,
 
1935
                                                &pvalueForThisPair,
 
1936
                                                compositionTestIndex,
 
1937
                                                &LambdaRatio);
1539
1938
        } else {
1540
 
            Blast_RedoOneMatch(alignments, redo_align_params,
1541
 
                               incoming_aligns, thisMatch->hspcnt,
1542
 
                               &matchingSeq, queryInfo->max_length,
1543
 
                               query_info, numQueries, matrix,
1544
 
                               NRrecord);
 
1939
            status_code =
 
1940
                Blast_RedoOneMatch(alignments, redo_align_params,
 
1941
                                   incoming_aligns, thisMatch->hspcnt,
 
1942
                                   kbp->Lambda, &matchingSeq,
 
1943
                                   queryInfo->max_length, query_info,
 
1944
                                   numQueries, matrix, NRrecord,
 
1945
                                   &pvalueForThisPair,
 
1946
                                   compositionTestIndex,
 
1947
                                   &LambdaRatio);
 
1948
        }
 
1949
        if (status_code != 0) {
 
1950
            goto match_loop_cleanup;
1545
1951
        }
1546
1952
        for (query_index = 0;  query_index < numQueries;  query_index++) {
1547
1953
            /* Loop over queries */
1548
1954
            if( alignments[query_index] != NULL) { /* alignments were found */
1549
 
                double bestEvalue;   /* best evalue among alignments in the
 
1955
                double bestEvalue;   /* best e-value among alignments in the
1550
1956
                                        hitlist */
1551
1957
                Int4 bestScore;      /* best score among alignments in
1552
1958
                                        the hitlist */
1553
 
                BlastHSPList * hsp_list; /* a hitlist containing the
1554
 
                                          * newly-computed alignments */
1555
 
                void * discardedAligns;
 
1959
                /* a hitlist containing the newly-computed alignments */
 
1960
                BlastHSPList * hsp_list = NULL;
 
1961
                void * discardedAligns = NULL;
1556
1962
                hsp_list =
1557
1963
                    s_HSPListFromDistinctAlignments(&alignments[query_index],
1558
 
                                                    matchingSeq.index);
 
1964
                                                    matchingSeq.index,
 
1965
                                                    queryInfo);
 
1966
                if (hsp_list == NULL) {
 
1967
                    status_code = -1;
 
1968
                    goto match_loop_cleanup;
 
1969
                }
1559
1970
                if (hsp_list->hspcnt > 1) {
1560
1971
                    s_HitlistReapContained(hsp_list->hsp_array,
1561
1972
                                           &hsp_list->hspcnt);
1562
1973
                }
1563
 
                s_HitlistEvaluateAndPurge(&bestScore, &bestEvalue,
1564
 
                                          hsp_list,
1565
 
                                          matchingSeq.length,
1566
 
                                          program_number, queryInfo,
1567
 
                                          sbp, hitParams,
1568
 
                                          do_link_hsps);
 
1974
                status_code =
 
1975
                    s_HitlistEvaluateAndPurge(&bestScore, &bestEvalue,
 
1976
                                              hsp_list,
 
1977
                                              matchingSeq.length,
 
1978
                                              program_number,
 
1979
                                              queryInfo, sbp,
 
1980
                                              hitParams,
 
1981
                                              pvalueForThisPair, LambdaRatio,
 
1982
                                              0);
 
1983
                if (status_code != 0) {
 
1984
                    goto query_loop_cleanup;
 
1985
                }
1569
1986
                if (bestEvalue <= hitParams->options->expect_value &&
1570
1987
                    BlastCompo_HeapWouldInsert(&redoneMatches[query_index],
1571
1988
                                               bestEvalue, bestScore,
1572
1989
                                               thisMatch->oid)) {
1573
 
                    s_HSPListRescaleScores(hsp_list, redo_align_params->Lambda,
1574
 
                                           redo_align_params->logK,
1575
 
                                           localScalingFactor);
1576
 
 
1577
 
                    BlastCompo_HeapInsert(&redoneMatches[query_index],
1578
 
                                          hsp_list, bestEvalue,
1579
 
                                          bestScore, thisMatch->oid,
1580
 
                                          &discardedAligns);
 
1990
                    /* The best alignment is significant */
 
1991
                    s_HSPListNormalizeScores(hsp_list,
 
1992
                                             kbp->Lambda, kbp->logK,
 
1993
                                             localScalingFactor);
 
1994
                    status_code =
 
1995
                        BlastCompo_HeapInsert(&redoneMatches[query_index],
 
1996
                                              hsp_list, bestEvalue,
 
1997
                                              bestScore, thisMatch->oid,
 
1998
                                              &discardedAligns);
 
1999
                    if (status_code == 0) {
 
2000
                        hsp_list = NULL;
 
2001
                    } else {
 
2002
                        goto query_loop_cleanup;
 
2003
                    }
1581
2004
                    if (discardedAligns != NULL) {
1582
2005
                        Blast_HSPListFree(discardedAligns);
1583
2006
                    }
1584
 
                } else { /* the best alignment is not significant */
1585
 
                    Blast_HSPListFree(hsp_list);
1586
 
                } /* end if the best alignment is significant */
 
2007
                }
 
2008
query_loop_cleanup:
 
2009
                Blast_HSPListFree(hsp_list);
 
2010
                if (status_code != 0) {
 
2011
                    goto match_loop_cleanup;
 
2012
                }
1587
2013
            } /* end if any alignments were found */
1588
2014
        } /* end loop over queries */
 
2015
match_loop_cleanup:
 
2016
        if (status_code != 0) {
 
2017
            for (query_index = 0;  query_index < numQueries;  query_index++) {
 
2018
                BlastCompo_AlignmentsFree(&alignments[query_index],
 
2019
                                          s_FreeEditScript);
 
2020
            }
 
2021
        }
1589
2022
        s_MatchingSequenceRelease(&matchingSeq);
1590
2023
        thisMatch = Blast_HSPListFree(thisMatch);
1591
 
        sfree(alignments);
1592
2024
        BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
 
2025
        if (status_code != 0) {
 
2026
            goto function_cleanup;
 
2027
        }
1593
2028
    }
1594
2029
    /* end for all matching sequences */
1595
 
    /* YIKES! handle multiple queries
1596
 
       for (query_index = 0;  query_index < numQueries;  query_index++) {
1597
 
       results[query_index] =
1598
 
       BlastCompo_HeapToFlatList(&redoneMatches[query_index]);
1599
 
       }
1600
 
    */
1601
 
    s_HeapToFlatList(&redoneMatches[0], results,
1602
 
                     hitParams->options->hitlist_size);
1603
 
    /* Clean up */
 
2030
function_cleanup:
 
2031
    sfree(alignments);
 
2032
    if (status_code == 0) {
 
2033
        s_FillResultsFromCompoHeaps(results, redoneMatches,
 
2034
                                    hitParams->options->hitlist_size);
 
2035
    } else {
 
2036
        if (redoneMatches != NULL) {
 
2037
            s_ClearHeap(&redoneMatches[0]);
 
2038
        }
 
2039
    }
1604
2040
    free(query_info);
1605
2041
    Blast_RedoAlignParamsFree(&redo_align_params);
1606
 
    for (query_index = 0;  query_index < numQueries;  query_index++) {
1607
 
        BlastCompo_HeapRelease(&redoneMatches[query_index]);
 
2042
    if (redoneMatches != NULL) {
 
2043
        for (query_index = 0;  query_index < numQueries;  query_index++) {
 
2044
            BlastCompo_HeapRelease(&redoneMatches[query_index]);
 
2045
        }
 
2046
        sfree(redoneMatches); redoneMatches = NULL;
1608
2047
    }
1609
 
    sfree(redoneMatches); redoneMatches = NULL;
1610
 
    if(SmithWaterman) {
 
2048
    if (smithWaterman) {
1611
2049
        Blast_ForbiddenRangesRelease(&forbidden);
1612
2050
    }
1613
 
    gapAlign = BLAST_GapAlignStructFree(gapAlign);
1614
 
    s_RestoreSearch(searchParams, sbp, matrix, queryInfo->max_length,
1615
 
                    scoringParams, positionBased, adjustParameters);
1616
 
    s_SearchParametersFree(&searchParams);
1617
 
    if (NULL != NRrecord) {
1618
 
        Blast_CompositionWorkspaceFree(&NRrecord);
 
2051
    if (gapAlign != NULL) {
 
2052
        gapAlign = BLAST_GapAlignStructFree(gapAlign);
1619
2053
    }
1620
 
    return 0;
 
2054
    s_RestoreSearch(sbp, scoringParams, savedParams, queryBlk->length,
 
2055
                    positionBased, compo_adjust_mode);
 
2056
    s_SavedParametersFree(&savedParams);
 
2057
    Blast_CompositionWorkspaceFree(&NRrecord);
 
2058
 
 
2059
    return (Int2) status_code;
1621
2060
}