48
48
#include <algo/blast/core/blast_filter.h>
49
49
#include <algo/blast/core/link_hsps.h>
50
50
#include "blast_psi_priv.h"
51
#include "matrix_freq_ratios.h"
52
51
#include "blast_gapalign_priv.h"
53
52
#include "blast_posit.h"
54
53
#include "blast_hits_priv.h"
56
55
#include <algo/blast/composition_adjustment/nlm_linear_algebra.h>
57
#include <algo/blast/composition_adjustment/composition_constants.h>
58
#include <algo/blast/composition_adjustment/composition_adjustment.h>
59
56
#include <algo/blast/composition_adjustment/compo_heap.h>
60
#include <algo/blast/composition_adjustment/smith_waterman.h>
61
57
#include <algo/blast/composition_adjustment/redo_alignment.h>
58
#include <algo/blast/composition_adjustment/matrix_frequency_data.h>
61
/** Compile-time option; if set to a true value, then blastp runs
62
that use Blast_RedoAlignmentCore to compute the traceback will not
63
SEG the subject sequence */
64
#ifndef KAPPA_BLASTP_NO_SEG_SEQUENCE
65
#define KAPPA_BLASTP_NO_SEG_SEQUENCE 0
69
/** Compile-time option; if set to a true value, then blastp runs
70
that use Blast_RedoAlignmentCore to compute the traceback will not
71
SEG the subject sequence */
72
#ifndef KAPPA_TBLASTN_NO_SEG_SEQUENCE
73
#define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0
64
* Scale the scores in an HSP list and reset the bit scores.
78
* Given a list of HSPs with (possibly) high-precision scores, rescale
79
* the scores to have standard precision and set the scale-independent
80
* bit scores. This routine does *not* resort the list; it is assumed
81
* that the list is already sorted according to e-values that have been
82
* computed using the initial, higher-precision scores.
66
84
* @param hsp_list the HSP list
67
85
* @param logK Karlin-Altschul statistical parameter [in]
68
86
* @param lambda Karlin-Altschul statistical parameter [in]
69
87
* @param scoreDivisor the value by which reported scores are to be
70
* @todo rename to something which is more intention revealing, merge with
71
* function of the same name in blast_traceback.c
75
s_HSPListRescaleScores(BlastHSPList * hsp_list,
90
s_HSPListNormalizeScores(BlastHSPList * hsp_list,
81
96
for(hsp_index = 0; hsp_index < hsp_list->hspcnt; hsp_index++) {
174
190
* @param alignments A list of distinct alignments; freed before return [in]
175
191
* @param oid Ordinal id of a database sequence [in]
176
* @return Allocated and filled BlastHSPList strucutre.
192
* @param queryInfo information about all queries in this search [in]
193
* @return Allocated and filled BlastHSPList structure.
178
195
static BlastHSPList *
179
s_HSPListFromDistinctAlignments(
180
BlastCompo_Alignment ** alignments,
196
s_HSPListFromDistinctAlignments(BlastCompo_Alignment ** alignments,
198
BlastQueryInfo* queryInfo)
183
const int unknown_value = 0;
184
BlastHSPList * hsp_list = Blast_HSPListNew(0);
185
BlastCompo_Alignment * align;
200
int status; /* return code for any routine called */
201
const int unknown_value = 0; /* dummy constant to use when a
202
parameter value is not known */
203
BlastCompo_Alignment * align; /* an alignment in the list */
204
BlastHSPList * hsp_list; /* the new HSP list */
206
hsp_list = Blast_HSPListNew(0);
207
if (hsp_list == NULL) {
187
210
hsp_list->oid = oid;
189
212
for (align = *alignments; NULL != align; align = align->next) {
190
213
BlastHSP * new_hsp = NULL;
191
214
GapEditScript * editScript = align->context;
215
int query_offset, queryStart, queryEnd;
192
216
align->context = NULL;
193
Blast_HSPInit(align->queryStart, align->queryEnd,
194
align->matchStart, align->matchEnd,
195
unknown_value, unknown_value,
196
0, 0, align->frame, align->score,
197
&editScript, &new_hsp);
218
query_offset = queryInfo->contexts[align->queryIndex].query_offset;
219
queryStart = align->queryStart - query_offset;
220
queryEnd = align->queryEnd - query_offset;
222
status = Blast_HSPInit(queryStart, queryEnd,
223
align->matchStart, align->matchEnd,
224
unknown_value, unknown_value,
226
0, (Int2) align->frame, align->score,
227
&editScript, &new_hsp);
228
switch (align->matrix_adjust_rule) {
229
case eDontAdjustMatrix:
230
new_hsp->comp_adjustment_method = eNoCompositionBasedStats;
232
case eCompoScaleOldMatrix:
233
new_hsp->comp_adjustment_method = eCompositionBasedStats;
236
new_hsp->comp_adjustment_method = eCompositionMatrixAdjust;
199
241
/* At this point, the subject and possibly the query sequence have
200
242
* been filtered; since it is not clear that num_ident of the
201
243
* filtered sequences, rather than the original, is desired,
202
* explictly leave num_ident blank. */
244
* explicitly leave num_ident blank. */
203
245
new_hsp->num_ident = 0;
205
Blast_HSPListSaveHSP(hsp_list, new_hsp);
207
BlastCompo_AlignmentsFree(alignments, s_FreeEditScript);
208
Blast_HSPListSortByScore(hsp_list);
247
status = Blast_HSPListSaveHSP(hsp_list, new_hsp);
252
BlastCompo_AlignmentsFree(alignments, s_FreeEditScript);
253
Blast_HSPListSortByScore(hsp_list);
255
hsp_list = Blast_HSPListFree(hsp_list);
262
* Adding evalues to a list of HSPs and remove those that do not have
263
* sufficiently good (low) evalue.
265
* @param *pbestScore best (highest) score in the list
266
* @param *pbestEvalue best (lowest) evalue in the list
267
* @param hsp_list the list
268
* @param subject_length length of the subject sequence
269
* @param program_number the type of BLAST search being performed
270
* @param queryInfo information about the queries
271
* @param sbp the score block for this search
272
* @param hitParams parameters used to assign evalues and
273
* decide whether to save hits.
274
* @param pvalueForThisPair composition p-value
275
* @param LambdaRatio lambda ratio, if available
276
* @param subject_id index of subject
278
* @return 0 on success; -1 on failure (can fail because some methods
279
* of generating evalues use auxiliary structures)
215
282
s_HitlistEvaluateAndPurge(int * pbestScore, double *pbestEvalue,
216
283
BlastHSPList * hsp_list,
217
284
int subject_length,
219
286
BlastQueryInfo* queryInfo,
220
287
BlastScoreBlk* sbp,
221
288
const BlastHitSavingParameters* hitParams,
289
double pvalueForThisPair,
224
294
*pbestEvalue = DBL_MAX;
227
BLAST_LinkHsps(program_number, hsp_list,
228
queryInfo, subject_length,
229
sbp, hitParams->link_hsp_params, TRUE);
296
if (hitParams->link_hsp_params) {
297
status = BLAST_LinkHsps(program_number, hsp_list, queryInfo,
299
hitParams->link_hsp_params, TRUE);
231
Blast_HSPListGetEvalues(queryInfo, hsp_list, TRUE, sbp,
232
0.0, /* use a non-zero gap decay only when
234
1.0); /* Use scaling factor equal to
235
1, because both scores and
236
Lambda are scaled, so they
237
will cancel each other. */
239
Blast_HSPListReapByEvalue(hsp_list, hitParams->options);
240
if (hsp_list->hspcnt > 0) {
241
*pbestEvalue = hsp_list->best_evalue;
242
*pbestScore = hsp_list->hsp_array[0]->score;
302
Blast_HSPListGetEvalues(queryInfo, hsp_list, TRUE, sbp,
303
0.0, /* use a non-zero gap decay
304
only when linking HSPs */
305
1.0); /* Use scaling factor equal to
306
1, because both scores and
307
Lambda are scaled, so they
308
will cancel each other. */
311
Blast_HSPListReapByEvalue(hsp_list, hitParams->options);
312
if (hsp_list->hspcnt > 0) {
313
*pbestEvalue = hsp_list->best_evalue;
314
*pbestScore = hsp_list->hsp_array[0]->score;
317
return status == 0 ? 0 : -1;
322
* A callback routine: compute lambda for the given score
324
* (@sa calc_lambda_type).
248
327
s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
252
Blast_ScoreFreq freq;
330
int i; /* loop index */
331
int score_range; /* range of possible scores */
332
double avg; /* expected score of aligning two characters */
333
Blast_ScoreFreq freq; /* score frequency data */
254
n = max_score - min_score + 1;
335
score_range = max_score - min_score + 1;
256
for (i = 0; i < n; i++) {
337
for (i = 0; i < score_range; i++) {
257
338
avg += (min_score + i) * probs[i];
259
340
freq.score_min = min_score;
264
345
freq.sprob = &probs[-min_score];
265
346
freq.score_avg = avg;
267
return Blast_KarlinLambdaNR(&freq, lambda0);
271
/** Return the a matrix of the frequency ratios that underlie the
272
* score matrix being used on this pass. The returned matrix
273
* is position-specific, so if we are in the first pass, use
274
* query to convert the 20x20 standard matrix into a position-specific
275
* variant. matrixName is the name of the underlying 20x20
276
* score matrix used. numPositions is the length of the query;
277
* startNumerator is the matrix of frequency ratios as stored
278
* in posit.h. It needs to be divided by the frequency of the
279
* second character to get the intended ratio
280
* @param sbp statistical information for blast [in]
281
* @param query the query sequence [in]
282
* @param matrixName name of the underlying matrix [in]
283
* @param startNumerator matrix of frequency ratios as stored
284
* in posit.h. It needs to be divided by the frequency of the
285
* second character to get the intended ratio [in]
286
* @param numPositions length of the query [in]
348
return Blast_KarlinLambdaNR(&freq, lambda0);
352
/** Fill a two-dimensional array with the frequency ratios that
353
* underlie a position specific score matrix (PSSM).
355
* @param returnRatios a two-dimensional array with BLASTAA_SIZE
357
* @param numPositions the number of rows in returnRatios
358
* @param query query sequence data, of length numPositions
359
* @param matrixName the name of the position independent matrix
360
* corresponding to this PSSM
361
* @param startNumerator position-specific data used to generate the
363
* @return 0 on success; -1 if the named matrix isn't known, or if
364
* there was a memory error
365
* @todo find out what start numerator is.
368
s_GetPosBasedStartFreqRatios(double ** returnRatios,
371
const char *matrixName,
372
double **startNumerator)
374
Int4 i,j; /* loop indices */
375
SFreqRatios * stdFreqRatios = NULL; /* frequency ratios for the
377
double *standardProb; /* probabilities of each
379
const double kPosEpsilon = 0.0001; /* values below this cutoff
380
are treated specially */
382
stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
383
if (stdFreqRatios == NULL) {
386
for (i = 0; i < numPositions; i++) {
387
for (j = 0; j < BLASTAA_SIZE; j++) {
388
returnRatios[i][j] = stdFreqRatios->data[query[i]][j];
391
stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
393
standardProb = BLAST_GetStandardAaProbabilities();
394
if(standardProb == NULL) {
397
/*reverse multiplication done in posit.c*/
398
for (i = 0; i < numPositions; i++) {
399
for (j = 0; j < BLASTAA_SIZE; j++) {
400
if ((standardProb[query[i]] > kPosEpsilon) &&
401
(standardProb[j] > kPosEpsilon) &&
402
(j != eStopChar) && (j != eXchar) &&
403
(startNumerator[i][j] > kPosEpsilon)) {
404
returnRatios[i][j] = startNumerator[i][j] / standardProb[j];
415
* Fill a two-dimensional array with the frequency ratios that underlie the
416
* named score matrix.
418
* @param returnRatios a two-dimensional array of size
419
* BLASTAA_SIZE x BLASTAA_SIZE
420
* @param matrixName the name of a matrix
421
* @return 0 on success; -1 if the named matrix isn't known, or if
422
* there was a memory error
289
425
s_GetStartFreqRatios(double ** returnRatios,
291
const char *matrixName,
292
double **startNumerator,
294
Boolean positionSpecific)
426
const char *matrixName)
430
/* Frequency ratios for the matrix */
297
431
SFreqRatios * stdFreqRatios = NULL;
298
const double kPosEpsilon = 0.0001;
300
433
stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
301
if (positionSpecific) {
302
for (i = 0; i < numPositions; i++) {
303
for (j = 0; j < BLASTAA_SIZE; j++) {
304
returnRatios[i][j] = stdFreqRatios->data[query[i]][j];
308
for (i = 0; i < BLASTAA_SIZE; i++) {
309
for (j = 0; j < BLASTAA_SIZE; j++) {
310
returnRatios[i][j] = stdFreqRatios->data[i][j];
434
if (stdFreqRatios == NULL) {
437
for (i = 0; i < BLASTAA_SIZE; i++) {
438
for (j = 0; j < BLASTAA_SIZE; j++) {
439
returnRatios[i][j] = stdFreqRatios->data[i][j];
314
442
stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
316
if (positionSpecific) {
317
double *standardProb; /*probabilities of each letter*/
318
standardProb = BLAST_GetStandardAaProbabilities();
320
/*reverse multiplication done in posit.c*/
321
for (i = 0; i < numPositions; i++) {
322
for (j = 0; j < BLASTAA_SIZE; j++) {
323
if ((standardProb[query[i]] > kPosEpsilon) &&
324
(standardProb[j] > kPosEpsilon) &&
325
(j != eStopChar) && (j != eXchar) &&
326
(startNumerator[i][j] > kPosEpsilon)) {
327
returnRatios[i][j] = startNumerator[i][j]/standardProb[j];
336
448
/** SCALING_FACTOR is a multiplicative factor used to get more bits of
337
449
* precision in the integer matrix scores. It cannot be arbitrarily
338
* large because we do not want total alignment scores to exceedto
450
* large because we do not want total alignment scores to exceed
339
451
* -(BLAST_SCORE_MIN) */
340
452
#define SCALING_FACTOR 32
344
* produce a scaled-up version of the position-specific matrix
345
* starting from posFreqs
456
* Produce a scaled-up version of the position-specific matrix
457
* with a given set of position-specific residue frequencies.
347
459
* @param fillPosMatrix is the matrix to be filled
348
* @param nonposMatrix is the underlying position-independent matrix,
349
* used to fill positions where frequencies are
351
* @param sbp stores various parameters of the search
352
460
* @param matrixName name of the standard substitution matrix [in]
353
461
* @param posFreqs PSSM's frequency ratios [in]
354
462
* @param query Query sequence data [in]
355
463
* @param queryLength Length of the query sequence above [in]
464
* @param sbp stores various parameters of the search
465
* @param scale_factor amount by which ungapped parameters should be
467
* @return 0 on success; -1 on failure
358
s_ScalePosMatrix(int **fillPosMatrix,
360
const char *matrixName,
470
s_ScalePosMatrix(int ** fillPosMatrix,
471
const char * matrixName,
478
/* Data used by scaling routines */
366
479
Kappa_posSearchItems *posSearch = NULL;
480
/* A reduced collection of search parameters used by PSI-blast */
367
481
Kappa_compactSearchItems *compactSearch = NULL;
482
/* Representation of a PSSM internal to PSI-blast */
368
483
_PSIInternalPssmData* internal_pssm = NULL;
369
int status = PSI_SUCCESS;
371
487
posSearch = Kappa_posSearchItemsNew(queryLength, matrixName,
372
488
fillPosMatrix, posFreqs);
373
489
compactSearch = Kappa_compactSearchItemsNew(query, queryLength, sbp);
375
490
/* Copy data into new structures */
376
491
internal_pssm = _PSIInternalPssmDataNew(queryLength, BLASTAA_SIZE);
492
if (posSearch == NULL || compactSearch == NULL || internal_pssm == NULL) {
377
496
_PSICopyMatrix_int(internal_pssm->pssm, posSearch->posMatrix,
378
497
internal_pssm->ncols, internal_pssm->nrows);
379
498
_PSICopyMatrix_int(internal_pssm->scaled_pssm,
400
516
internal_pssm->freq_ratios,
401
517
internal_pssm->ncols, internal_pssm->nrows);
402
518
status = Kappa_impalaScaling(posSearch, compactSearch, (double)
403
SCALING_FACTOR, FALSE, sbp);
405
internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
406
posSearch = Kappa_posSearchItemsFree(posSearch);
407
compactSearch = Kappa_compactSearchItemsFree(compactSearch);
519
scale_factor, FALSE, sbp);
410
521
internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
411
522
posSearch = Kappa_posSearchItemsFree(posSearch);
412
523
compactSearch = Kappa_compactSearchItemsFree(compactSearch);
530
* Convert an array of HSPs to a list of BlastCompo_Alignment objects.
531
* The context field of each BlastCompo_Alignment is set to point to the
534
* @param hsp_array an array of HSPs
535
* @param hspcnt the length of hsp_array
536
* @param queryInfo information about the concatenated query
537
* @param localScalingFactor the amount by which this search is scaled
539
* @return the new list of alignments; or NULL if there is an out-of-memory
540
* error (or if the original array is empty)
417
542
static BlastCompo_Alignment *
418
s_ResultHspToDistinctAlign(BlastQueryInfo* queryInfo,
419
BlastHSP * hsp_array[], Int4 hspcnt,
543
s_ResultHspToDistinctAlign(BlastHSP * hsp_array[], Int4 hspcnt,
544
BlastQueryInfo* queryInfo,
420
545
double localScalingFactor)
422
BlastCompo_Alignment *aligns = NULL, *tail = NULL, *new_align = NULL;
547
BlastCompo_Alignment * aligns = NULL; /* new list of alignments */
548
BlastCompo_Alignment * tail = NULL; /* last element in aligns */
549
int hsp_index; /* loop index */
424
551
for (hsp_index = 0; hsp_index < hspcnt; hsp_index++) {
425
int queryIndex, queryEnd, matchEnd;
426
BlastHSP * hsp = hsp_array[hsp_index];
427
queryEnd = hsp->query.end;
428
matchEnd = hsp->subject.end;
429
/* YIKES! how do we handle multiple queries */
431
if(search->mult_queries != NULL) {
433
GetQueryNum(search->mult_queries,
434
hsp->query_offset, queryEnd - 1, 0);
552
BlastHSP * hsp = hsp_array[hsp_index]; /* current HSP */
553
BlastCompo_Alignment * new_align; /* newly-created alignment */
554
int queryStart; /* start of the query context
555
in the concatenated query */
556
int queryOffset, queryEnd; /* coordinates of the
558
concatenated query */
559
/* Incoming alignments will have coordinates of the query
560
portion relative to a particular query context; they must
561
be shifted for used in the composition_adjustment library.
563
queryStart = queryInfo->contexts[hsp->context].query_offset;
564
queryOffset = hsp->query.offset + queryStart;
565
queryEnd = hsp->query.end + queryStart;
441
BlastCompo_AlignmentNew(hsp->score * localScalingFactor,
442
eNoCompositionAdjustment,
443
hsp->query.offset, queryEnd, queryIndex,
444
hsp->subject.offset, matchEnd,
567
BlastCompo_AlignmentNew((int) (hsp->score * localScalingFactor),
569
queryOffset, queryEnd, hsp->context,
570
hsp->subject.offset, hsp->subject.end,
445
571
hsp->subject.frame, hsp);
446
572
if (new_align == NULL) /* out of memory */
447
573
goto error_return;
574
if (tail == NULL) { /* if the list aligns is empty; */
575
/* make new_align the first element in the list */
449
576
aligns = new_align;
578
/* otherwise add new_align to the end of the list */
451
579
tail->next = new_align;
453
581
tail = new_align;
573
704
* @param gen_code_string genetic code for translated queries
574
705
* @param subject_index index of the matching sequence in the database
577
s_MatchingSequenceInitialize(
578
BlastCompo_MatchingSequence * self,
708
s_MatchingSequenceInitialize(BlastCompo_MatchingSequence * self,
579
709
EBlastProgramType program_number,
580
710
const BlastSeqSrc* seqSrc,
581
711
const Uint1* gen_code_string,
582
712
Int4 subject_index)
584
Kappa_SequenceLocalData * local_data =
585
malloc(sizeof(Kappa_SequenceLocalData));
586
self->local_data = local_data;
588
local_data->seq_src = seqSrc;
589
local_data->prog_number = program_number;
590
local_data->genetic_code = gen_code_string;
592
memset((void*) &local_data->seq_arg, 0, sizeof(local_data ->seq_arg));
593
local_data->seq_arg.oid = self->index = subject_index;
595
if( program_number == eBlastTypeTblastn ) {
596
local_data->seq_arg.encoding = eBlastEncodingNcbi4na;
598
local_data->seq_arg.encoding = eBlastEncodingProtein;
600
if (BlastSeqSrcGetSequence(seqSrc, (void*) &local_data->seq_arg) >= 0) {
602
BlastSeqSrcGetSeqLen(seqSrc, (void*) &local_data->seq_arg);
609
/** Release the resources associated with a matching sequence. */
611
s_MatchingSequenceRelease(BlastCompo_MatchingSequence * self)
614
Kappa_SequenceLocalData * local_data = self->local_data;
615
BlastSeqSrcReleaseSequence(local_data->seq_src,
616
(void*)&local_data->seq_arg);
617
BlastSequenceBlkFree(local_data->seq_arg.seq);
618
free(self->local_data);
619
self->local_data = NULL;
714
BlastKappa_SequenceInfo * seq_info; /* BLAST-specific sequence
717
self->local_data = NULL;
719
seq_info = malloc(sizeof(BlastKappa_SequenceInfo));
720
if (seq_info != NULL) {
721
self->local_data = seq_info;
723
seq_info->seq_src = seqSrc;
724
seq_info->prog_number = program_number;
725
seq_info->genetic_code = gen_code_string;
727
memset((void*) &seq_info->seq_arg, 0, sizeof(seq_info->seq_arg));
728
seq_info->seq_arg.oid = self->index = subject_index;
730
if( program_number == eBlastTypeTblastn ) {
731
seq_info->seq_arg.encoding = eBlastEncodingNcbi4na;
733
seq_info->seq_arg.encoding = eBlastEncodingProtein;
735
if (BlastSeqSrcGetSequence(seqSrc, (void*) &seq_info->seq_arg) >= 0) {
737
BlastSeqSrcGetSeqLen(seqSrc, (void*) &seq_info->seq_arg);
742
if (self->length == 0) {
743
/* Could not obtain the required data */
744
s_MatchingSequenceRelease(self);
759
* Filter low complexity regions from the sequence data; uses the SEG
762
* @param seqData data to be filtered
763
* @param program_name type of search being performed
764
* @return 0 for success; -1 for out-of-memory
767
s_DoSegSequenceData(BlastCompo_SequenceData * seqData,
768
EBlastProgramType program_name)
771
BlastSeqLoc* mask_seqloc = NULL;
772
SBlastFilterOptions* filter_options = NULL;
774
status = BlastFilteringOptionsFromString(program_name,
775
BLASTP_MASK_INSTRUCTIONS,
776
&filter_options, NULL);
778
status = BlastSetUp_Filter(program_name, seqData->data,
779
seqData->length, 0, filter_options,
781
filter_options = SBlastFilterOptionsFree(filter_options);
784
Blast_MaskTheResidues(seqData->data, seqData->length,
785
FALSE, mask_seqloc, FALSE, 0);
787
if (mask_seqloc != NULL) {
788
mask_seqloc = BlastSeqLocFree(mask_seqloc);
631
795
* Obtain a string of translated data
633
797
* @param self the sequence from which to obtain the data [in]
634
798
* @param range the range and translation frame to get [in]
635
799
* @param seqData the resulting data [out]
801
* @return 0 on success; -1 on failure
638
804
s_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence * self,
639
805
const BlastCompo_SequenceRange * range,
640
806
BlastCompo_SequenceData * seqData )
642
ASSERT( 0 && "Not implemented" );
809
BlastKappa_SequenceInfo * local_data; /* BLAST-specific
810
information associated
812
Uint1 * translation_buffer; /* a buffer for the translated,
813
amino-acid sequence */
814
Int4 translated_length; /* length of the translated sequence */
815
int translation_frame; /* frame in which to translate */
816
Uint1 * na_sequence; /* the nucleotide sequence */
817
int translation_start; /* location in na_sequence to start
819
int num_nucleotides; /* the number of nucleotides to be translated */
821
local_data = self->local_data;
822
na_sequence = local_data->seq_arg.seq->sequence_start;
824
/* Initialize seqData to nil, in case this routine fails */
825
seqData->buffer = NULL;
826
seqData->data = NULL;
829
translation_frame = range->context;
830
if (translation_frame > 0) {
831
translation_start = 3 * range->begin;
834
self->length - 3 * range->end + translation_frame + 1;
837
3 * (range->end - range->begin) + ABS(translation_frame) - 1;
839
status = Blast_GetPartialTranslation(na_sequence + translation_start,
841
(Int2) translation_frame,
842
local_data->genetic_code,
847
seqData->buffer = translation_buffer;
848
seqData->data = translation_buffer + 1;
849
seqData->length = translated_length;
851
if ( !(KAPPA_TBLASTN_NO_SEG_SEQUENCE) ) {
852
status = s_DoSegSequenceData(seqData, eBlastTypeTblastn);
854
free(seqData->buffer);
855
seqData->buffer = NULL;
856
seqData->data = NULL;
866
* Get a string of protein data from a protein sequence.
868
* @param self a protein sequence [in]
869
* @param range the range to get [in]
870
* @param seqData the resulting data [out]
872
* @return 0 on success; -1 on failure
875
s_SequenceGetProteinRange(const BlastCompo_MatchingSequence * self,
876
const BlastCompo_SequenceRange * range,
877
BlastCompo_SequenceData * seqData )
879
int status = 0; /* return status */
880
Int4 idx; /* loop index */
881
Uint1 *origData; /* the unfiltered data for the sequence */
882
/* BLAST-specific sequence information */
883
BlastKappa_SequenceInfo * local_data = self->local_data;
885
seqData->data = NULL;
887
/* Copy the entire sequence (necessary for SEG filtering.) */
888
seqData->buffer = calloc((self->length + 2), sizeof(Uint1));
889
if (seqData->buffer == NULL) {
892
/* First and last characters of the buffer MUST be '\0', which is
893
* true here because the buffer was allocated using calloc. */
894
seqData->data = seqData->buffer + 1;
895
seqData->length = self->length;
897
origData = local_data->seq_arg.seq->sequence;
898
for (idx = 0; idx < seqData->length; idx++) {
899
/* Copy the sequence data, replacing occurrences of amino acid
900
* number 24 (Selenocysteine) with number 21 (Undetermined or
902
if (origData[idx] != 24) {
903
seqData->data[idx] = origData[idx];
905
seqData->data[idx] = 21;
906
fprintf(stderr, "Selenocysteine (U) at position %ld"
911
if ( !(KAPPA_BLASTP_NO_SEG_SEQUENCE) ) {
912
status = s_DoSegSequenceData(seqData, eBlastTypeBlastp);
914
/* Fit the data to the range. */
915
seqData ->data = &seqData->data[range->begin - 1];
916
*seqData->data++ = '\0';
917
seqData ->length = range->end - range->begin;
920
free(seqData->buffer);
921
seqData->buffer = NULL;
922
seqData->data = NULL;
649
931
* @param self sequence information [in]
650
932
* @param range range specifying the range of data [in]
651
933
* @param seqData the sequence data obtained [out]
935
* @return 0 on success; -1 on failure
655
const BlastCompo_MatchingSequence * self,
938
s_SequenceGetRange(const BlastCompo_MatchingSequence * self,
656
939
const BlastCompo_SequenceRange * range,
657
940
BlastCompo_SequenceData * seqData )
659
Kappa_SequenceLocalData * local_data = self->local_data;
660
if (local_data->prog_number == eBlastTypeTblastn) {
942
BlastKappa_SequenceInfo * seq_info = self->local_data;
943
if (seq_info->prog_number == eBlastTypeTblastn) {
661
944
/* The sequence must be translated. */
662
s_SequenceGetTranslatedRange(self, range, seqData);
664
/* The sequence does not need to be translated. */
666
Uint1 *origData; /* the unfiltered data for the sequence */
668
/* Copy the entire sequence (necessary for SEG filtering.) */
669
seqData->buffer = calloc((self->length + 2), sizeof(Uint1));
670
/* First and last characters of the buffer MUST be '\0', which is
671
* true here because the buffer was allocated using calloc. */
672
seqData->data = seqData->buffer + 1;
673
seqData->length = self->length;
675
origData = local_data->seq_arg.seq->sequence;
676
for (idx = 0; idx < seqData->length; idx++) {
677
/* Copy the sequence data, replacing occurrences of amino acid
678
* number 24 (Selenocysteine) with number 21 (Undetermined or
680
if (origData[idx] != 24) {
681
seqData->data[idx] = origData[idx];
683
seqData->data[idx] = 21;
684
fprintf(stderr, "Selenocysteine (U) at position %ld"
689
#ifndef KAPPA_NO_SEG_SEQUENCE
690
/* take as input an amino acid string and its length; compute
691
* a filtered amino acid string and return the filtered string */
693
BlastSeqLoc* mask_seqloc;
694
const EBlastProgramType k_program_name = eBlastTypeBlastp;
695
SBlastFilterOptions* filter_options;
697
BlastFilteringOptionsFromString(k_program_name,
698
BLASTP_MASK_INSTRUCTIONS,
699
&filter_options, NULL);
700
BlastSetUp_Filter(k_program_name, seqData->data, seqData->length,
701
0, filter_options, &mask_seqloc, NULL);
702
filter_options = SBlastFilterOptionsFree(filter_options);
704
Blast_MaskTheResidues(seqData->data, seqData->length,
705
FALSE, mask_seqloc, FALSE, 0);
706
mask_seqloc = BlastSeqLocFree(mask_seqloc);
709
/* Fit the data to the range. */
710
seqData ->data = &seqData->data[range->begin - 1];
711
*seqData->data++ = '\0';
712
seqData ->length = range->end - range->begin;
713
} /* end else the sequence does not need to be translated */
719
* Computes an appropriate starting point for computing the traceback
720
* for an HSP. The start point depends on the matrix, the window, and
721
* the filtered sequence, and so may not be the start point saved in
722
* the HSP during the preliminary gapped extension.
724
* @param q_start the start point in the query [out]
725
* @param s_start the start point in the subject [out]
726
* @param sbp general scoring info (includes the matrix) [in]
727
* @param positionBased is this search position-specific? [in]
728
* @param hsp the HSP to be considered [in]
729
* @param window the window used to compute the traceback [in]
730
* @param query the query data [in]
731
* @param subject the subject data [in]
735
s_StartingPointForHit(Int4 * q_start,
737
const BlastScoreBlk* sbp,
738
Boolean positionBased,
740
BlastCompo_SequenceRange * range,
741
BlastCompo_SequenceData * query,
742
BlastCompo_SequenceData * subject)
744
hsp->subject.offset -= range->begin;
745
hsp->subject.gapped_start -= range->begin;
747
if(BLAST_CheckStartForGappedAlignment(hsp, query->data,
748
subject->data, sbp)) {
749
/* We may use the starting point supplied by the HSP. */
750
*q_start = hsp->query.gapped_start;
751
*s_start = hsp->subject.gapped_start;
753
/* We must recompute the start for the gapped alignment, as the
754
one in the HSP was unacceptable.*/
756
BlastGetStartForGappedAlignment(query->data,
763
hsp->subject.offset);
765
(hsp->subject.offset - hsp->query.offset) + *q_start;
945
return s_SequenceGetTranslatedRange(self, range, seqData);
947
return s_SequenceGetProteinRange(self, range, seqData);
770
struct Blast_GappingParamsContext {
771
BlastGapAlignStruct * gap_align;
772
const BlastScoringParameters* scoringParams;
774
double localScalingFactor;
777
typedef struct Blast_GappingParamsContext Blast_GappingParamsContext;
952
/** Data and data-structures needed to perform a gapped alignment */
953
typedef struct BlastKappa_GappingParamsContext {
954
const BlastScoringParameters*
955
scoringParams; /**< scoring parameters for a
957
BlastGapAlignStruct * gap_align; /**< additional parameters for a
959
BlastScoreBlk* sbp; /**< the score block for this search */
960
double localScalingFactor; /**< the amount by which this
961
search has been scaled */
962
EBlastProgramType prog_number; /**< the type of search being
964
} BlastKappa_GappingParamsContext;
781
* Reads a GapAlignBlk that has been used to compute a traceback, and
782
* return a BlastCompo_Alignment representing the alignment.
784
* @param gap_align the GapAlignBlk
785
* @param window the window used to compute the traceback
968
* Reads a BlastGapAlignStruct that has been used to compute a
969
* traceback, and return a BlastCompo_Alignment representing the
970
* alignment. The BlastGapAlignStruct is in coordinates local to the
971
* ranges being aligned; the resulting alignment is in coordinates w.r.t.
972
* the whole query and subject.
974
* @param gap_align the BlastGapAlignStruct
975
* @param *edit_script the edit script from the alignment; on exit
976
* NULL. The edit_script is usually
977
* gap_align->edit_script, but we don't want
978
* an implicit side effect on the gap_align.
979
* @param query_range the range of the query used in this alignment
980
* @param subject_range the range of the subject used in this alignment
981
* @param matrix_adjust_rule the rule used to compute the scoring matrix
983
* @return the new alignment on success or NULL on error
787
985
static BlastCompo_Alignment *
788
986
s_NewAlignmentFromGapAlign(BlastGapAlignStruct * gap_align,
987
GapEditScript ** edit_script,
789
988
BlastCompo_SequenceRange * query_range,
790
989
BlastCompo_SequenceRange * subject_range,
990
EMatrixAdjustRule matrix_adjust_rule)
992
/* parameters to BlastCompo_AlignmentNew */
793
993
int queryStart, queryEnd, queryIndex, matchStart, matchEnd, frame;
794
994
BlastCompo_Alignment * obj; /* the new alignment */
996
/* In the composition_adjustment library, the query start/end are
997
indices into the concatenated query, and so must be shifted. */
796
998
queryStart = gap_align->query_start + query_range->begin;
797
999
queryEnd = gap_align->query_stop + query_range->begin;
798
1000
queryIndex = query_range->context;
800
1002
matchEnd = gap_align->subject_stop + subject_range->begin;
801
1003
frame = subject_range->context;
803
obj = BlastCompo_AlignmentNew(gap_align->score, whichMode,
1005
obj = BlastCompo_AlignmentNew(gap_align->score, matrix_adjust_rule,
804
1006
queryStart, queryEnd, queryIndex,
805
1007
matchStart, matchEnd, frame,
806
gap_align->edit_script);
807
gap_align->edit_script = NULL;
1010
*edit_script = NULL;
813
* Create a new BlastCompo_Alignment and append the list of
814
* alignments represented by "next."
816
* @param query query sequence data
817
* @param queryStart the start of the alignment in the query
818
* @param queryEnd the end of the alignment in the query
819
* @param subject subject sequence data
820
* @param matchStart the start of the alignment in the subject range
821
* @param matchEnd the end of the alignment in the subject range
822
* @param score the score of this alignment
823
* @param window the subject window of this alignment
824
* @param gap_align alignment info for gapped alignments
825
* @param scoringParams Settings for gapped alignment.[in]
826
* @param localScalingFactor the factor by which the scoring system has
827
* been scaled in order to obtain greater
829
* @param prog_number the type of alignment being performed
830
* @param next preexisting list of alignments [out]
1016
/** A callback used when performing SmithWaterman alignments:
1017
* Calculate the traceback for one alignment by performing an x-drop
1018
* alignment in the forward direction, possibly increasing the x-drop
1019
* parameter until the desired score is attained.
1021
* The start, end and score of the alignment should be obtained
1022
* using the Smith-Waterman algorithm before this routine is called.
1024
* @param *pnewAlign the new alignment
1025
* @param *pqueryEnd on entry, the end of the alignment in the
1026
* query, as computed by the Smith-Waterman
1027
* algorithm. On exit, the end as computed by
1028
* the x-drop algorithm
1029
* @param *pmatchEnd like as *pqueryEnd, but for the subject
1031
* @param queryStart the starting point in the query
1032
* @param matchStart the starting point in the subject
1033
* @param score the score of the alignment, as computed by
1034
* the Smith-Waterman algorithm
1035
* @param query query sequence data
1036
* @param query_range range of this query in the concatenated
1038
* @param ccat_query_length total length of the concatenated query
1039
* @param subject subject sequence data
1040
* @param subject_range range of subject_data in the translated
1041
* query, in amino acid coordinates
1042
* @param full_subject_length length of the full subject sequence
1043
* @param gapping_params parameters used to compute gapped
1045
* @param matrix_adjust_rule the rule used to compute the scoring matrix
1047
* @returns 0 (posts a fatal error if it fails)
1048
* @sa new_xdrop_align_type
833
1051
s_NewAlignmentUsingXdrop(BlastCompo_Alignment ** pnewAlign,
835
1053
Int4 queryStart, Int4 matchStart, Int4 score,
836
1054
BlastCompo_SequenceData * query,
837
1055
BlastCompo_SequenceRange * query_range,
1056
Int4 ccat_query_length,
839
1057
BlastCompo_SequenceData * subject,
840
1058
BlastCompo_SequenceRange * subject_range,
1059
Int4 full_subject_length,
842
1060
BlastCompo_GappingParams * gapping_params,
843
ECompoAdjustModes whichMode)
1061
EMatrixAdjustRule matrix_adjust_rule)
846
1064
/* Extent of the alignment as computed by an x-drop alignment
847
1065
* (usually the same as (queryEnd - queryStart) and (matchEnd -
848
1066
* matchStart)) */
849
1067
Int4 queryExtent, matchExtent;
850
BlastCompo_Alignment * obj; /* the new object */
851
Blast_GappingParamsContext * context = gapping_params->context;
1068
BlastCompo_Alignment * obj = NULL; /* the new object */
1069
/* BLAST-specific parameters needed compute an X-drop alignment */
1070
BlastKappa_GappingParamsContext * context = gapping_params->context;
1071
/* Auxiliarly structure for computing gapped alignments */
852
1072
BlastGapAlignStruct * gap_align = context->gap_align;
1073
/* Scoring parameters for gapped alignments */
853
1074
const BlastScoringParameters* scoringParams = context->scoringParams;
854
double localScalingFactor = context->localScalingFactor;
855
GapEditScript* editScript;
1075
/* A structure containing the traceback of a gapped alignment */
1076
GapEditScript* editScript = NULL;
1078
/* suppress unused parameter warnings; this is a callback
1079
function, so these parameter cannot be deleted */
1080
(void) ccat_query_length;
1081
(void) full_subject_length;
1083
gap_align->gap_x_dropoff = gapping_params->x_dropoff;
857
1085
s_SWFindFinalEndsUsingXdrop(query, queryStart, *pqueryEnd,
858
1086
subject, matchStart, *pmatchEnd,
859
1087
gap_align, scoringParams,
860
score, localScalingFactor,
861
&queryExtent, &matchExtent,
1088
score, &queryExtent, &matchExtent,
863
1090
*pqueryEnd = queryStart + queryExtent;
864
1091
*pmatchEnd = matchStart + matchExtent;
888
1144
int full_subject_length,
889
1145
BlastCompo_GappingParams * gapping_params)
891
Int4 q_start, s_start;
892
Blast_GappingParamsContext * context = gapping_params->context;
1147
int status; /* return code */
1148
Int4 q_start, s_start; /* starting point in query and subject */
1149
/* BLAST-specific parameters needed to compute a gapped alignment */
1150
BlastKappa_GappingParamsContext * context = gapping_params->context;
1151
/* Score block for this search */
893
1152
BlastScoreBlk* sbp = context->sbp;
1153
/* Auxiliary structure for computing gapped alignments */
894
1154
BlastGapAlignStruct* gapAlign = context->gap_align;
895
Boolean positionBased = (sbp->psi_matrix ? TRUE : FALSE);
1155
/* The preliminary gapped HSP that were are recomputing */
896
1156
BlastHSP * hsp = in_align->context;
898
s_StartingPointForHit(&q_start, &s_start, sbp, positionBased,
899
hsp, subject_range, query_data, subject_data);
1158
/* suppress unused parameter warnings; this is a callback
1159
function, so these parameter cannot be deleted */
1160
(void) ccat_query_length;
1161
(void) full_subject_length;
1163
/* Shift the subject offset and gapped start to be offsets
1164
into the translated subject_range; shifting in this manner
1165
is necessary for BLAST_CheckStartForGappedAlignment */
1166
hsp->subject.offset -= subject_range->begin;
1167
hsp->subject.end -= subject_range->begin;
1168
hsp->subject.gapped_start -= subject_range->begin;
1170
if(BLAST_CheckStartForGappedAlignment(hsp, query_data->data,
1171
subject_data->data, sbp)) {
1172
/* We may use the starting point supplied by the HSP. */
1173
q_start = hsp->query.gapped_start;
1174
s_start = hsp->subject.gapped_start;
1176
/* We must recompute the start for the gapped alignment, as the
1177
one in the HSP was unacceptable.*/
1179
BlastGetStartForGappedAlignment(query_data->data,
1180
subject_data->data, sbp,
1184
hsp->subject.offset,
1186
hsp->subject.offset);
1188
(hsp->subject.offset - hsp->query.offset) + q_start;
1190
/* Undo the shift so there is no side effect on the incoming HSP
1192
hsp->subject.offset += subject_range->begin;
1193
hsp->subject.end += subject_range->begin;
1194
hsp->subject.gapped_start += subject_range->begin;
900
1196
gapAlign->gap_x_dropoff = gapping_params->x_dropoff;
902
BLAST_GappedAlignmentWithTraceback(context->prog_number,
904
subject_data->data, gapAlign,
905
context->scoringParams,
908
subject_data->length);
909
return s_NewAlignmentFromGapAlign(gapAlign, query_range, subject_range,
1199
BLAST_GappedAlignmentWithTraceback(context->prog_number,
1201
subject_data->data, gapAlign,
1202
context->scoringParams,
1205
subject_data->length);
1207
return s_NewAlignmentFromGapAlign(gapAlign, &gapAlign->edit_script,
1208
query_range, subject_range,
1209
matrix_adjust_rule);
915
* A s_SearchParameters represents the data needed by
916
* RedoAlignmentCore to adjust the parameters of a search, including
917
* the original value of these parameters
1217
* A BlastKappa_SavedParameters holds the value of certain search
1218
* parameters on entry to RedoAlignmentCore. These values are
919
typedef struct s_SearchParameters {
1221
typedef struct BlastKappa_SavedParameters {
920
1222
Int4 gap_open; /**< a penalty for the existence of a gap */
921
1223
Int4 gapExtend; /**< a penalty for each residue in the
927
1229
double original_expect_value; /**< expect value on entry */
928
1230
/** copy of the original gapped Karlin-Altschul block
929
1231
* corresponding to the first context */
930
Blast_KarlinBlk* kbp_gap_orig;
931
/** pointer to the array of gapped Karlin-Altschul block for all
932
* contexts; needed to restore the search to its original
934
Blast_KarlinBlk** orig_kbp_gap_array;
935
} s_SearchParameters;
1232
Blast_KarlinBlk** kbp_gap_orig;
1233
Int4 num_queries; /**< Number of queries in this search */
1234
} BlastKappa_SavedParameters;
939
* Release the data associated with a s_SearchParameters and
1238
* Release the data associated with a BlastKappa_SavedParameters and
940
1239
* delete the object
941
1240
* @param searchParams the object to be deleted [in][out]
944
s_SearchParametersFree(s_SearchParameters ** searchParams)
1243
s_SavedParametersFree(BlastKappa_SavedParameters ** searchParams)
946
1245
/* for convenience, remove one level of indirection from searchParams */
947
s_SearchParameters *sp = *searchParams;
949
if(sp->kbp_gap_orig) Blast_KarlinBlkFree(sp->kbp_gap_orig);
951
Nlm_Int4MatrixFree(&sp->origMatrix);
1246
BlastKappa_SavedParameters *sp = *searchParams;
1249
if (sp->kbp_gap_orig != NULL) {
1251
for (i = 0; i < sp->num_queries; i++) {
1252
if (sp->kbp_gap_orig[i] != NULL)
1253
Blast_KarlinBlkFree(sp->kbp_gap_orig[i]);
1255
free(sp->kbp_gap_orig);
1257
if (sp->origMatrix != NULL)
1258
Nlm_Int4MatrixFree(&sp->origMatrix);
953
1260
sfree(*searchParams);
954
1261
*searchParams = NULL;
959
* Create a new instance of s_SearchParameters
1266
* Create a new instance of BlastKappa_SavedParameters
961
* @param rows number of rows in the scoring matrix
962
* @param adjustParameters if >0, use composition-based statistics
963
* @param numQueries the number of queries in the concatenated
965
* @param positionBased if true, the search is position-based
1268
* @param rows number of rows in the scoring matrix
1269
* @param numQueries number of queries in this search
1270
* @param compo_adjust_mode if >0, use composition-based statistics
1271
* @param positionBased if true, the search is position-based
967
static s_SearchParameters *
968
s_SearchParametersNew(
970
Int4 adjustParameters,
971
Boolean positionBased)
1273
static BlastKappa_SavedParameters *
1274
s_SavedParametersNew(Int4 rows,
1276
ECompoAdjustModes compo_adjust_mode,
1277
Boolean positionBased)
973
s_SearchParameters *sp; /* the new object */
974
sp = malloc(sizeof(s_SearchParameters));
1280
BlastKappa_SavedParameters *sp; /* the new object */
1281
sp = malloc(sizeof(BlastKappa_SavedParameters));
976
sp->orig_kbp_gap_array = NULL;
977
1286
sp->kbp_gap_orig = NULL;
978
1287
sp->origMatrix = NULL;
980
sp->kbp_gap_orig = Blast_KarlinBlkNew();
981
if (adjustParameters) {
1289
sp->kbp_gap_orig = calloc(numQueries, sizeof(Blast_KarlinBlk*));
1290
if (sp->kbp_gap_orig == NULL) {
1293
sp->num_queries = numQueries;
1294
for (i = 0; i < numQueries; i++) {
1295
sp->kbp_gap_orig[i] = NULL;
1297
if (compo_adjust_mode != eNoCompositionBasedStats) {
982
1298
if (positionBased) {
983
1299
sp->origMatrix = Nlm_Int4MatrixNew(rows, BLASTAA_SIZE);
985
1301
sp->origMatrix = Nlm_Int4MatrixNew(BLASTAA_SIZE, BLASTAA_SIZE);
1303
if (sp->origMatrix == NULL)
1308
s_SavedParametersFree(&sp);
993
1314
* Record the initial value of the search parameters that are to be
996
* @param searchParams holds the recorded values [out]
997
* @param search the search parameters [in]
998
* @param query a list of query data [in]
999
* @param numQueries the length of the array query [in]
1317
* @param searchParams holds the recorded values [out]
1318
* @param sbp a score block [in]
1319
* @param scoring gapped alignment parameters [in]
1320
* @param query_length length of the concatenated query [in]
1321
* @param compo_adjust_mode composition adjustment mode [in]
1322
* @param positionBased is this search position-based [in]
1002
s_RecordInitialSearch(s_SearchParameters * searchParams,
1003
BLAST_SequenceBlk * queryBlk,
1004
BlastQueryInfo* queryInfo,
1325
s_RecordInitialSearch(BlastKappa_SavedParameters * searchParams,
1005
1326
BlastScoreBlk* sbp,
1006
1327
const BlastScoringParameters* scoring,
1007
1328
int query_length,
1008
Boolean adjustParameters,
1329
ECompoAdjustModes compo_adjust_mode,
1009
1330
Boolean positionBased)
1011
Blast_KarlinBlk* kbp; /* statistical parameters used to evaluate a
1012
* query-subject pair */
1013
/* YIKES! How do I get these! */
1015
searchParams->original_expect_value = search->pbp->cutoff_e;
1017
searchParams->gap_open = scoring->gap_open;
1018
searchParams->gapExtend = scoring->gap_extend;
1019
searchParams->gapDecline = scoring->decline_align;
1334
searchParams->gap_open = scoring->gap_open;
1335
searchParams->gapExtend = scoring->gap_extend;
1336
searchParams->gapDecline = scoring->decline_align;
1020
1337
searchParams->scale_factor = scoring->scale_factor;
1022
searchParams->orig_kbp_gap_array = sbp->kbp_gap;
1023
kbp = sbp->kbp_gap[0];
1024
Blast_KarlinBlkCopy(searchParams->kbp_gap_orig, kbp);
1339
for (i = 0; i < searchParams->num_queries; i++) {
1340
if (sbp->kbp_gap[i] != NULL) {
1341
/* There is a kbp_gap for query i and it must be copied */
1342
searchParams->kbp_gap_orig[i] = Blast_KarlinBlkNew();
1343
if (searchParams->kbp_gap_orig[i] == NULL) {
1346
Blast_KarlinBlkCopy(searchParams->kbp_gap_orig[i],
1026
if (adjustParameters) {
1028
Int4 i, j; /* iteration indices */
1351
if (compo_adjust_mode != eNoCompositionBasedStats) {
1352
Int4 **matrix; /* scoring matrix */
1353
int j; /* iteration index */
1354
int rows; /* number of rows in matrix */
1030
1355
if (positionBased) {
1031
1356
matrix = sbp->psi_matrix->pssm->data;
1032
1357
rows = query_length;
1047
1373
* Rescale the search parameters in the search object and options
1048
1374
* object to obtain more precision.
1376
* @param sbp score block to be rescaled
1377
* @param sp scoring parameters to be rescaled
1378
* @param num_queries number of queries in this search
1379
* @param scale_factor amount by which to scale this search
1051
s_RescaleSearch(s_SearchParameters * sp,
1052
BLAST_SequenceBlk* queryBlk,
1053
BlastQueryInfo* queryInfo,
1055
BlastScoringParameters* scoringParams,
1056
double localScalingFactor,
1057
Boolean positionBased)
1382
s_RescaleSearch(BlastScoreBlk* sbp,
1383
BlastScoringParameters* sp,
1385
double scale_factor)
1059
Blast_KarlinBlk* kbp; /* the statistical parameters used to
1060
* evaluate alignments of a
1061
* query-subject pair */
1062
kbp = sbp->kbp_gap[0];
1063
kbp->Lambda /= localScalingFactor;
1064
kbp->logK = log(kbp->K);
1388
for (i = 0; i < num_queries; i++) {
1389
if (sbp->kbp_gap[i] != NULL) {
1390
Blast_KarlinBlk * kbp = sbp->kbp_gap[i];
1391
kbp->Lambda /= scale_factor;
1392
kbp->logK = log(kbp->K);
1066
/* YIKES! and what about the cutoff_e */
1068
search->pbp->cutoff_e = options->kappa_expect_value;
1070
scoringParams->gap_open = BLAST_Nint(sp->gap_open * localScalingFactor);
1071
scoringParams->gap_extend = BLAST_Nint(sp->gapExtend * localScalingFactor);
1072
scoringParams->scale_factor = localScalingFactor;
1073
if (sp->gapDecline != INT2_MAX) {
1074
scoringParams->decline_align =
1075
BLAST_Nint(sp->gapDecline * localScalingFactor);
1396
sp->gap_open = BLAST_Nint(sp->gap_open * scale_factor);
1397
sp->gap_extend = BLAST_Nint(sp->gap_extend * scale_factor);
1398
sp->scale_factor = scale_factor;
1399
if (sp->decline_align != INT2_MAX) {
1400
sp->decline_align = BLAST_Nint(sp->decline_align * scale_factor);
1081
* Restore the parameters that were adjusted to their original values
1082
* @param searchParams a record of the original values [in]
1083
* @param search the search to be restored [out]
1084
* @param options the option block to be restored [out]
1085
* @param matrix the scoring matrix to be restored [out]
1086
* @param SmithWaterman if true, we have performed a Smith-Waterman
1087
* alignment with these search parameters [in]
1406
* Restore the parameters that were adjusted to their original values.
1408
* @param sbp the score block to be restored
1409
* @param scoring the scoring parameters to be restored
1410
* @param searchParams the initial recorded values of the parameters
1411
* @param query_length the concatenated query length
1412
* @param positionBased is this search position-based
1413
* @param compo_adjust_mode mode of composition adjustment
1090
s_RestoreSearch(s_SearchParameters * searchParams,
1416
s_RestoreSearch(BlastScoreBlk* sbp,
1417
BlastScoringParameters* scoring,
1418
const BlastKappa_SavedParameters * searchParams,
1093
1419
int query_length,
1094
BlastScoringParameters* scoring,
1095
1420
Boolean positionBased,
1096
Boolean adjustParameters)
1421
ECompoAdjustModes compo_adjust_mode)
1098
Blast_KarlinBlk* kbp; /* statistical parameters used to
1099
evaluate the significance of
1100
alignment of a query-subject
1103
/* YIKES! More stuff I don't know how to deal with */
1105
search->pbp->gap_x_dropoff_final = searchParams->gap_x_dropoff_final;
1106
search->pbp->cutoff_e = searchParams->original_expect_value;
1107
search->pbp->gap_open = searchParams->gap_open;
1108
search->pbp->gap_extend = searchParams->gapExtend;
1109
search->pbp->decline_align = searchParams->gapDecline;
1110
GapAlignBlkDelete(search->gap_align);
1111
search->gap_align = searchParams->orig_gap_align;
1112
search->sbp->kbp_gap = searchParams->orig_kbp_gap_array;
1114
kbp = sbp->kbp_gap[0];
1115
Blast_KarlinBlkCopy(kbp, searchParams->kbp_gap_orig);
1117
if(adjustParameters) {
1425
scoring->gap_open = searchParams->gap_open;
1426
scoring->gap_extend = searchParams->gapExtend;
1427
scoring->decline_align = searchParams->gapDecline;
1428
scoring->scale_factor = searchParams->scale_factor;
1430
for (i = 0; i < searchParams->num_queries; i++) {
1431
if (sbp->kbp_gap[i] != NULL) {
1432
Blast_KarlinBlkCopy(sbp->kbp_gap[i],
1433
searchParams->kbp_gap_orig[i]);
1436
if(compo_adjust_mode != eNoCompositionBasedStats) {
1437
int j; /* iteration index */
1438
Int4 ** matrix; /* matrix to be restored */
1439
int rows; /* number of rows in the matrix */
1119
1441
if (positionBased) {
1442
matrix = sbp->psi_matrix->pssm->data;
1120
1443
rows = query_length;
1445
matrix = sbp->matrix->data;
1122
1446
rows = BLASTAA_SIZE;
1124
for(i = 0; i < rows; i++) {
1125
for(j = 0; j < BLASTAA_SIZE; j++) {
1448
for (i = 0; i < rows; i++) {
1449
for (j = 0; j < BLASTAA_SIZE; j++) {
1126
1450
matrix[i][j] = searchParams->origMatrix[i][j];
1458
* Initialize an object of type Blast_MatrixInfo.
1460
* @param self object being initialized
1461
* @param queryBlk the query sequence data
1462
* @param sbp score block for this search
1463
* @param scale_factor amount by which ungapped parameters should be
1465
* @param matrixName name of the matrix
1134
1468
s_MatrixInfoInit(Blast_MatrixInfo * self,
1135
double localScalingFactor,
1136
1469
BLAST_SequenceBlk* queryBlk,
1137
BlastQueryInfo* queryInfo,
1138
1470
BlastScoreBlk* sbp,
1139
BlastScoringParameters* scoringParams,
1140
Boolean positionBased,
1471
double scale_factor,
1141
1472
const char * matrixName)
1143
Uint1 * query; /* the query sequence */
1145
/* Int4 queryLength; */ /* the length of the query sequence */
1146
double initialUngappedLambda;
1150
query = search->context[0].query->sequence;
1151
queryLength = search->context[0].query->length;
1153
query = &queryBlk->sequence[0];
1154
queryLength = queryInfo->contexts[0].query_length;
1474
int status = 0; /* return status */
1475
int lenName; /* length of matrixName as a string */
1477
/* copy the matrix name (strdup is not standard C) */
1478
lenName = strlen(matrixName);
1479
if (NULL == (self->matrixName = malloc(lenName + 1))) {
1482
memcpy(self->matrixName, matrixName, lenName + 1);
1155
1484
if (self->positionBased) {
1157
if(sbp->posFreqs == NULL) {
1159
allocatePosFreqs(queryLength, BLASTAA_SIZE);
1162
s_GetStartFreqRatios(self->startFreqRatios, query, matrixName,
1163
sbp->psi_matrix->freq_ratios, queryLength,
1165
s_ScalePosMatrix(self->startMatrix, sbp->matrix->data,
1166
matrixName,sbp->psi_matrix->freq_ratios, query,
1167
queryInfo->max_length, sbp);
1168
initialUngappedLambda = sbp->kbp_psi[0]->Lambda;
1485
status = s_GetPosBasedStartFreqRatios(self->startFreqRatios,
1489
sbp->psi_matrix->freq_ratios);
1491
status = s_ScalePosMatrix(self->startMatrix, matrixName,
1492
sbp->psi_matrix->freq_ratios,
1494
queryBlk->length, sbp, scale_factor);
1495
self->ungappedLambda = sbp->kbp_psi[0]->Lambda / scale_factor;
1170
s_GetStartFreqRatios(self->startFreqRatios, query, matrixName,
1171
NULL, BLASTAA_SIZE, FALSE);
1172
initialUngappedLambda = sbp->kbp_ideal->Lambda;
1174
self->ungappedLambda = initialUngappedLambda / localScalingFactor;
1175
if ( !positionBased ) {
1176
SFreqRatios * freqRatios; /* frequency ratios for the matrix */
1178
freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
1180
if (freqRatios == NULL) {
1181
ErrPostEx(SEV_FATAL, 1, 0, "blastpgp: Cannot adjust parameters "
1182
"for matrix %s\n", matrixName);
1185
Blast_Int4MatrixFromFreq(self->startMatrix, BLASTAA_SIZE,
1186
freqRatios->data, self->ungappedLambda);
1187
freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios);
1189
self->matrixName = strdup(matrixName);
1194
s_GetQueryInfo(BlastCompo_QueryInfo **pquery, int * pnumQueries,
1195
Uint1 * ccat_query, BlastQueryInfo* queryInfo)
1198
int numQueries = queryInfo->num_queries;
1199
BlastCompo_QueryInfo * query = calloc(numQueries,
1200
sizeof(BlastCompo_QueryInfo));
1201
*pnumQueries = numQueries;
1203
for (query_index = 0; query_index < numQueries; query_index++) {
1204
query[query_index].eff_search_space =
1205
queryInfo->contexts[query_index].eff_searchsp;
1207
for (query_index = 0; query_index < numQueries; query_index++) {
1208
query[query_index].origin =
1209
queryInfo->contexts[query_index].query_offset;
1210
query[query_index].seq.data = &ccat_query[query[query_index].origin];
1211
query[query_index].seq.length =
1212
queryInfo->contexts[query_index].query_length;
1214
for (query_index = 0; query_index < numQueries; query_index++) {
1215
Blast_ReadAaComposition(&query[query_index].composition,
1216
query[query_index].seq.data,
1217
query[query_index].seq.length);
1223
s_GappingParamsInit(Blast_GappingParamsContext * context,
1224
BlastCompo_GappingParams * gapping_params,
1225
BlastGapAlignStruct * gap_align,
1226
const BlastScoringParameters* scoring,
1228
double localScalingFactor,
1229
Int4 program_number,
1232
context->gap_align = gap_align;
1233
context->scoringParams = scoring;
1235
context->localScalingFactor = localScalingFactor;
1236
context->prog_number = program_number;
1238
gapping_params->gap_open = scoring->gap_open;
1239
gapping_params->gap_extend = scoring->gap_extend;
1240
gapping_params->decline_align = scoring->decline_align;
1241
/* YIKES! different x-dropoff due to different pass through the
1243
gapping_params->x_dropoff = gap_align->gap_x_dropoff;
1244
gapping_params->context = context;
1498
self->ungappedLambda = sbp->kbp_ideal->Lambda / scale_factor;
1499
status = s_GetStartFreqRatios(self->startFreqRatios, matrixName);
1501
Blast_Int4MatrixFromFreq(self->startMatrix, self->startFreqRatios,
1502
self->ungappedLambda);
1510
* Save information about all queries in an array of objects of type
1511
* BlastCompo_QueryInfo.
1513
* @param query_data query sequence data
1514
* @param blast_query_info information about all queries, as an
1515
* internal blast data structure
1517
* @return the new array on success, or NULL on error
1519
static BlastCompo_QueryInfo *
1520
s_GetQueryInfo(Uint1 * query_data, BlastQueryInfo * blast_query_info)
1522
int i; /* loop index */
1523
BlastCompo_QueryInfo *
1524
compo_query_info; /* the new array */
1525
int num_queries; /* the number of queries/elements in
1528
num_queries = blast_query_info->num_queries;
1529
compo_query_info = calloc(num_queries, sizeof(BlastCompo_QueryInfo));
1530
if (compo_query_info != NULL) {
1531
for (i = 0; i < num_queries; i++) {
1532
BlastCompo_QueryInfo * query_info = &compo_query_info[i];
1533
BlastContextInfo * query_context = &blast_query_info->contexts[i];
1535
query_info->eff_search_space =
1536
(double) query_context->eff_searchsp;
1537
query_info->origin = query_context->query_offset;
1538
query_info->seq.data = &query_data[query_info->origin];
1539
query_info->seq.length = query_context->query_length;
1541
Blast_ReadAaComposition(&query_info->composition,
1542
query_info->seq.data,
1543
query_info->seq.length);
1546
return compo_query_info;
1551
* Create a new object of type BlastCompo_GappingParams. The new
1552
* object contains the parameters needed by the composition adjustment
1553
* library to compute a gapped alignment.
1555
* @param context the data structures needed by callback functions
1556
* that perform the gapped alignments.
1557
* @param extendParams parameters used for a gapped extension
1558
* @param num_queries the number of queries in the concatenated query
1560
static BlastCompo_GappingParams *
1561
s_GappingParamsNew(BlastKappa_GappingParamsContext * context,
1562
const BlastExtensionParameters* extendParams,
1566
double min_lambda = DBL_MAX; /* smallest gapped Lambda */
1567
const BlastScoringParameters * scoring = context->scoringParams;
1568
const BlastExtensionOptions * options = extendParams->options;
1569
/* The new object */
1570
BlastCompo_GappingParams * gapping_params = NULL;
1572
gapping_params = malloc(sizeof(BlastCompo_GappingParams));
1573
if (gapping_params != NULL) {
1574
gapping_params->gap_open = scoring->gap_open;
1575
gapping_params->gap_extend = scoring->gap_extend;
1576
gapping_params->decline_align = scoring->decline_align;
1577
gapping_params->context = context;
1580
for (i = 0; i < num_queries; i++) {
1581
if (context->sbp->kbp_gap[i] != NULL &&
1582
context->sbp->kbp_gap[i]->Lambda < min_lambda) {
1583
min_lambda = context->sbp->kbp_gap[i]->Lambda;
1586
gapping_params->x_dropoff = (Int4)
1587
MAX(options->gap_x_dropoff_final*NCBIMATH_LN2 / min_lambda,
1588
extendParams->gap_x_dropoff_final);
1589
context->gap_align->gap_x_dropoff = gapping_params->x_dropoff;
1591
return gapping_params;
1595
/** Callbacks used by the Blast_RedoOneMatch* routines */
1248
1596
static const Blast_RedoAlignCallbacks
1249
1597
redo_align_callbacks = {
1250
1598
s_CalcLambda, s_SequenceGetRange, s_RedoOneAlignment,
1251
s_NewAlignmentUsingXdrop
1599
s_NewAlignmentUsingXdrop, s_FreeEditScript
1604
* Read the parameters required for the Blast_RedoOneMatch* functions from
1605
* the corresponding parameters in standard BLAST datatypes. Return a new
1606
* object representing these parameters.
1255
1608
static Blast_RedoAlignParams *
1256
s_GetAlignParams(Blast_GappingParamsContext * context,
1257
EBlastProgramType program_number,
1258
BlastGapAlignStruct * gap_align,
1609
s_GetAlignParams(BlastKappa_GappingParamsContext * context,
1259
1610
BLAST_SequenceBlk * queryBlk,
1260
1611
BlastQueryInfo* queryInfo,
1262
BlastScoringParameters* scoringParams,
1263
const BlastExtensionParameters* extendParams,
1264
1612
const BlastHitSavingParameters* hitParams,
1265
const PSIBlastOptions* psiOptions,
1266
const char * matrixName,
1267
double localScalingFactor,
1268
int adjustParameters)
1613
const BlastExtensionParameters* extendParams)
1273
BlastCompo_GappingParams * gapping_params = NULL;
1274
Blast_MatrixInfo * scaledMatrixInfo;
1275
Blast_KarlinBlk* kbp;
1276
int subject_is_translated = program_number == eBlastTypeTblastn;
1277
/* YIKES! wrong test for do_link_hsps */
1278
int do_link_hsps = program_number == eBlastTypeTblastn;
1279
Boolean positionBased = (sbp->psi_matrix ? TRUE : FALSE);
1615
int status = 0; /* status code */
1616
int rows; /* number of rows in the scoring matrix */
1617
int cutoff_s; /* cutoff score for saving an alignment */
1618
double cutoff_e; /* cutoff evalue for saving an alignment */
1619
BlastCompo_GappingParams *
1620
gapping_params = NULL; /* parameters needed to compute a gapped
1623
scaledMatrixInfo; /* information about the scoring matrix */
1624
/* does this kind of search translate the database sequence */
1625
int subject_is_translated = context->prog_number == eBlastTypeTblastn;
1626
/* is this a positiion-based search */
1627
Boolean positionBased = (Boolean) (context->sbp->psi_matrix != NULL);
1628
/* will BLAST_LinkHsps be called to assign e-values */
1629
Boolean do_link_hsps = (Boolean) (hitParams->link_hsp_params != NULL);
1630
ECompoAdjustModes compo_adjust_mode =
1631
(ECompoAdjustModes) extendParams->options->compositionBasedStats;
1281
1633
if (do_link_hsps) {
1282
ASSERT( 0 && "Which cutoff needed here?" );
1283
/* cutoff_s = search->pbp->cutoff_s2 * localScalingFactor; */
1635
(int) (hitParams->cutoff_score * context->localScalingFactor);
1285
1637
/* There is no cutoff score; we consider e-values instead */
1288
1640
cutoff_e = hitParams->options->expect_value;
1289
1641
rows = positionBased ? queryInfo->max_length : BLASTAA_SIZE;
1290
1642
scaledMatrixInfo = Blast_MatrixInfoNew(rows, positionBased);
1291
s_MatrixInfoInit(scaledMatrixInfo, localScalingFactor,
1292
queryBlk, queryInfo, sbp, scoringParams,
1293
positionBased, matrixName);
1294
kbp = sbp->kbp_gap[0];
1295
gapping_params = malloc(sizeof(BlastCompo_GappingParams));
1296
s_GappingParamsInit(context, gapping_params, gap_align, scoringParams,
1297
sbp, localScalingFactor, program_number,
1300
Blast_RedoAlignParamsNew(&scaledMatrixInfo, &gapping_params,
1301
adjustParameters, positionBased,
1302
subject_is_translated,
1303
queryInfo->max_length, cutoff_s, cutoff_e,
1304
do_link_hsps, kbp->Lambda, kbp->logK,
1305
&redo_align_callbacks);
1643
status = s_MatrixInfoInit(scaledMatrixInfo, queryBlk, context->sbp,
1644
context->localScalingFactor,
1645
context->scoringParams->options->matrix);
1649
gapping_params = s_GappingParamsNew(context, extendParams,
1650
queryInfo->num_queries);
1651
if (gapping_params == NULL) {
1655
Blast_RedoAlignParamsNew(&scaledMatrixInfo, &gapping_params,
1656
compo_adjust_mode, positionBased,
1657
subject_is_translated,
1658
queryInfo->max_length, cutoff_s, cutoff_e,
1659
do_link_hsps, &redo_align_callbacks);
1310
* Convert a BlastCompo_Heap to a flat list of SeqAligns. Note that
1311
* there may be more than one alignment per element in the heap. The
1312
* new list preserves the order of the SeqAligns associated with each
1313
* HeapRecord. (@todo this function is named as it is for
1314
* compatibility with kappa.c, rename in the future)
1665
* Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
1316
* @param self a BlastCompo_Heap
1317
1667
* @param results BLAST core external results structure (pre-SeqAlign)
1669
* @param heaps an array of BlastCompo_Heap objects
1319
1670
* @param hitlist_size size of each list in the results structure above [in]
1322
s_HeapToFlatList(BlastCompo_Heap * self, BlastHSPResults * results,
1325
BlastHSPList* hsp_list;
1326
BlastHitList* hitlist =
1327
results->hitlist_array[0] = Blast_HitListNew(hitlist_size);
1673
s_FillResultsFromCompoHeaps(BlastHSPResults * results,
1674
BlastCompo_Heap heaps[],
1677
int query_index; /* loop index */
1678
int num_queries; /* Number of queries in this search */
1680
num_queries = results->num_queries;
1681
for (query_index = 0; query_index < num_queries; query_index++) {
1682
BlastHSPList* hsp_list;
1683
BlastHitList* hitlist;
1684
BlastCompo_Heap * heap = &heaps[query_index];
1686
results->hitlist_array[query_index] = Blast_HitListNew(hitlist_size);
1687
hitlist = results->hitlist_array[query_index];
1689
while (NULL != (hsp_list = BlastCompo_HeapPop(heap))) {
1690
Blast_HitListUpdate(hitlist, hsp_list);
1693
Blast_HSPResultsReverseOrder(results);
1697
/** Remove all matches from a BlastCompo_Heap. */
1698
static void s_ClearHeap(BlastCompo_Heap * self)
1700
BlastHSPList* hsp_list = NULL; /* an element of the heap */
1330
1702
while (NULL != (hsp_list = BlastCompo_HeapPop(self))) {
1331
Blast_HitListUpdate(hitlist, hsp_list);
1703
hsp_list = Blast_HSPListFree(hsp_list);
1337
* Top level routine to recompute alignments for each
1338
* match found by the gapped BLAST algorithm
1340
* @param search is the structure with all the information about
1342
* @param options is used to pass certain command line options
1344
* @param hitlist_count is the number of old matches
1345
* @param adjustParameters determines whether we are to adjust the
1346
* Karlin-Altschul parameters and score matrix
1347
* @param SmithWaterman determines whether the new local alignments
1348
* should be computed by the optimal Smith-Waterman
1349
* algorithm; SmithWaterman false means that
1350
* alignments will be recomputed by the current
1351
* X-drop algorithm as implemented in the procedure
1353
* @return a array of lists of SeqAlign; each element
1354
* in the array is a list of SeqAligns for
1355
* one query in the concatenated query.
1356
* It is assumed that at least one of adjustParameters and
1357
* SmithWaterman is >0 or true when this procedure is called A linked list
1358
* of alignments is returned; the alignments are sorted according to
1359
* the lowest E-value of the best alignment for each matching
1360
* sequence; alignments for the same matching sequence are in the
1361
* list consecutively regardless of the E-value of the secondary
1362
* alignments. Ties in sorted order are much rarer than for the
1363
* standard BLAST method, but are broken deterministically based on
1364
* the index of the matching sequences in the database.
1709
* Recompute alignments for each match found by the gapped BLAST
1367
1713
Blast_RedoAlignmentCore(EBlastProgramType program_number,
1377
1723
const PSIBlastOptions* psiOptions,
1378
1724
BlastHSPResults* results)
1380
double localScalingFactor; /* the factor by which to
1381
* scale the scoring system in
1382
* order to obtain greater
1726
int status_code = 0; /* return value code */
1727
/* the factor by which to scale the scoring system in order to
1728
* obtain greater precision */
1729
double localScalingFactor;
1730
/* the values of the search parameters that will be recorded, altered
1731
* in the search structure in this routine, and then restored before
1732
* the routine exits. */
1733
BlastKappa_SavedParameters *savedParams = NULL;
1734
/* forbidden ranges for each database position (used in
1735
* Smith-Waterman alignments) */
1736
Blast_ForbiddenRanges forbidden = {0,};
1737
/* a collection of alignments for each query sequence with
1738
* sequences from the database */
1739
BlastCompo_Heap * redoneMatches = NULL;
1740
/* stores all fields needed for computing a compositionally
1741
* adjusted score matrix using Newton's method */
1742
Blast_CompositionWorkspace *NRrecord = NULL;
1745
/* number of queries in the concatenated query */
1747
/* keeps track of gapped alignment params */
1748
BlastGapAlignStruct* gapAlign = NULL;
1749
/* All alignments above this value will be reported, no matter how
1751
double inclusion_ethresh;
1752
/* array of lists of alignments for each query to this subject */
1753
BlastCompo_Alignment ** alignments = NULL;
1755
BlastCompo_QueryInfo * query_info = NULL;
1756
Blast_RedoAlignParams * redo_align_params = NULL;
1757
Boolean positionBased = (Boolean) (sbp->psi_matrix != NULL);
1758
ECompoAdjustModes compo_adjust_mode =
1759
(ECompoAdjustModes) extendParams->options->compositionBasedStats;
1760
Boolean smithWaterman =
1761
(Boolean) (extendParams->options->eTbackExt == eSmithWatermanTbck);
1762
/* alignment data for the current query-subject match */
1763
BlastHSPList* thisMatch = NULL;
1764
/* existing alignments for a match */
1765
BlastCompo_Alignment * incoming_aligns = NULL;
1384
1766
Int4 **matrix; /* score matrix */
1385
s_SearchParameters *searchParams; /* the values of the search
1386
* parameters that will be
1387
* recorded, altered in the
1388
* search structure in this
1389
* routine, and then restored
1390
* before the routine
1392
Blast_ForbiddenRanges forbidden; /* forbidden ranges for each
1393
* database position (used
1397
BlastCompo_Heap * redoneMatches; /* a collection of alignments
1398
* for each query sequence with
1399
* sequences from the
1401
Blast_CompositionWorkspace
1402
*NRrecord = NULL; /* stores all fields needed for
1403
* computing a compositionally adjusted
1404
* score matrix using Newton's method */
1405
Int4 query_index; /* loop index */
1406
Int4 numQueries; /* number of queries in the
1407
concatenated query */
1408
BlastGapAlignStruct* gapAlign; /* keeps track of gapped
1410
double inclusion_ethresh; /* All alignments above this value will be
1411
reported, no matter how many. */
1412
BlastCompo_QueryInfo * query_info = NULL;
1413
Blast_RedoAlignParams * redo_align_params;
1414
Boolean positionBased = (sbp->psi_matrix ? TRUE : FALSE);
1415
Boolean adjustParameters = extendParams->options->compositionBasedStats;
1416
Boolean SmithWaterman;
1418
BlastHSPList* thisMatch = NULL; /* alignment data for the
1419
* current query-subject
1421
BlastCompo_Alignment * incoming_aligns; /* existing algnments
1423
Blast_GappingParamsContext gapping_params_context;
1767
BlastKappa_GappingParamsContext gapping_params_context;
1769
double pvalueForThisPair = (-1); /* p-value for this match
1770
for composition; -1 == no adjustment*/
1771
double LambdaRatio; /*lambda ratio*/
1772
/* which test function do we use to see if a composition-adjusted
1773
p-value is desired; value needs to be passed in eventually*/
1774
int compositionTestIndex = 0;
1776
if (positionBased) {
1777
matrix = sbp->psi_matrix->pssm->data;
1779
matrix = sbp->matrix->data;
1426
1781
/**** Validate parameters *************/
1782
if (matrix == NULL) {
1427
1785
if (0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20") &&
1428
!adjustParameters) {
1429
return 0; /* BLOSUM62_20 only makes sense if
1430
* adjustParameters is on */
1786
compo_adjust_mode == eNoCompositionBasedStats) {
1787
return -1; /* BLOSUM62_20 only makes sense if
1788
* compo_adjust_mode is on */
1432
1790
if (positionBased) {
1433
adjustParameters = adjustParameters ? 1 : 0;
1435
if (extendParams->options->eTbackExt == eSmithWatermanTbck) {
1436
SmithWaterman = TRUE;
1438
SmithWaterman = FALSE;
1791
/* Position based searches can only use traditional
1792
* composition based stats */
1793
if ((int) compo_adjust_mode > 1) {
1794
compo_adjust_mode = eCompositionBasedStats;
1796
/* A position-based search can only have one query */
1797
ASSERT(queryInfo->num_queries == 1);
1798
ASSERT(queryBlk->length == (Int4)sbp->psi_matrix->pssm->ncols);
1800
if ((int) compo_adjust_mode > 1 &&
1801
!Blast_FrequencyDataIsAvailable(scoringParams->options->matrix)) {
1802
return -1; /* Unsupported matrix */
1440
1805
inclusion_ethresh =
1441
1806
(psiOptions != NULL) ? psiOptions->inclusion_ethresh : 0;
1444
/* Initialize searchParams */
1446
s_SearchParametersNew(queryInfo->max_length, adjustParameters,
1808
/* Initialize savedParams */
1810
s_SavedParametersNew(queryInfo->max_length, queryInfo->num_queries,
1811
compo_adjust_mode, positionBased);
1812
if (savedParams == NULL) {
1814
goto function_cleanup;
1817
s_RecordInitialSearch(savedParams, sbp, scoringParams,
1818
queryInfo->max_length, compo_adjust_mode,
1447
1819
positionBased);
1448
s_RecordInitialSearch(searchParams, queryBlk, queryInfo, sbp,
1449
scoringParams, queryInfo->max_length,
1450
adjustParameters, positionBased);
1451
if (adjustParameters) {
1820
if (status_code != 0) {
1821
goto function_cleanup;
1823
if (compo_adjust_mode != eNoCompositionBasedStats) {
1452
1824
if((0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20"))) {
1453
1825
localScalingFactor = SCALING_FACTOR / 10;
1458
1830
localScalingFactor = 1.0;
1460
s_RescaleSearch(searchParams, queryBlk, queryInfo, sbp, scoringParams,
1461
localScalingFactor, positionBased);
1463
if (positionBased) {
1464
matrix = sbp->psi_matrix->pssm->data;
1466
/* YIKES! error return
1468
"Cannot perform position-specific search without a PSSM";
1469
BlastConstructErrorMessage("RedoAlignmentCore", msg, 3,
1470
&(search->error_return));
1475
matrix = sbp->matrix->data;
1477
if ((status_code=BLAST_GapAlignStructNew(scoringParams,
1479
BlastSeqSrcGetMaxSeqLen(seqSrc),
1480
sbp, &gapAlign)) != 0) {
1483
gapAlign->gap_x_dropoff =
1484
extendParams->gap_x_dropoff_final * localScalingFactor;
1832
s_RescaleSearch(sbp, scoringParams, queryInfo->num_queries,
1833
localScalingFactor);
1835
BLAST_GapAlignStructNew(scoringParams, extendParams,
1836
BlastSeqSrcGetMaxSeqLen(seqSrc), sbp,
1838
if (status_code != 0) {
1839
return (Int2) status_code;
1841
gapping_params_context.gap_align = gapAlign;
1842
gapping_params_context.scoringParams = scoringParams;
1843
gapping_params_context.sbp = sbp;
1844
gapping_params_context.localScalingFactor = localScalingFactor;
1845
gapping_params_context.prog_number = program_number;
1485
1846
redo_align_params =
1486
s_GetAlignParams(&gapping_params_context, program_number,
1487
gapAlign, queryBlk, queryInfo,
1488
sbp, scoringParams, extendParams, hitParams,
1489
psiOptions, scoringParams->options->matrix,
1490
localScalingFactor, adjustParameters);
1491
do_link_hsps = redo_align_params->do_link_hsps;
1493
s_GetQueryInfo(&query_info, &numQueries, queryBlk->sequence, queryInfo);
1495
Blast_ForbiddenRangesInitialize(&forbidden, queryInfo->max_length);
1847
s_GetAlignParams(&gapping_params_context, queryBlk, queryInfo,
1848
hitParams, extendParams);
1849
if (redo_align_params == NULL) {
1851
goto function_cleanup;
1853
numQueries = queryInfo->num_queries;
1854
query_info = s_GetQueryInfo(queryBlk->sequence, queryInfo);
1855
if (query_info == NULL) {
1857
goto function_cleanup;
1861
Blast_ForbiddenRangesInitialize(&forbidden, queryInfo->max_length);
1862
if (status_code != 0) {
1863
goto function_cleanup;
1497
1866
redoneMatches = calloc(numQueries, sizeof(BlastCompo_Heap));
1867
if (redoneMatches == NULL) {
1869
goto function_cleanup;
1498
1871
for (query_index = 0; query_index < numQueries; query_index++) {
1499
BlastCompo_HeapInitialize(&redoneMatches[query_index],
1500
hitParams->options->hitlist_size,
1873
BlastCompo_HeapInitialize(&redoneMatches[query_index],
1874
hitParams->options->hitlist_size,
1876
if (status_code != 0) {
1877
goto function_cleanup;
1503
if( adjustParameters > 1 && !positionBased ) {
1880
if( (int) compo_adjust_mode > 1 && !positionBased ) {
1504
1881
NRrecord = Blast_CompositionWorkspaceNew();
1505
Blast_CompositionWorkspaceInit(NRrecord,
1506
scoringParams->options->matrix);
1883
Blast_CompositionWorkspaceInit(NRrecord,
1884
scoringParams->options->matrix);
1885
if (status_code != 0) {
1886
goto function_cleanup;
1889
alignments = calloc(numQueries, sizeof(BlastCompo_Alignment *));
1890
if (alignments == NULL) {
1892
goto function_cleanup;
1508
1894
while (BlastHSPStreamRead(hsp_stream, &thisMatch) != kBlastHSPStream_Eof) {
1509
1895
/* for all matching sequences */
1510
BlastCompo_MatchingSequence matchingSeq; /* the data for a matching
1511
* database sequence */
1512
BlastCompo_Alignment ** alignments; /* array of lists of
1513
* alignments for each
1514
* query to this subject */
1515
alignments = calloc(numQueries, sizeof(BlastCompo_Alignment *));
1896
Blast_KarlinBlk * kbp;
1898
/* the data for a matching database sequence */
1899
BlastCompo_MatchingSequence matchingSeq = {0,};
1517
1900
if(thisMatch->hsp_array == NULL) {
1524
1907
/* Get the sequence for this match */
1525
s_MatchingSequenceInitialize(&matchingSeq, program_number,
1526
seqSrc, gen_code_string, thisMatch->oid);
1909
s_MatchingSequenceInitialize(&matchingSeq, program_number,
1910
seqSrc, gen_code_string,
1912
if (status_code != 0) {
1913
goto match_loop_cleanup;
1527
1915
incoming_aligns =
1528
s_ResultHspToDistinctAlign(queryInfo, thisMatch->hsp_array,
1529
thisMatch->hspcnt, localScalingFactor);
1530
if (SmithWaterman) {
1531
Blast_RedoOneMatchSmithWaterman(alignments,
1535
&matchingSeq, query_info,
1537
NRrecord, &forbidden,
1916
s_ResultHspToDistinctAlign(thisMatch->hsp_array, thisMatch->hspcnt,
1917
queryInfo, localScalingFactor);
1918
if (incoming_aligns == NULL) {
1920
goto match_loop_cleanup;
1922
/* All alignments in thisMatch should be to the same query */
1923
kbp = sbp->kbp_gap[thisMatch->query_index];
1924
if (smithWaterman) {
1926
Blast_RedoOneMatchSmithWaterman(alignments,
1930
kbp->Lambda, kbp->logK,
1931
&matchingSeq, query_info,
1933
NRrecord, &forbidden,
1936
compositionTestIndex,
1540
Blast_RedoOneMatch(alignments, redo_align_params,
1541
incoming_aligns, thisMatch->hspcnt,
1542
&matchingSeq, queryInfo->max_length,
1543
query_info, numQueries, matrix,
1940
Blast_RedoOneMatch(alignments, redo_align_params,
1941
incoming_aligns, thisMatch->hspcnt,
1942
kbp->Lambda, &matchingSeq,
1943
queryInfo->max_length, query_info,
1944
numQueries, matrix, NRrecord,
1946
compositionTestIndex,
1949
if (status_code != 0) {
1950
goto match_loop_cleanup;
1546
1952
for (query_index = 0; query_index < numQueries; query_index++) {
1547
1953
/* Loop over queries */
1548
1954
if( alignments[query_index] != NULL) { /* alignments were found */
1549
double bestEvalue; /* best evalue among alignments in the
1955
double bestEvalue; /* best e-value among alignments in the
1551
1957
Int4 bestScore; /* best score among alignments in
1553
BlastHSPList * hsp_list; /* a hitlist containing the
1554
* newly-computed alignments */
1555
void * discardedAligns;
1959
/* a hitlist containing the newly-computed alignments */
1960
BlastHSPList * hsp_list = NULL;
1961
void * discardedAligns = NULL;
1557
1963
s_HSPListFromDistinctAlignments(&alignments[query_index],
1966
if (hsp_list == NULL) {
1968
goto match_loop_cleanup;
1559
1970
if (hsp_list->hspcnt > 1) {
1560
1971
s_HitlistReapContained(hsp_list->hsp_array,
1561
1972
&hsp_list->hspcnt);
1563
s_HitlistEvaluateAndPurge(&bestScore, &bestEvalue,
1566
program_number, queryInfo,
1975
s_HitlistEvaluateAndPurge(&bestScore, &bestEvalue,
1981
pvalueForThisPair, LambdaRatio,
1983
if (status_code != 0) {
1984
goto query_loop_cleanup;
1569
1986
if (bestEvalue <= hitParams->options->expect_value &&
1570
1987
BlastCompo_HeapWouldInsert(&redoneMatches[query_index],
1571
1988
bestEvalue, bestScore,
1572
1989
thisMatch->oid)) {
1573
s_HSPListRescaleScores(hsp_list, redo_align_params->Lambda,
1574
redo_align_params->logK,
1575
localScalingFactor);
1577
BlastCompo_HeapInsert(&redoneMatches[query_index],
1578
hsp_list, bestEvalue,
1579
bestScore, thisMatch->oid,
1990
/* The best alignment is significant */
1991
s_HSPListNormalizeScores(hsp_list,
1992
kbp->Lambda, kbp->logK,
1993
localScalingFactor);
1995
BlastCompo_HeapInsert(&redoneMatches[query_index],
1996
hsp_list, bestEvalue,
1997
bestScore, thisMatch->oid,
1999
if (status_code == 0) {
2002
goto query_loop_cleanup;
1581
2004
if (discardedAligns != NULL) {
1582
2005
Blast_HSPListFree(discardedAligns);
1584
} else { /* the best alignment is not significant */
1585
Blast_HSPListFree(hsp_list);
1586
} /* end if the best alignment is significant */
2009
Blast_HSPListFree(hsp_list);
2010
if (status_code != 0) {
2011
goto match_loop_cleanup;
1587
2013
} /* end if any alignments were found */
1588
2014
} /* end loop over queries */
2016
if (status_code != 0) {
2017
for (query_index = 0; query_index < numQueries; query_index++) {
2018
BlastCompo_AlignmentsFree(&alignments[query_index],
1589
2022
s_MatchingSequenceRelease(&matchingSeq);
1590
2023
thisMatch = Blast_HSPListFree(thisMatch);
1592
2024
BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
2025
if (status_code != 0) {
2026
goto function_cleanup;
1594
2029
/* end for all matching sequences */
1595
/* YIKES! handle multiple queries
1596
for (query_index = 0; query_index < numQueries; query_index++) {
1597
results[query_index] =
1598
BlastCompo_HeapToFlatList(&redoneMatches[query_index]);
1601
s_HeapToFlatList(&redoneMatches[0], results,
1602
hitParams->options->hitlist_size);
2032
if (status_code == 0) {
2033
s_FillResultsFromCompoHeaps(results, redoneMatches,
2034
hitParams->options->hitlist_size);
2036
if (redoneMatches != NULL) {
2037
s_ClearHeap(&redoneMatches[0]);
1604
2040
free(query_info);
1605
2041
Blast_RedoAlignParamsFree(&redo_align_params);
1606
for (query_index = 0; query_index < numQueries; query_index++) {
1607
BlastCompo_HeapRelease(&redoneMatches[query_index]);
2042
if (redoneMatches != NULL) {
2043
for (query_index = 0; query_index < numQueries; query_index++) {
2044
BlastCompo_HeapRelease(&redoneMatches[query_index]);
2046
sfree(redoneMatches); redoneMatches = NULL;
1609
sfree(redoneMatches); redoneMatches = NULL;
2048
if (smithWaterman) {
1611
2049
Blast_ForbiddenRangesRelease(&forbidden);
1613
gapAlign = BLAST_GapAlignStructFree(gapAlign);
1614
s_RestoreSearch(searchParams, sbp, matrix, queryInfo->max_length,
1615
scoringParams, positionBased, adjustParameters);
1616
s_SearchParametersFree(&searchParams);
1617
if (NULL != NRrecord) {
1618
Blast_CompositionWorkspaceFree(&NRrecord);
2051
if (gapAlign != NULL) {
2052
gapAlign = BLAST_GapAlignStructFree(gapAlign);
2054
s_RestoreSearch(sbp, scoringParams, savedParams, queryBlk->length,
2055
positionBased, compo_adjust_mode);
2056
s_SavedParametersFree(&savedParams);
2057
Blast_CompositionWorkspaceFree(&NRrecord);
2059
return (Int2) status_code;