1
static char const rcsid[] = "$Id: kappa.c,v 6.87 2006/06/29 17:43:39 papadopo Exp $";
1
static char const rcsid[] = "$Id: kappa.c,v 6.90 2008/02/08 21:51:38 camacho Exp $";
3
/* $Id: kappa.c,v 6.87 2006/06/29 17:43:39 papadopo Exp $
3
/* $Id: kappa.c,v 6.90 2008/02/08 21:51:38 camacho Exp $
4
4
* ==========================================================================
6
6
* PUBLIC DOMAIN NOTICE
1223
1223
self->local_data = NULL;
1226
#define MINUMUM_FRACTION_NEAR_IDENTICAL 0.98
1229
* Test whether the aligned parts of two sequences that
1230
* have a high-scoring gapless alignment are nearly identical
1232
* @param seqData subject sequence
1233
* @param queryData query sequence
1234
* @param queryOffset offset for align if there are multiple queries
1235
* @param align information about the alignment
1236
* @param rangeOffset offset for subject sequence (used for tblastn)
1238
* @return TRUE if the aligned portions are nearly identical
1240
static Boolean testNearIdentical(const BlastCompo_SequenceData *seqData,
1241
const BlastCompo_SequenceData *queryData,
1242
const int queryOffset,
1243
const BlastCompo_Alignment *align,
1244
const int rangeOffset)
1246
int numIdentical = 0;
1247
double fractionIdentical;
1248
int qPos, sPos; /*positions in query and subject;*/
1249
int qEnd; /*end of query*/
1251
qPos = align->queryStart - queryOffset;
1252
qEnd = align->queryEnd - queryOffset;
1253
sPos = align->matchStart - rangeOffset;
1254
while (qPos < qEnd) {
1255
if (queryData->data[qPos] == seqData->data[sPos])
1260
fractionIdentical = ((double) numIdentical/
1261
(double) (align->queryEnd - align->queryStart));
1262
if (fractionIdentical >= MINUMUM_FRACTION_NEAR_IDENTICAL)
1227
1269
/** Character to use for masked residues */
1228
1270
#define BLASTP_MASK_RESIDUE eXchar
1237
1279
* @param range the range, in amino acid coordinates, of data
1238
1280
* to get; includes the translation frame [in]
1239
1281
* @param seqData the resulting data [out]
1282
* @param queryData the query sequence [in]
1283
* @param queryOffset offset for align if there are multiple queries
1284
* @param align information about the alignment between query and subject
1285
* @param shouldTestIdentical did alignment pass a preliminary test in
1286
* redo_alignment.c that indicates the sequence
1287
* pieces may be near identical
1242
1290
Kappa_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence * self,
1243
1291
const BlastCompo_SequenceRange * range,
1244
BlastCompo_SequenceData * seqData )
1292
BlastCompo_SequenceData * seqData,
1293
const BlastCompo_SequenceData * queryData,
1294
const int queryOffset,
1295
const BlastCompo_Alignment *align,
1296
const Boolean shouldTestIdentical)
1247
1299
Int4 nucleotide_start; /* position of the first nucleotide to be
1303
1355
seg_slp = BlastBioseqFilter(bsp_temp, BLASTP_MASK_INSTRUCTIONS);
1305
1357
HackSeqLocId(seg_slp, local_data->bsp_db->id);
1306
BlastMaskTheResidues(seqData->data, seqData->length,
1358
if ((!shouldTestIdentical) ||
1359
(shouldTestIdentical &&
1360
(!testNearIdentical(seqData, queryData, queryOffset, align, range->begin))))
1361
BlastMaskTheResidues(seqData->data, seqData->length,
1307
1362
BLASTP_MASK_RESIDUE, seg_slp, FALSE, 0);
1308
1363
seg_slp = SeqLocSetFree(seg_slp);
1323
1378
* @param range the range, in amino acid coordinates, of data
1325
1380
* @param seqData the sequence data obtained [out]
1381
* @param queryData the query sequence [in]
1382
* @param queryOffset offset for align if there are multiple queries
1383
* @param align information about the alignment between query and subject
1384
* @param shouldTestIdentical did alignment pass a preliminary test in
1385
* redo_alignment.c that indicates the sequence
1386
* pieces may be near identical
1327
1388
* @returns always 0 (posts a fatal error on failure rather than
1328
1389
* returning an error code.)
1331
1392
Kappa_SequenceGetRange(
1332
1393
const BlastCompo_MatchingSequence * self,
1333
1394
const BlastCompo_SequenceRange * range,
1334
BlastCompo_SequenceData * seqData )
1395
BlastCompo_SequenceData * seqData,
1396
const BlastCompo_SequenceData * queryData,
1397
const int queryOffset,
1398
const BlastCompo_Alignment *align,
1399
const Boolean shouldTestIdentical)
1336
1401
Kappa_SequenceLocalData * local_data = self->local_data;
1337
1402
if(local_data->prog_number == blast_type_tblastn) {
1338
1403
/* The sequence must be translated. */
1339
Kappa_SequenceGetTranslatedRange(self, range, seqData);
1404
Kappa_SequenceGetTranslatedRange(self, range, seqData, queryData,
1406
align, shouldTestIdentical);
1341
1408
/* The sequence does not need to be translated. */
1342
1409
/* Obtain the entire sequence (necessary for SEG filtering.) */
1406
1473
BlastBioseqFilter(local_data->bsp_db, BLASTP_MASK_INSTRUCTIONS);
1408
BlastMaskTheResidues(seqData->data, seqData->length,
1409
BLASTP_MASK_RESIDUE, seg_slp, FALSE, 0);
1475
if ((!shouldTestIdentical) ||
1476
(shouldTestIdentical &&
1477
(!testNearIdentical(seqData, queryData, queryOffset, align, 0))))
1478
BlastMaskTheResidues(seqData->data, seqData->length,
1479
BLASTP_MASK_RESIDUE, seg_slp, FALSE, 0);
1410
1480
seg_slp = SeqLocSetFree(seg_slp);
2082
2152
redo_align_callbacks = {
2083
2153
Kappa_CalcLambda, Kappa_SequenceGetRange, Kappa_RedoOneAlignment,
2084
2154
Kappa_NewAlignmentUsingXdrop, Kappa_FreeEditBlock
2088
2158
/** Get a set of alignment parameters, for use by Blast_RedoOneMatch or
2336
2406
ErrPostEx(SEV_FATAL, E_NoMemory, 0, "Failed to allocate memory");
2338
2408
significantMatches = Nlm_Calloc(numQueries, sizeof(BlastCompo_Heap));
2409
ASSERT(options->ethresh != 0.0);
2339
2410
for (query_index = 0; query_index < numQueries; query_index++) {
2341
2412
BlastCompo_HeapInitialize(&significantMatches[query_index],