7
#define TRACE_SEQPAIR 0
10
extern SCOREMATRIX VTML_LA;
11
extern SCOREMATRIX PAM200;
12
extern SCOREMATRIX PAM200NoCenter;
13
extern SCOREMATRIX VTML_SP;
14
extern SCOREMATRIX VTML_SPNoCenter;
15
extern SCOREMATRIX NUC_SP;
17
SCORE g_SPScoreLetters;
20
static SCORE TermGapScore(bool Gap)
29
return g_scoreGapOpen/2;
34
return g_scoreGapExtend;
37
Quit("TermGapScore?!");
41
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
42
const MSA &msa2, unsigned uSeqIndex2)
44
const unsigned uColCount = msa1.GetColCount();
45
const unsigned uColCount2 = msa2.GetColCount();
46
if (uColCount != uColCount2)
47
Quit("ScoreSeqPairLetters, different lengths");
52
Log("ScoreSeqPairLetters\n");
54
msaTmp.SetSize(2, uColCount);
55
msaTmp.CopySeq(0, msa1, uSeqIndex1);
56
msaTmp.CopySeq(1, msa2, uSeqIndex2);
61
SCORE scoreLetters = 0;
63
bool bGapping1 = false;
64
bool bGapping2 = false;
66
unsigned uColStart = 0;
67
bool bLeftTermGap = false;
68
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
70
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
71
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
76
uColStart = uColIndex;
81
unsigned uColEnd = uColCount - 1;
82
bool bRightTermGap = false;
83
for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
85
bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
86
bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
91
uColEnd = (unsigned) iColIndex;
97
Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
100
for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
102
unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
103
if (uLetter1 >= g_AlphaSize)
105
unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
106
if (uLetter2 >= g_AlphaSize)
109
SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
110
scoreLetters += scoreMatch;
115
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
116
const MSA &msa2, unsigned uSeqIndex2)
118
const unsigned uColCount = msa1.GetColCount();
119
const unsigned uColCount2 = msa2.GetColCount();
120
if (uColCount != uColCount2)
121
Quit("ScoreSeqPairGaps, different lengths");
126
Log("ScoreSeqPairGaps\n");
128
msaTmp.SetSize(2, uColCount);
129
msaTmp.CopySeq(0, msa1, uSeqIndex1);
130
msaTmp.CopySeq(1, msa2, uSeqIndex2);
136
bool bGapping1 = false;
137
bool bGapping2 = false;
139
unsigned uColStart = 0;
140
bool bLeftTermGap = false;
141
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
143
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
144
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
145
if (!bGap1 || !bGap2)
149
uColStart = uColIndex;
154
unsigned uColEnd = uColCount - 1;
155
bool bRightTermGap = false;
156
for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
158
bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
159
bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
160
if (!bGap1 || !bGap2)
163
bRightTermGap = true;
164
uColEnd = (unsigned) iColIndex;
170
Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
173
for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
175
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
176
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
186
Log("Gap open seq 1 col %d\n", uColIndex);
188
if (uColIndex == uColStart)
189
scoreGaps += TermGapScore(true);
191
scoreGaps += g_scoreGapOpen;
195
scoreGaps += g_scoreGapExtend;
204
Log("Gap open seq 2 col %d\n", uColIndex);
206
if (uColIndex == uColStart)
207
scoreGaps += TermGapScore(true);
209
scoreGaps += g_scoreGapOpen;
213
scoreGaps += g_scoreGapExtend;
221
if (bGapping1 || bGapping2)
223
scoreGaps -= g_scoreGapOpen;
224
scoreGaps += TermGapScore(true);
229
// The usual sum-of-pairs objective score: sum the score
230
// of the alignment of each pair of sequences.
231
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
234
Log("==================ObjScoreSP==============\n");
238
g_SPScoreLetters = 0;
243
const unsigned uColCount = msa.GetColCount();
244
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
245
MatchScore[uColIndex] = 0;
248
const unsigned uSeqCount = msa.GetSeqCount();
249
SCORE scoreTotal = 0;
250
unsigned uPairCount = 0;
252
Log("Seq1 Seq2 wt1 wt2 Letters Gaps Unwt.Score Wt.Score Total\n");
253
Log("---- ---- ------ ------ ---------- ---------- ---------- ---------- ----------\n");
255
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
257
const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
258
for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
260
const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
261
const WEIGHT w = w1*w2;
263
SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
264
SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
265
SCORE scorePair = scoreLetters + scoreGaps;
268
scoreTotal += w*scorePair;
270
g_SPScoreLetters += w*scoreLetters;
271
g_SPScoreGaps += w*scoreGaps;
273
Log("%4d %4d %6.3f %6.3f %10.2f %10.2f %10.2f %10.2f %10.2f >%s >%s\n",
283
msa.GetSeqName(uSeqIndex1),
284
msa.GetSeqName(uSeqIndex2));
290
SCORE f = ObjScoreSPFast(msa);
291
Log("Fast = %.6g\n", f);
292
Log("Brute = %.6g\n", scoreTotal);
293
if (BTEq(f, scoreTotal))
296
Log("** DISAGREE **\n");
299
// return scoreTotal / uPairCount;
303
// Objective score defined as the dynamic programming score.
304
// Input is two alignments, which must be of the same length.
305
// Result is the same profile-profile score that is optimized
306
// by dynamic programming.
307
SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
309
const unsigned uColCount = msa1.GetColCount();
310
if (msa2.GetColCount() != uColCount)
311
Quit("ObjScoreDP, must be same length");
313
const unsigned uColCount1 = msa1.GetColCount();
314
const unsigned uColCount2 = msa2.GetColCount();
316
const ProfPos *PA = ProfileFromMSA(msa1);
317
const ProfPos *PB = ProfileFromMSA(msa2);
319
return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);
322
SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,
326
// Log("Profile 1:\n");
327
// ListProfile(PA, uColCount, &msa1);
329
// Log("Profile 2:\n");
330
// ListProfile(PB, uColCount, &msa2);
333
SCORE scoreTotal = 0;
335
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
337
const ProfPos &PPA = PA[uColIndex];
338
const ProfPos &PPB = PB[uColIndex];
341
SCORE scoreMatch = 0;
342
// If gapped column...
343
if (PPA.m_bAllGaps && PPB.m_bAllGaps)
345
else if (PPA.m_bAllGaps)
347
if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps)
348
scoreGap = PPB.m_scoreGapClose;
349
if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps)
350
scoreGap += PPB.m_scoreGapOpen;
352
// scoreGap = PPB.m_scoreGapExtend;
354
else if (PPB.m_bAllGaps)
356
if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps)
357
scoreGap = PPA.m_scoreGapClose;
358
if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps)
359
scoreGap += PPA.m_scoreGapOpen;
361
// scoreGap = PPA.m_scoreGapExtend;
364
scoreMatch = ScoreProfPos2(PPA, PPB);
367
MatchScore[uColIndex] = scoreMatch;
369
scoreTotal += scoreMatch + scoreGap;
371
extern bool g_bTracePPScore;
372
extern MSA *g_ptrPPScoreMSA1;
373
extern MSA *g_ptrPPScoreMSA2;
376
const MSA &msa1 = *g_ptrPPScoreMSA1;
377
const MSA &msa2 = *g_ptrPPScoreMSA2;
378
const unsigned uSeqCount1 = msa1.GetSeqCount();
379
const unsigned uSeqCount2 = msa2.GetSeqCount();
381
for (unsigned n = 0; n < uSeqCount1; ++n)
382
Log("%c", msa1.GetChar(n, uColIndex));
384
for (unsigned n = 0; n < uSeqCount2; ++n)
385
Log("%c", msa2.GetChar(n, uColIndex));
386
Log(" %10.3f", scoreMatch);
388
Log(" %10.3f", scoreGap);
399
// Objective score defined as the sum of profile-sequence
400
// scores for each sequence in the alignment. The profile
401
// is computed from the entire alignment, so this includes
402
// the score of each sequence against itself. This is to
403
// avoid recomputing the profile each time, so we reduce
404
// complexity but introduce a questionable approximation.
405
// The goal is to see if we can exploit the apparent
406
// improvement in performance of log-expectation score
407
// over the usual sum-of-pairs by optimizing this
408
// objective score in the iterative refinement stage.
409
SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
411
if (g_PPScore != PPSCORE_LE)
412
Quit("FastScoreMSA_LASimple: LA");
414
const unsigned uSeqCount = msa.GetSeqCount();
415
const unsigned uColCount = msa.GetColCount();
417
const ProfPos *Prof = ProfileFromMSA(msa);
420
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
421
MatchScore[uColIndex] = 0;
423
SCORE scoreTotal = 0;
424
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
426
const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
428
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
430
const ProfPos &PP = Prof[uColIndex];
431
if (msa.IsGap(uSeqIndex, uColIndex))
433
bool bOpen = (0 == uColIndex ||
434
!msa.IsGap(uSeqIndex, uColIndex - 1));
435
bool bClose = (uColCount - 1 == uColIndex ||
436
!msa.IsGap(uSeqIndex, uColIndex + 1));
439
scoreSeq += PP.m_scoreGapOpen;
441
scoreSeq += PP.m_scoreGapClose;
442
//if (!bOpen && !bClose)
443
// scoreSeq += PP.m_scoreGapExtend;
445
else if (msa.IsWildcard(uSeqIndex, uColIndex))
449
unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
450
const SCORE scoreMatch = PP.m_AAScores[uLetter];
452
MatchScore[uColIndex] += weightSeq*scoreMatch;
453
scoreSeq += scoreMatch;
456
scoreTotal += weightSeq*scoreSeq;
463
// The XP score is the sum of the score of each pair of
464
// sequences between two profiles which are aligned to
465
// each other. Notice that for two given profiles aligned
466
// in different ways, the difference in XP score must be
467
// the same as the difference in SP score because the
468
// score of a pair of sequences in one profile doesn't
469
// depend on the alignment.
470
SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
472
const unsigned uColCount1 = msa1.GetColCount();
473
const unsigned uColCount2 = msa2.GetColCount();
474
if (uColCount1 != uColCount2)
475
Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);
477
const unsigned uSeqCount1 = msa1.GetSeqCount();
478
const unsigned uSeqCount2 = msa2.GetSeqCount();
481
Log(" Score Weight Weight Total\n");
482
Log("---------- ------ ------ ----------\n");
485
SCORE scoreTotal = 0;
486
unsigned uPairCount = 0;
487
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
489
const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
490
for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
492
const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
493
const WEIGHT w = w1*w2;
494
SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
495
SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
496
SCORE scorePair = scoreLetters + scoreGaps;
497
scoreTotal += w1*w2*scorePair;
500
Log("%10.2f %6.3f %6.3f %10.2f >%s >%s\n",
505
msa1.GetSeqName(uSeqIndex1),
506
msa2.GetSeqName(uSeqIndex2));
511
Quit("0 == uPairCount");
518
Log("XP=%g\n", scoreTotal);
520
// return scoreTotal / uPairCount;