4
const int MAX_SEQ_NAME = 63;
19
// Ways to create an MSA
20
void FromFile(TextFile &File);
21
void FromFASTAFile(TextFile &File);
22
void FromSeq(const Seq &s);
24
void ToFile(TextFile &File) const;
25
void ToFASTAFile(TextFile &File) const;
26
void ToMSFFile(TextFile &File, const char *ptrComment = 0) const;
27
void ToAlnFile(TextFile &File) const;
28
void ToHTMLFile(TextFile &File) const;
29
void ToPhySequentialFile(TextFile &File) const;
30
void ToPhyInterleavedFile(TextFile &File) const;
32
void SetSize(unsigned uSeqCount, unsigned uColCount);
33
void SetSeqCount(unsigned uSeqCount);
34
char GetChar(unsigned uSeqIndex, unsigned uIndex) const;
35
unsigned GetLetter(unsigned uSeqIndex, unsigned uIndex) const;
36
unsigned GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const;
37
const char *GetSeqName(unsigned uSeqIndex) const;
38
unsigned GetSeqId(unsigned uSeqIndex) const;
39
unsigned GetSeqIndex(unsigned uId) const;
40
bool GetSeqIndex(unsigned uId, unsigned *ptruIndex) const;
41
double GetOcc(unsigned uColIndex) const;
42
void GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
43
FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
44
FCOUNT *fcGapExtend, FCOUNT *ptrfOcc,
45
FCOUNT *fcLL, FCOUNT *fcLG, FCOUNT *fcGL, FCOUNT *fcGG) const;
46
bool IsGap(unsigned uSeqIndex, unsigned uColIndex) const;
47
bool IsWildcard(unsigned uSeqIndex, unsigned uColIndex) const;
48
bool IsGapColumn(unsigned uColIndex) const;
49
bool ColumnHasGap(unsigned uColIndex) const;
50
bool IsGapSeq(unsigned uSeqIndex) const;
52
void SetChar(unsigned uSeqIndex, unsigned uColIndex, char c);
53
void SetSeqName(unsigned uSeqIndex, const char szName[]);
54
void SetSeqId(unsigned uSeqIndex, unsigned uId);
56
bool IsLegalLetter(unsigned uLetter) const;
57
void GetSeq(unsigned uSeqIndex, Seq &seq) const;
58
void Copy(const MSA &msa);
59
double GetCons(unsigned uColIndex) const;
60
double GetAvgCons() const;
61
double GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
62
bool GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const;
63
void DeleteCol(unsigned uColIndex);
64
void DeleteColumns(unsigned uColIndex, unsigned uColCount);
65
void CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex);
66
void DeleteSeq(unsigned uSeqIndex);
67
// void DeleteEmptyCols(bool bProgress = false);
68
bool IsEmptyCol(unsigned uColIndex) const;
70
WEIGHT GetSeqWeight(unsigned uSeqIndex) const;
71
WEIGHT GetTotalSeqWeight() const;
72
void SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const;
73
void NormalizeWeights(WEIGHT wTotal) const;
74
bool WeightsSet() const;
76
unsigned GetGCGCheckSum(unsigned uSeqIndex) const;
78
ALPHA GuessAlpha() const;
81
unsigned UniqueResidueTypes(unsigned uColIndex) const;
85
void GetNodeCounts(unsigned uAlignedColIndex, NodeCounts &Counts) const;
86
void ValidateBreakMatrices() const;
87
unsigned GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const;
88
const char *GetSeqBuffer(unsigned uSeqIndex) const;
89
unsigned AlignedColIndexToColIndex(unsigned uAlignedColIndex) const;
90
unsigned GetSeqLength(unsigned uSeqIndex) const;
91
void GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrdPWID,
92
unsigned *ptruPosCount) const;
94
void GetPairMap(unsigned uSeqIndex1, unsigned uSeqIndex2, int iMap1[],
98
void ListWeights() const;
100
void GapInfoToDataBuffer(DataBuffer &Buffer) const;
101
void GapInfoFromDataBuffer(const DataBuffer &Buffer);
102
double GetPctGroupIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
108
unsigned GetSeqCount() const
112
unsigned GetColCount() const
117
static bool SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
118
unsigned uSeqIndex2);
120
static void SetIdCount(unsigned uIdCount);
123
friend void SetMSAWeightsMuscle(MSA &msa);
124
friend void SetThreeWayWeightsMuscle(MSA &msa);
125
void SetHenikoffWeightsPB() const;
126
void SetHenikoffWeights() const;
127
void SetGSCWeights() const;
128
void SetUniformWeights() const;
129
void SetClustalWWeights(const Tree &tree);
132
void AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel);
133
void ExpandCache(unsigned uSeqCount, unsigned uColCount);
134
void CalcWeights() const;
135
void GetNameFromFASTAAnnotationLine(const char szLine[],
136
char szName[], unsigned uBytes);
137
void CopyCol(unsigned uFromCol, unsigned uToCol);
138
unsigned CalcBLOSUMWeights(ClusterTree &BlosumCluster) const;
139
void SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const;
140
unsigned SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const;
141
void SetSubtreeWeight2(const ClusterNode *ptrNode) const;
142
void SetSubtreeGSCWeight(ClusterNode *ptrNode) const;
144
void CalcHenikoffWeightsColPB(unsigned uColIndex) const;
145
void CalcHenikoffWeightsCol(unsigned uColIndex) const;
148
unsigned m_uSeqCount;
149
unsigned m_uColCount;
150
unsigned m_uCacheSeqLength;
151
unsigned m_uCacheSeqCount;
155
static unsigned m_uIdCount;
157
unsigned *m_IdToSeqIndex;
158
unsigned *m_SeqIndexToId;
163
void SeqVectFromMSA(const MSA &msa, SeqVect &v);
164
void DeleteGappedCols(MSA &msa);
165
void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
167
void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat);
168
void MSAAppend(MSA &msa1, const MSA &msa2);
169
void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
171
void AssertMSAEq(const MSA &msa1, const MSA &msa2);
172
void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2);
173
void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
175
void SetMSAWeightsMuscle(MSA &msa);
176
void SetClustalWWeightsMuscle(MSA &msa);
177
void SetThreeWayWeightsMuscle(MSA &msa);