///////////////////////////////////////////////////////////////// // SparseMatrix.h // // Sparse matrix computations ///////////////////////////////////////////////////////////////// #ifndef SPARSEMATRIX_H #define SPARSEMATRIX_H #include using namespace std; const float POSTERIOR_CUTOFF = 0.01; // minimum posterior probability // value that is maintained in the // sparse matrix representation typedef pair PIF; // Sparse matrix entry type // first --> column // second --> value ///////////////////////////////////////////////////////////////// // SparseMatrix // // Class for sparse matrix computations ///////////////////////////////////////////////////////////////// class SparseMatrix { int seq1Length, seq2Length; // dimensions of matrix VI rowSize; // rowSize[i] = # of cells in row i SafeVector data; // data values SafeVector::iterator> rowPtrs; // pointers to the beginning of each row VF gapPosteriors; // gap posteriors (sum of pairs) for first and second sequences int gapPostBase; // offset to begining of gapPosteriors ///////////////////////////////////////////////////////////////// // SparseMatrix::SparseMatrix() // // Private constructor. ///////////////////////////////////////////////////////////////// SparseMatrix (){} public: ///////////////////////////////////////////////////////////////// // SparseMatrix::SparseMatrix() // // Constructor. Builds a sparse matrix from a posterior matrix. // Note that the expected format for the posterior matrix is as // a (seq1Length+1) x (seq2Length+1) matrix where the 0th row // and 0th column are ignored (they should contain all zeroes). ///////////////////////////////////////////////////////////////// SparseMatrix (int seq1Length, int seq2Length, const VF &posterior) : seq1Length (seq1Length), seq2Length (seq2Length), gapPostBase ((seq1Length + 1) * (seq2Length + 1)) { int numCells = 0; assert (seq1Length > 0); assert (seq2Length > 0); // calculate memory required; count the number of cells in the // posterior matrix above the threshold VF::const_iterator postPtr = posterior.begin(); for (int i = 0; i <= seq1Length; i++){ for (int j = 0; j <= seq2Length; j++){ if (*(postPtr++) >= POSTERIOR_CUTOFF){ assert (i != 0 && j != 0); numCells++; } } } // allocate memory data.resize(numCells); rowSize.resize (seq1Length + 1); rowSize[0] = -1; rowPtrs.resize (seq1Length + 1); rowPtrs[0] = data.end(); gapPosteriors.resize(seq1Length + seq2Length + 2); // build sparse matrix for (int i = 0; i < seq2Length + 1; i++) gapPosteriors[seq1Length + i + 1] = 1; postPtr = posterior.begin() + seq2Length + 1; // note that we're skipping the first row here SafeVector::iterator dataPtr = data.begin(); for (int i = 1; i <= seq1Length; i++){ gapPosteriors[i] = 1; postPtr++; // and skipping the first column of each row rowPtrs[i] = dataPtr; for (int j = 1; j <= seq2Length; j++){ if (*postPtr >= POSTERIOR_CUTOFF){ dataPtr->first = j; dataPtr->second = *postPtr; gapPosteriors[i] -= *postPtr; gapPosteriors[seq1Length + j + 1] -= *postPtr; dataPtr++; if (gapPosteriors[i] < 1e-4) gapPosteriors[i] = 1e-4; if (gapPosteriors[seq1Length + j + 1] < 1e-4) gapPosteriors[seq1Length + j + 1] = 1e-4; } postPtr++; } rowSize[i] = dataPtr - rowPtrs[i]; } // for (int i = 0; i < seq1Length + seq2Length + 2; i++) // gapPosteriors[i] = *(postPtr++); } ///////////////////////////////////////////////////////////////// // SparseMatrix::GetRowPtr() // // Returns the pointer to a particular row in the sparse matrix. ///////////////////////////////////////////////////////////////// SafeVector::iterator GetRowPtr (int row) const { assert (row >= 1 && row <= seq1Length); return rowPtrs[row]; } ///////////////////////////////////////////////////////////////// // SparseMatrix::GetValue() // // Returns value at a particular row, column. ///////////////////////////////////////////////////////////////// float GetValue (int row, int col){ assert (row >= 1 && row <= seq1Length); assert (col >= 1 && col <= seq2Length); for (int i = 0; i < rowSize[row]; i++){ if (rowPtrs[row][i].first == col) return rowPtrs[row][i].second; } return 0; } ///////////////////////////////////////////////////////////////// // SparseMatrix::GetRowSize() // // Returns the number of entries in a particular row. ///////////////////////////////////////////////////////////////// int GetRowSize (int row) const { assert (row >= 1 && row <= seq1Length); return rowSize[row]; } ///////////////////////////////////////////////////////////////// // SparseMatrix::GetSeq1Length() // // Returns the first dimension of the matrix. ///////////////////////////////////////////////////////////////// int GetSeq1Length () const { return seq1Length; } ///////////////////////////////////////////////////////////////// // SparseMatrix::GetSeq2Length() // // Returns the second dimension of the matrix. ///////////////////////////////////////////////////////////////// int GetSeq2Length () const { return seq2Length; } ///////////////////////////////////////////////////////////////// // SparseMatrix::GetRowPtr // // Returns the pointer to a particular row in the sparse matrix. ///////////////////////////////////////////////////////////////// int GetNumCells () const { return data.size(); } ///////////////////////////////////////////////////////////////// // SparseMatrix::Print() // // Prints out a sparse matrix. ///////////////////////////////////////////////////////////////// void Print (ostream &outfile) const { outfile << "Match posteriors:" << endl; for (int i = 1; i <= seq1Length; i++){ outfile << " " << i << ":"; for (int j = 0; j < rowSize[i]; j++){ outfile << " (" << rowPtrs[i][j].first << "," << rowPtrs[i][j].second << ")"; } outfile << endl; } outfile << "Gap posteriors 0: "; for (int i = 1; i <= seq1Length; i++){ outfile << " (" << i << "," << gapPosteriors[i] << ")"; } outfile << endl << "Gap posteriors 1: "; for (int i = 1; i <= seq2Length; i++){ outfile << " (" << i << "," << gapPosteriors[i + seq1Length + 1] << ")"; } outfile << endl; } ///////////////////////////////////////////////////////////////// // SparseMatrix::ComputeTranspose() // // Returns a new sparse matrix containing the transpose of the // current matrix. ///////////////////////////////////////////////////////////////// SparseMatrix *ComputeTranspose () const { // create a new sparse matrix SparseMatrix *ret = new SparseMatrix(); int numCells = data.size(); ret->seq1Length = seq2Length; ret->seq2Length = seq1Length; // allocate memory ret->data.resize (numCells); ret->rowSize.resize (seq2Length + 1); ret->rowSize[0] = -1; ret->rowPtrs.resize (seq2Length + 1); ret->rowPtrs[0] = ret->data.end(); ret->gapPosteriors.resize(seq1Length + seq2Length + 2); // compute row sizes for (int i = 1; i <= seq2Length; i++) ret->rowSize[i] = 0; for (int i = 0; i < numCells; i++) ret->rowSize[data[i].first]++; // compute row ptrs for (int i = 1; i <= seq2Length; i++){ ret->rowPtrs[i] = (i == 1) ? ret->data.begin() : ret->rowPtrs[i-1] + ret->rowSize[i-1]; } // now fill in data SafeVector::iterator> currPtrs = ret->rowPtrs; for (int i = 1; i <= seq1Length; i++){ SafeVector::iterator row = rowPtrs[i]; for (int j = 0; j < rowSize[i]; j++){ currPtrs[row[j].first]->first = i; currPtrs[row[j].first]->second = row[j].second; currPtrs[row[j].first]++; } } for (int i = 0; i <= seq1Length; i++) { ret->gapPosteriors[i + seq2Length + 1] = gapPosteriors[i]; } for (int i = 0; i <= seq2Length; i++) { ret->gapPosteriors[i] = gapPosteriors[i + seq1Length + 1]; } return ret; } ///////////////////////////////////////////////////////////////// // SparseMatrix::GetPosterior() // // Return the posterior representation of the sparse matrix. ///////////////////////////////////////////////////////////////// VF *GetPosterior () const { // create a new posterior matrix VF *posteriorPtr = new VF((seq1Length+1) * (seq2Length+1) + seq1Length + seq2Length + 2,0); assert (posteriorPtr); VF &posterior = *posteriorPtr; // build the posterior matrix for (int i = 0; i < (seq1Length+1) * (seq2Length+1); i++) posterior[i] = 0; for (int i = 1; i <= seq1Length; i++){ VF::iterator postPtr = posterior.begin() + i * (seq2Length+1); for (int j = 0; j < rowSize[i]; j++){ postPtr[rowPtrs[i][j].first] = rowPtrs[i][j].second; } } for (int i = 0; i < seq1Length + seq2Length + 2; i++) posterior[gapPostBase + i] = gapPosteriors[i]; return posteriorPtr; } float GetGapPosterior(int seqNum, int position) const { assert(seqNum == 0 && position <= seq1Length || seqNum == 1 && position <= seq2Length); return gapPosteriors[position + seqNum * (seq1Length + 1)]; } }; #endif