13
13
#include "coverageBed.h"
16
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,
17
bool writeHistogram, bool bamInput, bool obeySplits,
18
bool eachBase, bool countsOnly) {
16
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile,
17
bool sameStrand, bool diffStrand,
18
bool writeHistogram, bool bamInput,
19
bool obeySplits, bool eachBase,
20
23
_bedAFile = bedAFile;
21
24
_bedBFile = bedBFile;
58
61
if (_bedA->_status == BED_VALID) {
59
62
// process the BED entry as a single block
60
63
if (_obeySplits == false)
61
_bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
62
// split the BED into discrete blocksand process each independently.
64
_bedB->countHits(a, _sameStrand,
65
_diffStrand, _countsOnly);
66
// split the BED into discrete blocksand process
67
// each independently.
64
69
bedVector bedBlocks;
65
70
GetBedBlocks(a, bedBlocks);
66
// use countSplitHits to avoid over-counting each split chunk
67
// as distinct read coverage.
68
_bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
71
// use countSplitHits to avoid over-counting
72
// each split chunk as distinct read coverage.
73
_bedB->countSplitHits(bedBlocks, _sameStrand,
74
_diffStrand, _countsOnly);
85
91
// that we can easily compare "A" to it for overlaps
86
92
_bedB->loadBedCovFileIntoMap();
96
if (!reader.Open(bamFile)) {
97
cerr << "Failed to open BAM file " << bamFile << endl;
92
100
// get header & reference information
93
101
string header = reader.GetHeaderText();
94
102
RefVector refs = reader.GetReferenceData();
100
108
if (bam.IsMapped()) {
101
109
// treat the BAM alignment as a single "block"
102
110
if (_obeySplits == false) {
103
// construct a new BED entry from the current BAM alignment.
111
// construct a new BED entry from the current
105
a.chrom = refs.at(bam.RefID).RefName;
106
a.start = bam.Position;
107
a.end = bam.GetEndPosition(false, false);
109
if (bam.IsReverseStrand()) a.strand = "-";
117
a.chrom = refs.at(bam.RefID).RefName;
118
a.start = bam.Position;
119
a.end = bam.GetEndPosition(false, false);
121
if (bam.IsReverseStrand()) a.strand = "-";
111
_bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
123
_bedB->countHits(a, _sameStrand,
124
_diffStrand, _countsOnly);
126
catch (out_of_range& oor)
129
<< " is said to be mapped (0x4 == false), "
130
<< "yet the chrom is missing. Skipping."
113
134
// split the BAM alignment into discrete blocks and
114
135
// look for overlaps only within each block.
116
137
// vec to store the discrete BED "blocks" from a
117
138
bedVector bedBlocks;
118
// since we are counting coverage, we do want to split blocks when a
119
// deletion (D) CIGAR op is encountered (hence the true for the last parm)
120
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true);
121
// use countSplitHits to avoid over-counting each split chunk
122
// as distinct read coverage.
123
_bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
139
// since we are counting coverage, we do want
140
// to split blocks when a deletion (D) CIGAR op
141
// is encountered (hence the true for the last parm)
144
string chrom = refs.at(bam.RefID).RefName;
145
GetBamBlocks(bam, chrom, bedBlocks, false, true);
146
// use countSplitHits to avoid over-counting
147
// each split chunk as distinct read coverage.
148
_bedB->countSplitHits(bedBlocks, _sameStrand,
149
_diffStrand, _countsOnly);
151
catch (out_of_range& oor)
154
<< " is said to be mapped (0x4 == false), "
155
<< "yet the chrom is missing. Skipping."
163
197
void BedCoverage::ReportCoverage() {
165
map<unsigned int, unsigned int> allDepthHist;
166
unsigned int totalLength = 0;
199
map<unsigned long, unsigned long> allDepthHist;
200
unsigned long totalLength = 0;
168
202
// process each chromosome
169
203
masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
185
219
int depth = 0; // tracks the depth at the current base
187
221
// the start is either the first base in the feature OR
188
// the leftmost position of an overlapping feature. e.g. (s = start):
222
// the leftmost position of an overlapping feature.
190
225
// B s ------------
191
int start = min(bedItr->minOverlapStart, bedItr->start);
226
int start = min(bedItr->minOverlapStart, bedItr->start);
193
228
// track the number of bases in the feature covered by
194
229
// 0, 1, 2, ... n features in A
195
230
map<unsigned int, unsigned int> depthHist;
196
231
map<unsigned int, DEPTH>::const_iterator depthItr;
198
// compute the coverage observed at each base in the feature marching from start to end.
233
// compute the coverage observed at each base in
234
// the feature marching from start to end.
199
235
for (CHRPOS pos = start+1; pos <= bedItr->end; pos++)
201
// map pointer grabbing the starts and ends observed at this position
237
// map pointer grabbing the starts and
238
// ends observed at this position
202
239
depthItr = bedItr->depthMap.find(pos);
203
240
// increment coverage if starts observed at this position.
204
241
if (depthItr != bedItr->depthMap.end())
205
242
depth += depthItr->second.starts;
206
// update coverage assuming the current position is within the current B feature
243
// update coverage assuming the current position is
244
// within the current B feature
207
245
if ((pos > bedItr->start) && (pos <= bedItr->end)) {
208
246
if (depth == 0) zeroDepthCount++;
209
// update our histograms, assuming we are not reporting "per-base" coverage.
247
// update our histograms, assuming we are not
248
// reporting "per-base" coverage.
210
249
if (_eachBase == false) {
211
250
depthHist[depth]++;
212
251
allDepthHist[depth]++;
214
else if ((_eachBase == true) && (bedItr->zeroLength == false))
253
else if ((_eachBase == true) &&
254
(bedItr->zeroLength == false))
216
256
_bedB->reportBedTab(*bedItr);
217
257
printf("%d\t%d\n", pos-bedItr->start, depth);
247
287
// print a summary of the coverage
248
288
if (_writeHistogram == false) {
249
289
_bedB->reportBedTab(*bedItr);
250
printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered);
290
printf("%d\t%d\t%d\t%0.7f\n",
291
bedItr->count, nonZeroBases,
292
length, fractCovered);
253
295
// report the number of bases with coverage == x
255
297
// produce a histogram when not a zero length feature.
256
298
if (bedItr->zeroLength == false) {
257
map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin();
258
map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end();
299
map<unsigned int, unsigned int>::const_iterator
300
histItr = depthHist.begin();
301
map<unsigned int, unsigned int>::const_iterator
302
histEnd = depthHist.end();
259
303
for (; histItr != histEnd; ++histItr)
261
float fractAtThisDepth = (float) histItr->second / length;
305
float fractAtThisDepth = (float)
306
histItr->second / length;
262
308
_bedB->reportBedTab(*bedItr);
263
printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth);
309
printf("%d\t%d\t%d\t%0.7f\n",
310
histItr->first, histItr->second,
311
length, fractAtThisDepth);
266
314
// special case when it is a zero length feauture.
268
316
_bedB->reportBedTab(*bedItr);
269
printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, 0, 0, 1.0000000);
317
printf("%d\t%d\t%d\t%0.7f\n",
318
bedItr->count, 0, 0, 1.0000000);
276
325
// report a histogram of coverage among _all_
277
326
// features in B.
278
327
if (_writeHistogram == true) {
279
map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin();
280
map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end();
328
map<unsigned long, unsigned long>::const_iterator
329
histItr = allDepthHist.begin();
330
map<unsigned long, unsigned long>::const_iterator
331
histEnd = allDepthHist.end();
281
332
for (; histItr != histEnd; ++histItr) {
282
333
float fractAtThisDepth = (float) histItr->second / totalLength;
283
printf("all\t%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, totalLength, fractAtThisDepth);
334
printf("all\t%lu\t%lu\t%lu\t%0.7f\n",
335
histItr->first, histItr->second,
336
totalLength, fractAtThisDepth);