~ubuntu-branches/ubuntu/raring/bedtools/raring

« back to all changes in this revision

Viewing changes to src/coverageBed/coverageBed.cpp

  • Committer: Package Import Robot
  • Author(s): Charles Plessy
  • Date: 2012-11-04 17:59:41 UTC
  • mfrom: (1.1.9)
  • Revision ID: package-import@ubuntu.com-20121104175941-hahqzy1w8uy650z0
Tags: 2.17.0-1
bb9012e Imported Upstream version 2.16.2.
9006b23 Imported Upstream version 2.17.0.
9112569 Documented BEDTools license as a whole.
325689c Removed Pre-Depends: dpkg (>= 1.15.6).
a781b14 Conforms with Policy 3.9.4.
84b1167 Use Debhelper 9. 
0bf572d Distribute the test suite.
422cd34 Bash completion for BEDTools.

Show diffs side-by-side

added added

removed removed

Lines of Context:
13
13
#include "coverageBed.h"
14
14
 
15
15
// build
16
 
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,
17
 
                         bool writeHistogram, bool bamInput, bool obeySplits, 
18
 
                         bool eachBase, bool countsOnly) {
 
16
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, 
 
17
                         bool sameStrand, bool diffStrand,
 
18
                         bool writeHistogram, bool bamInput, 
 
19
                         bool obeySplits, bool eachBase, 
 
20
                         bool countsOnly) 
 
21
{
19
22
 
20
23
    _bedAFile       = bedAFile;
21
24
    _bedBFile       = bedBFile;
58
61
        if (_bedA->_status == BED_VALID) {
59
62
            // process the BED entry as a single block
60
63
            if (_obeySplits == false)
61
 
                _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
62
 
            // split the BED into discrete blocksand process each independently.
 
64
                _bedB->countHits(a, _sameStrand, 
 
65
                                 _diffStrand, _countsOnly);
 
66
            // split the BED into discrete blocksand process 
 
67
            // each independently.
63
68
            else {
64
69
                bedVector bedBlocks;
65
70
                GetBedBlocks(a, bedBlocks);
66
 
                // use countSplitHits to avoid over-counting each split chunk
67
 
                // as distinct read coverage.
68
 
                _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
 
71
                // use countSplitHits to avoid over-counting 
 
72
                // each split chunk as distinct read coverage.
 
73
                _bedB->countSplitHits(bedBlocks, _sameStrand, 
 
74
                                      _diffStrand, _countsOnly);
69
75
            }
70
76
        }
71
77
    }
85
91
    // that we can easily compare "A" to it for overlaps
86
92
    _bedB->loadBedCovFileIntoMap();
87
93
 
88
 
    // open the BAM file
 
94
    // open the BAM file                
89
95
    BamReader reader;
90
 
    reader.Open(bamFile);
91
 
 
 
96
    if (!reader.Open(bamFile)) {
 
97
        cerr << "Failed to open BAM file " << bamFile << endl;
 
98
        exit(1);
 
99
    }
92
100
    // get header & reference information
93
101
    string header = reader.GetHeaderText();
94
102
    RefVector refs = reader.GetReferenceData();
100
108
        if (bam.IsMapped()) {
101
109
            // treat the BAM alignment as a single "block"
102
110
            if (_obeySplits == false) {
103
 
                // construct a new BED entry from the current BAM alignment.
 
111
                // construct a new BED entry from the current 
 
112
                // BAM alignment.
104
113
                BED a;
105
 
                a.chrom  = refs.at(bam.RefID).RefName;
106
 
                a.start  = bam.Position;
107
 
                a.end    = bam.GetEndPosition(false, false);
108
 
                a.strand = "+";
109
 
                if (bam.IsReverseStrand()) a.strand = "-";
 
114
                
 
115
                try 
 
116
                {
 
117
                    a.chrom  = refs.at(bam.RefID).RefName;
 
118
                    a.start  = bam.Position;
 
119
                    a.end    = bam.GetEndPosition(false, false);
 
120
                    a.strand = "+";
 
121
                    if (bam.IsReverseStrand()) a.strand = "-";
110
122
 
111
 
                _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
 
123
                    _bedB->countHits(a, _sameStrand, 
 
124
                                     _diffStrand, _countsOnly);
 
125
                }
 
126
                catch (out_of_range& oor) 
 
127
                {
 
128
                  cerr << bam.Name 
 
129
                       << " is said to be mapped (0x4 == false), "
 
130
                       << "yet the chrom is missing.  Skipping." 
 
131
                       << endl;
 
132
                }
112
133
            }
113
134
            // split the BAM alignment into discrete blocks and
114
135
            // look for overlaps only within each block.
115
136
            else {
116
137
                // vec to store the discrete BED "blocks" from a
117
138
                bedVector bedBlocks;
118
 
                // since we are counting coverage, we do want to split blocks when a
119
 
                // deletion (D) CIGAR op is encountered (hence the true for the last parm)
120
 
                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true);
121
 
                // use countSplitHits to avoid over-counting each split chunk
122
 
                // as distinct read coverage.
123
 
                _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
 
139
                // since we are counting coverage, we do want 
 
140
                // to split blocks when a deletion (D) CIGAR op 
 
141
                // is encountered (hence the true for the last parm)
 
142
                try 
 
143
                {
 
144
                    string chrom  = refs.at(bam.RefID).RefName;
 
145
                    GetBamBlocks(bam, chrom, bedBlocks, false, true);
 
146
                    // use countSplitHits to avoid over-counting 
 
147
                    // each split chunk as distinct read coverage.
 
148
                    _bedB->countSplitHits(bedBlocks, _sameStrand, 
 
149
                                          _diffStrand, _countsOnly);
 
150
                }
 
151
                catch (out_of_range& oor) 
 
152
                {
 
153
                  cerr << bam.Name 
 
154
                       << " is said to be mapped (0x4 == false), "
 
155
                       << "yet the chrom is missing.  Skipping." 
 
156
                       << endl;
 
157
                }
124
158
            }
125
159
        }
126
160
    }
162
196
 
163
197
void BedCoverage::ReportCoverage() {
164
198
 
165
 
    map<unsigned int, unsigned int> allDepthHist;
166
 
    unsigned int totalLength = 0;
 
199
    map<unsigned long, unsigned long> allDepthHist;
 
200
    unsigned long totalLength = 0;
167
201
 
168
202
    // process each chromosome
169
203
    masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
185
219
                int depth          = 0; // tracks the depth at the current base
186
220
 
187
221
                // the start is either the first base in the feature OR
188
 
                // the leftmost position of an overlapping feature. e.g. (s = start):
 
222
                // the leftmost position of an overlapping feature. 
 
223
                // e.g. (s = start):
189
224
                // A    ----------
190
225
                // B    s    ------------
191
 
                int start          = min(bedItr->minOverlapStart, bedItr->start);
 
226
                int start = min(bedItr->minOverlapStart, bedItr->start);
192
227
 
193
228
                // track the number of bases in the feature covered by
194
229
                // 0, 1, 2, ... n features in A
195
230
                map<unsigned int, unsigned int> depthHist;
196
231
                map<unsigned int, DEPTH>::const_iterator depthItr;
197
232
 
198
 
                // compute the coverage observed at each base in the feature marching from start to end.
 
233
                // compute the coverage observed at each base in 
 
234
                // the feature marching from start to end.
199
235
                for (CHRPOS pos = start+1; pos <= bedItr->end; pos++)
200
236
                {
201
 
                    // map pointer grabbing the starts and ends observed at this position
 
237
                    // map pointer grabbing the starts and 
 
238
                    // ends observed at this position
202
239
                    depthItr = bedItr->depthMap.find(pos);
203
240
                    // increment coverage if starts observed at this position.
204
241
                    if (depthItr != bedItr->depthMap.end())
205
242
                        depth += depthItr->second.starts;
206
 
                    // update coverage assuming the current position is within the current B feature
 
243
                    // update coverage assuming the current position is 
 
244
                    // within the current B feature
207
245
                    if ((pos > bedItr->start) && (pos <= bedItr->end)) {
208
246
                        if (depth == 0) zeroDepthCount++;
209
 
                        // update our histograms, assuming we are not reporting "per-base" coverage.
 
247
                        // update our histograms, assuming we are not 
 
248
                        // reporting "per-base" coverage.
210
249
                        if (_eachBase == false) {
211
250
                            depthHist[depth]++;
212
251
                            allDepthHist[depth]++;
213
252
                        }
214
 
                        else if ((_eachBase == true) && (bedItr->zeroLength == false))
 
253
                        else if ((_eachBase == true) && 
 
254
                                 (bedItr->zeroLength == false))
215
255
                        {
216
256
                            _bedB->reportBedTab(*bedItr);
217
257
                            printf("%d\t%d\n", pos-bedItr->start, depth);
247
287
                    // print a summary of the coverage
248
288
                    if (_writeHistogram == false) {
249
289
                        _bedB->reportBedTab(*bedItr);
250
 
                        printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered);
 
290
                        printf("%d\t%d\t%d\t%0.7f\n", 
 
291
                               bedItr->count, nonZeroBases, 
 
292
                               length, fractCovered);
251
293
                    }
252
294
                    // HISTOGRAM
253
295
                    // report the number of bases with coverage == x
254
296
                    else {
255
297
                        // produce a histogram when not a zero length feature.
256
298
                        if (bedItr->zeroLength == false) {
257
 
                            map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin();
258
 
                            map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end();
 
299
                            map<unsigned int, unsigned int>::const_iterator 
 
300
                                histItr = depthHist.begin();
 
301
                            map<unsigned int, unsigned int>::const_iterator 
 
302
                                histEnd = depthHist.end();
259
303
                            for (; histItr != histEnd; ++histItr)
260
304
                            {
261
 
                                float fractAtThisDepth = (float) histItr->second / length;
 
305
                                float fractAtThisDepth = (float) 
 
306
                                    histItr->second / length;
 
307
                                
262
308
                                _bedB->reportBedTab(*bedItr);
263
 
                                printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth);
 
309
                                printf("%d\t%d\t%d\t%0.7f\n", 
 
310
                                       histItr->first, histItr->second, 
 
311
                                       length, fractAtThisDepth);
264
312
                            }
265
313
                        }
266
314
                        // special case when it is a zero length feauture.
267
315
                        else {
268
316
                            _bedB->reportBedTab(*bedItr);
269
 
                            printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, 0, 0, 1.0000000);
 
317
                            printf("%d\t%d\t%d\t%0.7f\n", 
 
318
                                   bedItr->count, 0, 0, 1.0000000);
270
319
                        }
271
320
                    }
272
321
                }
276
325
    // report a histogram of coverage among _all_
277
326
    // features in B.
278
327
    if (_writeHistogram == true) {
279
 
        map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin();
280
 
        map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end();
 
328
        map<unsigned long, unsigned long>::const_iterator 
 
329
            histItr = allDepthHist.begin();
 
330
        map<unsigned long, unsigned long>::const_iterator 
 
331
            histEnd = allDepthHist.end();
281
332
        for (; histItr != histEnd; ++histItr) {
282
333
            float fractAtThisDepth = (float) histItr->second / totalLength;
283
 
            printf("all\t%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, totalLength, fractAtThisDepth);
 
334
            printf("all\t%lu\t%lu\t%lu\t%0.7f\n", 
 
335
                   histItr->first, histItr->second, 
 
336
                   totalLength, fractAtThisDepth);
284
337
        }
285
338
    }
286
339
}