~ubuntu-branches/ubuntu/raring/bedtools/raring

« back to all changes in this revision

Viewing changes to src/intersectBed/intersectBed.cpp

  • Committer: Package Import Robot
  • Author(s): Charles Plessy
  • Date: 2012-11-04 17:59:41 UTC
  • mfrom: (1.1.9)
  • Revision ID: package-import@ubuntu.com-20121104175941-hahqzy1w8uy650z0
Tags: 2.17.0-1
bb9012e Imported Upstream version 2.16.2.
9006b23 Imported Upstream version 2.17.0.
9112569 Documented BEDTools license as a whole.
325689c Removed Pre-Depends: dpkg (>= 1.15.6).
a781b14 Conforms with Policy 3.9.4.
84b1167 Use Debhelper 9. 
0bf572d Distribute the test suite.
422cd34 Bash completion for BEDTools.

Show diffs side-by-side

added added

removed removed

Lines of Context:
52
52
/*
53
53
    Constructor
54
54
*/
55
 
BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
56
 
                           bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
57
 
                           float overlapFraction, bool noHit, bool leftJoin, bool writeCount, bool sameStrand, bool diffStrand,
58
 
                           bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
59
 
                           bool sortedInput, bool printHeader) {
60
 
 
 
55
BedIntersect::BedIntersect(string bedAFile, string bedBFile, 
 
56
                           bool anyHit, bool writeA, 
 
57
                           bool writeB, bool writeOverlap, 
 
58
                           bool writeAllOverlap, float overlapFraction, 
 
59
                           bool noHit, bool leftJoin, 
 
60
                           bool writeCount, bool sameStrand, 
 
61
                           bool diffStrand, bool reciprocal, 
 
62
                           bool obeySplits, bool bamInput, 
 
63
                           bool bamOutput, bool isUncompressedBam,
 
64
                           bool sortedInput, bool printHeader) 
 
65
{
61
66
    _bedAFile            = bedAFile;
62
67
    _bedBFile            = bedBFile;
63
68
    _anyHit              = anyHit;
110
115
}
111
116
 
112
117
 
113
 
bool BedIntersect::FindBlockedOverlaps(const BED &a, const vector<BED> &a_blocks, 
114
 
                                       const vector<BED> &hits, bool a_is_bam) {
 
118
bool BedIntersect::FindBlockedOverlaps(const BED &a, 
 
119
                                       const vector<BED> &a_blocks, 
 
120
                                       const vector<BED> &hits, 
 
121
                                       bool a_is_bam) 
 
122
{
115
123
    int a_footprint = GetTotalBlockLength(a_blocks);
116
124
    // container to store the set of raw hits 
117
125
    // that actually overlap the A blocks
152
160
            }
153
161
        }
154
162
        if (valid_hit) {
155
 
            // require sufficint overlap fraction (reciprocal or otherwise)
 
163
            // require sufficient overlap fraction (reciprocal or otherwise)
156
164
            // w.r.t to the "footprint" (i.e., the total length of each block)
157
 
            if ( ((float) total_overlap / (float) a_footprint) > _overlapFraction) {
158
 
                if (_reciprocal && ((float) total_overlap / (float) b_footprint) > _overlapFraction) {
 
165
            if ( ((float) total_overlap 
 
166
                   / 
 
167
                  (float) a_footprint) > _overlapFraction) 
 
168
            {
 
169
                if (_reciprocal && ((float) total_overlap / 
 
170
                                    (float) b_footprint) > _overlapFraction) 
 
171
                {
159
172
                    valid_hits.push_back(*hItr);
160
173
                }
161
174
                else if (!_reciprocal) {
172
185
}
173
186
 
174
187
 
175
 
void BedIntersect::ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e) {
 
188
void BedIntersect::ReportOverlapDetail(int overlapBases, const BED &a, 
 
189
                                       const BED &b, CHRPOS s, CHRPOS e) 
 
190
{
176
191
    // default. simple intersection only
177
192
    if (_writeA == false && _writeB == false && 
178
193
        _writeOverlap == false && _leftJoin == false) 
246
261
                // treat the BED as a single "block"
247
262
                if (_obeySplits == false)
248
263
                    FindOverlaps(a, hits);
249
 
                // split the BED12 into blocks and look for overlaps in each discrete block
 
264
                // split the BED12 into blocks and look for 
 
265
                // overlaps in each discrete block
250
266
                else {
251
 
                    // find the hits that overlap with the full span of the blocked BED
 
267
                    // find the hits that overlap with the 
 
268
                    // full span of the blocked BED
252
269
                    _bedB->allHits(a.chrom, a.start, a.end, a.strand,
253
270
                                   hits, _sameStrand, _diffStrand,
254
 
                                   _overlapFraction, _reciprocal);
 
271
                                   0.0, false);
255
272
                    // break a into discrete blocks, as we need to 
256
 
                    // measure overlap with the individual blocks, not the full span.
 
273
                    // measure overlap with the individual blocks, 
 
274
                    // not the full span.
257
275
                    bedVector a_blocks; 
258
276
                    GetBedBlocks(a, a_blocks);
259
277
                    // find the overlaps between the block in A and B 
270
288
        ChromSweep sweep = ChromSweep(_bedA, _bedB, 
271
289
                                      _sameStrand, _diffStrand, 
272
290
                                      _overlapFraction, _reciprocal,
273
 
                                      _printHeader);
 
291
                                      false, _printHeader);
274
292
 
275
293
        pair<BED, vector<BED> > hit_set;
276
294
        hit_set.second.reserve(10000);
277
295
        while (sweep.Next(hit_set)) {
278
 
            if (_obeySplits == false)
 
296
            if (_obeySplits == false) {
279
297
                processHits(hit_set.first, hit_set.second);
 
298
            }
280
299
            else {
281
300
                bedVector a_blocks; 
282
301
                GetBedBlocks(hit_set.first, a_blocks);
283
 
                FindBlockedOverlaps(hit_set.first, a_blocks, hit_set.second, false);
 
302
                FindBlockedOverlaps(hit_set.first, a_blocks, 
 
303
                                    hit_set.second, false);
284
304
            }
285
305
        }
286
306
    }
303
323
    // open the BAM file
304
324
    BamReader reader;
305
325
    BamWriter writer;
306
 
    reader.Open(bamFile);
 
326
    if (!reader.Open(bamFile)) {
 
327
        cerr << "Failed to open BAM file " << bamFile << endl;
 
328
        exit(1);
 
329
    }
 
330
    
307
331
    // get header & reference information
308
332
    string bamHeader  = reader.GetHeaderText();
309
333
    RefVector refs    = reader.GetReferenceData();
323
347
    // get each set of alignments for each pair.
324
348
    while (reader.GetNextAlignment(bam)) {
325
349
 
326
 
        // BAM IsMapped() is false
327
 
        if ((!bam.IsMapped()) && (_noHit == true))
328
 
            writer.SaveAlignment(bam);
329
 
        else if (!bam.IsMapped())
 
350
        // save an unaligned read if -v
 
351
        if (!bam.IsMapped()) {
 
352
            if ((_noHit == true) && (_bamOutput == true))
 
353
                writer.SaveAlignment(bam);
330
354
            continue;
331
 
 
 
355
        }   
332
356
        // break alignment into discrete blocks,
333
357
        bedVector bed_blocks;
334
358
        string chrom = refs.at(bam.RefID).RefName;
351
375
                           hits, _sameStrand, _diffStrand,
352
376
                           _overlapFraction, _reciprocal);
353
377
            // find the overlaps between the block in A and B
354
 
            overlapsFound = FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
 
378
            overlapsFound = 
 
379
                FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
355
380
        }
356
381
        else if ((_bamOutput == false) && (_obeySplits == false))
357
382
        {