~ubuntu-branches/ubuntu/raring/bedtools/raring

« back to all changes in this revision

Viewing changes to src/multiBamCov/multiBamCov.cpp

  • Committer: Package Import Robot
  • Author(s): Charles Plessy
  • Date: 2012-11-04 17:59:41 UTC
  • mfrom: (1.1.9)
  • Revision ID: package-import@ubuntu.com-20121104175941-hahqzy1w8uy650z0
Tags: 2.17.0-1
bb9012e Imported Upstream version 2.16.2.
9006b23 Imported Upstream version 2.17.0.
9112569 Documented BEDTools license as a whole.
325689c Removed Pre-Depends: dpkg (>= 1.15.6).
a781b14 Conforms with Policy 3.9.4.
84b1167 Use Debhelper 9. 
0bf572d Distribute the test suite.
422cd34 Bash completion for BEDTools.

Show diffs side-by-side

added added

removed removed

Lines of Context:
19
19
*/
20
20
MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file, 
21
21
                         int minQual, bool properOnly,
22
 
                         bool keepDuplicates, bool keepFailedQC)
 
22
                         bool keepDuplicates, bool keepFailedQC,
 
23
                         bool obeySplits, bool sameStrand, 
 
24
                         bool diffStrand, float overlapFraction,
 
25
                         bool reciprocal)
23
26
:
24
27
_bam_files(bam_files),
25
28
_bed_file(bed_file),
26
29
_minQual(minQual),
27
30
_properOnly(properOnly),
28
31
_keepDuplicates(keepDuplicates),
29
 
_keepFailedQC(keepFailedQC)
 
32
_keepFailedQC(keepFailedQC),
 
33
_obeySplits(obeySplits),
 
34
_sameStrand(sameStrand),
 
35
_diffStrand(diffStrand),
 
36
_overlapFraction(overlapFraction),
 
37
_reciprocal(reciprocal)
30
38
{
31
39
        _bed = new BedFile(_bed_file);
32
40
    LoadBamFileMap();
41
49
 
42
50
 
43
51
 
 
52
bool MultiCovBam::FindBlockedOverlaps(const BED &a, const vector<BED> &a_blocks, 
 
53
                                      const BED &hit) {
 
54
 
 
55
    int a_footprint = GetTotalBlockLength(a_blocks);
 
56
    
 
57
    // 1. Break the raw hit into it's blocks
 
58
    //    and see of one of the hit blocks (inner loop)
 
59
    //    overlaps one of a's blocks (inner, inner loop)
 
60
    // 2. If so, mark the hit as valid and add it to the valid_set.
 
61
    //    Otherwise, the hit only overlapped the span of a and not
 
62
    //    the individual blocks.  Thus, it doesn't count.
 
63
 
 
64
    // break the hit into blocks
 
65
    bedVector hitBlocks;
 
66
    GetBedBlocks(hit, hitBlocks);
 
67
    int b_footprint = GetTotalBlockLength(hitBlocks);
 
68
    // test to see if there is a valid hit with one of the blocks
 
69
    bool valid_hit    = false;
 
70
    int tot_overlap = 0;
 
71
    bedVector::const_iterator hbItr = hitBlocks.begin();
 
72
    bedVector::const_iterator hbEnd = hitBlocks.end();
 
73
    for (; hbItr != hbEnd; ++hbItr) {
 
74
        // look for overlaps between this hit/block and each block in a
 
75
        bedVector::const_iterator a_blockItr = a_blocks.begin();
 
76
        bedVector::const_iterator a_blockEnd = a_blocks.end();
 
77
        for (; a_blockItr != a_blockEnd; ++a_blockItr) {
 
78
            int hs = max(a_blockItr->start, hbItr->start);
 
79
            int he = min(a_blockItr->end, hbItr->end);
 
80
            int overlap = he - hs;
 
81
            if (overlap > 0) {
 
82
                valid_hit = true;
 
83
                tot_overlap += overlap;
 
84
            }
 
85
        }
 
86
    }
 
87
    if (valid_hit)
 
88
    {
 
89
        // require sufficient overlap fraction (reciprocal or otherwise)
 
90
        // w.r.t to the "footprint" (i.e., the total length of each block)
 
91
        if ( ((float) tot_overlap / (float) a_footprint) > _overlapFraction) {
 
92
            if (_reciprocal && 
 
93
                ((float) tot_overlap / (float) b_footprint) > _overlapFraction) 
 
94
            {
 
95
                return true;
 
96
            }
 
97
            else if (!_reciprocal) {
 
98
                return true;
 
99
            }
 
100
        }
 
101
    }
 
102
    return false;
 
103
}
 
104
 
 
105
 
44
106
void MultiCovBam::CollectCoverage()
45
107
{
46
108
    BamMultiReader reader;
73
135
                    // set up a BamRegion to which to attempt to jump
74
136
                    BamRegion region(refId, (int)bed.start, refId, (int)bed.end);
75
137
                    
76
 
                    // everything checks out, just iterate through specified region, counting alignments
 
138
                    // everything checks out, just iterate through 
 
139
                    // specified region, counting alignments
77
140
                    if ( (refId != -1) && (reader.SetRegion(region)) ) {
78
141
                        BamAlignment al;
79
142
                        while ( reader.GetNextAlignment(al) )
80
143
                        {
 
144
                            string strand = "+";
 
145
                            if (al.IsReverseStrand() == true) strand = "-";
 
146
                            bool strands_are_same = (bed.strand == strand);
 
147
 
81
148
                            bool duplicate = al.IsDuplicate();
82
149
                            bool failedQC  = al.IsFailedQC();
83
150
                            if (_keepDuplicates) duplicate = false;
84
151
                            if (_keepFailedQC)    failedQC = false;
85
 
                            // map qual must exceed minimum
86
 
                            if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) {
87
 
                                // ignore if not properly paired and we actually care.
88
 
                                if (_properOnly && !al.IsProperPair())
89
 
                                    continue;
90
 
 
91
 
                                // lookup the offset of the file name and tabulate 
92
 
                                //coverage for the appropriate file
93
 
                                counts[bamFileMap[al.Filename]]++;
 
152
 
 
153
                            // filters
 
154
                            if (
 
155
                                (_properOnly && !al.IsProperPair()) ||
 
156
                                (_sameStrand && !strands_are_same)  ||
 
157
                                (_diffStrand && strands_are_same)   || 
 
158
                                (al.MapQuality < _minQual)          ||
 
159
                                (duplicate)                         ||
 
160
                                (failedQC)
 
161
                            )
 
162
                            {
 
163
                                continue;
 
164
                            }
 
165
                            
 
166
 
 
167
                            if (_obeySplits == false) {
 
168
                                // enforce fractional overlap
 
169
                                int al_end = al.GetEndPosition(false, false);
 
170
                                CHRPOS s = max(al.Position, (int) bed.start);
 
171
                                CHRPOS e = min(al_end, (int) bed.end);
 
172
                                CHRPOS aLength = (bed.end - bed.start);
 
173
                                CHRPOS bLength = (al_end - al.Position);
 
174
                                int overlapBases = (e - s);
 
175
                                float aOverlap = 
 
176
                                    ( (float) overlapBases / (float) aLength );
 
177
                                float bOverlap = 
 
178
                                    ( (float) overlapBases / (float) bLength );
 
179
                                
 
180
                                if ( aOverlap >= _overlapFraction) 
 
181
                                {
 
182
                                    if (!_reciprocal)
 
183
                                        counts[bamFileMap[al.Filename]]++;
 
184
                                    else if (bOverlap >= _overlapFraction)
 
185
                                        counts[bamFileMap[al.Filename]]++;
 
186
                                }
 
187
                            }
 
188
                            else {
 
189
                                // break alignment into discrete blocks,
 
190
                                bedVector bed_blocks, hits;
 
191
                                GetBamBlocks(al, bed.chrom, 
 
192
                                             bed_blocks, false, true);
 
193
                                // find the overlaps b/w the block in A & B
 
194
                                bool overlapsFound = FindBlockedOverlaps(bed, 
 
195
                                                                 bed_blocks, 
 
196
                                                                 bed);
 
197
                                if (overlapsFound == true)
 
198
                                    counts[bamFileMap[al.Filename]]++;
94
199
                            }
95
200
                        }
96
201
                    }