20
20
MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file,
21
21
int minQual, bool properOnly,
22
bool keepDuplicates, bool keepFailedQC)
22
bool keepDuplicates, bool keepFailedQC,
23
bool obeySplits, bool sameStrand,
24
bool diffStrand, float overlapFraction,
24
27
_bam_files(bam_files),
25
28
_bed_file(bed_file),
27
30
_properOnly(properOnly),
28
31
_keepDuplicates(keepDuplicates),
29
_keepFailedQC(keepFailedQC)
32
_keepFailedQC(keepFailedQC),
33
_obeySplits(obeySplits),
34
_sameStrand(sameStrand),
35
_diffStrand(diffStrand),
36
_overlapFraction(overlapFraction),
37
_reciprocal(reciprocal)
31
39
_bed = new BedFile(_bed_file);
52
bool MultiCovBam::FindBlockedOverlaps(const BED &a, const vector<BED> &a_blocks,
55
int a_footprint = GetTotalBlockLength(a_blocks);
57
// 1. Break the raw hit into it's blocks
58
// and see of one of the hit blocks (inner loop)
59
// overlaps one of a's blocks (inner, inner loop)
60
// 2. If so, mark the hit as valid and add it to the valid_set.
61
// Otherwise, the hit only overlapped the span of a and not
62
// the individual blocks. Thus, it doesn't count.
64
// break the hit into blocks
66
GetBedBlocks(hit, hitBlocks);
67
int b_footprint = GetTotalBlockLength(hitBlocks);
68
// test to see if there is a valid hit with one of the blocks
69
bool valid_hit = false;
71
bedVector::const_iterator hbItr = hitBlocks.begin();
72
bedVector::const_iterator hbEnd = hitBlocks.end();
73
for (; hbItr != hbEnd; ++hbItr) {
74
// look for overlaps between this hit/block and each block in a
75
bedVector::const_iterator a_blockItr = a_blocks.begin();
76
bedVector::const_iterator a_blockEnd = a_blocks.end();
77
for (; a_blockItr != a_blockEnd; ++a_blockItr) {
78
int hs = max(a_blockItr->start, hbItr->start);
79
int he = min(a_blockItr->end, hbItr->end);
80
int overlap = he - hs;
83
tot_overlap += overlap;
89
// require sufficient overlap fraction (reciprocal or otherwise)
90
// w.r.t to the "footprint" (i.e., the total length of each block)
91
if ( ((float) tot_overlap / (float) a_footprint) > _overlapFraction) {
93
((float) tot_overlap / (float) b_footprint) > _overlapFraction)
97
else if (!_reciprocal) {
44
106
void MultiCovBam::CollectCoverage()
46
108
BamMultiReader reader;
73
135
// set up a BamRegion to which to attempt to jump
74
136
BamRegion region(refId, (int)bed.start, refId, (int)bed.end);
76
// everything checks out, just iterate through specified region, counting alignments
138
// everything checks out, just iterate through
139
// specified region, counting alignments
77
140
if ( (refId != -1) && (reader.SetRegion(region)) ) {
79
142
while ( reader.GetNextAlignment(al) )
145
if (al.IsReverseStrand() == true) strand = "-";
146
bool strands_are_same = (bed.strand == strand);
81
148
bool duplicate = al.IsDuplicate();
82
149
bool failedQC = al.IsFailedQC();
83
150
if (_keepDuplicates) duplicate = false;
84
151
if (_keepFailedQC) failedQC = false;
85
// map qual must exceed minimum
86
if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) {
87
// ignore if not properly paired and we actually care.
88
if (_properOnly && !al.IsProperPair())
91
// lookup the offset of the file name and tabulate
92
//coverage for the appropriate file
93
counts[bamFileMap[al.Filename]]++;
155
(_properOnly && !al.IsProperPair()) ||
156
(_sameStrand && !strands_are_same) ||
157
(_diffStrand && strands_are_same) ||
158
(al.MapQuality < _minQual) ||
167
if (_obeySplits == false) {
168
// enforce fractional overlap
169
int al_end = al.GetEndPosition(false, false);
170
CHRPOS s = max(al.Position, (int) bed.start);
171
CHRPOS e = min(al_end, (int) bed.end);
172
CHRPOS aLength = (bed.end - bed.start);
173
CHRPOS bLength = (al_end - al.Position);
174
int overlapBases = (e - s);
176
( (float) overlapBases / (float) aLength );
178
( (float) overlapBases / (float) bLength );
180
if ( aOverlap >= _overlapFraction)
183
counts[bamFileMap[al.Filename]]++;
184
else if (bOverlap >= _overlapFraction)
185
counts[bamFileMap[al.Filename]]++;
189
// break alignment into discrete blocks,
190
bedVector bed_blocks, hits;
191
GetBamBlocks(al, bed.chrom,
192
bed_blocks, false, true);
193
// find the overlaps b/w the block in A & B
194
bool overlapsFound = FindBlockedOverlaps(bed,
197
if (overlapsFound == true)
198
counts[bamFileMap[al.Filename]]++;