55
BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
56
bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
57
float overlapFraction, bool noHit, bool leftJoin, bool writeCount, bool sameStrand, bool diffStrand,
58
bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
59
bool sortedInput, bool printHeader) {
55
BedIntersect::BedIntersect(string bedAFile, string bedBFile,
56
bool anyHit, bool writeA,
57
bool writeB, bool writeOverlap,
58
bool writeAllOverlap, float overlapFraction,
59
bool noHit, bool leftJoin,
60
bool writeCount, bool sameStrand,
61
bool diffStrand, bool reciprocal,
62
bool obeySplits, bool bamInput,
63
bool bamOutput, bool isUncompressedBam,
64
bool sortedInput, bool printHeader)
61
66
_bedAFile = bedAFile;
62
67
_bedBFile = bedBFile;
113
bool BedIntersect::FindBlockedOverlaps(const BED &a, const vector<BED> &a_blocks,
114
const vector<BED> &hits, bool a_is_bam) {
118
bool BedIntersect::FindBlockedOverlaps(const BED &a,
119
const vector<BED> &a_blocks,
120
const vector<BED> &hits,
115
123
int a_footprint = GetTotalBlockLength(a_blocks);
116
124
// container to store the set of raw hits
117
125
// that actually overlap the A blocks
155
// require sufficint overlap fraction (reciprocal or otherwise)
163
// require sufficient overlap fraction (reciprocal or otherwise)
156
164
// w.r.t to the "footprint" (i.e., the total length of each block)
157
if ( ((float) total_overlap / (float) a_footprint) > _overlapFraction) {
158
if (_reciprocal && ((float) total_overlap / (float) b_footprint) > _overlapFraction) {
165
if ( ((float) total_overlap
167
(float) a_footprint) > _overlapFraction)
169
if (_reciprocal && ((float) total_overlap /
170
(float) b_footprint) > _overlapFraction)
159
172
valid_hits.push_back(*hItr);
161
174
else if (!_reciprocal) {
175
void BedIntersect::ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e) {
188
void BedIntersect::ReportOverlapDetail(int overlapBases, const BED &a,
189
const BED &b, CHRPOS s, CHRPOS e)
176
191
// default. simple intersection only
177
192
if (_writeA == false && _writeB == false &&
178
193
_writeOverlap == false && _leftJoin == false)
246
261
// treat the BED as a single "block"
247
262
if (_obeySplits == false)
248
263
FindOverlaps(a, hits);
249
// split the BED12 into blocks and look for overlaps in each discrete block
264
// split the BED12 into blocks and look for
265
// overlaps in each discrete block
251
// find the hits that overlap with the full span of the blocked BED
267
// find the hits that overlap with the
268
// full span of the blocked BED
252
269
_bedB->allHits(a.chrom, a.start, a.end, a.strand,
253
270
hits, _sameStrand, _diffStrand,
254
_overlapFraction, _reciprocal);
255
272
// break a into discrete blocks, as we need to
256
// measure overlap with the individual blocks, not the full span.
273
// measure overlap with the individual blocks,
274
// not the full span.
257
275
bedVector a_blocks;
258
276
GetBedBlocks(a, a_blocks);
259
277
// find the overlaps between the block in A and B
270
288
ChromSweep sweep = ChromSweep(_bedA, _bedB,
271
289
_sameStrand, _diffStrand,
272
290
_overlapFraction, _reciprocal,
291
false, _printHeader);
275
293
pair<BED, vector<BED> > hit_set;
276
294
hit_set.second.reserve(10000);
277
295
while (sweep.Next(hit_set)) {
278
if (_obeySplits == false)
296
if (_obeySplits == false) {
279
297
processHits(hit_set.first, hit_set.second);
281
300
bedVector a_blocks;
282
301
GetBedBlocks(hit_set.first, a_blocks);
283
FindBlockedOverlaps(hit_set.first, a_blocks, hit_set.second, false);
302
FindBlockedOverlaps(hit_set.first, a_blocks,
303
hit_set.second, false);
303
323
// open the BAM file
304
324
BamReader reader;
305
325
BamWriter writer;
306
reader.Open(bamFile);
326
if (!reader.Open(bamFile)) {
327
cerr << "Failed to open BAM file " << bamFile << endl;
307
331
// get header & reference information
308
332
string bamHeader = reader.GetHeaderText();
309
333
RefVector refs = reader.GetReferenceData();
323
347
// get each set of alignments for each pair.
324
348
while (reader.GetNextAlignment(bam)) {
326
// BAM IsMapped() is false
327
if ((!bam.IsMapped()) && (_noHit == true))
328
writer.SaveAlignment(bam);
329
else if (!bam.IsMapped())
350
// save an unaligned read if -v
351
if (!bam.IsMapped()) {
352
if ((_noHit == true) && (_bamOutput == true))
353
writer.SaveAlignment(bam);
332
356
// break alignment into discrete blocks,
333
357
bedVector bed_blocks;
334
358
string chrom = refs.at(bam.RefID).RefName;
351
375
hits, _sameStrand, _diffStrand,
352
376
_overlapFraction, _reciprocal);
353
377
// find the overlaps between the block in A and B
354
overlapsFound = FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
379
FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
356
381
else if ((_bamOutput == false) && (_obeySplits == false))