13
13
#include "shuffleBed.h"
16
BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile,
17
bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom,
18
float overlapFraction, int seed, bool chooseChrom) {
16
BedShuffle::BedShuffle(string &bedFile, string &genomeFile,
17
string &excludeFile, string &includeFile,
18
bool haveSeed, bool haveExclude,
19
bool haveInclude, bool sameChrom,
20
float overlapFraction, int seed,
21
bool chooseChrom, bool isBedpe) {
20
23
_bedFile = bedFile;
21
24
_genomeFile = genomeFile;
83
91
void BedShuffle::Shuffle() {
86
while (_bed->GetNextBed(bedEntry)) {
87
if (_bed->_status == BED_VALID) {
88
ChooseLocus(bedEntry);
89
_bed->reportBedNewLine(bedEntry);
97
void BedShuffle::ShuffleWithExclusions() {
101
while (_bed->GetNextBed(bedEntry)) {
102
if (_bed->_status == BED_VALID) {
103
// keep looking as long as the chosen
104
// locus happens to overlap with regions
105
// that the user wishes to exclude.
107
bool haveOverlap = false;
110
// choose a new locus
93
if (_isBedpe == false) {
96
while (_bed->GetNextBed(bedEntry)) {
97
if (_bed->_status == BED_VALID) {
111
98
ChooseLocus(bedEntry);
112
haveOverlap = _exclude->anyHits(bedEntry.chrom, bedEntry.start, bedEntry.end,
113
bedEntry.strand, false, false, _overlapFraction, false);
115
} while ((haveOverlap == true) && (tries <= MAX_TRIES));
118
if (tries > MAX_TRIES) {
119
cerr << "Error, line " << _bed->_lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl;
122
99
_bed->reportBedNewLine(bedEntry);
106
int lineNum = 0; // current input line number
107
BedLineStatus status;
109
BEDPE bedEntry, nullBedPE;
111
while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID)
113
if (status == BED_VALID) {
114
ChoosePairedLocus(bedEntry);
115
_bedpe->reportBedPENewLine(bedEntry);
117
bedEntry = nullBedPE;
125
void BedShuffle::ShuffleWithExclusions() {
127
if (_isBedpe == false) {
130
while (_bed->GetNextBed(bedEntry)) {
131
if (_bed->_status == BED_VALID) {
132
// keep looking as long as the chosen
133
// locus happens to overlap with regions
134
// that the user wishes to exclude.
136
bool haveOverlap = false;
139
// choose a new locus
140
ChooseLocus(bedEntry);
141
haveOverlap = _exclude->anyHits(bedEntry.chrom,
146
_overlapFraction, false);
148
} while ((haveOverlap == true) && (tries <= MAX_TRIES));
151
if (tries > MAX_TRIES) {
152
cerr << "Error, line " << _bed->_lineNum
153
<< ": tried " << MAX_TRIES
154
<< " potential loci for entry, but could not avoid "
155
<< "excluded regions. Ignoring entry and moving on."
158
_bed->reportBedNewLine(bedEntry);
166
int lineNum = 0; // current input line number
167
BedLineStatus status;
171
while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID)
173
if (status == BED_VALID) {
174
// keep looking as long as the chosen
175
// locus happens to overlap with regions
176
// that the user wishes to exclude.
178
bool haveOverlap1 = false;
179
bool haveOverlap2 = false;
182
// choose a new locus
183
ChoosePairedLocus(bedEntry);
184
haveOverlap1 = _exclude->anyHits(bedEntry.chrom1,
189
_overlapFraction, false);
191
haveOverlap2 = _exclude->anyHits(bedEntry.chrom2,
196
_overlapFraction, false);
198
} while (((haveOverlap1 == true) || (haveOverlap2 == true))
199
&& (tries <= MAX_TRIES));
201
if (tries > MAX_TRIES) {
202
cerr << "Error, line " << _bed->_lineNum
203
<< ": tried " << MAX_TRIES
204
<< " potential loci for entry, but could not avoid "
205
<< "excluded regions. Ignoring entry and moving on."
209
_bedpe->reportBedPENewLine(bedEntry);
290
void BedShuffle::ChoosePairedLocus(BEDPE &b) {
292
CHRPOS foot1_len = b.end1 - b.start1;
293
CHRPOS foot2_len = b.end2 - b.start2;
294
CHRPOS length = b.end2 - b.start1;
296
if (b.chrom1 == b.chrom2)
301
uint32_t randStart = ((((long) rand()) << 31) | rand()) % _genomeSize;
302
pair<string, int> location = _genome->projectOnGenome(randStart);
303
b.chrom1 = location.first;
304
b.chrom2 = location.first;
305
b.start1 = location.second;
306
b.end1 = b.start1 + foot1_len;
307
b.end2 = b.start1 + length;
308
b.start2 = b.end2 - foot2_len;
309
chromSize = _genome->getChromSize(location.first);
310
} while ((b.end1 > chromSize) ||
311
(b.start2 > chromSize) ||
312
(b.end2 > chromSize));
313
// keep looking if we have exceeded the end of the chrom.
317
CHRPOS chromSize1, chromSize2;
320
uint32_t rand1Start = ((((long) rand()) << 31) | rand()) % _genomeSize;
321
uint32_t rand2Start = ((((long) rand()) << 31) | rand()) % _genomeSize;
322
pair<string, int> location1 = _genome->projectOnGenome(rand1Start);
323
pair<string, int> location2 = _genome->projectOnGenome(rand2Start);
325
b.chrom1 = location1.first;
326
b.chrom2 = location2.first;
327
b.start1 = location1.second;
328
b.start2 = location2.second;
330
b.end1 = b.start1 + foot1_len;
331
b.end2 = b.start2 + foot2_len;
332
chromSize1 = _genome->getChromSize(location1.first);
333
chromSize2 = _genome->getChromSize(location2.first);
335
} while ((b.end1 > chromSize1) ||
336
(b.end2 > chromSize2));
337
// keep looking if we have exceeded the end of the chrom.
201
342
void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) {
203
344
string chrom = bedEntry.chrom;