~ubuntu-branches/ubuntu/raring/bedtools/raring

« back to all changes in this revision

Viewing changes to src/shuffleBed/shuffleBed.cpp

  • Committer: Package Import Robot
  • Author(s): Charles Plessy
  • Date: 2012-11-04 17:59:41 UTC
  • mfrom: (1.1.9)
  • Revision ID: package-import@ubuntu.com-20121104175941-hahqzy1w8uy650z0
Tags: 2.17.0-1
bb9012e Imported Upstream version 2.16.2.
9006b23 Imported Upstream version 2.17.0.
9112569 Documented BEDTools license as a whole.
325689c Removed Pre-Depends: dpkg (>= 1.15.6).
a781b14 Conforms with Policy 3.9.4.
84b1167 Use Debhelper 9. 
0bf572d Distribute the test suite.
422cd34 Bash completion for BEDTools.

Show diffs side-by-side

added added

removed removed

Lines of Context:
13
13
#include "shuffleBed.h"
14
14
 
15
15
 
16
 
BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, 
17
 
                       bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, 
18
 
                       float overlapFraction, int seed, bool chooseChrom) {
 
16
BedShuffle::BedShuffle(string &bedFile, string &genomeFile, 
 
17
                       string &excludeFile, string &includeFile, 
 
18
                       bool haveSeed, bool haveExclude,
 
19
                       bool haveInclude, bool sameChrom, 
 
20
                       float overlapFraction, int seed,
 
21
                       bool chooseChrom, bool isBedpe) {
19
22
 
20
23
    _bedFile         = bedFile;
21
24
    _genomeFile      = genomeFile;
27
30
    _overlapFraction = overlapFraction;
28
31
    _haveSeed        = haveSeed;
29
32
    _chooseChrom     = chooseChrom;
 
33
    _isBedpe         = isBedpe;
30
34
 
31
35
 
32
36
    // use the supplied seed for the random
41
45
        _seed = (unsigned)time(0)+(unsigned)getpid();
42
46
        srand(_seed);
43
47
    }
 
48
    
 
49
    if (_isBedpe == false)
 
50
        _bed         = new BedFile(bedFile);
 
51
    else
 
52
        _bedpe       = new BedFilePE(bedFile);
44
53
 
45
 
    _bed         = new BedFile(bedFile);
46
54
    _genome      = new GenomeFile(genomeFile);
47
55
    _chroms      = _genome->getChromList();
48
56
    _numChroms   = _genome->getNumberOfChroms();
81
89
 
82
90
 
83
91
void BedShuffle::Shuffle() {
84
 
    BED bedEntry;
85
 
    _bed->Open();
86
 
    while (_bed->GetNextBed(bedEntry)) {
87
 
        if (_bed->_status == BED_VALID) {
88
 
            ChooseLocus(bedEntry);
89
 
            _bed->reportBedNewLine(bedEntry);
90
 
        }
91
 
    }
92
 
    _bed->Close();
93
 
}
94
 
 
95
 
 
96
 
 
97
 
void BedShuffle::ShuffleWithExclusions() {
98
 
 
99
 
    BED bedEntry;
100
 
    _bed->Open();
101
 
    while (_bed->GetNextBed(bedEntry)) {
102
 
        if (_bed->_status == BED_VALID) {
103
 
            // keep looking as long as the chosen
104
 
            // locus happens to overlap with regions
105
 
            // that the user wishes to exclude.
106
 
            int  tries = 0;
107
 
            bool haveOverlap = false;
108
 
            do 
109
 
            {
110
 
                // choose a new locus
 
92
    
 
93
    if (_isBedpe == false) {
 
94
        BED bedEntry;
 
95
        _bed->Open();
 
96
        while (_bed->GetNextBed(bedEntry)) {
 
97
            if (_bed->_status == BED_VALID) {
111
98
                ChooseLocus(bedEntry);
112
 
                haveOverlap = _exclude->anyHits(bedEntry.chrom, bedEntry.start, bedEntry.end,
113
 
                                                bedEntry.strand, false, false, _overlapFraction, false);
114
 
                tries++;
115
 
            } while ((haveOverlap == true) && (tries <= MAX_TRIES));
116
 
            
117
 
 
118
 
            if (tries > MAX_TRIES) {
119
 
                cerr << "Error, line " << _bed->_lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions.  Ignoring entry and moving on." << endl;
120
 
            }
121
 
            else {
122
99
                _bed->reportBedNewLine(bedEntry);
123
100
            }
124
101
        }
125
 
    }
126
 
    _bed->Close();
 
102
        _bed->Close();
 
103
    }
 
104
    // BEDPE input
 
105
    else {
 
106
        int lineNum = 0;     // current input line number
 
107
        BedLineStatus status;
 
108
 
 
109
        BEDPE bedEntry, nullBedPE;
 
110
        _bedpe->Open();
 
111
        while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) 
 
112
        {
 
113
            if (status == BED_VALID) {
 
114
                ChoosePairedLocus(bedEntry);
 
115
                _bedpe->reportBedPENewLine(bedEntry);
 
116
            }
 
117
            bedEntry = nullBedPE;
 
118
        }
 
119
        _bedpe->Close();
 
120
    }
 
121
}
 
122
 
 
123
 
 
124
 
 
125
void BedShuffle::ShuffleWithExclusions() {
 
126
 
 
127
    if (_isBedpe == false) {
 
128
        BED bedEntry;
 
129
        _bed->Open();
 
130
        while (_bed->GetNextBed(bedEntry)) {
 
131
            if (_bed->_status == BED_VALID) {
 
132
                // keep looking as long as the chosen
 
133
                // locus happens to overlap with regions
 
134
                // that the user wishes to exclude.
 
135
                int  tries = 0;
 
136
                bool haveOverlap = false;
 
137
                do 
 
138
                {
 
139
                    // choose a new locus
 
140
                    ChooseLocus(bedEntry);
 
141
                    haveOverlap = _exclude->anyHits(bedEntry.chrom, 
 
142
                                                    bedEntry.start, 
 
143
                                                    bedEntry.end,
 
144
                                                    bedEntry.strand, 
 
145
                                                    false, false, 
 
146
                                                    _overlapFraction, false);
 
147
                    tries++;
 
148
                } while ((haveOverlap == true) && (tries <= MAX_TRIES));
 
149
            
 
150
 
 
151
                if (tries > MAX_TRIES) {
 
152
                    cerr << "Error, line " << _bed->_lineNum 
 
153
                         << ": tried " << MAX_TRIES 
 
154
                         << " potential loci for entry, but could not avoid "
 
155
                         << "excluded regions.  Ignoring entry and moving on." 
 
156
                         << endl;                }
 
157
                else {
 
158
                    _bed->reportBedNewLine(bedEntry);
 
159
                }
 
160
            }
 
161
        }
 
162
        _bed->Close();
 
163
    }
 
164
    // BEDPE input
 
165
    else {
 
166
        int lineNum = 0;     // current input line number
 
167
        BedLineStatus status;
 
168
 
 
169
        BEDPE bedEntry;
 
170
        _bedpe->Open();
 
171
        while ((status = _bedpe->GetNextBedPE(bedEntry, lineNum)) != BED_INVALID) 
 
172
        {
 
173
            if (status == BED_VALID) {
 
174
                // keep looking as long as the chosen
 
175
                // locus happens to overlap with regions
 
176
                // that the user wishes to exclude.
 
177
                int  tries = 0;
 
178
                bool haveOverlap1 = false;
 
179
                bool haveOverlap2 = false;
 
180
                do 
 
181
                {
 
182
                    // choose a new locus
 
183
                    ChoosePairedLocus(bedEntry);
 
184
                    haveOverlap1 = _exclude->anyHits(bedEntry.chrom1, 
 
185
                                                     bedEntry.start1, 
 
186
                                                     bedEntry.end1,
 
187
                                                     bedEntry.strand1, 
 
188
                                                     false, false, 
 
189
                                                     _overlapFraction, false);
 
190
 
 
191
                    haveOverlap2 = _exclude->anyHits(bedEntry.chrom2, 
 
192
                                                     bedEntry.start2, 
 
193
                                                     bedEntry.end2,
 
194
                                                     bedEntry.strand2, 
 
195
                                                     false, false, 
 
196
                                                     _overlapFraction, false);
 
197
                    tries++;
 
198
                } while (((haveOverlap1 == true) || (haveOverlap2 == true))
 
199
                        && (tries <= MAX_TRIES));
 
200
                
 
201
                if (tries > MAX_TRIES) {
 
202
                    cerr << "Error, line " << _bed->_lineNum 
 
203
                         << ": tried " << MAX_TRIES 
 
204
                         << " potential loci for entry, but could not avoid "
 
205
                         << "excluded regions.  Ignoring entry and moving on."
 
206
                         << endl;
 
207
                }
 
208
                else {
 
209
                    _bedpe->reportBedPENewLine(bedEntry);
 
210
                }
 
211
                
 
212
            }
 
213
        }
 
214
        _bedpe->Close();
 
215
    }
127
216
}
128
217
 
129
218
 
161
250
            // we need to combine two consective calls to rand()
162
251
            // because RAND_MAX is 2^31 (2147483648), whereas
163
252
            // mammalian genomes are obviously much larger.
164
 
            uint32_t randStart = ((rand() << 31) | rand()) % _genomeSize;
 
253
            uint32_t randStart = ((((long) rand()) << 31) | rand()) % _genomeSize;
165
254
            // use the above randomStart (e.g., for human 0..3.1billion) 
166
255
            // to identify the chrom and start on that chrom.
167
256
            pair<string, int> location = _genome->projectOnGenome(randStart);
198
287
}
199
288
 
200
289
 
 
290
void BedShuffle::ChoosePairedLocus(BEDPE &b) {
 
291
    
 
292
    CHRPOS foot1_len = b.end1 - b.start1;
 
293
    CHRPOS foot2_len = b.end2 - b.start2;
 
294
    CHRPOS length    = b.end2 - b.start1;
 
295
 
 
296
    if (b.chrom1 == b.chrom2)
 
297
    {
 
298
        CHRPOS chromSize;
 
299
        do 
 
300
        {
 
301
            uint32_t randStart = ((((long) rand()) << 31) | rand()) % _genomeSize;
 
302
            pair<string, int> location = _genome->projectOnGenome(randStart);
 
303
            b.chrom1  = location.first;
 
304
            b.chrom2  = location.first;
 
305
            b.start1  = location.second;
 
306
            b.end1    = b.start1 + foot1_len;
 
307
            b.end2    = b.start1 + length;
 
308
            b.start2  = b.end2 - foot2_len;
 
309
            chromSize      = _genome->getChromSize(location.first);
 
310
        } while ((b.end1 > chromSize) || 
 
311
                (b.start2 > chromSize) ||
 
312
                (b.end2 > chromSize));
 
313
        // keep looking if we have exceeded the end of the chrom.
 
314
    }
 
315
    else
 
316
    {
 
317
        CHRPOS chromSize1, chromSize2;
 
318
        do 
 
319
        {
 
320
            uint32_t rand1Start = ((((long) rand()) << 31) | rand()) % _genomeSize;
 
321
            uint32_t rand2Start = ((((long) rand()) << 31) | rand()) % _genomeSize;
 
322
            pair<string, int> location1 = _genome->projectOnGenome(rand1Start);
 
323
            pair<string, int> location2 = _genome->projectOnGenome(rand2Start);
 
324
            
 
325
            b.chrom1  = location1.first;
 
326
            b.chrom2  = location2.first;
 
327
            b.start1  = location1.second;
 
328
            b.start2  = location2.second;
 
329
            
 
330
            b.end1    = b.start1 + foot1_len;
 
331
            b.end2    = b.start2 + foot2_len;
 
332
            chromSize1      = _genome->getChromSize(location1.first);
 
333
            chromSize2      = _genome->getChromSize(location2.first);
 
334
            
 
335
        } while ((b.end1 > chromSize1) || 
 
336
                (b.end2 > chromSize2));
 
337
        // keep looking if we have exceeded the end of the chrom.
 
338
    }
 
339
}
 
340
 
 
341
 
201
342
void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) {
202
343
 
203
344
    string chrom    = bedEntry.chrom;
238
379
        ChooseLocusFromInclusionFile(bedEntry);
239
380
    }
240
381
}
241