3
This program was created at: Fri Apr 17 14:59:53 2015
4
This program was created by: Zev N. Kronenberg
7
Contact: zev.kronenber@gmail.com
9
Organization: Unviersity of Utah
16
Copyright (c) <2015> <Zev N. Kronenberg>
18
Permission is hereby granted, free of charge, to any person obtaining a copy
19
of this software and associated documentation files (the "Software"), to deal
20
in the Software without restriction, including without limitation the rights
21
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22
copies of the Software, and to permit persons to whom the Software is
23
furnished to do so, subject to the following conditions:
25
The above copyright notice and this permission notice shall be included in
26
all copies or substantial portions of the Software.
28
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51
#include "gpatInfo.hpp"
75
struct smoothedInputData{
84
static const char *optString = "x:y:u:f:n:s:";
88
std::map<std::string, int> FORMATS;
90
//------------------------------- SUBROUTINE --------------------------------
94
Function does : prints help
102
cerr << endl << endl;
103
cerr << "INFO: help" << endl;
104
cerr << "INFO: description:" << endl;
105
cerr << " permuteSmooth is a method for adding empirical p-values smoothed wcFst scores." << endl ;
107
cerr << "INFO: usage: permuteSmooth -s wcFst.smooth.txt -f wcFst.txt -n 5 -s 1 "<< endl;
109
cerr << "Required:" << endl;
110
cerr << " file: f -- argument: original wcFst data "<< endl;
111
cerr << " smoothed: s -- argument: smoothed wcFst data "<< endl;
112
cerr << " format: y -- argument: [swcFst, segwcFst] "<< endl;
113
cerr << "Optional:" << endl;
114
cerr << " number: n -- argument: the number of permutations to run for each value [1000]" << endl;
115
cerr << " success: u -- argument: stop permutations after \'s\' successes [1]" << endl;
116
cerr << " success: x -- argument: number of threads [1]" << endl;
118
cerr << "OUTPUT: permuteSmooth will append three additional columns:" << endl;
119
cerr << " 1. The number of successes " << endl;
120
cerr << " 2. The number of trials " << endl;
121
cerr << " 3. The empirical p-value " << endl;
128
//------------------------------- OPTIONS --------------------------------
129
int parseOpts(int argc, char** argv)
132
globalOpts.file = "NA";
135
globalOpts.npermutation = 1000;
137
opt = getopt(argc, argv, optString);
142
globalOpts.threads = atoi(optarg);
146
globalOpts.file = optarg;
151
globalOpts.format = optarg;
152
if(FORMATS.find(globalOpts.format) == FORMATS.end()){
153
std::cerr << "FATAL: format not supported: " << globalOpts.format;
158
if(globalOpts.format == "swcFst"){
159
globalOpts.chrIndex = 0;
160
globalOpts.nIndex = 3;
161
globalOpts.valueIndex = 4;
163
if(globalOpts.format == "segwcFst"){
164
globalOpts.chrIndex = 0;
165
globalOpts.valueIndex = 3;
166
globalOpts.nIndex = 5;
172
globalOpts.npermutation = atof(((string)optarg).c_str());
173
cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
178
globalOpts.smoothed = optarg;
179
cerr << "INFO: smoothed file: " << globalOpts.smoothed << endl;
184
globalOpts.nsuc = atof(((string)optarg).c_str());
185
cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
194
opt = getopt( argc, argv, optString );
200
//------------------------------- SUBROUTINE --------------------------------
202
Function input : data, vector to load
204
Function does : returns a contguous window
206
Function returns: bool
210
bool getContiguousWindow(vector<score *> & data,
211
vector<double> & load,
213
int r = rand() % data.size();
215
if(r+n >= data.size()){
220
if(data[r]->seqid != data[r+n]->seqid){
225
for(int i = r; i < r+n; i++){
226
load.push_back(data[i]->score);
232
//------------------------------- SUBROUTINE --------------------------------
234
double mean(vector<double> & data){
238
for(vector<double>::iterator it = data.begin(); it != data.end(); it++){
241
return sum / data.size();
246
//------------------------------- SUBROUTINE --------------------------------
248
Function input : score, n, data
250
Function does : permutes the score. requires that the window is contiguous
256
bool permute(double s, int n, vector<score *> & data,
257
double * nRep, double *nSuc, double * ePv){
260
*ePv = 1 / globalOpts.npermutation;
262
while(*nSuc < globalOpts.nsuc && *nRep < globalOpts.npermutation ){
266
std::vector<double> scores;
268
bool getWindow = false;
273
if(nfail > globalOpts.npermutation){
276
getWindow = getContiguousWindow(data, scores, n, &nfail);
279
double ns = mean(scores);
286
*ePv = *nSuc / *nRep;
292
//------------------------------- MAIN --------------------------------
297
int main( int argc, char** argv)
301
FORMATS["swcFst"] = 1;
302
FORMATS["segwcFst"] = 1;
304
globalOpts.threads = 1;
306
int parse = parseOpts(argc, argv);
308
omp_set_num_threads(globalOpts.threads);
311
if(globalOpts.file.compare("NA") == 0){
312
cerr << "FATAL: no file was provided" << endl;
316
if(globalOpts.format.empty()){
317
std::cerr << "FATAL: no format specified." << std::endl;
321
vector< score *> data;
323
ifstream wcDat (globalOpts.file.c_str());
329
while(getline(wcDat, line)){
330
vector<string> region = split(line, "\t");
331
// will change for other output
332
double value = atof(region[4].c_str());
334
if(globalOpts.format == "swcFst" || globalOpts.format == "segwcFst"){
336
if(region.size() != 5){
337
cerr << "FATAL: wrong number of columns in wcFst input" << endl;
347
sp->seqid = region[0] ;
348
sp->pos = atoi(region[1].c_str());
355
cerr << "FATAL: coult not open file: " << globalOpts.file << endl;
363
cerr << "INFO: raw values to permute against: " << data.size() << endl;
365
ifstream smoothedFile (globalOpts.smoothed.c_str());
367
vector<smoothedInputData*> sData;
369
if(smoothedFile.is_open()){
370
while(getline(smoothedFile, line)){
372
vector<string> region = split(line, "\t");
373
smoothedInputData * sp = new smoothedInputData;
376
sp->score = atof(region[globalOpts.valueIndex].c_str());
377
sp->n = atoi(region[globalOpts.nIndex].c_str());
385
smoothedFile.close();
387
cerr << "INFO: Number of smoothed windows to permute : " << sData.size() << endl;
389
#pragma omp parallel for schedule(dynamic, 20)
390
for(int i = 0; i < sData.size(); i++){
391
bool per = permute(sData[i]->score, sData[i]->n,
392
data, &sData[i]->nPer,
393
&sData[i]->nSuc, &sData[i]->ePv);
398
cout << sData[i]->line
399
<< "\t" << sData[i]->nSuc
400
<< "\t" << sData[i]->nPer
401
<< "\t" << sData[i]->ePv << endl;
402
omp_unset_lock(&lock);
406
cout << sData[i]->line
409
<< "\t" << "NA" << endl;
410
omp_unset_lock(&lock);
414
for(vector<score*>::iterator itz = data.begin();
415
itz != data.end(); itz++){
418
for(vector<smoothedInputData*>::iterator itz = sData.begin();
419
itz != sData.end(); itz++){