4
This program was created at: Tue Sep 8 21:05:23 2015
5
This program was created by: Zev N. Kronenberg
8
Contact: zev.kronenber@gmail.com
10
Organization: Unviersity of Utah
17
Copyright (c) <2015> <Zev N. Kronenberg>
19
Permission is hereby granted, free of charge, to any person obtaining a copy
20
of this software and associated documentation files (the "Software"), to deal
21
in the Software without restriction, including without limitation the rights
22
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23
copies of the Software, and to permit persons to whom the Software is
24
furnished to do so, subject to the following conditions:
26
The above copyright notice and this permission notice shall be included in
27
all copies or substantial portions of the Software.
29
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51
#include "gpatInfo.hpp"
60
static const char *optString = "hf:s:";
65
cerr << "INFO: help" << endl;
66
cerr << "INFO: description:" << endl;
67
cerr << " Creates genomic segments (bed file) for regions with high wcFst " << endl;
69
cerr << "Output : 8 columns : " << endl;
70
cerr << " 1. Seqid " << endl;
71
cerr << " 2. Start (zero based) " << endl;
72
cerr << " 3. End (zero based) " << endl;
73
cerr << " 4. Average Fst " << endl;
74
cerr << " 5. Average high Fst (Fst > -s) " << endl;
75
cerr << " 6. N Fst values in segment " << endl;
76
cerr << " 7. N high fst values in segment " << endl;
77
cerr << " 8. Segment length " << endl;
79
cerr << "INFO: usage: segmentFst -s 0.7 -f wcFst.output.txt " << endl;
81
cerr << "INFO: required: -f -- Output from wcFst " << endl;
82
cerr << "INFO: optional: -s -- High Fst cutoff [0.8] " << endl;
89
//------------------------------- OPTIONS --------------------------------
90
int parseOpts(int argc, char** argv)
93
globalOpts.file = "NA";
94
opt = getopt(argc, argv, optString);
100
globalOpts.cut = atof(op.c_str());
112
globalOpts.file = optarg;
120
opt = getopt( argc, argv, optString );
124
//------------------------------- SUBROUTINE --------------------------------
134
bool growWindow(vector<double> & values,
148
if(*end >= values.size()){
152
for(int index = *begin; index <= *end; index++){
153
if(values[index] > globalOpts.cut){
155
*hSum += values[index];
159
*lSum += values[index];
162
if((*nhigh)*2 < (*nlow)){
168
//------------------------------- SUBROUTINE --------------------------------
170
Function input : takes the sorted Fst scores, positions, and seqids
172
Function does : segments and prints bed
179
void process(vector<int> & pos, vector<double> & value, vector<string> & seqid)
183
// i is the index of the outter loop/aka variant sites.
184
// always start the seed with 9 SNPs the window grows to 10 in "growWindow"
185
for(int i = 9; i < value.size()-9; i++){
196
bool anyGroth = false;
198
while(growWindow(value, &begin, &end,
199
&nHigh, &nLow, &HighSum, &LowSum)){
202
// the current window had an extention
204
// reset the index beyond current window
210
if(end >= value.size()){
211
end = (value.size() - 1);
214
double avgFstHigh = HighSum / double(nHigh);
215
double avgFst = (HighSum + LowSum) / (double(nHigh)+double(nLow));
219
cout << seqid[begin] << "\t"
220
<< pos[begin] -1 << "\t"
221
<< pos[end] -1 << "\t"
223
<< avgFstHigh << "\t"
224
<< nHigh + nLow << "\t"
226
<< (pos[end] - pos[begin])
233
//------------------------------- MAIN --------------------------------
238
int main( int argc, char** argv)
240
globalOpts.cut = 0.8;
241
int parse = parseOpts(argc, argv);
246
vector<string> seqid;
248
vector<double> value;
250
if(globalOpts.file.empty()){
256
ifstream myfile (globalOpts.file);
257
if (myfile.is_open())
259
while ( getline (myfile,line) )
261
vector<string> lineDat = split(line, '\t');
262
if(lineDat.size() != 5){
263
cerr << "FATAL: not valid wcFst input" << endl;
266
if(last.compare(lineDat[0]) != 0){
268
process(pos, value, seqid);
275
if(atoi(lineDat[1].c_str()) < lastPos){
276
cerr << "FATAL: wcFst input must be sorted by position." << endl;
279
lastPos = atoi(lineDat[1].c_str());
280
seqid.push_back(lineDat[0]);
281
pos.push_back(atoi(lineDat[1].c_str()));
282
value.push_back(atof(lineDat[4].c_str()));
286
std::cerr << "INFO: about to segment: " << pos.size() << " scores." << std::endl;
287
process(pos, value, seqid);
291
cerr << "FATAL: could not open file: " << globalOpts.file << endl;