4
This program was created at: Tue Sep 8 21:05:23 2015
5
This program was created by: Zev N. Kronenberg
8
Contact: zev.kronenber@gmail.com
10
Organization: Unviersity of Utah
17
Copyright (c) <2015> <Zev N. Kronenberg>
19
Permission is hereby granted, free of charge, to any person obtaining a copy
20
of this software and associated documentation files (the "Software"), to deal
21
in the Software without restriction, including without limitation the rights
22
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23
copies of the Software, and to permit persons to whom the Software is
24
furnished to do so, subject to the following conditions:
26
The above copyright notice and this permission notice shall be included in
27
all copies or substantial portions of the Software.
29
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51
#include "gpatInfo.hpp"
74
static const char *optString = "hf:s:";
79
cerr << "INFO: help" << endl;
80
cerr << "INFO: description:" << endl;
81
cerr << " normalizes iHS or XP-EHH scores " << endl;
83
cerr << "Output : normalize-iHS adds one additional column to input (normalized score)." << endl;
85
cerr << "INFO: usage: normalizeHS -s 0.01 -f input.txt " << endl;
87
cerr << "INFO: required: -f -- Output from iHS or XPEHH " << endl;
88
cerr << "INFO: optional: -s -- Max AF diff for window [0.01]" << endl;
95
//------------------------------- OPTIONS --------------------------------
96
int parseOpts(int argc, char** argv)
99
opt = getopt(argc, argv, optString);
105
globalOpts.afDiff = atof(op.c_str());
117
globalOpts.file = optarg;
125
opt = getopt( argc, argv, optString );
130
bool sortAF(iHSdat * L, iHSdat * R){
138
//------------------------------- SUBROUTINE --------------------------------
140
Function input : vector of doubles
142
Function does : calculates the var
144
Function returns: double
148
double var(vector<double> & data, double mu){
151
for(vector<double>::iterator it = data.begin(); it != data.end(); it++){
152
variance += pow((*it) - mu,2);
155
return variance / (data.size() - 1);
159
//------------------------------- SUBROUTINE --------------------------------
161
Function input : vector of doubles
163
Function does : computes the mean
165
Function returns: the mean
170
double windowAvg(std::vector<double> & rangeData){
175
for(std::vector<double>::iterator it = rangeData.begin(); it != rangeData.end(); it++){
186
//------------------------------- SUBROUTINE --------------------------------
188
Function input : vector of iHS data
190
Function does : normalizes
192
Function returns: nothing
196
void normalize(std::vector<iHSdat *> & data, int * pos){
198
std::vector<double> windat;
203
while((abs(data[start]->af - data[end]->af ) < globalOpts.afDiff)
204
&& end < data.size() -1 ){
208
for(int i = start; i <= end; i++){
209
windat.push_back(data[i]->iHS);
212
double avg = windowAvg(windat);
213
double sd = sqrt(var(windat, avg));
215
std::cerr << "start: " << data[start]->af << " "
216
<< "end: " << data[end]->af << " "
217
<< "n iHS scores: " << windat.size() << " "
218
<< "mean: " << avg << " "
219
<< "sd: " << sd << std::endl;
221
for(int i = start; i <= end; i++){
222
data[i]->niHS = (data[i]->iHS - avg) / (sd);
229
//------------------------------- MAIN --------------------------------
234
int main( int argc, char** argv)
236
globalOpts.afDiff = 0.01;
237
int parse = parseOpts(argc, argv);
239
if(globalOpts.file.empty()){
240
std::cerr << "FATAL: no file" << std::endl;
244
std::vector<iHSdat *> data;
247
ifstream myfile (globalOpts.file);
248
if (myfile.is_open())
250
while ( getline (myfile,line) ){
251
vector<string> lineDat = split(line, '\t');
253
iHSdat * tp = new iHSdat;
254
tp->seqid = lineDat[0];
255
tp->start = lineDat[1];
256
tp->af = atof(lineDat[2].c_str());
257
tp->ehhR = atof(lineDat[3].c_str());
258
tp->ehhA = atof(lineDat[4].c_str());
259
tp->iHS = atof(lineDat[5].c_str());
260
tp->F1 = lineDat[6].c_str();
261
tp->F2 = lineDat[7].c_str();
271
cerr << "FATAL: could not open file: " << globalOpts.file << endl;
276
std::cerr << "INFO: sorting " << data.size() << " scores by AF" << std::endl;
278
sort(data.begin(), data.end(), sortAF);
280
std::cerr << "INFO: finished sorting" << std::endl;
282
for(int i = 0; i < data.size() ; i++){
287
for(int i = 0; i < data.size(); i++){
288
std::cout << data[i]->seqid << "\t"
289
<< data[i]->start << "\t"
290
<< data[i]->af << "\t"
291
<< data[i]->ehhR << "\t"
292
<< data[i]->ehhA << "\t"
293
<< data[i]->iHS << "\t"
294
<< data[i]->niHS << "\t"
295
<< data[i]->F1 << "\t"
296
<< data[i]->F2 << std::endl;