41
PosteriorErrorProbabilityModel::PosteriorErrorProbabilityModel()
42
: DefaultParamHandler("PosteriorErrorProbabilityModel"), negative_prior_(0.5),max_incorrectly_(0),max_correctly_(0), smallest_score_(0)
44
defaults_.setValue("number_of_bins", 100, "Number of bins used for visualization. Only needed if each iteration step of the EM-Algorithm will be visualized", StringList::create("advanced"));
45
defaults_.setValue("output_plots","false","If true every step of the EM-algorithm will be written to a file as a gnuplot formula",StringList::create("advanced"));
46
defaults_.setValidStrings("output_plots",StringList::create("true,false"));
47
defaults_.setValue("output_name","", "if output_plots is on, the output files will be saved in the following manner: <output_name>scores.txt for the scores and <output_name> which contains each step of the EM-algorithm e.g. output_name = /usr/home/OMSSA123 then /usr/home/OMSSA123_scores.txt, /usr/home/OMSSA123 will be written. If no directory is specified, e.g. instead of '/usr/home/OMSSA123' just OMSSA123, the files will be written into the working directory.",StringList::create("advanced,output file"));
48
defaults_.setValue("incorrectly_assigned","Gumbel", "for 'Gumbel', the Gumbel distribution is used to plot incorrectly assigned sequences. For 'Gauss', the Gauss distribution is used.",StringList::create("advanced"));
49
defaults_.setValidStrings("incorrectly_assigned",StringList::create("Gumbel,Gauss"));
51
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGumbel;
52
calc_correct_ = &PosteriorErrorProbabilityModel::getGauss;
53
getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGumbelGnuplotFormula;
54
getPositiveGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
57
PosteriorErrorProbabilityModel::~PosteriorErrorProbabilityModel()
61
bool PosteriorErrorProbabilityModel::fit( std::vector<double>& search_engine_scores)
63
if(search_engine_scores.empty())
67
//-------------------------------------------------------------
68
// Initializing Parameters
69
//-------------------------------------------------------------
70
sort(search_engine_scores.begin(),search_engine_scores.end());
72
smallest_score_ = search_engine_scores[0];
73
vector<double> x_scores;
74
x_scores.resize(search_engine_scores.size());
75
std::vector< double>::iterator it = x_scores.begin();
76
for(std::vector< double>::iterator iti = search_engine_scores.begin(); iti < search_engine_scores.end(); ++it,++iti)
78
*it = *iti + fabs(smallest_score_) + 0.001;
80
negative_prior_ = 0.7;
81
if(param_.getValue("incorrectly_assigned") == "Gumbel")
83
incorrectly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[0], 1, ceil(0.5* x_scores.size())) + x_scores[0];
84
incorrectly_assigned_fit_param_.sigma = gsl_stats_sd(&x_scores[0], 1, x_scores.size() - 1);//pow(gsl_stats_sd_with_fixed_mean(&probabilities[x_score_start], 1, probabilities.size() - x_score_start, gauss_fit_param_.x0),2);
85
incorrectly_assigned_fit_param_.A = 1 /sqrt(2*3.14159*pow(incorrectly_assigned_fit_param_.sigma,2));
86
//TODO: compute directly with gauss. Workaround:
87
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGauss;
88
getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGumbelGnuplotFormula;
92
incorrectly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[0], 1, ceil(0.5* x_scores.size())) + x_scores[0];
93
incorrectly_assigned_fit_param_.sigma = gsl_stats_sd(&x_scores[0], 1, x_scores.size() - 1);//pow(gsl_stats_sd_with_fixed_mean(&probabilities[x_score_start], 1, probabilities.size() - x_score_start, gauss_fit_param_.x0),2);
94
incorrectly_assigned_fit_param_.A = 1 /sqrt(2*3.14159*pow(incorrectly_assigned_fit_param_.sigma,2));
95
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGauss;
96
getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
98
getPositiveGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
99
calc_correct_ = &PosteriorErrorProbabilityModel::getGauss;
100
Size x_score_start = std::min(x_scores.size()-1, (Size) ceil(x_scores.size()*0.7)); // if only one score is present, ceil(...) will yield 1, which is an invalid index
101
correctly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[x_score_start], 1, x_scores.size() - x_score_start) + x_scores[x_score_start];//(gauss_scores.begin()->getX() + (gauss_scores.end()-1)->getX())/2;
102
correctly_assigned_fit_param_.sigma = incorrectly_assigned_fit_param_.sigma;
103
correctly_assigned_fit_param_.A = 1.0 / sqrt(2*3.14159*pow(correctly_assigned_fit_param_.sigma,2));
105
DoubleReal maxlike(0);
106
vector<DoubleReal> incorrect_density;
107
vector<DoubleReal> correct_density;
109
fillDensities(x_scores,incorrect_density,correct_density);
112
maxlike = computeMaxLikelihood(incorrect_density,correct_density);
113
//-------------------------------------------------------------
114
// create files for output
115
//-------------------------------------------------------------
116
bool output_plots = param_.getValue("output_plots").toBool();
117
TextFile* file = NULL;
120
file = InitPlots(x_scores);
122
//-------------------------------------------------------------
123
// Estimate Parameters - EM algorithm
124
//-------------------------------------------------------------
125
bool stop_em_init = false;
129
DoubleReal one_minus_sum_posterior = one_minus_sum_post(incorrect_density,correct_density);
130
DoubleReal sum_posterior = sum_post(incorrect_density,correct_density);
133
DoubleReal sum_positive_x0 = sum_pos_x0(x_scores, incorrect_density,correct_density);
134
DoubleReal sum_negative_x0 = sum_neg_x0(x_scores, incorrect_density,correct_density);
136
DoubleReal positive_mean = sum_positive_x0/one_minus_sum_posterior;
137
DoubleReal negative_mean = sum_negative_x0/sum_posterior;
139
//new standard deviation
140
DoubleReal sum_positive_sigma = sum_pos_sigma(x_scores,incorrect_density,correct_density, positive_mean);
141
DoubleReal sum_negative_sigma = sum_neg_sigma(x_scores,incorrect_density,correct_density, negative_mean);
144
correctly_assigned_fit_param_.x0 = positive_mean;
145
if(sum_positive_sigma != 0 && one_minus_sum_posterior != 0)
147
correctly_assigned_fit_param_.sigma = sqrt(sum_positive_sigma/one_minus_sum_posterior);
148
correctly_assigned_fit_param_.A = 1/sqrt(2*3.14159*pow(correctly_assigned_fit_param_.sigma,2));
151
incorrectly_assigned_fit_param_.x0 = negative_mean;
152
if(sum_negative_sigma != 0 && sum_posterior != 0)
154
incorrectly_assigned_fit_param_.sigma = sqrt(sum_negative_sigma/sum_posterior);
155
incorrectly_assigned_fit_param_.A = 1/sqrt(2*3.14159*pow(incorrectly_assigned_fit_param_.sigma,2));
159
//compute new prior probabilities negative peptides
160
fillDensities(x_scores,incorrect_density,correct_density);
161
sum_posterior = sum_post(incorrect_density,correct_density);
162
negative_prior_ = sum_posterior/x_scores.size();
164
DoubleReal new_maxlike(computeMaxLikelihood(incorrect_density,correct_density));
165
if(boost::math::isnan(new_maxlike - maxlike))
168
//throw Exception::UnableToFit(__FILE__,__LINE__,__PRETTY_FUNCTION__,"UnableToFit-PosteriorErrorProbability","Could not fit mixture model to data");
170
if(fabs(new_maxlike - maxlike) < 0.001)
173
sum_posterior = sum_post(incorrect_density,correct_density);
174
negative_prior_ = sum_posterior/x_scores.size();
179
String formula1, formula2,formula3;
180
formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_)+ "* " + String(negative_prior_);//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
181
formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_)+ "* (1 - " + String(negative_prior_) + ")";//String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
182
formula3 = getBothGnuplotFormula(incorrectly_assigned_fit_param_,correctly_assigned_fit_param_);
183
(*file)<<("plot \""+(String)param_.getValue("output_name") +"_scores.txt\" with boxes, " + formula1 + " , " + formula2 + " , " + formula3);
185
//update maximum likelihood
186
maxlike = new_maxlike;
187
}while(!stop_em_init);
188
//-------------------------------------------------------------
190
//-------------------------------------------------------------
192
if(param_.getValue("incorrectly_assigned") == "Gumbel")
194
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGumbel;
196
max_incorrectly_ = ((this)->*(calc_incorrect_))(incorrectly_assigned_fit_param_.x0, incorrectly_assigned_fit_param_);
197
max_correctly_ = ((this)->*(calc_correct_))(correctly_assigned_fit_param_.x0, correctly_assigned_fit_param_ );
200
String formula1, formula2,formula3;
201
formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_)+ "*" + String(negative_prior_);//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
202
formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_)+ "* (1 - " + String(negative_prior_) + ")";// String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
203
formula3 = getBothGnuplotFormula(incorrectly_assigned_fit_param_,correctly_assigned_fit_param_);
204
(*file)<<("plot \""+(String)param_.getValue("output_name") +"_scores.txt\" with boxes, " + formula1 + " , " + formula2 + " , " + formula3);
205
file->store((String)param_.getValue("output_name"));
211
bool PosteriorErrorProbabilityModel::fit( std::vector<double>& search_engine_scores, vector<double>& probabilities)
214
return_value = fit(search_engine_scores);
215
if(!return_value) return false;
216
probabilities.resize(search_engine_scores.size());
217
vector<double>::iterator probs =probabilities.begin();
218
for(vector<double>::iterator scores = search_engine_scores.begin(); scores != search_engine_scores.end(); ++scores, ++probs)
220
*probs = computeProbability(*scores);
225
void PosteriorErrorProbabilityModel::fillDensities(vector<double>& x_scores,vector<DoubleReal>& incorrect_density,vector<DoubleReal>& correct_density)
227
if(incorrect_density.size() != x_scores.size())
229
incorrect_density.resize(x_scores.size());
230
correct_density.resize(x_scores.size());
232
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
233
vector<DoubleReal>::iterator correct = correct_density.begin();
234
for(vector<double>::iterator scores = x_scores.begin(); scores != x_scores.end(); ++scores, ++incorrect, ++correct)
236
*incorrect = ((this)->*(calc_incorrect_))(*scores, incorrectly_assigned_fit_param_);
237
*correct = ((this)->*(calc_correct_))(*scores, correctly_assigned_fit_param_ );
241
DoubleReal PosteriorErrorProbabilityModel::computeMaxLikelihood(vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
243
DoubleReal maxlike(0);
244
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
245
for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect)
247
maxlike += log10(negative_prior_* (*incorrect) +(1-negative_prior_)* (*correct));
251
DoubleReal PosteriorErrorProbabilityModel::one_minus_sum_post(vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
253
DoubleReal one_min(0);
254
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
255
for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect)
257
one_min += 1 - ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) ));
262
DoubleReal PosteriorErrorProbabilityModel::sum_post(vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
265
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
266
for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect)
268
post += ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) ));
273
DoubleReal PosteriorErrorProbabilityModel::sum_pos_x0(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
275
DoubleReal pos_x0(0);
276
vector<double>::iterator the_x = x_scores.begin();
277
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
278
for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect,++the_x)
280
pos_x0 += ((1 - ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))* (*the_x) );
285
DoubleReal PosteriorErrorProbabilityModel::sum_neg_x0(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
287
DoubleReal neg_x0(0);
288
vector<double>::iterator the_x = x_scores.begin();
289
vector<DoubleReal>::iterator correct = correct_density.begin();
290
for(vector<DoubleReal>::iterator incorrect = incorrect_density.begin(); incorrect < incorrect_density.end() ; ++correct, ++incorrect,++the_x)
292
neg_x0 += ((((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))* (*the_x) );
297
DoubleReal PosteriorErrorProbabilityModel::sum_pos_sigma(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density, DoubleReal positive_mean)
299
DoubleReal pos_sigma(0);
300
vector<double>::iterator the_x = x_scores.begin();
301
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
302
for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect,++the_x)
304
pos_sigma += ((1 - ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))*pow( (*the_x) - positive_mean,2));
309
DoubleReal PosteriorErrorProbabilityModel::sum_neg_sigma(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density, DoubleReal positive_mean)
311
DoubleReal neg_sigma(0);
312
vector<double>::iterator the_x = x_scores.begin();
313
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
314
for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect,++the_x)
316
neg_sigma += ((((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))*pow( (*the_x) - positive_mean,2));
320
DoubleReal PosteriorErrorProbabilityModel::computeProbability(DoubleReal score)
322
score = score + fabs(smallest_score_) + 0.001;
325
//the score is smaller than the peak of incorreclty assigned sequences. To ensure that the probabilies wont rise again use the incorrectly assigend peak for computation
326
if(score < incorrectly_assigned_fit_param_.x0)
328
x_neg = max_incorrectly_;
329
x_pos = ((this)->*(calc_correct_))(score, correctly_assigned_fit_param_ );
331
//same as above. However, this time to ensure that probabilities wont drop again.
332
else if(score > correctly_assigned_fit_param_.x0)
334
x_neg = ((this)->*(calc_incorrect_))(score, incorrectly_assigned_fit_param_);
335
x_pos = max_correctly_;
337
//if its in between use the normal formula
340
x_neg = ((this)->*(calc_incorrect_))(score, incorrectly_assigned_fit_param_);
341
x_pos = ((this)->*(calc_correct_))(score, correctly_assigned_fit_param_ );
343
return (negative_prior_*x_neg)/((negative_prior_*x_neg) + (1-negative_prior_)*x_pos);
345
TextFile* PosteriorErrorProbabilityModel::InitPlots(vector<double> & x_scores)
347
TextFile* file = new TextFile;
349
std::vector<DPosition<2> > points;
350
Int number_of_bins = param_.getValue("number_of_bins");
351
points.resize(number_of_bins);
353
double dividing_score = (x_scores.back() -x_scores[0])/number_of_bins;
355
temp.setX(dividing_score/2);
359
double temp_divider = dividing_score;
360
for(std::vector< double>::iterator it = x_scores.begin(); it < x_scores.end(); ++it)
362
if(temp_divider - *it >= 0 && bin < number_of_bins - 1 )
364
points[bin].setY(points[bin].getY()+1);
366
else if(bin == number_of_bins - 1 )
368
points[bin].setY(points[bin].getY()+1);
372
temp.setX((temp_divider + temp_divider + dividing_score)/2);
376
temp_divider += dividing_score;
380
for(vector<DPosition<2> >::iterator it = points.begin(); it < points.end(); ++it)
382
it->setY( it->getY()/( x_scores.size() *dividing_score));
385
TextFile data_points;
386
for(vector<DPosition<2> >::iterator it = points.begin(); it < points.end() ; ++it)
388
String temp = it->getX();
393
data_points.store((String)param_.getValue("output_name") + "_scores.txt");
394
output = "set output \""+ (String)param_.getValue("output_name") +".ps\"";
395
(*file)<<"set terminal postscript color solid linewidth 2.0 rounded";
396
//(*file)<<"set style empty solid 0.5 border -1";
397
//(*file)<<"set style function lines";
398
(*file)<<"set xlabel \"discriminant score\"";
399
(*file)<<"set ylabel \"density\"";
400
//TODO: (*file)<<"set title ";
401
(*file)<<"set key off";
403
String formula1, formula2;
404
formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_)+ "* " + String(negative_prior_);//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
405
formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_)+ "* (1 - " + String(negative_prior_) + ")";//String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
406
(*file)<< ("plot \""+(String)param_.getValue("output_name") +"_scores.txt\" with boxes, " + formula1 + " , " + formula2);
410
const String PosteriorErrorProbabilityModel::getGumbelGnuplotFormula(const GaussFitter::GaussFitResult& params) const
412
// build a formula with the fitted parameters for gnuplot
413
stringstream formula;
414
formula << "(1/" << params.sigma <<") * " << "exp(( "<< params.x0<< "- x)/"<< params.sigma <<") * exp(-exp(("<<params.x0<<" - x)/"<<params.sigma<<"))";
415
return formula.str();
418
const String PosteriorErrorProbabilityModel::getGaussGnuplotFormula(const GaussFitter::GaussFitResult& params) const
420
stringstream formula;
421
formula << params.A << " * exp(-(x - " << params.x0 << ") ** 2 / 2 / (" << params.sigma << ") ** 2)";
422
return formula.str();
425
const String PosteriorErrorProbabilityModel::getBothGnuplotFormula(const GaussFitter::GaussFitResult& incorrect,const GaussFitter::GaussFitResult& correct) const
427
stringstream formula;
428
formula << negative_prior_<<"*"<< ((this)->*(getNegativeGnuplotFormula_))(incorrect) <<" + (1-"<<negative_prior_<<")*"<< ((this)->*(getPositiveGnuplotFormula_))(correct);
429
return formula.str();
432
void PosteriorErrorProbabilityModel::plotTargetDecoyEstimation(vector<double> &target,vector<double> & decoy)
436
std::vector<DPosition<3> > points;
437
Int number_of_bins = param_.getValue("number_of_bins");
438
points.resize(number_of_bins);
441
sort(target.begin(),target.end());
442
sort(decoy.begin(),decoy.end());
444
double dividing_score = (max(target.back(),decoy.back())/*scores.back()*/ - min(target[0],decoy[0])/*scores[0]*/)/number_of_bins;
446
temp[0] = (dividing_score/2);
451
double temp_divider = dividing_score;
452
for(std::vector< double>::iterator it = target.begin(); it < target.end(); ++it)
454
*it = *it + fabs(smallest_score_) + 0.001;
455
if(temp_divider - *it >= 0 && bin < number_of_bins - 1 )
457
points[bin][1] = (points[bin][1]+1);
459
else if(bin == number_of_bins - 1 )
461
points[bin][1] = (points[bin][1]+1);
465
temp[0] = ((temp_divider + temp_divider + dividing_score)/2);
469
temp_divider += dividing_score;
474
temp_divider = dividing_score;
475
for(std::vector< double>::iterator it = decoy.begin(); it < decoy.end(); ++it)
477
*it = *it + fabs(smallest_score_) + 0.001;
478
if(temp_divider - *it >= 0 && bin < number_of_bins - 1 )
480
points[bin][2] = (points[bin][2]+1);
482
else if(bin == number_of_bins - 1 )
484
points[bin][2] = (points[bin][2]+1);
488
//temp[0] = ((temp_divider + temp_divider + dividing_score)/2);
492
temp_divider += dividing_score;
496
for(vector<DPosition<3> >::iterator it = points.begin(); it < points.end(); ++it)
498
// if((*it)[1] > (*it)[2])
499
// {(*it)[1] = (*it)[1] + (*it)[2];}
500
/* else{/(*it)[2] = (*it)[1] + (*it)[2];//}*/
502
(*it)[1] = ( (*it)[1]/( (decoy.size()+target.size()) *dividing_score));
503
(*it)[2] = ( (*it)[2]/( (decoy.size()+target.size()) *dividing_score));
506
TextFile data_points;
507
for(vector<DPosition<3> >::iterator it = points.begin(); it < points.end() ; ++it)
509
String temp = (*it)[0];
516
data_points.store( (String)param_.getValue("output_name") + "_target_decoy_scores.txt");
517
output = String("set output \"")+ (String)param_.getValue("output_name") + "_target_decoy.ps\"";
518
(file)<<"set terminal postscript color solid linewidth 2.0 rounded";
519
//(*file)<<"set style empty solid 0.5 border -1";
520
//(*file)<<"set style function lines";
521
(file)<<"set xlabel \"discriminant score\"";
522
(file)<<"set ylabel \"density\"";
523
//TODO: (*file)<<"set title ";
524
(file)<<"set key off";
526
String formula1, formula2;
527
formula1 = getGumbelGnuplotFormula(getIncorrectlyAssignedFitResult())+ "* " + String(getNegativePrior());//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
528
formula2 = getGaussGnuplotFormula(getCorrectlyAssignedFitResult())+ "* (1 - " + String(getNegativePrior()) + ")";//String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
529
(file)<< ("plot \"" + (String)param_.getValue("output_name") + "_target_decoy_scores.txt\" using 1:3 with boxes fill solid 0.8 noborder, \"" + (String)param_.getValue("output_name") + "_target_decoy_scores.txt\" using 1:2 with boxes, " + formula1 + " , " + formula2);
530
file.store( (String)param_.getValue("output_name") + "_target_decoy");
48
PosteriorErrorProbabilityModel::PosteriorErrorProbabilityModel() :
49
DefaultParamHandler("PosteriorErrorProbabilityModel"), negative_prior_(0.5), max_incorrectly_(0), max_correctly_(0), smallest_score_(0)
51
defaults_.setValue("number_of_bins", 100, "Number of bins used for visualization. Only needed if each iteration step of the EM-Algorithm will be visualized", StringList::create("advanced"));
52
defaults_.setValue("output_plots", "false", "If true every step of the EM-algorithm will be written to a file as a gnuplot formula", StringList::create("advanced"));
53
defaults_.setValidStrings("output_plots", StringList::create("true,false"));
54
defaults_.setValue("output_name", "", "If output_plots is on, the output files will be saved in the following manner: <output_name>scores.txt for the scores and <output_name> which contains each step of the EM-algorithm e.g. output_name = /usr/home/OMSSA123 then /usr/home/OMSSA123_scores.txt, /usr/home/OMSSA123 will be written. If no directory is specified, e.g. instead of '/usr/home/OMSSA123' just OMSSA123, the files will be written into the working directory.", StringList::create("advanced,output file"));
55
defaults_.setValue("incorrectly_assigned", "Gumbel", "for 'Gumbel', the Gumbel distribution is used to plot incorrectly assigned sequences. For 'Gauss', the Gauss distribution is used.", StringList::create("advanced"));
56
defaults_.setValidStrings("incorrectly_assigned", StringList::create("Gumbel,Gauss"));
58
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGumbel;
59
calc_correct_ = &PosteriorErrorProbabilityModel::getGauss;
60
getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGumbelGnuplotFormula;
61
getPositiveGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
64
PosteriorErrorProbabilityModel::~PosteriorErrorProbabilityModel()
68
bool PosteriorErrorProbabilityModel::fit(std::vector<double> & search_engine_scores)
70
if (search_engine_scores.empty())
74
//-------------------------------------------------------------
75
// Initializing Parameters
76
//-------------------------------------------------------------
77
sort(search_engine_scores.begin(), search_engine_scores.end());
79
smallest_score_ = search_engine_scores[0];
80
vector<double> x_scores;
81
x_scores.resize(search_engine_scores.size());
82
std::vector<double>::iterator it = x_scores.begin();
83
for (std::vector<double>::iterator iti = search_engine_scores.begin(); iti < search_engine_scores.end(); ++it, ++iti)
85
*it = *iti + fabs(smallest_score_) + 0.001;
87
negative_prior_ = 0.7;
88
if (param_.getValue("incorrectly_assigned") == "Gumbel")
90
incorrectly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[0], 1, ceil(0.5 * x_scores.size())) + x_scores[0];
91
incorrectly_assigned_fit_param_.sigma = gsl_stats_sd(&x_scores[0], 1, x_scores.size() - 1); //pow(gsl_stats_sd_with_fixed_mean(&probabilities[x_score_start], 1, probabilities.size() - x_score_start, gauss_fit_param_.x0),2);
92
incorrectly_assigned_fit_param_.A = 1 / sqrt(2 * 3.14159 * pow(incorrectly_assigned_fit_param_.sigma, 2));
93
//TODO: compute directly with gauss. Workaround:
94
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGauss;
95
getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGumbelGnuplotFormula;
99
incorrectly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[0], 1, ceil(0.5 * x_scores.size())) + x_scores[0];
100
incorrectly_assigned_fit_param_.sigma = gsl_stats_sd(&x_scores[0], 1, x_scores.size() - 1); //pow(gsl_stats_sd_with_fixed_mean(&probabilities[x_score_start], 1, probabilities.size() - x_score_start, gauss_fit_param_.x0),2);
101
incorrectly_assigned_fit_param_.A = 1 / sqrt(2 * 3.14159 * pow(incorrectly_assigned_fit_param_.sigma, 2));
102
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGauss;
103
getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
105
getPositiveGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
106
calc_correct_ = &PosteriorErrorProbabilityModel::getGauss;
107
Size x_score_start = std::min(x_scores.size() - 1, (Size) ceil(x_scores.size() * 0.7)); // if only one score is present, ceil(...) will yield 1, which is an invalid index
108
correctly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[x_score_start], 1, x_scores.size() - x_score_start) + x_scores[x_score_start]; //(gauss_scores.begin()->getX() + (gauss_scores.end()-1)->getX())/2;
109
correctly_assigned_fit_param_.sigma = incorrectly_assigned_fit_param_.sigma;
110
correctly_assigned_fit_param_.A = 1.0 / sqrt(2 * 3.14159 * pow(correctly_assigned_fit_param_.sigma, 2));
112
DoubleReal maxlike(0);
113
vector<DoubleReal> incorrect_density;
114
vector<DoubleReal> correct_density;
116
fillDensities(x_scores, incorrect_density, correct_density);
119
maxlike = computeMaxLikelihood(incorrect_density, correct_density);
120
//-------------------------------------------------------------
121
// create files for output
122
//-------------------------------------------------------------
123
bool output_plots = param_.getValue("output_plots").toBool();
124
TextFile * file = NULL;
127
file = InitPlots(x_scores);
129
//-------------------------------------------------------------
130
// Estimate Parameters - EM algorithm
131
//-------------------------------------------------------------
132
bool stop_em_init = false;
136
DoubleReal one_minus_sum_posterior = one_minus_sum_post(incorrect_density, correct_density);
137
DoubleReal sum_posterior = sum_post(incorrect_density, correct_density);
140
DoubleReal sum_positive_x0 = sum_pos_x0(x_scores, incorrect_density, correct_density);
141
DoubleReal sum_negative_x0 = sum_neg_x0(x_scores, incorrect_density, correct_density);
143
DoubleReal positive_mean = sum_positive_x0 / one_minus_sum_posterior;
144
DoubleReal negative_mean = sum_negative_x0 / sum_posterior;
146
//new standard deviation
147
DoubleReal sum_positive_sigma = sum_pos_sigma(x_scores, incorrect_density, correct_density, positive_mean);
148
DoubleReal sum_negative_sigma = sum_neg_sigma(x_scores, incorrect_density, correct_density, negative_mean);
151
correctly_assigned_fit_param_.x0 = positive_mean;
152
if (sum_positive_sigma != 0 && one_minus_sum_posterior != 0)
154
correctly_assigned_fit_param_.sigma = sqrt(sum_positive_sigma / one_minus_sum_posterior);
155
correctly_assigned_fit_param_.A = 1 / sqrt(2 * 3.14159 * pow(correctly_assigned_fit_param_.sigma, 2));
158
incorrectly_assigned_fit_param_.x0 = negative_mean;
159
if (sum_negative_sigma != 0 && sum_posterior != 0)
161
incorrectly_assigned_fit_param_.sigma = sqrt(sum_negative_sigma / sum_posterior);
162
incorrectly_assigned_fit_param_.A = 1 / sqrt(2 * 3.14159 * pow(incorrectly_assigned_fit_param_.sigma, 2));
166
//compute new prior probabilities negative peptides
167
fillDensities(x_scores, incorrect_density, correct_density);
168
sum_posterior = sum_post(incorrect_density, correct_density);
169
negative_prior_ = sum_posterior / x_scores.size();
171
DoubleReal new_maxlike(computeMaxLikelihood(incorrect_density, correct_density));
172
if (boost::math::isnan(new_maxlike - maxlike))
175
//throw Exception::UnableToFit(__FILE__,__LINE__,__PRETTY_FUNCTION__,"UnableToFit-PosteriorErrorProbability","Could not fit mixture model to data");
177
if (fabs(new_maxlike - maxlike) < 0.001)
180
sum_posterior = sum_post(incorrect_density, correct_density);
181
negative_prior_ = sum_posterior / x_scores.size();
186
String formula1, formula2, formula3;
187
formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_) + "* " + String(negative_prior_); //String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
188
formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_) + "* (1 - " + String(negative_prior_) + ")"; //String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
189
formula3 = getBothGnuplotFormula(incorrectly_assigned_fit_param_, correctly_assigned_fit_param_);
190
(*file) << ("plot \"" + (String)param_.getValue("output_name") + "_scores.txt\" with boxes, " + formula1 + " , " + formula2 + " , " + formula3);
192
//update maximum likelihood
193
maxlike = new_maxlike;
195
while (!stop_em_init);
196
//-------------------------------------------------------------
198
//-------------------------------------------------------------
200
if (param_.getValue("incorrectly_assigned") == "Gumbel")
202
calc_incorrect_ = &PosteriorErrorProbabilityModel::getGumbel;
204
max_incorrectly_ = ((this)->*(calc_incorrect_))(incorrectly_assigned_fit_param_.x0, incorrectly_assigned_fit_param_);
205
max_correctly_ = ((this)->*(calc_correct_))(correctly_assigned_fit_param_.x0, correctly_assigned_fit_param_);
208
String formula1, formula2, formula3;
209
formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_) + "*" + String(negative_prior_); //String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
210
formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_) + "* (1 - " + String(negative_prior_) + ")"; // String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
211
formula3 = getBothGnuplotFormula(incorrectly_assigned_fit_param_, correctly_assigned_fit_param_);
212
(*file) << ("plot \"" + (String)param_.getValue("output_name") + "_scores.txt\" with boxes, " + formula1 + " , " + formula2 + " , " + formula3);
213
file->store((String)param_.getValue("output_name"));
219
bool PosteriorErrorProbabilityModel::fit(std::vector<double> & search_engine_scores, vector<double> & probabilities)
222
return_value = fit(search_engine_scores);
226
probabilities.resize(search_engine_scores.size());
227
vector<double>::iterator probs = probabilities.begin();
228
for (vector<double>::iterator scores = search_engine_scores.begin(); scores != search_engine_scores.end(); ++scores, ++probs)
230
*probs = computeProbability(*scores);
235
void PosteriorErrorProbabilityModel::fillDensities(vector<double> & x_scores, vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density)
237
if (incorrect_density.size() != x_scores.size())
239
incorrect_density.resize(x_scores.size());
240
correct_density.resize(x_scores.size());
242
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
243
vector<DoubleReal>::iterator correct = correct_density.begin();
244
for (vector<double>::iterator scores = x_scores.begin(); scores != x_scores.end(); ++scores, ++incorrect, ++correct)
246
*incorrect = ((this)->*(calc_incorrect_))(*scores, incorrectly_assigned_fit_param_);
247
*correct = ((this)->*(calc_correct_))(*scores, correctly_assigned_fit_param_);
251
DoubleReal PosteriorErrorProbabilityModel::computeMaxLikelihood(vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density)
253
DoubleReal maxlike(0);
254
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
255
for (vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end(); ++correct, ++incorrect)
257
maxlike += log10(negative_prior_ * (*incorrect) + (1 - negative_prior_) * (*correct));
262
DoubleReal PosteriorErrorProbabilityModel::one_minus_sum_post(vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density)
264
DoubleReal one_min(0);
265
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
266
for (vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end(); ++correct, ++incorrect)
268
one_min += 1 - ((negative_prior_ * (*incorrect)) / ((negative_prior_ * (*incorrect)) + (1 - negative_prior_) * (*correct)));
273
DoubleReal PosteriorErrorProbabilityModel::sum_post(vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density)
276
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
277
for (vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end(); ++correct, ++incorrect)
279
post += ((negative_prior_ * (*incorrect)) / ((negative_prior_ * (*incorrect)) + (1 - negative_prior_) * (*correct)));
284
DoubleReal PosteriorErrorProbabilityModel::sum_pos_x0(vector<double> & x_scores, vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density)
286
DoubleReal pos_x0(0);
287
vector<double>::iterator the_x = x_scores.begin();
288
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
289
for (vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end(); ++correct, ++incorrect, ++the_x)
291
pos_x0 += ((1 - ((negative_prior_ * (*incorrect)) / ((negative_prior_ * (*incorrect)) + (1 - negative_prior_) * (*correct)))) * (*the_x));
296
DoubleReal PosteriorErrorProbabilityModel::sum_neg_x0(vector<double> & x_scores, vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density)
298
DoubleReal neg_x0(0);
299
vector<double>::iterator the_x = x_scores.begin();
300
vector<DoubleReal>::iterator correct = correct_density.begin();
301
for (vector<DoubleReal>::iterator incorrect = incorrect_density.begin(); incorrect < incorrect_density.end(); ++correct, ++incorrect, ++the_x)
303
neg_x0 += ((((negative_prior_ * (*incorrect)) / ((negative_prior_ * (*incorrect)) + (1 - negative_prior_) * (*correct)))) * (*the_x));
308
DoubleReal PosteriorErrorProbabilityModel::sum_pos_sigma(vector<double> & x_scores, vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density, DoubleReal positive_mean)
310
DoubleReal pos_sigma(0);
311
vector<double>::iterator the_x = x_scores.begin();
312
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
313
for (vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end(); ++correct, ++incorrect, ++the_x)
315
pos_sigma += ((1 - ((negative_prior_ * (*incorrect)) / ((negative_prior_ * (*incorrect)) + (1 - negative_prior_) * (*correct)))) * pow((*the_x) - positive_mean, 2));
320
DoubleReal PosteriorErrorProbabilityModel::sum_neg_sigma(vector<double> & x_scores, vector<DoubleReal> & incorrect_density, vector<DoubleReal> & correct_density, DoubleReal positive_mean)
322
DoubleReal neg_sigma(0);
323
vector<double>::iterator the_x = x_scores.begin();
324
vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
325
for (vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end(); ++correct, ++incorrect, ++the_x)
327
neg_sigma += ((((negative_prior_ * (*incorrect)) / ((negative_prior_ * (*incorrect)) + (1 - negative_prior_) * (*correct)))) * pow((*the_x) - positive_mean, 2));
332
DoubleReal PosteriorErrorProbabilityModel::computeProbability(DoubleReal score)
334
score = score + fabs(smallest_score_) + 0.001;
337
// the score is smaller than the peak of incorrectly assigned sequences. To ensure that the probabilities wont rise again use the incorrectly assigned peak for computation
338
if (score < incorrectly_assigned_fit_param_.x0)
340
x_neg = max_incorrectly_;
341
x_pos = ((this)->*(calc_correct_))(score, correctly_assigned_fit_param_);
343
// same as above. However, this time to ensure that probabilities wont drop again.
344
else if (score > correctly_assigned_fit_param_.x0)
346
x_neg = ((this)->*(calc_incorrect_))(score, incorrectly_assigned_fit_param_);
347
x_pos = max_correctly_;
349
// if its in between use the normal formula
352
x_neg = ((this)->*(calc_incorrect_))(score, incorrectly_assigned_fit_param_);
353
x_pos = ((this)->*(calc_correct_))(score, correctly_assigned_fit_param_);
355
return (negative_prior_ * x_neg) / ((negative_prior_ * x_neg) + (1 - negative_prior_) * x_pos);
358
TextFile * PosteriorErrorProbabilityModel::InitPlots(vector<double> & x_scores)
360
TextFile * file = new TextFile;
362
std::vector<DPosition<2> > points;
363
Int number_of_bins = param_.getValue("number_of_bins");
364
points.resize(number_of_bins);
366
double dividing_score = (x_scores.back() - x_scores[0]) / number_of_bins;
368
temp.setX(dividing_score / 2);
372
double temp_divider = dividing_score;
373
for (std::vector<double>::iterator it = x_scores.begin(); it < x_scores.end(); ++it)
375
if (temp_divider - *it >= 0 && bin < number_of_bins - 1)
377
points[bin].setY(points[bin].getY() + 1);
379
else if (bin == number_of_bins - 1)
381
points[bin].setY(points[bin].getY() + 1);
385
temp.setX((temp_divider + temp_divider + dividing_score) / 2);
389
temp_divider += dividing_score;
393
for (vector<DPosition<2> >::iterator it = points.begin(); it < points.end(); ++it)
395
it->setY(it->getY() / (x_scores.size() * dividing_score));
398
TextFile data_points;
399
for (vector<DPosition<2> >::iterator it = points.begin(); it < points.end(); ++it)
401
String temp = it->getX();
406
data_points.store((String)param_.getValue("output_name") + "_scores.txt");
407
output = "set output \"" + (String)param_.getValue("output_name") + ".ps\"";
408
(*file) << "set terminal postscript color solid linewidth 2.0 rounded";
409
//(*file)<<"set style empty solid 0.5 border -1";
410
//(*file)<<"set style function lines";
411
(*file) << "set xlabel \"discriminant score\"";
412
(*file) << "set ylabel \"density\"";
413
//TODO: (*file)<<"set title ";
414
(*file) << "set key off";
416
String formula1, formula2;
417
formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_) + "* " + String(negative_prior_); //String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
418
formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_) + "* (1 - " + String(negative_prior_) + ")"; //String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
419
(*file) << ("plot \"" + (String)param_.getValue("output_name") + "_scores.txt\" with boxes, " + formula1 + " , " + formula2);
423
const String PosteriorErrorProbabilityModel::getGumbelGnuplotFormula(const GaussFitter::GaussFitResult & params) const
425
// build a formula with the fitted parameters for gnuplot
426
stringstream formula;
427
formula << "(1/" << params.sigma << ") * " << "exp(( " << params.x0 << "- x)/" << params.sigma << ") * exp(-exp((" << params.x0 << " - x)/" << params.sigma << "))";
428
return formula.str();
431
const String PosteriorErrorProbabilityModel::getGaussGnuplotFormula(const GaussFitter::GaussFitResult & params) const
433
stringstream formula;
434
formula << params.A << " * exp(-(x - " << params.x0 << ") ** 2 / 2 / (" << params.sigma << ") ** 2)";
435
return formula.str();
438
const String PosteriorErrorProbabilityModel::getBothGnuplotFormula(const GaussFitter::GaussFitResult & incorrect, const GaussFitter::GaussFitResult & correct) const
440
stringstream formula;
441
formula << negative_prior_ << "*" << ((this)->*(getNegativeGnuplotFormula_))(incorrect) << " + (1-" << negative_prior_ << ")*" << ((this)->*(getPositiveGnuplotFormula_))(correct);
442
return formula.str();
445
void PosteriorErrorProbabilityModel::plotTargetDecoyEstimation(vector<double> & target, vector<double> & decoy)
449
std::vector<DPosition<3> > points;
450
Int number_of_bins = param_.getValue("number_of_bins");
451
points.resize(number_of_bins);
454
sort(target.begin(), target.end());
455
sort(decoy.begin(), decoy.end());
457
double dividing_score = (max(target.back(), decoy.back()) /*scores.back()*/ - min(target[0], decoy[0]) /*scores[0]*/) / number_of_bins;
459
temp[0] = (dividing_score / 2);
464
double temp_divider = dividing_score;
465
for (std::vector<double>::iterator it = target.begin(); it < target.end(); ++it)
467
*it = *it + fabs(smallest_score_) + 0.001;
468
if (temp_divider - *it >= 0 && bin < number_of_bins - 1)
470
points[bin][1] = (points[bin][1] + 1);
472
else if (bin == number_of_bins - 1)
474
points[bin][1] = (points[bin][1] + 1);
478
temp[0] = ((temp_divider + temp_divider + dividing_score) / 2);
482
temp_divider += dividing_score;
487
temp_divider = dividing_score;
488
for (std::vector<double>::iterator it = decoy.begin(); it < decoy.end(); ++it)
490
*it = *it + fabs(smallest_score_) + 0.001;
491
if (temp_divider - *it >= 0 && bin < number_of_bins - 1)
493
points[bin][2] = (points[bin][2] + 1);
495
else if (bin == number_of_bins - 1)
497
points[bin][2] = (points[bin][2] + 1);
501
// temp[0] = ((temp_divider + temp_divider + dividing_score)/2);
505
temp_divider += dividing_score;
509
for (vector<DPosition<3> >::iterator it = points.begin(); it < points.end(); ++it)
511
// if((*it)[1] > (*it)[2])
512
// {(*it)[1] = (*it)[1] + (*it)[2];}
513
/* else{/(*it)[2] = (*it)[1] + (*it)[2];//}*/
515
(*it)[1] = ((*it)[1] / ((decoy.size() + target.size()) * dividing_score));
516
(*it)[2] = ((*it)[2] / ((decoy.size() + target.size()) * dividing_score));
519
TextFile data_points;
520
for (vector<DPosition<3> >::iterator it = points.begin(); it < points.end(); ++it)
522
String temp = (*it)[0];
529
data_points.store((String)param_.getValue("output_name") + "_target_decoy_scores.txt");
530
output = String("set output \"") + (String)param_.getValue("output_name") + "_target_decoy.ps\"";
531
(file) << "set terminal postscript color solid linewidth 2.0 rounded";
532
//(*file)<<"set style empty solid 0.5 border -1";
533
//(*file)<<"set style function lines";
534
(file) << "set xlabel \"discriminant score\"";
535
(file) << "set ylabel \"density\"";
536
//TODO: (*file)<<"set title ";
537
(file) << "set key off";
539
String formula1, formula2;
540
formula1 = getGumbelGnuplotFormula(getIncorrectlyAssignedFitResult()) + "* " + String(getNegativePrior()); //String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
541
formula2 = getGaussGnuplotFormula(getCorrectlyAssignedFitResult()) + "* (1 - " + String(getNegativePrior()) + ")"; //String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
542
(file) << ("plot \"" + (String)param_.getValue("output_name") + "_target_decoy_scores.txt\" using 1:3 with boxes fill solid 0.8 noborder, \"" + (String)param_.getValue("output_name") + "_target_decoy_scores.txt\" using 1:2 with boxes, " + formula1 + " , " + formula2);
543
file.store((String)param_.getValue("output_name") + "_target_decoy");
534
547
} // namespace OpenMS