~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/MATH/STATISTICS/PosteriorErrorProbabilityModel.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2009 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
//
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: David Wojnar $
 
25
// $Authors: David Wojnar $
 
26
// --------------------------------------------------------------------------
 
27
//
 
28
#include <OpenMS/MATH/STATISTICS/PosteriorErrorProbabilityModel.h>
 
29
#include <OpenMS/FORMAT/TextFile.h>
 
30
#include <OpenMS/DATASTRUCTURES/String.h>
 
31
#include <algorithm>
 
32
#include <gsl/gsl_statistics.h>
 
33
#include <boost/math/special_functions/fpclassify.hpp>
 
34
 
 
35
using namespace std;
 
36
 
 
37
namespace OpenMS
 
38
{
 
39
        namespace Math
 
40
        {
 
41
                PosteriorErrorProbabilityModel::PosteriorErrorProbabilityModel()
 
42
                : DefaultParamHandler("PosteriorErrorProbabilityModel"), negative_prior_(0.5),max_incorrectly_(0),max_correctly_(0), smallest_score_(0)
 
43
        {
 
44
                        defaults_.setValue("number_of_bins", 100, "Number of bins used for visualization. Only needed if each iteration step of the EM-Algorithm will be visualized", StringList::create("advanced"));
 
45
                        defaults_.setValue("output_plots","false","If true every step of the EM-algorithm will be written to a file as a gnuplot formula",StringList::create("advanced"));
 
46
                        defaults_.setValidStrings("output_plots",StringList::create("true,false"));
 
47
                        defaults_.setValue("output_name","", "if output_plots is on, the output files will be saved in the following manner: <output_name>scores.txt for the scores and <output_name> which contains each step of the EM-algorithm e.g. output_name = /usr/home/OMSSA123 then /usr/home/OMSSA123_scores.txt, /usr/home/OMSSA123 will be written. If no directory is specified, e.g. instead of '/usr/home/OMSSA123' just OMSSA123, the files will be written into the working directory.",StringList::create("advanced,output file"));
 
48
                        defaults_.setValue("incorrectly_assigned","Gumbel", "for 'Gumbel', the Gumbel distribution is used to plot incorrectly assigned sequences. For 'Gauss', the Gauss distribution is used.",StringList::create("advanced"));
 
49
                        defaults_.setValidStrings("incorrectly_assigned",StringList::create("Gumbel,Gauss"));
 
50
                        defaultsToParam_();
 
51
                        calc_incorrect_ = &PosteriorErrorProbabilityModel::getGumbel;
 
52
                        calc_correct_ = &PosteriorErrorProbabilityModel::getGauss;
 
53
                        getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGumbelGnuplotFormula;
 
54
                        getPositiveGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
 
55
                }
 
56
                
 
57
                PosteriorErrorProbabilityModel::~PosteriorErrorProbabilityModel()
 
58
                {
 
59
                }
 
60
                                
 
61
                bool PosteriorErrorProbabilityModel::fit( std::vector<double>& search_engine_scores)
 
62
                {       
 
63
                        if(search_engine_scores.empty())
 
64
                        {
 
65
                                return false;
 
66
                        }
 
67
                        //-------------------------------------------------------------
 
68
                        // Initializing Parameters
 
69
                        //-------------------------------------------------------------
 
70
                        sort(search_engine_scores.begin(),search_engine_scores.end());          
 
71
                        
 
72
                        smallest_score_ = search_engine_scores[0];
 
73
                        vector<double> x_scores;
 
74
                        x_scores.resize(search_engine_scores.size());
 
75
                        std::vector< double>::iterator it = x_scores.begin();
 
76
                        for(std::vector< double>::iterator iti = search_engine_scores.begin(); iti < search_engine_scores.end(); ++it,++iti)
 
77
                        {
 
78
                                *it = *iti + fabs(smallest_score_) + 0.001;
 
79
                        }
 
80
                        negative_prior_ = 0.7;
 
81
                        if(param_.getValue("incorrectly_assigned") == "Gumbel")
 
82
                        {
 
83
                                incorrectly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[0], 1, ceil(0.5* x_scores.size())) + x_scores[0];
 
84
                                incorrectly_assigned_fit_param_.sigma = gsl_stats_sd(&x_scores[0], 1, x_scores.size() - 1);//pow(gsl_stats_sd_with_fixed_mean(&probabilities[x_score_start], 1, probabilities.size() - x_score_start, gauss_fit_param_.x0),2);
 
85
                                incorrectly_assigned_fit_param_.A = 1   /sqrt(2*3.14159*pow(incorrectly_assigned_fit_param_.sigma,2));  
 
86
                                //TODO: compute directly with gauss. Workaround:
 
87
                                calc_incorrect_ = &PosteriorErrorProbabilityModel::getGauss;
 
88
                                getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGumbelGnuplotFormula;
 
89
                        }
 
90
                        else
 
91
                        {
 
92
                                incorrectly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[0], 1, ceil(0.5* x_scores.size())) + x_scores[0];
 
93
                                incorrectly_assigned_fit_param_.sigma = gsl_stats_sd(&x_scores[0], 1, x_scores.size() - 1);//pow(gsl_stats_sd_with_fixed_mean(&probabilities[x_score_start], 1, probabilities.size() - x_score_start, gauss_fit_param_.x0),2);
 
94
                                incorrectly_assigned_fit_param_.A = 1   /sqrt(2*3.14159*pow(incorrectly_assigned_fit_param_.sigma,2));          
 
95
                                calc_incorrect_ = &PosteriorErrorProbabilityModel::getGauss;
 
96
                                getNegativeGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
 
97
                        }
 
98
                        getPositiveGnuplotFormula_ = &PosteriorErrorProbabilityModel::getGaussGnuplotFormula;
 
99
                        calc_correct_ = &PosteriorErrorProbabilityModel::getGauss;
 
100
                        Size x_score_start = std::min(x_scores.size()-1, (Size) ceil(x_scores.size()*0.7)); // if only one score is present, ceil(...) will yield 1, which is an invalid index
 
101
                        correctly_assigned_fit_param_.x0 = gsl_stats_mean(&x_scores[x_score_start], 1, x_scores.size() - x_score_start) + x_scores[x_score_start];//(gauss_scores.begin()->getX() + (gauss_scores.end()-1)->getX())/2; 
 
102
                        correctly_assigned_fit_param_.sigma = incorrectly_assigned_fit_param_.sigma;
 
103
                        correctly_assigned_fit_param_.A = 1.0   / sqrt(2*3.14159*pow(correctly_assigned_fit_param_.sigma,2));           
 
104
 
 
105
                        DoubleReal maxlike(0);
 
106
                        vector<DoubleReal> incorrect_density;
 
107
                        vector<DoubleReal> correct_density;
 
108
                        
 
109
                        fillDensities(x_scores,incorrect_density,correct_density);
 
110
                        
 
111
                        
 
112
                        maxlike = computeMaxLikelihood(incorrect_density,correct_density);
 
113
                        //-------------------------------------------------------------
 
114
                        // create files for output
 
115
                        //-------------------------------------------------------------
 
116
                        bool output_plots  = param_.getValue("output_plots").toBool();
 
117
                        TextFile* file = NULL;
 
118
                        if(output_plots)
 
119
                        {
 
120
                                file = InitPlots(x_scores);
 
121
                        }                       
 
122
                        //-------------------------------------------------------------
 
123
                        // Estimate Parameters - EM algorithm
 
124
                        //-------------------------------------------------------------
 
125
                        bool stop_em_init = false;
 
126
                        do
 
127
                        { 
 
128
                                //E-STEP
 
129
                                DoubleReal one_minus_sum_posterior = one_minus_sum_post(incorrect_density,correct_density);
 
130
                                DoubleReal sum_posterior = sum_post(incorrect_density,correct_density);
 
131
                                
 
132
                                //new mean                              
 
133
                                DoubleReal sum_positive_x0 = sum_pos_x0(x_scores, incorrect_density,correct_density);
 
134
                                DoubleReal sum_negative_x0 = sum_neg_x0(x_scores, incorrect_density,correct_density);
 
135
                                
 
136
                                DoubleReal positive_mean = sum_positive_x0/one_minus_sum_posterior; 
 
137
                                DoubleReal negative_mean = sum_negative_x0/sum_posterior; 
 
138
                                
 
139
                                //new standard deviation
 
140
                                DoubleReal sum_positive_sigma = sum_pos_sigma(x_scores,incorrect_density,correct_density, positive_mean);
 
141
                                DoubleReal sum_negative_sigma = sum_neg_sigma(x_scores,incorrect_density,correct_density, negative_mean);
 
142
                                
 
143
                                //update parameters
 
144
                                correctly_assigned_fit_param_.x0 = positive_mean;
 
145
                                if(sum_positive_sigma  != 0 && one_minus_sum_posterior != 0)
 
146
                                {
 
147
                                        correctly_assigned_fit_param_.sigma = sqrt(sum_positive_sigma/one_minus_sum_posterior); 
 
148
                                        correctly_assigned_fit_param_.A = 1/sqrt(2*3.14159*pow(correctly_assigned_fit_param_.sigma,2));
 
149
                                }
 
150
                                                
 
151
                                incorrectly_assigned_fit_param_.x0 = negative_mean;
 
152
                                if(sum_negative_sigma  != 0 && sum_posterior != 0)
 
153
                                {
 
154
                                        incorrectly_assigned_fit_param_.sigma = sqrt(sum_negative_sigma/sum_posterior);
 
155
                                        incorrectly_assigned_fit_param_.A = 1/sqrt(2*3.14159*pow(incorrectly_assigned_fit_param_.sigma,2));
 
156
                                }                               
 
157
 
 
158
                                        
 
159
                                //compute new prior probabilities negative peptides
 
160
                                fillDensities(x_scores,incorrect_density,correct_density);
 
161
                                sum_posterior = sum_post(incorrect_density,correct_density);
 
162
                                negative_prior_ = sum_posterior/x_scores.size();
 
163
                                
 
164
                                DoubleReal new_maxlike(computeMaxLikelihood(incorrect_density,correct_density));
 
165
        if(boost::math::isnan(new_maxlike - maxlike))
 
166
                                {
 
167
                                        return false;
 
168
                                        //throw Exception::UnableToFit(__FILE__,__LINE__,__PRETTY_FUNCTION__,"UnableToFit-PosteriorErrorProbability","Could not fit mixture model to data");                                    
 
169
                                }
 
170
        if(fabs(new_maxlike - maxlike) < 0.001)
 
171
        {
 
172
                stop_em_init = true;
 
173
                sum_posterior = sum_post(incorrect_density,correct_density);
 
174
                                        negative_prior_ = sum_posterior/x_scores.size();
 
175
                
 
176
        }
 
177
                                if(output_plots)
 
178
                                {
 
179
                                        String formula1, formula2,formula3;
 
180
                                        formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_)+ "* " + String(negative_prior_);//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
 
181
                                        formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_)+ "* (1 - " + String(negative_prior_) + ")";//String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
 
182
                                        formula3 = getBothGnuplotFormula(incorrectly_assigned_fit_param_,correctly_assigned_fit_param_);
 
183
                                        (*file)<<("plot \""+(String)param_.getValue("output_name") +"_scores.txt\" with boxes, " + formula1 + " , " + formula2 + " , " + formula3);
 
184
                                }
 
185
                                //update maximum likelihood
 
186
                                maxlike = new_maxlike;                          
 
187
                        }while(!stop_em_init);
 
188
                        //-------------------------------------------------------------
 
189
                        // Finished fitting
 
190
                        //-------------------------------------------------------------                 
 
191
                        //!!Workaround:
 
192
                        if(param_.getValue("incorrectly_assigned") == "Gumbel")
 
193
                        {
 
194
                                calc_incorrect_ = &PosteriorErrorProbabilityModel::getGumbel;
 
195
                        }
 
196
                        max_incorrectly_ = ((this)->*(calc_incorrect_))(incorrectly_assigned_fit_param_.x0, incorrectly_assigned_fit_param_);
 
197
                        max_correctly_ = ((this)->*(calc_correct_))(correctly_assigned_fit_param_.x0, correctly_assigned_fit_param_ );
 
198
                        if(output_plots)
 
199
                        {
 
200
                                String formula1, formula2,formula3;
 
201
                                formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_)+ "*" + String(negative_prior_);//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
 
202
                                formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_)+ "* (1 - " + String(negative_prior_) + ")";// String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
 
203
                                formula3 = getBothGnuplotFormula(incorrectly_assigned_fit_param_,correctly_assigned_fit_param_);
 
204
                                (*file)<<("plot \""+(String)param_.getValue("output_name") +"_scores.txt\" with boxes, " + formula1 + " , " + formula2 + " , " + formula3);
 
205
                                file->store((String)param_.getValue("output_name"));
 
206
                                delete file;
 
207
                        }
 
208
                        return true;
 
209
                }
 
210
                
 
211
                bool PosteriorErrorProbabilityModel::fit(  std::vector<double>& search_engine_scores, vector<double>& probabilities)
 
212
                {       
 
213
                        bool return_value;
 
214
                        return_value = fit(search_engine_scores);
 
215
                        if(!return_value) return false;
 
216
                        probabilities.resize(search_engine_scores.size());
 
217
                        vector<double>::iterator probs =probabilities.begin();
 
218
                        for(vector<double>::iterator scores = search_engine_scores.begin(); scores != search_engine_scores.end(); ++scores, ++probs)
 
219
                        {
 
220
                                *probs = computeProbability(*scores);
 
221
                        }
 
222
                        return true;
 
223
                }
 
224
                
 
225
                void PosteriorErrorProbabilityModel::fillDensities(vector<double>& x_scores,vector<DoubleReal>& incorrect_density,vector<DoubleReal>& correct_density)
 
226
                {
 
227
                        if(incorrect_density.size() != x_scores.size())
 
228
                        {
 
229
                                incorrect_density.resize(x_scores.size());
 
230
                                correct_density.resize(x_scores.size());
 
231
                        }
 
232
                        vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
 
233
                        vector<DoubleReal>::iterator correct = correct_density.begin();
 
234
                        for(vector<double>::iterator scores = x_scores.begin(); scores != x_scores.end(); ++scores, ++incorrect, ++correct)
 
235
                        {
 
236
                                *incorrect = ((this)->*(calc_incorrect_))(*scores, incorrectly_assigned_fit_param_);
 
237
                                *correct = ((this)->*(calc_correct_))(*scores, correctly_assigned_fit_param_ );
 
238
                        }
 
239
                }
 
240
                
 
241
                DoubleReal PosteriorErrorProbabilityModel::computeMaxLikelihood(vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
 
242
                {
 
243
                        DoubleReal maxlike(0);
 
244
                        vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
 
245
                        for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect)
 
246
                        {
 
247
                                maxlike += log10(negative_prior_* (*incorrect) +(1-negative_prior_)* (*correct));
 
248
                        }
 
249
                        return maxlike;
 
250
                }
 
251
                DoubleReal PosteriorErrorProbabilityModel::one_minus_sum_post(vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
 
252
                {
 
253
                        DoubleReal one_min(0);
 
254
                        vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
 
255
                        for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect)
 
256
                        {
 
257
                                one_min +=  1  - ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) ));
 
258
                        }
 
259
                        return one_min;         
 
260
                }
 
261
 
 
262
                DoubleReal PosteriorErrorProbabilityModel::sum_post(vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
 
263
                {
 
264
                        DoubleReal post(0);
 
265
                        vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
 
266
                        for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect)
 
267
                        {
 
268
                                post += ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) ));
 
269
                        }
 
270
                        return post;            
 
271
                }
 
272
                
 
273
                DoubleReal PosteriorErrorProbabilityModel::sum_pos_x0(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
 
274
                {
 
275
                        DoubleReal pos_x0(0);
 
276
                        vector<double>::iterator the_x = x_scores.begin();
 
277
                        vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
 
278
                        for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect,++the_x)
 
279
                        {
 
280
                                pos_x0 += ((1  - ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))* (*the_x) );
 
281
                        }
 
282
                        return pos_x0;  
 
283
                }
 
284
                
 
285
                DoubleReal PosteriorErrorProbabilityModel::sum_neg_x0(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density)
 
286
                {
 
287
                        DoubleReal neg_x0(0);
 
288
                        vector<double>::iterator the_x = x_scores.begin();
 
289
                        vector<DoubleReal>::iterator correct = correct_density.begin();
 
290
                        for(vector<DoubleReal>::iterator incorrect = incorrect_density.begin(); incorrect < incorrect_density.end() ; ++correct, ++incorrect,++the_x)
 
291
                        {
 
292
                                neg_x0 += ((((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))* (*the_x) );
 
293
                        }
 
294
                        return neg_x0;  
 
295
                }               
 
296
                
 
297
                DoubleReal PosteriorErrorProbabilityModel::sum_pos_sigma(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density, DoubleReal positive_mean)
 
298
                {
 
299
                        DoubleReal pos_sigma(0);
 
300
                        vector<double>::iterator the_x = x_scores.begin();
 
301
                        vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
 
302
                        for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect,++the_x)
 
303
                        {
 
304
                                pos_sigma += ((1  - ((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))*pow( (*the_x) - positive_mean,2));
 
305
                        }
 
306
                        return pos_sigma;
 
307
                }
 
308
                
 
309
                DoubleReal PosteriorErrorProbabilityModel::sum_neg_sigma(vector<double>& x_scores, vector<DoubleReal>& incorrect_density, vector<DoubleReal>& correct_density, DoubleReal positive_mean)
 
310
                {
 
311
                        DoubleReal neg_sigma(0);
 
312
                        vector<double>::iterator the_x = x_scores.begin();
 
313
                        vector<DoubleReal>::iterator incorrect = incorrect_density.begin();
 
314
                        for(vector<DoubleReal>::iterator correct = correct_density.begin(); correct < correct_density.end() ; ++correct, ++incorrect,++the_x)
 
315
                        {
 
316
                                neg_sigma += ((((negative_prior_* (*incorrect) )/((negative_prior_* (*incorrect) ) + (1-negative_prior_)* (*correct) )))*pow( (*the_x) - positive_mean,2));
 
317
                        }
 
318
                        return neg_sigma;
 
319
                }
 
320
                DoubleReal PosteriorErrorProbabilityModel::computeProbability(DoubleReal score)
 
321
                {
 
322
                        score = score + fabs(smallest_score_) + 0.001;
 
323
                        DoubleReal x_neg;
 
324
                        DoubleReal x_pos;
 
325
                        //the score is smaller than the peak of incorreclty assigned sequences. To ensure that the probabilies wont rise again use the incorrectly assigend peak for computation
 
326
                        if(score < incorrectly_assigned_fit_param_.x0)
 
327
                        {
 
328
                                x_neg = max_incorrectly_;       
 
329
                                x_pos = ((this)->*(calc_correct_))(score, correctly_assigned_fit_param_ );
 
330
                        }
 
331
                        //same as above. However, this time to ensure that probabilities wont drop again.
 
332
                        else if(score > correctly_assigned_fit_param_.x0)
 
333
                        {
 
334
                                x_neg = ((this)->*(calc_incorrect_))(score, incorrectly_assigned_fit_param_);
 
335
                                x_pos = max_correctly_;
 
336
                        }
 
337
                        //if its in between use the normal formula
 
338
                        else
 
339
                        {
 
340
                                x_neg = ((this)->*(calc_incorrect_))(score, incorrectly_assigned_fit_param_);
 
341
                                x_pos = ((this)->*(calc_correct_))(score, correctly_assigned_fit_param_ );
 
342
                        }
 
343
                         return (negative_prior_*x_neg)/((negative_prior_*x_neg) + (1-negative_prior_)*x_pos);          
 
344
                }               
 
345
                TextFile* PosteriorErrorProbabilityModel::InitPlots(vector<double> & x_scores)
 
346
                {               
 
347
                                TextFile* file = new TextFile;
 
348
                                String output;
 
349
                                std::vector<DPosition<2> > points;
 
350
                                Int number_of_bins = param_.getValue("number_of_bins");
 
351
                                points.resize(number_of_bins);
 
352
                                DPosition<2> temp;
 
353
                                double dividing_score = (x_scores.back() -x_scores[0])/number_of_bins;
 
354
 
 
355
                                temp.setX(dividing_score/2);
 
356
                                temp.setY(0);
 
357
                                Int bin = 0;
 
358
                                points[bin] = temp;
 
359
                                double temp_divider = dividing_score;
 
360
                                for(std::vector< double>::iterator it = x_scores.begin(); it < x_scores.end(); ++it)
 
361
                                {
 
362
                                        if(temp_divider - *it >= 0 && bin < number_of_bins - 1 )
 
363
                                        {
 
364
                                                        points[bin].setY(points[bin].getY()+1);
 
365
                                        }
 
366
                                        else if(bin  == number_of_bins - 1 )
 
367
                                        {
 
368
                                                points[bin].setY(points[bin].getY()+1);
 
369
                                        }
 
370
                                        else
 
371
                                        {
 
372
                                                temp.setX((temp_divider + temp_divider + dividing_score)/2);
 
373
                                                temp.setY(1);
 
374
                                                ++bin;
 
375
                                                points[bin] = temp;
 
376
                                                temp_divider += dividing_score;
 
377
                                        }
 
378
                                }       
 
379
 
 
380
                                for(vector<DPosition<2> >::iterator it = points.begin(); it < points.end(); ++it)
 
381
                                {
 
382
                                        it->setY( it->getY()/( x_scores.size()  *dividing_score));
 
383
                                }
 
384
                                        
 
385
                                TextFile data_points;
 
386
                                for(vector<DPosition<2> >::iterator it = points.begin(); it < points.end() ; ++it)
 
387
                                {
 
388
                                        String temp  = it->getX();
 
389
                                        temp += "\t";
 
390
                                        temp += it->getY();
 
391
                                        data_points<<temp;
 
392
                                }
 
393
                                data_points.store((String)param_.getValue("output_name") + "_scores.txt");
 
394
                                output = "set output \""+ (String)param_.getValue("output_name") +".ps\"";
 
395
                                (*file)<<"set terminal postscript color solid linewidth 2.0 rounded";
 
396
                                //(*file)<<"set style empty solid 0.5 border -1";
 
397
                                //(*file)<<"set style function lines";
 
398
                                (*file)<<"set xlabel \"discriminant score\"";
 
399
                                (*file)<<"set ylabel \"density\"";
 
400
                                //TODO: (*file)<<"set title ";
 
401
                                (*file)<<"set key off";
 
402
                                (*file)<<(output);
 
403
                                String formula1, formula2;
 
404
                                formula1 = ((this)->*(getNegativeGnuplotFormula_))(incorrectly_assigned_fit_param_)+ "* " + String(negative_prior_);//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
 
405
                                formula2 = ((this)->*(getPositiveGnuplotFormula_))(correctly_assigned_fit_param_)+ "* (1 - " + String(negative_prior_) + ")";//String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
 
406
                                (*file)<< ("plot \""+(String)param_.getValue("output_name") +"_scores.txt\" with boxes, " + formula1 + " , " + formula2);
 
407
                                return file;
 
408
                }
 
409
 
 
410
                const String PosteriorErrorProbabilityModel::getGumbelGnuplotFormula(const GaussFitter::GaussFitResult& params) const
 
411
                {
 
412
                        // build a formula with the fitted parameters for gnuplot
 
413
                        stringstream formula;
 
414
                        formula << "(1/" << params.sigma <<") * " << "exp(( "<< params.x0<< "- x)/"<< params.sigma <<") * exp(-exp(("<<params.x0<<" - x)/"<<params.sigma<<"))";
 
415
                        return  formula.str();
 
416
                }
 
417
                                
 
418
                const String PosteriorErrorProbabilityModel::getGaussGnuplotFormula(const GaussFitter::GaussFitResult& params) const
 
419
                {
 
420
                        stringstream formula;
 
421
                        formula << params.A << " * exp(-(x - " << params.x0 << ") ** 2 / 2 / (" << params.sigma << ") ** 2)";
 
422
                        return formula.str();
 
423
                }
 
424
                
 
425
                const String PosteriorErrorProbabilityModel::getBothGnuplotFormula(const GaussFitter::GaussFitResult& incorrect,const GaussFitter::GaussFitResult& correct) const
 
426
                {
 
427
                        stringstream formula;
 
428
                        formula << negative_prior_<<"*"<<  ((this)->*(getNegativeGnuplotFormula_))(incorrect) <<" + (1-"<<negative_prior_<<")*"<< ((this)->*(getPositiveGnuplotFormula_))(correct);
 
429
                        return formula.str();
 
430
                }
 
431
                
 
432
                void PosteriorErrorProbabilityModel::plotTargetDecoyEstimation(vector<double> &target,vector<double> & decoy)
 
433
                {
 
434
                                TextFile file;
 
435
                                String output;
 
436
                                std::vector<DPosition<3> > points;
 
437
                                Int number_of_bins = param_.getValue("number_of_bins");
 
438
                                points.resize(number_of_bins);
 
439
                                DPosition<3> temp;
 
440
 
 
441
                                sort(target.begin(),target.end());
 
442
                                sort(decoy.begin(),decoy.end());
 
443
                                
 
444
                                double dividing_score = (max(target.back(),decoy.back())/*scores.back()*/ - min(target[0],decoy[0])/*scores[0]*/)/number_of_bins;
 
445
                                
 
446
                                temp[0] = (dividing_score/2);
 
447
                                temp[1] = 0;
 
448
                                temp[2] = 0;
 
449
                                Int bin = 0;
 
450
                                points[bin] = temp;
 
451
                                double temp_divider = dividing_score;
 
452
                                for(std::vector< double>::iterator it = target.begin(); it < target.end(); ++it)
 
453
                                {
 
454
                                        *it = *it + fabs(smallest_score_) + 0.001;
 
455
                                        if(temp_divider - *it >= 0 && bin < number_of_bins - 1 )
 
456
                                        {
 
457
                                                        points[bin][1] = (points[bin][1]+1);
 
458
                                        }
 
459
                                        else if(bin  == number_of_bins - 1 )
 
460
                                        {
 
461
                                                points[bin][1] = (points[bin][1]+1);
 
462
                                        }
 
463
                                        else
 
464
                                        {
 
465
                                                temp[0] = ((temp_divider + temp_divider + dividing_score)/2);
 
466
                                                temp[1] = 1;
 
467
                                                ++bin;
 
468
                                                points[bin] = temp;
 
469
                                                temp_divider += dividing_score;
 
470
                                        }
 
471
                                }       
 
472
 
 
473
                                bin = 0;
 
474
                                temp_divider = dividing_score;
 
475
                                for(std::vector< double>::iterator it = decoy.begin(); it < decoy.end(); ++it)
 
476
                                {       
 
477
                                        *it = *it + fabs(smallest_score_) + 0.001;
 
478
                                        if(temp_divider - *it >= 0 && bin < number_of_bins - 1 )
 
479
                                        {
 
480
                                                        points[bin][2] = (points[bin][2]+1);
 
481
                                        }
 
482
                                        else if(bin  == number_of_bins - 1 )
 
483
                                        {
 
484
                                                points[bin][2] = (points[bin][2]+1);
 
485
                                        }
 
486
                                        else
 
487
                                        {
 
488
                                                //temp[0] = ((temp_divider + temp_divider + dividing_score)/2);
 
489
                                        //      temp[2] = 1;
 
490
                                                ++bin;
 
491
                                                points[bin][2] = 1;
 
492
                                                temp_divider += dividing_score;
 
493
                                        }
 
494
                                }       
 
495
 
 
496
                                for(vector<DPosition<3> >::iterator it = points.begin(); it < points.end(); ++it)
 
497
                                {
 
498
                                //      if((*it)[1] > (*it)[2])
 
499
                                //      {(*it)[1] = (*it)[1] + (*it)[2];}
 
500
                                /*      else{/(*it)[2] = (*it)[1] + (*it)[2];//}*/
 
501
                                        
 
502
                                        (*it)[1] = ( (*it)[1]/( (decoy.size()+target.size())  *dividing_score));
 
503
                                        (*it)[2] = ( (*it)[2]/( (decoy.size()+target.size())  *dividing_score));
 
504
                                }
 
505
                                        
 
506
                                TextFile data_points;
 
507
                                for(vector<DPosition<3> >::iterator it = points.begin(); it < points.end() ; ++it)
 
508
                                {
 
509
                                        String temp  = (*it)[0];
 
510
                                        temp += "\t";
 
511
                                        temp += (*it)[1];
 
512
                                        temp += "\t";
 
513
                                        temp += (*it)[2];
 
514
                                        data_points<<temp;
 
515
                                }
 
516
                                data_points.store( (String)param_.getValue("output_name") + "_target_decoy_scores.txt");
 
517
                                output = String("set output \"")+  (String)param_.getValue("output_name") + "_target_decoy.ps\"";
 
518
                                (file)<<"set terminal postscript color solid linewidth 2.0 rounded";
 
519
                                //(*file)<<"set style empty solid 0.5 border -1";
 
520
                                //(*file)<<"set style function lines";
 
521
                                (file)<<"set xlabel \"discriminant score\"";
 
522
                                (file)<<"set ylabel \"density\"";
 
523
                                //TODO: (*file)<<"set title ";
 
524
                                (file)<<"set key off";
 
525
                                (file)<<(output);
 
526
                                String formula1, formula2;
 
527
                                formula1 = getGumbelGnuplotFormula(getIncorrectlyAssignedFitResult())+ "* " + String(getNegativePrior());//String(incorrectly_assigned_fit_param_.A) +" * exp(-(x - " + String(incorrectly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(incorrectly_assigned_fit_param_.sigma) + ") ** 2)"+ "*" + String(negative_prior_);
 
528
                                formula2 = getGaussGnuplotFormula(getCorrectlyAssignedFitResult())+ "* (1 - " + String(getNegativePrior()) + ")";//String(correctly_assigned_fit_param_.A) +" * exp(-(x - " + String(correctly_assigned_fit_param_.x0) + ") ** 2 / 2 / (" + String(correctly_assigned_fit_param_.sigma) + ") ** 2)"+ "* (1 - " + String(negative_prior_) + ")";
 
529
                                (file)<< ("plot \"" + (String)param_.getValue("output_name") + "_target_decoy_scores.txt\"   using 1:3  with boxes fill solid 0.8 noborder, \"" + (String)param_.getValue("output_name") + "_target_decoy_scores.txt\"  using 1:2  with boxes, " + formula1 + " , " + formula2);
 
530
                                file.store( (String)param_.getValue("output_name") + "_target_decoy");
 
531
                        }               
 
532
                
 
533
        } //namespace Math
 
534
} // namespace OpenMS
 
535