~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/APPLICATIONS/TOPP/RTPredict.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
//
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: Mathias Walzer $
 
25
// $Authors: Nico Pfeifer, Mathias Walzer $
 
26
// --------------------------------------------------------------------------
 
27
 
 
28
#include <OpenMS/ANALYSIS/SVM/SVMWrapper.h>
 
29
#include <OpenMS/FORMAT/IdXMLFile.h>
 
30
#include <OpenMS/FORMAT/TextFile.h>
 
31
#include <OpenMS/FORMAT/LibSVMEncoder.h>
 
32
#include <OpenMS/METADATA/ProteinIdentification.h>
 
33
#include <OpenMS/APPLICATIONS/TOPPBase.h>
 
34
#include <OpenMS/MATH/STATISTICS/StatisticFunctions.h>
 
35
 
 
36
#include <map>
 
37
 
 
38
using namespace OpenMS;
 
39
using namespace std;
 
40
 
 
41
//-------------------------------------------------------------
 
42
//Doxygen docu
 
43
//-------------------------------------------------------------
 
44
 
 
45
/**
 
46
        @page TOPP_RTPredict RTPredict
 
47
 
 
48
        @brief This application is used to predict retention times
 
49
                                 for peptides or peptide separation.
 
50
 
 
51
  This methods and applications of this model are described
 
52
  in several publications:
 
53
 
 
54
  Nico Pfeifer, Andreas Leinenbach, Christian G. Huber and Oliver Kohlbacher
 
55
  Statistical learning of peptide retention behavior in chromatographic separations: A new kernel-based approach for computational proteomics.
 
56
  BMC Bioinformatics 2007, 8:468
 
57
 
 
58
  Nico Pfeifer, Andreas Leinenbach, Christian G. Huber and Oliver Kohlbacher
 
59
  Improving Peptide Identification in Proteome Analysis by a Two-Dimensional Retention Time Filtering Approach
 
60
  J. Proteome Res. 2009, 8(8):4109-15
 
61
 
 
62
 
 
63
        The input of this application
 
64
        is an svm model and an IdXML
 
65
        file with peptide identifications. The svm model file is specified
 
66
        by the <b>svm_model</b> parameter in the command line or the ini file.
 
67
        This file should have been produced by the @ref TOPP_RTModel application.
 
68
        <br>
 
69
        For retention time prediction the peptide sequences are extracted
 
70
        from the IdXML inputfile
 
71
        and passed to the svm. The svm then predicts retention times
 
72
        according to the trained model. The predicted retention times
 
73
        are stored as @code <userParam name="predicted_retention_time" value="<predicted retention time>" />
 
74
        @endcode inside the peptide entities in the IdXML output file.
 
75
 
 
76
        For separation prediction you have to specify two output file names.
 
77
        'out_id:positive' is the filename of the peptides which are predicted
 
78
        to be collected by the column and 'out_id:negative' is the file
 
79
        of the predicted flowthrough peptides.
 
80
 
 
81
  Retention time prediction and separation prediction cannot be combined!
 
82
 
 
83
        <B>The command line parameters of this tool are:</B>
 
84
        @verbinclude TOPP_RTPredict.cli
 
85
 
 
86
        @todo This need serious clean up! Combining certain input and output options will
 
87
          result in strange behaviour, especially when using text output/input.
 
88
*/
 
89
 
 
90
// We do not want this class to show up in the docu:
 
91
/// @cond TOPPCLASSES
 
92
 
 
93
class TOPPRTPredict
 
94
        : public TOPPBase
 
95
{
 
96
        public:
 
97
                TOPPRTPredict()
 
98
                        : TOPPBase("RTPredict","Predicts retention times for peptides using a model trained by RTModel.")
 
99
                {
 
100
 
 
101
                }
 
102
 
 
103
        protected:
 
104
                void registerOptionsAndFlags_()
 
105
                {
 
106
                        //input
 
107
                        registerInputFile_("in_id","<file>","","peptides with precursor information", false);
 
108
                        setValidFormats_("in_id",StringList::create("idXML"));
 
109
                        registerInputFile_("in_text","<file>", "","peptides as text-based file", false);
 
110
 
 
111
                        //output
 
112
                        registerTOPPSubsection_("out_id", "Output files in idXML format");
 
113
                        registerOutputFile_("out_id:file","<file>","","Output file with peptide RT prediction", false);
 
114
                        setValidFormats_("out_id:file",StringList::create("idXML"));
 
115
                        registerOutputFile_("out_id:positive","<file>","","Output file in IdXML format containing positive predictions (peptide separation prediction - requires negative file to be present as well)\n", false);
 
116
                        setValidFormats_("out_id:positive",StringList::create("idXML"));
 
117
                        registerOutputFile_("out_id:negative","<file>","","Output file in IdXML format containing negative predictions (peptide separation prediction - requires positive file to be present as well)\n", false);
 
118
                        setValidFormats_("out_id:negative",StringList::create("idXML"));
 
119
                        registerFlag_("out_id:rewrite_peptideidentification_rtmz", "rewrites each peptideidentification's rt and mz from prediction and calculation (according to the best hit)",true);
 
120
 
 
121
                        registerTOPPSubsection_("out_text","Output files in text format");
 
122
                        registerOutputFile_("out_text:file","<file>","","Output file with predicted RT values", false);
 
123
 
 
124
                        registerInputFile_("svm_model","<file>","","svm model in libsvm format (can be produced by RTModel)");
 
125
                        registerDoubleOption_("total_gradient_time","<time>",1.0,"the time (in seconds) of the gradient (peptide RT prediction)", false);
 
126
                        setMinFloat_("total_gradient_time", 0.00001);
 
127
                        registerIntOption_("max_number_of_peptides", "<int>",100000,"the maximum number of peptides considered at once (bigger number will lead to faster results but needs more memory).\n", false, true);
 
128
                }
 
129
 
 
130
                void loadStrings_(String filename, std::vector<String>& sequences)
 
131
                {
 
132
                        TextFile text_file(filename.c_str(), true);
 
133
                        TextFile::iterator it;
 
134
 
 
135
                        sequences.clear();
 
136
 
 
137
                        it = text_file.begin();
 
138
                        while(it != text_file.end())
 
139
                        {
 
140
                                sequences.push_back((*it).trim());
 
141
                                ++it;
 
142
                        }
 
143
                }
 
144
 
 
145
                void writeStringLabelLines_(String filename, map< String, DoubleReal > predicted_data)
 
146
                {
 
147
        ofstream os;
 
148
                                map< String, DoubleReal >::iterator it;
 
149
 
 
150
                                os.open(filename.c_str(), ofstream::out);
 
151
 
 
152
                                for(it = predicted_data.begin(); it != predicted_data.end(); ++it)
 
153
                                {
 
154
                                        os << it->first << " " << it->second << "\n";
 
155
                                }
 
156
                                os.flush();
 
157
                                os.close();
 
158
                }
 
159
 
 
160
                ExitCodes main_(int , const char**)
 
161
                {
 
162
                        IdXMLFile IdXML_file;
 
163
                        vector<ProteinIdentification> protein_identifications;
 
164
                        vector<PeptideIdentification> identifications;
 
165
                        vector< String > peptides;
 
166
                        vector< AASequence > modified_peptides;
 
167
                        vector< DoubleReal > training_retention_times;
 
168
                        vector<PeptideHit> temp_peptide_hits;
 
169
                        SVMWrapper svm;
 
170
                        LibSVMEncoder encoder;
 
171
                        String allowed_amino_acid_characters = "ACDEFGHIKLMNPQRSTVWY";
 
172
                        vector<DoubleReal> predicted_retention_times;
 
173
                        vector<DoubleReal> all_predicted_retention_times;
 
174
                        map< String, DoubleReal > predicted_data;
 
175
                        map< AASequence, DoubleReal > predicted_modified_data;
 
176
                        svm_problem* prediction_data = NULL;
 
177
                        SVMData training_samples;
 
178
                        SVMData prediction_samples;
 
179
                        UInt border_length = 0;
 
180
                        UInt k_mer_length = 0;
 
181
                        DoubleReal sigma = 0;
 
182
                        DoubleReal sigma_0 = 0;
 
183
                        DoubleReal sigma_max = 0;
 
184
                        String temp_string = "";
 
185
                        UInt maximum_length = 50;
 
186
                        pair<DoubleReal, DoubleReal> temp_point;
 
187
                        vector<Real> performance_retention_times;
 
188
                        String svmfile_name = "";
 
189
                        Real total_gradient_time = 1.f;
 
190
                        bool separation_prediction = false;
 
191
                        vector<PeptideIdentification> identifications_positive;
 
192
                        vector<PeptideIdentification> identifications_negative;
 
193
                        bool first_dim_rt = false;
 
194
                        Size number_of_peptides = 0;
 
195
                        Size max_number_of_peptides = getIntOption_("max_number_of_peptides");
 
196
 
 
197
                        //-------------------------------------------------------------
 
198
                        // parsing parameters
 
199
                        //-------------------------------------------------------------
 
200
 
 
201
                        String outputfile_name_positive = getStringOption_("out_id:positive");
 
202
                        String outputfile_name_negative = getStringOption_("out_id:negative");
 
203
                        // for separation prediction, we require both files to be present!
 
204
                        if (outputfile_name_positive != "" || outputfile_name_negative != "")
 
205
                        {
 
206
                                if (outputfile_name_positive != "" && outputfile_name_negative != "")
 
207
                                {
 
208
                                        separation_prediction = true;
 
209
                                }
 
210
                                else
 
211
                                {
 
212
                                        writeLog_("Both files for separation prediction required. Please specify the other one as well. Aborting!");
 
213
                                        return ILLEGAL_PARAMETERS;
 
214
                                }
 
215
                        }
 
216
 
 
217
                        // either or
 
218
                        String input_id = getStringOption_("in_id");
 
219
                        String input_text = getStringOption_("in_text");
 
220
                        if (input_text != "" && input_id != "")
 
221
                        {
 
222
                                writeLog_("Two input parameter files given, only one allowed! Use either -in_id:file or -in_text:file!");
 
223
                                return ILLEGAL_PARAMETERS;
 
224
                        }
 
225
                        else if (input_text == "" && input_id == "")
 
226
                        {
 
227
                                writeLog_("No input file given. Aborting...");
 
228
                                return ILLEGAL_PARAMETERS;
 
229
                        }
 
230
 
 
231
                        // OUTPUT
 
232
                        // (can use both)
 
233
                        String output_id = getStringOption_("out_id:file");
 
234
                        String output_text = getStringOption_("out_text:file");
 
235
                        if (output_text == "" && output_id == "" && !separation_prediction)
 
236
                        {
 
237
                                writeLog_("No output files given. Aborting...");
 
238
                                return ILLEGAL_PARAMETERS;
 
239
                        }
 
240
 
 
241
                        svmfile_name = getStringOption_("svm_model");
 
242
                        total_gradient_time = getDoubleOption_("total_gradient_time");
 
243
 
 
244
                        //-------------------------------------------------------------
 
245
                        // reading input
 
246
                        //-------------------------------------------------------------
 
247
 
 
248
                        svm.loadModel(svmfile_name);
 
249
 
 
250
                        if ((svm.getIntParameter(SVMWrapper::SVM_TYPE) == C_SVC || svm.getIntParameter(SVMWrapper::SVM_TYPE) == NU_SVC) && !separation_prediction)
 
251
                        {
 
252
                                        writeLog_("You cannot perform peptide separation prediction with a model trained for"
 
253
                                                                                + String("\npeptide retention time prediction. Aborting!"));
 
254
                                        return ILLEGAL_PARAMETERS;
 
255
                        }
 
256
                        if ((svm.getIntParameter(SVMWrapper::SVM_TYPE) != C_SVC && svm.getIntParameter(SVMWrapper::SVM_TYPE) != NU_SVC) && separation_prediction)
 
257
                        {
 
258
                                        writeLog_("You cannot perform peptide retention time prediction with a model trained for\n"
 
259
                                                                                + String("peptide separation prediction. Aborting!"));
 
260
                                        return ILLEGAL_PARAMETERS;
 
261
                        }
 
262
 
 
263
                        // Since the POBK is not included in the libsvm we have to load
 
264
                        // additional parameters from additional files.
 
265
                        if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
266
                        {
 
267
                                inputFileReadable_(svmfile_name + "_additional_parameters", "svm_model (derived)");
 
268
 
 
269
                                Param additional_parameters;
 
270
 
 
271
                                additional_parameters.load(svmfile_name + "_additional_parameters");
 
272
                                if (additional_parameters.exists("first_dim_rt")
 
273
                                                && additional_parameters.getValue("first_dim_rt") != DataValue::EMPTY)
 
274
                                {
 
275
                                        first_dim_rt = additional_parameters.getValue("first_dim_rt").toBool();
 
276
                                }
 
277
                                if (additional_parameters.getValue("kernel_type") != DataValue::EMPTY)
 
278
                                {
 
279
                                        svm.setParameter(SVMWrapper::KERNEL_TYPE, ((String) additional_parameters.getValue("kernel_type")).toInt());
 
280
                                }
 
281
 
 
282
                                if (additional_parameters.getValue("border_length") == DataValue::EMPTY
 
283
                                                && svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
284
                                {
 
285
                                        writeLog_("No border length saved in additional parameters file. Aborting!");
 
286
                                        cout << "No border length saved in additional parameters file. Aborting!" << endl;
 
287
                                        return ILLEGAL_PARAMETERS;
 
288
                                }
 
289
                                border_length = ((String)additional_parameters.getValue("border_length")).toInt();
 
290
                                if (additional_parameters.getValue("k_mer_length") == DataValue::EMPTY
 
291
                                                && svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
292
                                {
 
293
                                        writeLog_("No k-mer length saved in additional parameters file. Aborting!");
 
294
                                        cout << "No k-mer length saved in additional parameters file. Aborting!" << endl;
 
295
                                        return ILLEGAL_PARAMETERS;
 
296
                                }
 
297
                                k_mer_length = ((String)additional_parameters.getValue("k_mer_length")).toInt();
 
298
                                if (additional_parameters.getValue("sigma") == DataValue::EMPTY
 
299
                                                && svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
300
                                {
 
301
                                        writeLog_("No sigma saved in additional parameters file. Aborting!");
 
302
                                        cout << "No sigma saved in additional parameters file. Aborting!" << endl;
 
303
                                        return ILLEGAL_PARAMETERS;
 
304
                                }
 
305
                                sigma = ((String)additional_parameters.getValue("sigma")).toFloat();
 
306
                                if (!separation_prediction && additional_parameters.getValue("sigma_0") == DataValue::EMPTY)
 
307
                                {
 
308
                                        writeLog_("No sigma_0 saved in additional parameters file. Aborting!");
 
309
                                        cout << "No sigma_0 length saved in additional parameters file. Aborting!" << endl;
 
310
                                        return ILLEGAL_PARAMETERS;
 
311
                                }
 
312
                                if (!separation_prediction && additional_parameters.getValue("sigma_0") != DataValue::EMPTY)
 
313
                                {
 
314
                                        sigma_0 = additional_parameters.getValue("sigma_0");
 
315
                                }
 
316
                                if (!separation_prediction && additional_parameters.getValue("sigma_max") == DataValue::EMPTY)
 
317
                                {
 
318
                                        writeLog_("No sigma_max saved in additional parameters file. Aborting!");
 
319
                                        cout << "No sigma_max length saved in additional parameters file. Aborting!" << endl;
 
320
                                        return ILLEGAL_PARAMETERS;
 
321
                                }
 
322
                                if (!separation_prediction && additional_parameters.getValue("sigma_max") != DataValue::EMPTY)
 
323
                                {
 
324
                                        sigma_max = additional_parameters.getValue("sigma_max");
 
325
                                }
 
326
                        }
 
327
 
 
328
                        if ( input_text != "" )
 
329
                        {
 
330
                                loadStrings_(input_text, peptides);
 
331
                                if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
332
                                {
 
333
                                        for (Size i = 0; i < peptides.size(); ++i)
 
334
                                        {
 
335
                                                modified_peptides.push_back(AASequence(peptides[i]));
 
336
                                        }
 
337
                                        peptides.clear();
 
338
                                }
 
339
                        }
 
340
                        else
 
341
                        {
 
342
                                String document_id;
 
343
                                IdXML_file.load(input_id, protein_identifications, identifications, document_id);
 
344
                        }
 
345
 
 
346
                        //-------------------------------------------------------------
 
347
                        // calculations
 
348
                        //-------------------------------------------------------------
 
349
 
 
350
                        if (input_id != "")
 
351
                        {
 
352
                                for (Size i = 0; i < identifications.size(); i++)
 
353
                                {
 
354
                                        temp_peptide_hits = identifications[i].getHits();
 
355
                                        for (Size j = 0; j < temp_peptide_hits.size(); j++)
 
356
                                        {
 
357
                                                if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
358
                                                {
 
359
                                                        modified_peptides.push_back(temp_peptide_hits[j].getSequence());
 
360
                                                }
 
361
                                                else
 
362
                                                {
 
363
                                                        peptides.push_back(temp_peptide_hits[j].getSequence().toUnmodifiedString());
 
364
                                                }
 
365
                                        }
 
366
                                }
 
367
                        }
 
368
                        if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
369
                        {
 
370
                                number_of_peptides = modified_peptides.size();
 
371
                        }
 
372
                        else
 
373
                        {
 
374
                                number_of_peptides = peptides.size();
 
375
                        }
 
376
 
 
377
                        vector<DoubleReal> rts;
 
378
                        rts.resize(number_of_peptides, 0);
 
379
 
 
380
                        vector<String>::iterator it_from = peptides.begin();
 
381
                        vector<String>::iterator it_to = peptides.begin();
 
382
                        vector<AASequence>::iterator it_from_mod = modified_peptides.begin();
 
383
                        vector<AASequence>::iterator it_to_mod = modified_peptides.begin();
 
384
                        Size counter = 0;
 
385
                        while(counter < number_of_peptides)
 
386
                        {
 
387
                                vector<String> temp_peptides;
 
388
                                vector<AASequence> temp_modified_peptides;
 
389
                                vector<DoubleReal> temp_rts;
 
390
 
 
391
                                Size temp_counter = 0;
 
392
                                if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) != SVMWrapper::OLIGO)
 
393
                                {
 
394
                                        while(temp_counter <= max_number_of_peptides && it_to != peptides.end())
 
395
                                        {
 
396
                                                ++it_to;
 
397
                                                ++temp_counter;
 
398
                                        }
 
399
                                        temp_peptides.insert(temp_peptides.end(), it_from, it_to);
 
400
                                        //temp_peptides.insert(temp_peptides.end(), peptides.begin(), peptides.end());
 
401
                                        temp_rts.resize(temp_peptides.size(), 0);
 
402
 
 
403
                                        prediction_data =
 
404
                                                encoder.encodeLibSVMProblemWithCompositionAndLengthVectors(temp_peptides,
 
405
                                                                                                                                                                                                                                                                                                temp_rts,
 
406
                                                                                                                                                                                                                                                                                                allowed_amino_acid_characters,
 
407
                                                                                                                                                                                                                                                                                                maximum_length);
 
408
                                        it_from = it_to;
 
409
                                }
 
410
                                else if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
411
                                {
 
412
                                        while(temp_counter < max_number_of_peptides && it_to_mod != modified_peptides.end())
 
413
                                        {
 
414
                                                ++it_to_mod;
 
415
                                                ++temp_counter;
 
416
                                        }
 
417
                                        temp_modified_peptides.insert(temp_modified_peptides.end(), it_from_mod, it_to_mod);
 
418
                                        //                                      temp_modified_peptides.insert(temp_modified_peptides.end(), modified_peptides.begin(), modified_peptides.end());
 
419
                                        temp_rts.resize(temp_modified_peptides.size(), 0);
 
420
 
 
421
                                        encoder.encodeProblemWithOligoBorderVectors(temp_modified_peptides,
 
422
                                                                                                                                                                                                                        k_mer_length,
 
423
                                                                                                                                                                                                                        allowed_amino_acid_characters,
 
424
                                                                                                                                                                                                                        border_length,
 
425
                                                                                                                                                                                                                        prediction_samples.sequences);
 
426
                                        prediction_samples.labels = temp_rts;
 
427
                                        it_from_mod = it_to_mod;
 
428
                                }
 
429
                                counter += temp_counter;
 
430
 
 
431
                                if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
432
                                {
 
433
                                        inputFileReadable_((svmfile_name + "_samples").c_str(), "svm_model (derived)");
 
434
 
 
435
                                        training_samples.load(svmfile_name + "_samples");
 
436
                                        svm.setTrainingSample(training_samples);
 
437
 
 
438
                                        svm.setParameter(SVMWrapper::BORDER_LENGTH, (Int) border_length);
 
439
                                        svm.setParameter(SVMWrapper::SIGMA, sigma);
 
440
                                        svm.predict(prediction_samples, predicted_retention_times);
 
441
                                        prediction_samples.labels.clear();
 
442
                                        prediction_samples.sequences.clear();
 
443
                                }
 
444
                                else
 
445
                                {
 
446
                                        svm.predict(prediction_data, predicted_retention_times);
 
447
                                        LibSVMEncoder::destroyProblem(prediction_data);
 
448
                                }
 
449
                                for (Size i = 0; i < temp_counter; ++i)
 
450
                                {
 
451
                                        if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO && output_text=="")
 
452
                                        {
 
453
                                                predicted_modified_data.insert(make_pair(temp_modified_peptides[i],
 
454
                                                                                                                                                                                (predicted_retention_times[i] * total_gradient_time)));
 
455
                                        }
 
456
                                        else if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) != SVMWrapper::OLIGO)
 
457
                                        {
 
458
                                                predicted_data.insert(make_pair(temp_peptides[i],
 
459
                                                                                                                                                                                (predicted_retention_times[i] * total_gradient_time)));
 
460
                                        }
 
461
                                        else
 
462
                                        {
 
463
                                                predicted_data.insert(make_pair(temp_modified_peptides[i].toString(),
 
464
                                                                                                                                                                                (predicted_retention_times[i] * total_gradient_time)));
 
465
                                        }
 
466
                                }
 
467
                                all_predicted_retention_times.insert(all_predicted_retention_times.end(), predicted_retention_times.begin(), predicted_retention_times.end());
 
468
                                predicted_retention_times.clear();
 
469
                        }
 
470
 
 
471
                        if (input_id != "")
 
472
                        {
 
473
                                if (!separation_prediction)
 
474
                                {
 
475
                                        for (Size i = 0; i < identifications.size(); i++)
 
476
                                        {
 
477
                                                temp_peptide_hits = identifications[i].getHits();
 
478
 
 
479
                                                for (Size j = 0; j < temp_peptide_hits.size(); j++)
 
480
                                                {
 
481
                                                        DoubleReal temp_rt = 0.;
 
482
                                                        DoubleReal temp_p_value = 0.;
 
483
 
 
484
                                                        if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
485
                                                        {
 
486
                                                                temp_rt = predicted_modified_data[temp_peptide_hits[j].getSequence()];
 
487
                                                        }
 
488
                                                        else
 
489
                                                        {
 
490
                                                                temp_rt = predicted_data[temp_peptide_hits[j].getSequence().toUnmodifiedString()];
 
491
                                                        }
 
492
 
 
493
                                                        if (first_dim_rt)
 
494
                                                        {
 
495
                                                                temp_point.first = identifications[i].getMetaValue("first_dim_rt");
 
496
                                                        }
 
497
                                                        else
 
498
                                                        {
 
499
                                                                temp_point.first = 0;
 
500
                                                                if (identifications[i].metaValueExists("RT"))
 
501
                                                                {
 
502
                                                                        temp_point.first = identifications[i].getMetaValue("RT");
 
503
                                                                }
 
504
                                                        }
 
505
                                                        if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
506
                                                        {
 
507
                                                                temp_point.second = temp_rt;
 
508
                                                                temp_p_value = svm.getPValue(sigma_0, sigma_max, temp_point);
 
509
                                                        }
 
510
                                                        if (first_dim_rt)
 
511
                                                        {
 
512
                                                                if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
513
                                                                {
 
514
                                                                        temp_peptide_hits[j].setMetaValue("predicted_RT_p_value_first_dim",temp_p_value);
 
515
                                                                }
 
516
                                                                temp_peptide_hits[j].setMetaValue("predicted_RT_first_dim",temp_rt);
 
517
                                                                performance_retention_times.push_back(identifications[i].getMetaValue("first_dim_rt"));
 
518
                                                        }
 
519
                                                        else
 
520
                                                        {
 
521
                                                                if (identifications[i].metaValueExists("RT"))
 
522
                                                                {
 
523
                                                                        performance_retention_times.push_back(identifications[i].getMetaValue("RT"));
 
524
                                                                }
 
525
                                                                else
 
526
                                                                {
 
527
                                                                        performance_retention_times.push_back(0);
 
528
                                                                }
 
529
                                                                if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
530
                                                                {
 
531
                                                                        temp_peptide_hits[j].setMetaValue("predicted_RT_p_value",temp_p_value);
 
532
                                                                }
 
533
                                                                temp_peptide_hits[j].setMetaValue("predicted_RT",temp_rt);
 
534
                                                        }
 
535
                                                }
 
536
                                                identifications[i].setHits(temp_peptide_hits);
 
537
                                                if (getFlag_("out_id:rewrite_peptideidentification_rtmz"))
 
538
                                                {
 
539
                                                        identifications[i].sort();
 
540
                                                        Int charge = identifications[i].getHits().front().getCharge();
 
541
                                                        DoubleReal mz =  identifications[i].getHits().front().getSequence().getMonoWeight(Residue::Full, charge) / DoubleReal(charge);
 
542
                                                        DoubleReal rt =  identifications[i].getHits().front().getMetaValue("predicted_RT");
 
543
 
 
544
                                                        identifications[i].setMetaValue("RT",rt);
 
545
                                                        identifications[i].setMetaValue("MZ",mz);
 
546
                                                }
 
547
 
 
548
                                                identifications[i].setHits(temp_peptide_hits);
 
549
                                        }
 
550
                                }
 
551
                                else // separation prediction
 
552
                                {
 
553
                                        vector<PeptideHit> hits_positive;
 
554
                                        vector<PeptideHit> hits_negative;
 
555
 
 
556
                                        PeptideIdentification temp_identification;
 
557
 
 
558
                                        for (Size i = 0; i < identifications.size(); i++)
 
559
                                        {
 
560
                                                hits_negative.clear();
 
561
                                                hits_positive.clear();
 
562
 
 
563
                                                temp_peptide_hits = identifications[i].getHits();
 
564
                                                for(vector<PeptideHit>::iterator it = temp_peptide_hits.begin();
 
565
                                                                it != temp_peptide_hits.end();
 
566
                                                                ++it)
 
567
                                                {
 
568
                                                        if (svm.getIntParameter(SVMWrapper::KERNEL_TYPE) == SVMWrapper::OLIGO)
 
569
                                                        {
 
570
                                                                if (predicted_modified_data[it->getSequence()] > 0)
 
571
                                                                {
 
572
                                                                        hits_positive.push_back(*it);
 
573
                                                                }
 
574
                                                                else
 
575
                                                                {
 
576
                                                                        hits_negative.push_back(*it);
 
577
                                                                }
 
578
                                                        }
 
579
                                                        else
 
580
                                                        {
 
581
                                                                if (predicted_data[it->getSequence().toUnmodifiedString()] > 0)
 
582
                                                                {
 
583
                                                                        hits_positive.push_back(*it);
 
584
                                                                }
 
585
                                                                else
 
586
                                                                {
 
587
                                                                        hits_negative.push_back(*it);
 
588
                                                                }
 
589
                                                        }
 
590
                                                }
 
591
 
 
592
                                                if (identifications[i].metaValueExists("MZ"))
 
593
                                                {
 
594
                                                        temp_identification.setMetaValue("MZ",identifications[i].getMetaValue("MZ"));
 
595
                                                }
 
596
                                                if (identifications[i].metaValueExists("RT"))
 
597
                                                {
 
598
                                                        temp_identification.setMetaValue("RT",identifications[i].getMetaValue("RT"));
 
599
                                                }
 
600
 
 
601
                                                temp_identification = identifications[i];
 
602
                                                temp_identification.setHits(hits_positive);
 
603
                                                identifications_positive.push_back(temp_identification);
 
604
                                                temp_identification.setHits(hits_negative);
 
605
                                                identifications_negative.push_back(temp_identification);
 
606
                                        }
 
607
                                }
 
608
                        }
 
609
                        //-------------------------------------------------------------
 
610
                        // writing output
 
611
                        //-------------------------------------------------------------
 
612
 
 
613
                        if (separation_prediction)
 
614
                        {
 
615
                                IdXML_file.store(outputfile_name_positive,
 
616
                                                                                                 protein_identifications,
 
617
                                                                                                 identifications_positive);
 
618
                                IdXML_file.store(outputfile_name_negative,
 
619
                                                                                                 protein_identifications,
 
620
                                                                                                 identifications_negative);
 
621
                        }
 
622
                        else
 
623
                        {
 
624
                                if (output_text != "") // text
 
625
                                {
 
626
                                        writeStringLabelLines_(output_text, predicted_data);
 
627
                                }
 
628
                                if (output_id != "") // idXML
 
629
                                {
 
630
                                        IdXML_file.store(output_id,
 
631
                                                                                                         protein_identifications,
 
632
                                                                                                         identifications);
 
633
                                        writeDebug_("Linear correlation between predicted and measured rt is: "
 
634
                                                                                        + String(Math::pearsonCorrelationCoefficient(all_predicted_retention_times.begin(),
 
635
                                                                                                                                                                all_predicted_retention_times.end(),
 
636
                                                                                                                                                                performance_retention_times.begin(),
 
637
                                                                                                                                                                performance_retention_times.end())), 1);
 
638
                                        writeDebug_("MSE between predicted and measured rt is: "
 
639
                                                                                        + String(Math::meanSquareError(all_predicted_retention_times.begin(),
 
640
                                                                                                                                                                all_predicted_retention_times.end(),
 
641
                                                                                                                                                                performance_retention_times.begin(),
 
642
                                                                                                                                                                performance_retention_times.end())), 1);
 
643
 
 
644
                                }
 
645
                        }
 
646
                        return EXECUTION_OK;
 
647
                }
 
648
};
 
649
 
 
650
 
 
651
int main( int argc, const char** argv )
 
652
{
 
653
        TOPPRTPredict tool;
 
654
        return tool.main(argc,argv);
 
655
}
 
656
 
 
657
/// @endcond
 
658
 
 
659
 
 
660
 
 
661
 
 
662