~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/ANALYSIS/ID/PILISCrossValidation.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- Mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
//
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: Andreas Bertsch $
 
25
// $Authors: Andreas Bertsch $
 
26
// --------------------------------------------------------------------------
 
27
 
 
28
#include <OpenMS/FILTERING/TRANSFORMERS/Normalizer.h>
 
29
#include <OpenMS/COMPARISON/SPECTRA/SpectrumAlignmentScore.h>
 
30
#include <OpenMS/CONCEPT/Factory.h>
 
31
#include <OpenMS/ANALYSIS/ID/PILISCrossValidation.h>
 
32
#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
 
33
#include <OpenMS/ANALYSIS/ID/PILISModel.h>
 
34
#include <OpenMS/ANALYSIS/MRM/MRMFragmentSelection.h>
 
35
 
 
36
using namespace std;
 
37
 
 
38
namespace OpenMS
 
39
{
 
40
        PILISCrossValidation::PILISCrossValidation()
 
41
                : DefaultParamHandler("PILISCrossValidation")
 
42
        {
 
43
                defaults_.setValue("nfold", 10, "Number of partitions to use for cross validation");
 
44
                defaults_.setValue("optimization_method", "tophit_against_all_others", "Scoring method used for optimization");
 
45
                defaults_.setValidStrings("optimization_method",StringList::create("tophit_against_all_others,only_top_hit,top_n_ions,top_n_ions_by"));
 
46
                defaults_.setValue("compare_function", "SpectrumAlignmentScore", "Spectra scoring function to use");
 
47
                defaults_.setValidStrings("compare_function", StringList::create("SpectrumAlignmentScore,ZhangSimilarityScore"));
 
48
                defaults_.setValue("num_top_peaks", 2, "Number of highest abundant peaks to consider with top_n_ion and top_n_ions_by optimization_methods");
 
49
                defaults_.setValue("min_intensity", 0.30, "Min relative intensity of highest abundant peaks to consider in top_n_ions_by");
 
50
                defaults_.setValue("fragment_mass_tolerance", 0.5, "Fragment mass tolerance, mainly used in compare function.");
 
51
                defaults_.setValue("normalize_to_TIC", "true", "Whether the spectra should be normalized to TIC before training, to max of one otherwise.");
 
52
                defaults_.setValidStrings("normalize_to_TIC", StringList::create("true,false"));
 
53
                defaultsToParam_();
 
54
        }
 
55
 
 
56
        PILISCrossValidation::PILISCrossValidation(const PILISCrossValidation& rhs)
 
57
                : DefaultParamHandler(rhs),
 
58
                        cv_options_(rhs.cv_options_)
 
59
        {
 
60
        }
 
61
 
 
62
        PILISCrossValidation::~PILISCrossValidation()
 
63
        {
 
64
        }
 
65
 
 
66
        PILISCrossValidation& PILISCrossValidation::operator = (const PILISCrossValidation& rhs)
 
67
        {
 
68
                if (this != &rhs)
 
69
                {
 
70
                        cv_options_ = rhs.cv_options_;
 
71
                }
 
72
                return *this;
 
73
        }
 
74
 
 
75
        void PILISCrossValidation::partition_(vector<vector<Peptide> >& parts, const vector<Peptide>& source)
 
76
        {
 
77
                Size nfold(param_.getValue("nfold"));
 
78
        parts = vector<vector<Peptide> >(nfold);
 
79
 
 
80
        Size count = 0;
 
81
        set<SignedSize> used_numbers;
 
82
        Size part = 0;
 
83
        while (count != source.size())
 
84
        {
 
85
        // get new random variable [0, source.size()]
 
86
        Size r = (Size)(DoubleReal(rand())/DoubleReal(RAND_MAX) * (source.size()));
 
87
 
 
88
        if (used_numbers.find(r) == used_numbers.end())
 
89
        {
 
90
        ++count;
 
91
        used_numbers.insert(r);
 
92
        parts[part++].push_back(source[r]);
 
93
 
 
94
        if (part == nfold)
 
95
        {
 
96
                part = 0;
 
97
        }
 
98
        }
 
99
        }
 
100
 
 
101
        return;
 
102
        }
 
103
 
 
104
// generate parameter permutations
 
105
void PILISCrossValidation::generateParameters_(const Param& param, const Map<String, Option>& options, vector<Param>& parameters)
 
106
{
 
107
        if (options.empty())
 
108
        {
 
109
                parameters.push_back(param);
 
110
                return;
 
111
        }
 
112
        for (Map<String, Option>::ConstIterator it = options.begin(); it != options.end(); ++it)
 
113
        {
 
114
                Map<String, Option> new_options = options;
 
115
                new_options.erase(new_options.find(it->first));
 
116
                Param new_param = param;
 
117
                
 
118
                if (it->second.type == Option::DOUBLE)
 
119
                {
 
120
                        DoubleReal dbl_min(it->second.dbl_min), dbl_max(it->second.dbl_max);
 
121
                        if (dbl_min > dbl_max)
 
122
                        {
 
123
                                cerr << "PILISCrossValidation: " << it->first << " min-value > max-value! (" << dbl_min << ", " << dbl_max << ")" << endl;
 
124
                        }
 
125
                        for (DoubleReal value = dbl_min; value <= dbl_max; value += it->second.dbl_stepsize)
 
126
                        {
 
127
                                new_param.setValue(it->first, value);
 
128
                                generateParameters_(new_param, new_options, parameters);
 
129
                        }
 
130
                        continue;
 
131
                }
 
132
                if (it->second.type == Option::INT)
 
133
                {
 
134
                        Int int_min(it->second.int_min), int_max(it->second.int_max);
 
135
                        if (int_min > int_max)
 
136
                        {
 
137
                                cerr << "PILISCrossValidation: " << it->first << " min-value > max-value! (" << int_min << ", " << int_max << ")" << endl;
 
138
                        }
 
139
                        for (Int value = int_min; value <= int_max; value += it->second.int_stepsize)
 
140
                        {
 
141
                                new_param.setValue(it->first, value);
 
142
                                generateParameters_(new_param, new_options, parameters);
 
143
                        }
 
144
                }
 
145
        }
 
146
        return;
 
147
}
 
148
 
 
149
 
 
150
        void PILISCrossValidation::apply(Param& PILIS_param, const PILISModel& base_model, const vector<Peptide>& peptides)
 
151
        {
 
152
                vector<vector<Peptide> > partitions;
 
153
                partition_(partitions, peptides);
 
154
 
 
155
                SpectrumAlignmentScore sa;
 
156
        Param sa_param(sa.getParameters());
 
157
        sa_param.setValue("tolerance", (DoubleReal)PILIS_param.getValue("fragment_mass_tolerance"));
 
158
        sa_param.setValue("use_linear_factor", "true");
 
159
        sa.setParameters(sa_param);
 
160
 
 
161
                Param model_param = base_model.getParameters();
 
162
                vector<Param> all_parameters;
 
163
                generateParameters_(model_param, cv_options_, all_parameters);
 
164
 
 
165
                // gnaaah
 
166
                vector<Param> all_new_parameters;
 
167
                for (vector<Param>::const_iterator it1 = all_parameters.begin(); it1 != all_parameters.end(); ++it1)
 
168
                {
 
169
                        bool found(false);
 
170
                        for (vector<Param>::const_iterator it2 = all_new_parameters.begin(); it2 != all_new_parameters.end(); ++it2)
 
171
                        {
 
172
                                if (*it1 == *it2)
 
173
                                {
 
174
                                        found = true;
 
175
                                }
 
176
                        }
 
177
                        if (!found)
 
178
                        {
 
179
                                all_new_parameters.push_back(*it1);
 
180
                        }
 
181
                }
 
182
                all_parameters = all_new_parameters;
 
183
                //cerr << all_parameters.size() << " parameters generated" << endl;
 
184
                for (Size i = 0; i != all_parameters.size(); ++i)
 
185
                {
 
186
                        for (Param::ParamIterator it = all_parameters[i].begin(); it != all_parameters[i].end(); ++it)
 
187
                        {
 
188
                                //cerr << i+1 << " " << it->name << " " << it->value.toString() << endl;
 
189
                        }
 
190
                }
 
191
 
 
192
                Normalizer to_one_normalizer, tic_normalizer;
 
193
                Param normalizer_param(to_one_normalizer.getParameters());
 
194
                normalizer_param.setValue("method", "to_one");
 
195
                to_one_normalizer.setParameters(normalizer_param);
 
196
                normalizer_param.setValue("method", "to_TIC");
 
197
                tic_normalizer.setParameters(normalizer_param);
 
198
 
 
199
                bool normalize_to_TIC(param_.getValue("normalize_to_TIC").toBool());
 
200
        
 
201
                // iterate over all parameter setting permutations
 
202
                vector<DoubleReal> scores;
 
203
                for (vector<Param>::const_iterator pait = all_parameters.begin(); pait != all_parameters.end(); ++pait)
 
204
                {
 
205
                        cerr << "Param #" << pait - all_parameters.begin() + 1 << "/" << all_parameters.size() << endl;
 
206
                        PILISModel model = base_model;
 
207
                        model.setParameters(*pait);
 
208
                        vector<DoubleReal> top_scores, non_top_scores;
 
209
                        vector<vector<RichPeakSpectrum> > exp_spectra;
 
210
                        vector<vector<vector<RichPeakSpectrum> > > sim_spectra;
 
211
        
 
212
                        // perform cross validation     
 
213
                        for (Size i = 0; i != partitions.size(); ++i)
 
214
                        {
 
215
                                cerr << "Partition #" << i + 1 << endl;
 
216
                                for (Size j = 0; j != partitions.size(); ++j)
 
217
                                {
 
218
                                        for (vector<Peptide>::const_iterator it = partitions[j].begin(); it != partitions[j].end(); ++it)
 
219
                {
 
220
                                                if (i != j)
 
221
                                                {
 
222
                                                        RichPeakSpectrum spec = it->spec;
 
223
                                        spec.setMSLevel(2);
 
224
 
 
225
                                                        if (normalize_to_TIC)
 
226
                                                        {
 
227
                                                tic_normalizer.filterSpectrum(spec);
 
228
                                                        }
 
229
                                                        else
 
230
                                                        {
 
231
                                                                to_one_normalizer.filterSpectrum(spec);
 
232
                                                        }
 
233
                                                        //cerr << "training with " << it->sequence << " " << it->charge << "...";
 
234
                                        model.train(it->spec, it->sequence, it->charge);
 
235
                                                        //cerr << "ended" << endl;
 
236
                                }
 
237
                                        }
 
238
                                }
 
239
 
 
240
                                vector<vector<RichPeakSpectrum> > sim_spectra_part;
 
241
                                vector<RichPeakSpectrum> exp_spectra_part;
 
242
                                // evaluate this setting
 
243
                                for (vector<Peptide>::const_iterator it = partitions[i].begin(); it != partitions[i].end(); ++it)
 
244
                                {
 
245
                                        RichPeakSpectrum exp_spec = it->spec;
 
246
                                        to_one_normalizer.filterSpectrum(exp_spec);
 
247
                                        exp_spec.sortByPosition();
 
248
                                        exp_spectra_part.push_back(exp_spec);
 
249
 
 
250
                                        //cerr << "Evalulating...(#peptides=" << it->hits.size() << ") ";
 
251
                                        vector<DoubleReal> new_scores;
 
252
                                        // for all hits
 
253
                                        vector<RichPeakSpectrum> sim_spectra_hits;
 
254
                                        for (vector<PeptideHit>::const_iterator pit = it->hits.begin(); pit != it->hits.end(); ++pit)
 
255
                                        {
 
256
                        RichPeakSpectrum sim_spec;
 
257
                                                //cerr << "simulate " << pit->getSequence() << " " << pit->getCharge() << "..";
 
258
                        model.getSpectrum(sim_spec, pit->getSequence(), pit->getCharge());
 
259
        
 
260
                                                Precursor prec;
 
261
                                                prec.setCharge(pit->getCharge());
 
262
                                                prec.setPosition((pit->getSequence().getMonoWeight() + (DoubleReal)pit->getCharge())/(DoubleReal)pit->getCharge());
 
263
                                                sim_spec.getPrecursors().push_back(prec);
 
264
 
 
265
                                                //sim_spec.getPrecursorPeak().setCharge(pit->getCharge());
 
266
                        //sim_spec.getPrecursorPeak().setPosition((pit->getSequence().getMonoWeight() + (DoubleReal)pit->getCharge())/(DoubleReal)pit->getCharge());
 
267
 
 
268
                        to_one_normalizer.filterSpectrum(sim_spec);
 
269
                                                sim_spec.sortByPosition();
 
270
                                                sim_spectra_hits.push_back(sim_spec);
 
271
                                        }
 
272
                                        //cerr << "ended" << endl;
 
273
                                        sim_spectra_part.push_back(sim_spectra_hits);
 
274
                                }
 
275
                                sim_spectra.push_back(sim_spectra_part);
 
276
                                exp_spectra.push_back(exp_spectra_part);
 
277
                        }
 
278
 
 
279
                        scores.push_back(scoreHits(sim_spectra, exp_spectra));
 
280
                }
 
281
 
 
282
                Size best_param_pos(0);
 
283
                DoubleReal max_score(0);
 
284
                Size pos(0);
 
285
                for (vector<DoubleReal>::const_iterator it = scores.begin(); it != scores.end(); ++it, ++pos)
 
286
                {
 
287
                        if (*it > max_score)
 
288
                        {
 
289
                                max_score = *it;
 
290
                                best_param_pos = pos;
 
291
                        }
 
292
                }
 
293
 
 
294
                
 
295
                cerr << "Best parameters (score=" << max_score << ")" << endl;
 
296
        for (Param::ParamIterator it = all_parameters[best_param_pos].begin(); it != all_parameters[best_param_pos].end(); ++it)
 
297
        {
 
298
                        cerr << " " << it->name << " " << it->value.toString() << endl;
 
299
        }
 
300
                PILIS_param = all_parameters[best_param_pos];
 
301
 
 
302
                return;
 
303
        }
 
304
 
 
305
        DoubleReal PILISCrossValidation::scoreHits(const vector<vector<vector<RichPeakSpectrum> > >& sim_spectra, const vector<vector<RichPeakSpectrum> >& exp_spectra)
 
306
        {
 
307
                String optimization_method = param_.getValue("optimization_method");
 
308
 
 
309
                if (optimization_method == "tophit_against_all_others")
 
310
                {
 
311
          // consider all against all
 
312
                        vector<DoubleReal> top_scores, non_top_scores;
 
313
                        for (Size i = 0; i != sim_spectra.size(); ++i)
 
314
                        {
 
315
                                for (Size j = 0; j != sim_spectra[i].size(); ++j)
 
316
                                {
 
317
                                        for (Size k = 0; k != sim_spectra[i][j].size(); ++k)
 
318
                                        {
 
319
                                                DoubleReal score = scoreSpectra_(sim_spectra[i][j][k], exp_spectra[i][j]);
 
320
                                                if (k == 0)
 
321
                                                {
 
322
                                                        top_scores.push_back(score);
 
323
                                                }
 
324
                                                else
 
325
                                                {
 
326
                                                        non_top_scores.push_back(score);
 
327
                                                }
 
328
                                        }
 
329
                                }
 
330
                        }
 
331
                        
 
332
        DoubleReal sum = 0;
 
333
        for (vector<DoubleReal>::const_iterator it1 = top_scores.begin(); it1 != top_scores.end(); ++it1)
 
334
        {
 
335
        for (vector<DoubleReal>::const_iterator it2 = non_top_scores.begin(); it2 != non_top_scores.end(); ++it2)
 
336
        {
 
337
                sum += *it1 - *it2;
 
338
        }
 
339
        }
 
340
        DoubleReal score = sum / (DoubleReal)(top_scores.size() * non_top_scores.size());
 
341
        cerr << "Avg. score-diff for param: " << score << endl;
 
342
                        return score;
 
343
                }
 
344
 
 
345
    if (optimization_method == "only_top_hit")
 
346
                {
 
347
        // consider only top-scores
 
348
                        vector<DoubleReal> top_scores;
 
349
                        for (Size i = 0; i != sim_spectra.size(); ++i)
 
350
                        {
 
351
                                for (Size j = 0; j != sim_spectra[i].size(); ++j)
 
352
                                {
 
353
          if ( !sim_spectra[i][j].empty())
 
354
                                        {
 
355
                                                DoubleReal score = scoreSpectra_(sim_spectra[i][j][0], exp_spectra[i][j]);
 
356
                                                top_scores.push_back(score);
 
357
                                        }
 
358
                                }
 
359
                        }
 
360
        DoubleReal sum(0);
 
361
        for (vector<DoubleReal>::const_iterator it = top_scores.begin(); it != top_scores.end(); ++it)
 
362
        {
 
363
        sum += *it;
 
364
                }
 
365
        DoubleReal score(sum / (DoubleReal)top_scores.size());
 
366
        cerr << "Avg. score for param: " << score << endl;
 
367
                        return score;
 
368
                }
 
369
 
 
370
                if (optimization_method == "top_n_ions")
 
371
                {
 
372
                        MRMFragmentSelection fragment_selection;
 
373
                        Size num_correct_topn(0), num_all(0);
 
374
                        for (Size i = 0; i != sim_spectra.size(); ++i)
 
375
                        {
 
376
                                for (Size j = 0; j != sim_spectra[i].size(); ++j)
 
377
                                {
 
378
                                        ++num_all;
 
379
          if ( !sim_spectra[i][j].empty() )
 
380
                                        {
 
381
                                                vector<RichPeak1D> exp_highest_peak, sim_highest_peak;
 
382
                                                fragment_selection.selectFragments(exp_highest_peak, exp_spectra[i][j]);
 
383
                                                fragment_selection.selectFragments(sim_highest_peak, sim_spectra[i][j][0]);
 
384
 
 
385
                                                DoubleReal fragment_mass_tolerance((DoubleReal)param_.getValue("fragment_mass_tolerance"));
 
386
                                                bool has_topn(false);
 
387
                                for (Size ii = 0; ii < exp_highest_peak.size(); ++ii)
 
388
                                {
 
389
                                for (Size jj = ii; jj < sim_highest_peak.size(); ++jj)
 
390
                                        {
 
391
                                        if (fabs(exp_highest_peak[ii].getMZ() - sim_highest_peak[jj].getMZ()) < fragment_mass_tolerance)
 
392
                                        {
 
393
                                        ++num_correct_topn;
 
394
                                        has_topn = true;
 
395
                                                                        break;
 
396
                                        }
 
397
                                }
 
398
                                if (has_topn)
 
399
                                {
 
400
                                        break;
 
401
                                                        }
 
402
                                                }
 
403
                }
 
404
                                }
 
405
        }
 
406
                        DoubleReal score((DoubleReal)num_correct_topn / (DoubleReal)num_all);
 
407
                        cerr << "Avg. score in top " << param_.getValue("num_top_peaks") << ": " << score << endl;
 
408
                        return score;
 
409
                }
 
410
 
 
411
                if (optimization_method == "top_n_ions_by")
 
412
                {
 
413
                        MRMFragmentSelection fragment_selection;
 
414
                        DoubleReal fragment_mass_tolerance((DoubleReal)param_.getValue("fragment_mass_tolerance"));
 
415
                        DoubleReal min_intensity((DoubleReal)param_.getValue("min_intensity"));
 
416
 
 
417
                        TheoreticalSpectrumGenerator tsg;
 
418
                        Param tsg_param(tsg.getParameters());
 
419
      tsg_param.setValue("add_metainfo", "true");
 
420
      tsg.setParameters(tsg_param);
 
421
 
 
422
      SpectrumAlignment aligner;
 
423
      Param aligner_param(aligner.getParameters());
 
424
      aligner_param.setValue("tolerance", fragment_mass_tolerance);
 
425
      aligner.setParameters(aligner_param);
 
426
 
 
427
                        Size num_correct_topn(0), num_all(0);
 
428
                        for (Size i = 0; i != sim_spectra.size(); ++i)
 
429
      {
 
430
        for (Size j = 0; j != sim_spectra[i].size(); ++j)
 
431
        {
 
432
                                        ++num_all;
 
433
          if ( !sim_spectra[i][j].empty() )
 
434
          {
 
435
            vector<RichPeak1D> sim_highest_peak;
 
436
                                                //cerr << "Peptide: " << exp_spectra[i][j].getPeptideIdentifications().begin()->getHits().begin()->getSequence() << " "
 
437
                                                //                      << exp_spectra[i][j].getPeptideIdentifications().begin()->getHits().begin()->getCharge() << endl;
 
438
                                                cerr << "Sim-highest peaks: (sim-spectrum size=" << sim_spectra[i][j][0].size() << ") ";
 
439
                                                /*
 
440
                                                for (Size p = 0; p != sim_spectra[i][j][0].size(); ++p)
 
441
                                                {
 
442
                                                        cerr << sim_spectra[i][j][0][p].getMZ() << " " << sim_spectra[i][j][0][p].getIntensity() << " " << sim_spectra[i][j][0][p].getMetaValue("IonName") << endl;
 
443
                                                }*/
 
444
            fragment_selection.selectFragments(sim_highest_peak, sim_spectra[i][j][0]);
 
445
                                                cerr << sim_highest_peak.size();
 
446
                                                cerr << endl;
 
447
 
 
448
                                                // normalize the exp_spectrum to the highest peaks which could be picked
 
449
                                                RichPeakSpectrum exp_spec(exp_spectra[i][j]);
 
450
            RichPeakSpectrum theo_spec;
 
451
            const AASequence& peptide(exp_spec.getPeptideIdentifications().begin()->getHits().begin()->getSequence());
 
452
            tsg.addPeaks(theo_spec, peptide, Residue::BIon, 1);
 
453
            //tsg.addPeaks(theo_spec, peptide, Residue::BIon, 2); // TODO check which charge states are allowed
 
454
            tsg.addPeaks(theo_spec, peptide, Residue::YIon, 1);
 
455
            //tsg.addPeaks(theo_spec, peptide, Residue::YIon, 2);
 
456
            theo_spec.sortByPosition();
 
457
            vector<pair<Size, Size> > alignment;
 
458
            aligner.getSpectrumAlignment(alignment, exp_spec, theo_spec);
 
459
                                                vector<RichPeak1D> exp_highest_peak;
 
460
 
 
461
            for (vector<pair<Size, Size> >::const_iterator it = alignment.begin(); it != alignment.end(); ++it)
 
462
            {
 
463
              exp_spec[it->first].setMetaValue("IonName", (String)theo_spec[it->second].getMetaValue("IonName"));
 
464
                                                        exp_highest_peak.push_back(exp_spec[it->first]);
 
465
            }
 
466
 
 
467
                                                //cerr << "Exp-highest peaks: ";
 
468
                                                fragment_selection.selectFragments(exp_highest_peak, exp_spec);
 
469
                                                //cerr << endl;
 
470
 
 
471
                                                // normalize the exp spectrum to max possible intensity to be selected
 
472
                                                DoubleReal max_exp_int(0);
 
473
                                                for (vector<RichPeak1D>::const_iterator it = exp_highest_peak.begin(); it != exp_highest_peak.end(); ++it)
 
474
                                                {
 
475
                                                        if (it->getIntensity() > max_exp_int)
 
476
                                                        {
 
477
                                                                max_exp_int = it->getIntensity();
 
478
                                                        }
 
479
                                                }
 
480
 
 
481
                                                for (vector<RichPeak1D>::iterator it = exp_highest_peak.begin(); it != exp_highest_peak.end(); ++it)
 
482
                                                {
 
483
                                                        it->setIntensity(it->getIntensity() / max_exp_int);
 
484
                                                }
 
485
 
 
486
                                                bool has_topn(false);
 
487
                                                for (Size ii = 0; ii < exp_highest_peak.size(); ++ii)
 
488
                                                {
 
489
                                                        for (Size jj = 0; jj < sim_highest_peak.size(); ++jj)
 
490
                                                        {
 
491
                                                                //cerr << ii << " " << jj << " " <<  exp_highest_peak[ii].getMZ() << " " << sim_highest_peak[jj].getMZ() << " " << exp_highest_peak[ii].getIntensity() << endl;
 
492
                                                                if (fabs(exp_highest_peak[ii].getMZ() - sim_highest_peak[jj].getMZ()) < fragment_mass_tolerance 
 
493
                                                                                && exp_highest_peak[ii].getIntensity() >= min_intensity)
 
494
                                                                {
 
495
                                                                        cerr << "Found: " << exp_spec.getPeptideIdentifications().begin()->getHits().begin()->getSequence()
 
496
                                                                                         << " " << exp_spec.getPeptideIdentifications().begin()->getHits().begin()->getCharge() 
 
497
                                                                                         << " exp m/z=" << exp_highest_peak[ii].getMZ()  << ", sim m/z=" << sim_highest_peak[jj].getMZ() 
 
498
                                                                                         << " exp int=" << exp_highest_peak[ii].getIntensity() <<  ", sim m/z=" << sim_highest_peak[jj].getIntensity() << endl;
 
499
                                                                        ++num_correct_topn;
 
500
                  has_topn = true;
 
501
                  break;
 
502
                }
 
503
              }
 
504
              if (has_topn)
 
505
              {
 
506
                break;
 
507
              }
 
508
                                                }
 
509
                                        }
 
510
                                }
 
511
                        }
 
512
                        DoubleReal score((DoubleReal)num_correct_topn / (DoubleReal)num_all);
 
513
      cerr << "Avg. score in top " << param_.getValue("num_top_peaks") << " with a least " << min_intensity * 100.0 << "% intensity: " << score << endl;
 
514
                        return score;
 
515
                }
 
516
 
 
517
                cerr << "PILISCrossValidation: unknown optimization_method: " << optimization_method << endl;
 
518
                return 0;
 
519
        }
 
520
 
 
521
        DoubleReal PILISCrossValidation::scoreSpectra_(const RichPeakSpectrum& spec1, const RichPeakSpectrum& spec2)
 
522
        {
 
523
                PeakSpectrum s1, s2;
 
524
                for (RichPeakSpectrum::ConstIterator piit = spec1.begin(); piit != spec1.end(); ++piit)
 
525
    {
 
526
        Peak1D p;
 
527
      p.setPosition(piit->getPosition()[0]);
 
528
      p.setIntensity(piit->getIntensity());
 
529
      s1.push_back(p);
 
530
    }
 
531
                for (RichPeakSpectrum::ConstIterator piit = spec2.begin(); piit != spec2.end(); ++piit)
 
532
                {
 
533
                        Peak1D p;
 
534
                        p.setPosition(piit->getPosition()[0]);
 
535
                        p.setIntensity(piit->getIntensity());
 
536
                        s2.push_back(p);
 
537
                }
 
538
                return (*pscf_)(s1, s2);
 
539
        }
 
540
 
 
541
        void PILISCrossValidation::updateMembers_()
 
542
        {
 
543
                pscf_ = Factory<PeakSpectrumCompareFunctor>::create(param_.getValue("compare_function"));
 
544
                Param compare_param(pscf_->getParameters());
 
545
                if (compare_param.exists("tolerance"))
 
546
                {
 
547
                        compare_param.setValue("tolerance", (DoubleReal)param_.getValue("fragment_mass_tolerance"));
 
548
                        pscf_->setParameters(compare_param);
 
549
                }
 
550
                return;         
 
551
        }
 
552
}
 
553
 
 
554