~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/ANALYSIS/ID/FalseDiscoveryRate.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
// 
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: Chris Bielow $
 
25
// $Authors: Andreas Bertsch, Chris Bielow $
 
26
// --------------------------------------------------------------------------
 
27
 
 
28
#include <OpenMS/ANALYSIS/ID/FalseDiscoveryRate.h>
 
29
#include <OpenMS/DATASTRUCTURES/StringList.h>
 
30
#include <OpenMS/CONCEPT/LogStream.h>
 
31
 
 
32
#include <algorithm>
 
33
 
 
34
#define FALSE_DISCOVERY_RATE_DEBUG
 
35
#undef  FALSE_DISCOVERY_RATE_DEBUG
 
36
 
 
37
using namespace std;
 
38
 
 
39
namespace OpenMS 
 
40
{
 
41
        FalseDiscoveryRate::FalseDiscoveryRate()
 
42
                : DefaultParamHandler("FalseDiscoveryRate")
 
43
        {               
 
44
                defaults_.setValue("q_value", "true", "If 'true', the q-values will be calculated instead of the FDRs");
 
45
                defaults_.setValidStrings("q_value", StringList::create("true,false"));
 
46
                defaults_.setValue("use_all_hits", "false", "If 'true' not only the first hit, but all are used (peptides only)");
 
47
                defaults_.setValidStrings("use_all_hits", StringList::create("true,false"));
 
48
                defaults_.setValue("split_charge_variants", "false", "If set to 'true' charge variants are treated separately (for peptides of combined target/decoy searches only).");
 
49
                defaults_.setValidStrings("split_charge_variants", StringList::create("true,false"));
 
50
                defaults_.setValue("treat_runs_separately", "false", "If set to 'true' different search runs are treated separately (for peptides of combined target/decoy searches only).");
 
51
                defaults_.setValidStrings("treat_runs_separately", StringList::create("true,false"));
 
52
                defaults_.setValue("decoy_string", "_rev", "String which is appended at the accession of the protein to indicate that it is a decoy protein (for proteins only).");
 
53
                defaults_.setValue("add_decoy_peptides","false", "If set to true, decoy peptides will be written to output file, too. The q-value is set to the closest target score.");
 
54
                defaults_.setValidStrings("add_decoy_peptides", StringList::create("true,false"));
 
55
                defaultsToParam_();
 
56
        }
 
57
 
 
58
        void FalseDiscoveryRate::apply(vector<PeptideIdentification>& ids)
 
59
        {
 
60
                bool q_value = param_.getValue("q_value").toBool();
 
61
                bool use_all_hits = param_.getValue("use_all_hits").toBool();
 
62
                bool treat_runs_separately = param_.getValue("treat_runs_separately").toBool();
 
63
                bool split_charge_variants = param_.getValue("split_charge_variants").toBool();
 
64
          bool add_decoy_peptides = param_.getValue("add_decoy_peptides").toBool();
 
65
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
66
                cerr << "Parameters: q_value=" << q_value << ", use_all_hits=" << use_all_hits << ", treat_runs_separately=" << treat_runs_separately << ", split_charge_variants=" << split_charge_variants << endl;
 
67
#endif
 
68
 
 
69
 
 
70
                if (ids.empty())
 
71
                {
 
72
      LOG_WARN << "No peptide identifications given to FalseDiscoveryRate! No calculation performed.\n";
 
73
                        return;
 
74
                }
 
75
 
 
76
    // first search for all identifiers and charge variants
 
77
    set<String> identifiers;
 
78
                set<SignedSize> charge_variants;
 
79
    for (vector<PeptideIdentification>::iterator it = ids.begin(); it != ids.end(); ++it)
 
80
    {
 
81
      identifiers.insert(it->getIdentifier());
 
82
                        it->assignRanks();
 
83
 
 
84
                        if (!use_all_hits)
 
85
                        {
 
86
                                vector<PeptideHit> hits = it->getHits();
 
87
                                hits.resize(1);
 
88
                                it->setHits(hits);
 
89
                        }
 
90
                
 
91
                        for (vector<PeptideHit>::const_iterator pit = it->getHits().begin(); pit != it->getHits().end(); ++pit)
 
92
                        {
 
93
                                charge_variants.insert(pit->getCharge());
 
94
                        }
 
95
    }
 
96
 
 
97
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
98
                cerr << "#id-runs: " << identifiers.size() << " ";
 
99
                for (set<String>::const_iterator it = identifiers.begin(); it != identifiers.end(); ++it)
 
100
                {
 
101
                        cerr << "," << *it;
 
102
                }
 
103
                cerr << endl;
 
104
 
 
105
 
 
106
                cerr << "#of charge states: " << charge_variants.size() << " ";
 
107
                for (set<SignedSize>::const_iterator it = charge_variants.begin(); it != charge_variants.end(); ++it)
 
108
                {
 
109
                        cerr << "," << *it;
 
110
                }
 
111
                cerr << endl;
 
112
#endif
 
113
 
 
114
                for (set<SignedSize>::const_iterator zit = charge_variants.begin(); zit != charge_variants.end(); ++zit)
 
115
                {
 
116
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
117
                        cerr << "Charge variant=" << *zit << endl;
 
118
#endif
 
119
 
 
120
                        // for all identifiers
 
121
                        for (set<String>::const_iterator iit = identifiers.begin(); iit != identifiers.end(); ++iit)
 
122
                        {
 
123
                                if (!treat_runs_separately && iit != identifiers.begin())
 
124
                                {
 
125
                                        continue;
 
126
                                }
 
127
 
 
128
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
129
                                cerr << "Id-run: " << *iit << endl;
 
130
#endif
 
131
                                // get the scores of all peptide hits
 
132
                                vector<DoubleReal> target_scores, decoy_scores;
 
133
                                for (vector<PeptideIdentification>::iterator it = ids.begin(); it != ids.end(); ++it)
 
134
                                {
 
135
                                        // if runs should be treated separately, the identifiers must be the same
 
136
                                        if (treat_runs_separately && it->getIdentifier() != *iit)
 
137
                                        {
 
138
                                                continue;
 
139
                                        }
 
140
 
 
141
                                        for (Size i = 0; i < it->getHits().size(); ++i)
 
142
                                        {
 
143
                                                if (split_charge_variants && it->getHits()[i].getCharge() != *zit)
 
144
                                                {
 
145
                                                        continue;
 
146
                                                }
 
147
 
 
148
                                                if (!it->getHits()[i].metaValueExists("target_decoy"))
 
149
                                                {
 
150
                                                        LOG_FATAL_ERROR << "Meta value 'target_decoy' does not exists, reindex the idXML file with 'PeptideIndexer' first (run-id='" << it->getIdentifier() << ", rank=" << i+1 << " of " << it->getHits().size() << ")!" << endl;
 
151
              throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__,"Meta value 'target_decoy' does not exist!");
 
152
                                                }
 
153
 
 
154
                                                String target_decoy(it->getHits()[i].getMetaValue("target_decoy"));
 
155
                                                if (target_decoy == "target")
 
156
                                                {
 
157
                                                        target_scores.push_back(it->getHits()[i].getScore());
 
158
                                                }
 
159
                                                else 
 
160
                                                {
 
161
                                                        if (target_decoy == "decoy" || target_decoy == "target+decoy")
 
162
                                                        {
 
163
                                                                decoy_scores.push_back(it->getHits()[i].getScore());
 
164
                                                        }
 
165
                                                        else
 
166
                                                        {
 
167
                                                                if (target_decoy != "")
 
168
                                                                {
 
169
                                                                        LOG_FATAL_ERROR << "Unknown value of meta value 'target_decoy': '" << target_decoy << "'!" << endl;
 
170
                                                                }
 
171
                                                        }
 
172
                                                }
 
173
                                        }
 
174
                                }
 
175
                
 
176
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
177
                                cerr << "#target-scores=" << target_scores.size() << ", #decoy-scores=" << decoy_scores.size() << endl;
 
178
#endif
 
179
 
 
180
                                // check decoy scores
 
181
                                if (decoy_scores.empty())
 
182
                                {
 
183
                                        String error_string = "FalseDiscoveryRate: #decoy sequences is zero! Setting all target sequences to q-value/FDR 0! ";
 
184
                                        if (split_charge_variants || treat_runs_separately)
 
185
                                        {
 
186
                                                error_string += "(";
 
187
                                                if (split_charge_variants)
 
188
                                                {
 
189
                                                        error_string += "charge_variant=" + String(*zit) + " ";
 
190
                                                }
 
191
                                                if (treat_runs_separately)
 
192
                                                {
 
193
                                                        error_string += "run-id=" + *iit;
 
194
                                                }
 
195
                                                error_string += ")";
 
196
                                        }
 
197
                                        LOG_ERROR << error_string;
 
198
                                }
 
199
 
 
200
                                // check target scores
 
201
                                if (target_scores.empty())
 
202
                                {       
 
203
                                        String error_string = "FalseDiscoveryRate: #target sequences is zero! Ignoring. ";
 
204
          if (split_charge_variants || treat_runs_separately)
 
205
          {
 
206
            error_string += "(";
 
207
            if (split_charge_variants)
 
208
            {
 
209
              error_string += "charge_variant=" + String(*zit) + " ";
 
210
            }
 
211
            if (treat_runs_separately)
 
212
            {
 
213
              error_string += "run-id=" + *iit;
 
214
            }
 
215
            error_string += ")";
 
216
          }
 
217
          LOG_ERROR << error_string;
 
218
                                }
 
219
 
 
220
                                if (target_scores.empty() || decoy_scores.empty())
 
221
                                {
 
222
                                        // no remove the the relevant entries, or put 'pseudo-scores' in
 
223
                for (vector<PeptideIdentification>::iterator it = ids.begin(); it != ids.end(); ++it)
 
224
              {
 
225
              // if runs should be treated separately, the identifiers must be the same
 
226
            if (treat_runs_separately && it->getIdentifier() != *iit)
 
227
                  {
 
228
                  continue;
 
229
                }
 
230
 
 
231
                                                vector<PeptideHit> hits(it->getHits()), new_hits;
 
232
                  for (Size i = 0; i < hits.size(); ++i)
 
233
                {
 
234
                if (split_charge_variants && hits[i].getCharge() != *zit)
 
235
              {
 
236
                                                                new_hits.push_back(hits[i]);
 
237
                      continue;
 
238
                  }
 
239
 
 
240
                    if (!hits[i].metaValueExists("target_decoy"))
 
241
                  {
 
242
                  LOG_FATAL_ERROR << "Meta value 'target_decoy' does not exists, reindex the idXML file with 'PeptideIndexer' first (run-id='" << it->getIdentifier() << ", rank=" << i+1 << " of " << hits.size() << ")!" << endl;
 
243
                throw Exception::MissingInformation(__FILE__,__LINE__,__PRETTY_FUNCTION__,"Meta value 'target_decoy' does not exist!");
 
244
                    }
 
245
 
 
246
                String target_decoy(hits[i].getMetaValue("target_decoy"));
 
247
                        if (target_decoy == "target")
 
248
                  {
 
249
                                                                // if it is a target hit, there are now decoys, fdr/q-value should be zero then
 
250
                                                                new_hits.push_back(hits[i]);
 
251
                                                                String score_type = it->getScoreType() + "_score";
 
252
                                                                new_hits.back().setMetaValue(score_type, new_hits.back().getScore());
 
253
                                                                new_hits.back().setScore(0);
 
254
                }
 
255
                else
 
256
                {
 
257
                if (target_decoy != "decoy" && target_decoy != "target+decoy")
 
258
                {
 
259
                  LOG_FATAL_ERROR << "Unknown value of meta value 'target_decoy': '" << target_decoy << "'!" << endl;
 
260
                }
 
261
                                                        }
 
262
                                                }
 
263
                                                it->setHits(new_hits);
 
264
                                        }
 
265
                                        continue;
 
266
                                }
 
267
 
 
268
                                // calculate fdr for the forward scores
 
269
                        bool higher_score_better(ids.begin()->isHigherScoreBetter());
 
270
                        Map<DoubleReal, DoubleReal> score_to_fdr;
 
271
                                calculateFDRs_(score_to_fdr, target_scores, decoy_scores, q_value, higher_score_better);
 
272
 
 
273
                                // annotate fdr
 
274
                        for (vector<PeptideIdentification>::iterator it = ids.begin(); it != ids.end(); ++it)
 
275
                        {
 
276
                  // if runs should be treated separately, the identifiers must be the same
 
277
                if (treat_runs_separately && it->getIdentifier() != *iit)
 
278
                {
 
279
                continue;
 
280
                }
 
281
 
 
282
                                String score_type = it->getScoreType() + "_score";
 
283
                vector<PeptideHit> hits;
 
284
                for (vector<PeptideHit>::const_iterator pit = it->getHits().begin(); pit != it->getHits().end(); ++pit)
 
285
                {
 
286
                                                PeptideHit hit = *pit;
 
287
 
 
288
                                                if (split_charge_variants && pit->getCharge() != *zit)
 
289
                                                {
 
290
                                                        hits.push_back(*pit);
 
291
                                                        continue;
 
292
                                                }
 
293
                                                if (hit.metaValueExists("target_decoy"))
 
294
                                                {
 
295
                                                        String meta_value = (String)hit.getMetaValue("target_decoy");
 
296
                                                        if ((meta_value == "decoy" || meta_value == "target+decoy") && !add_decoy_peptides)
 
297
                                                        {
 
298
                                                                continue;
 
299
                                                        }
 
300
                                                }
 
301
                        hit.setMetaValue(score_type, pit->getScore());
 
302
                        hit.setScore(score_to_fdr[pit->getScore()]);
 
303
                                                hits.push_back(hit);
 
304
                }
 
305
                it->setHits(hits);
 
306
                }
 
307
                        }
 
308
                        if (!split_charge_variants)
 
309
                        {
 
310
                                break;
 
311
                        }
 
312
                }
 
313
 
 
314
                // higher-score-better can be set now, calculations are finished
 
315
    for (vector<PeptideIdentification>::iterator it = ids.begin(); it != ids.end(); ++it)
 
316
    {
 
317
                        if (q_value)
 
318
                        {
 
319
                                if (it->getScoreType() != "q-value")
 
320
                                {
 
321
                                        it->setScoreType("q-value");
 
322
                                }
 
323
                        }
 
324
                        else
 
325
                        {
 
326
                                if (it->getScoreType() != "FDR")
 
327
                                {
 
328
                                        it->setScoreType("FDR");
 
329
                                }
 
330
                        }
 
331
      it->setHigherScoreBetter(false);
 
332
                        it->assignRanks();
 
333
                }
 
334
 
 
335
    return;
 
336
        }
 
337
 
 
338
        void FalseDiscoveryRate::apply(vector<PeptideIdentification>& fwd_ids, vector<PeptideIdentification>& rev_ids)
 
339
        {
 
340
                if (fwd_ids.empty() || rev_ids.empty())
 
341
                {
 
342
                        return;
 
343
                }
 
344
                vector<DoubleReal> target_scores, decoy_scores;
 
345
                // get the scores of all peptide hits
 
346
                for (vector<PeptideIdentification>::const_iterator it = fwd_ids.begin(); it != fwd_ids.end(); ++it)
 
347
                {
 
348
                        for (vector<PeptideHit>::const_iterator pit = it->getHits().begin(); pit != it->getHits().end(); ++pit)
 
349
                        {
 
350
                                target_scores.push_back(pit->getScore());
 
351
                        }
 
352
                }
 
353
 
 
354
                for (vector<PeptideIdentification>::const_iterator it = rev_ids.begin(); it != rev_ids.end(); ++it)
 
355
    {
 
356
      for (vector<PeptideHit>::const_iterator pit = it->getHits().begin(); pit != it->getHits().end(); ++pit)
 
357
      {
 
358
        decoy_scores.push_back(pit->getScore());
 
359
      }
 
360
    }
 
361
 
 
362
                bool q_value(param_.getValue("q_value").toBool());
 
363
                bool higher_score_better(fwd_ids.begin()->isHigherScoreBetter());
 
364
          bool add_decoy_peptides = param_.getValue("add_decoy_peptides").toBool();
 
365
                // calculate fdr for the forward scores
 
366
                Map<DoubleReal, DoubleReal> score_to_fdr;
 
367
                calculateFDRs_(score_to_fdr, target_scores, decoy_scores, q_value, higher_score_better);
 
368
 
 
369
                // annotate fdr 
 
370
                String score_type = fwd_ids.begin()->getScoreType() + "_score";
 
371
                for (vector<PeptideIdentification>::iterator it = fwd_ids.begin(); it != fwd_ids.end(); ++it)
 
372
                {
 
373
                        if (q_value)
 
374
                        {
 
375
                                it->setScoreType("q-value");
 
376
                        }
 
377
                        else
 
378
                        {
 
379
                                it->setScoreType("FDR");
 
380
                        }
 
381
                                
 
382
                        it->setHigherScoreBetter(false);
 
383
                        vector<PeptideHit> hits = it->getHits();
 
384
                        for (vector<PeptideHit>::iterator pit = hits.begin(); pit != hits.end(); ++pit)
 
385
                        {
 
386
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
387
                                cerr << pit->getScore() << " " << score_to_fdr[pit->getScore()] << endl;
 
388
#endif
 
389
                                pit->setMetaValue(score_type, pit->getScore());
 
390
                                pit->setScore(score_to_fdr[pit->getScore()]);
 
391
                        }
 
392
                        it->setHits(hits);
 
393
                }
 
394
                //write as well decoy peptides
 
395
                if(add_decoy_peptides)
 
396
                {
 
397
                        score_type = rev_ids.begin()->getScoreType() + "_score";
 
398
                        for (vector<PeptideIdentification>::iterator it = rev_ids.begin(); it != rev_ids.end(); ++it)
 
399
                        {
 
400
                                if (q_value)
 
401
                                {
 
402
                                        it->setScoreType("q-value");
 
403
                                }
 
404
                                else
 
405
                                {
 
406
                                        it->setScoreType("FDR");
 
407
                                }
 
408
                                        
 
409
                                it->setHigherScoreBetter(false);
 
410
                                vector<PeptideHit> hits = it->getHits();
 
411
                                for (vector<PeptideHit>::iterator pit = hits.begin(); pit != hits.end(); ++pit)
 
412
                                {
 
413
        #ifdef FALSE_DISCOVERY_RATE_DEBUG
 
414
                                        cerr << pit->getScore() << " " << score_to_fdr[pit->getScore()] << endl;
 
415
        #endif
 
416
                                        pit->setMetaValue(score_type, pit->getScore());
 
417
                                        pit->setScore(score_to_fdr[pit->getScore()]);
 
418
                                }
 
419
                                it->setHits(hits);
 
420
                        }               
 
421
                }
 
422
                
 
423
                return;
 
424
        }
 
425
 
 
426
        void FalseDiscoveryRate::apply(vector<ProteinIdentification>& ids)
 
427
        {
 
428
                if (ids.empty()) 
 
429
                {
 
430
      LOG_WARN << "No protein identifications given to FalseDiscoveryRate! No calculation performed.\n";
 
431
      return;
 
432
                }
 
433
 
 
434
                vector<DoubleReal> target_scores, decoy_scores;
 
435
                String decoy_string = (String)param_.getValue("decoy_string");
 
436
                for (vector<ProteinIdentification>::const_iterator it = ids.begin(); it != ids.end(); ++it)
 
437
                {
 
438
                        for (vector<ProteinHit>::const_iterator pit = it->getHits().begin(); pit != it->getHits().end(); ++pit)
 
439
                        {
 
440
                                if (pit->getAccession().hasSubstring(decoy_string))
 
441
                                {
 
442
                                        decoy_scores.push_back(pit->getScore());
 
443
                                }
 
444
                                else
 
445
                                {
 
446
                                        target_scores.push_back(pit->getScore());
 
447
                                }
 
448
                        }
 
449
                }
 
450
 
 
451
                bool q_value(param_.getValue("q_value").toBool());
 
452
    bool higher_score_better(ids.begin()->isHigherScoreBetter());
 
453
 
 
454
    // calculate fdr for the forward scores
 
455
    Map<DoubleReal, DoubleReal> score_to_fdr;
 
456
                calculateFDRs_(score_to_fdr, target_scores, decoy_scores, q_value, higher_score_better);
 
457
 
 
458
    // annotate fdr
 
459
    String score_type = ids.begin()->getScoreType() + "_score";
 
460
    for (vector<ProteinIdentification>::iterator it = ids.begin(); it != ids.end(); ++it)
 
461
    {
 
462
                        if (q_value)
 
463
                        {
 
464
                                it->setScoreType("q-value");
 
465
                        }
 
466
                        else
 
467
                        {
 
468
        it->setScoreType("FDR");
 
469
                        }
 
470
      it->setHigherScoreBetter(false);
 
471
      vector<ProteinHit> hits = it->getHits();
 
472
      for (vector<ProteinHit>::iterator pit = hits.begin(); pit != hits.end(); ++pit)
 
473
      {
 
474
        pit->setMetaValue(score_type, pit->getScore());
 
475
        pit->setScore(score_to_fdr[pit->getScore()]);
 
476
      }
 
477
      it->setHits(hits);
 
478
    }
 
479
 
 
480
    return;
 
481
  }
 
482
 
 
483
        void FalseDiscoveryRate::apply(vector<ProteinIdentification>& fwd_ids, vector<ProteinIdentification>& rev_ids)
 
484
        {
 
485
    if (fwd_ids.empty() || rev_ids.empty())
 
486
    {
 
487
      return;
 
488
    }
 
489
    vector<DoubleReal> target_scores, decoy_scores;
 
490
    // get the scores of all peptide hits
 
491
    for (vector<ProteinIdentification>::const_iterator it = fwd_ids.begin(); it != fwd_ids.end(); ++it)
 
492
    {
 
493
      for (vector<ProteinHit>::const_iterator pit = it->getHits().begin(); pit != it->getHits().end(); ++pit)
 
494
      {
 
495
        target_scores.push_back(pit->getScore());
 
496
      }
 
497
    }
 
498
    for (vector<ProteinIdentification>::const_iterator it = rev_ids.begin(); it != rev_ids.end(); ++it)
 
499
    {
 
500
      for (vector<ProteinHit>::const_iterator pit = it->getHits().begin(); pit != it->getHits().end(); ++pit)
 
501
      {
 
502
        decoy_scores.push_back(pit->getScore());
 
503
      }
 
504
    }
 
505
 
 
506
                bool q_value(param_.getValue("q_value").toBool());
 
507
    bool higher_score_better(fwd_ids.begin()->isHigherScoreBetter());
 
508
        // calculate fdr for the forward scores
 
509
    Map<DoubleReal, DoubleReal> score_to_fdr;
 
510
                calculateFDRs_(score_to_fdr, target_scores, decoy_scores, q_value, higher_score_better);
 
511
 
 
512
    // annotate fdr
 
513
    String score_type = fwd_ids.begin()->getScoreType() + "_score";
 
514
    for (vector<ProteinIdentification>::iterator it = fwd_ids.begin(); it != fwd_ids.end(); ++it)
 
515
    {
 
516
                        if (q_value)
 
517
                        {
 
518
                                it->setScoreType("q-value");
 
519
                        }
 
520
                        else
 
521
                        {
 
522
        it->setScoreType("FDR");
 
523
                        }
 
524
      it->setHigherScoreBetter(false);
 
525
      vector<ProteinHit> hits = it->getHits();
 
526
      for (vector<ProteinHit>::iterator pit = hits.begin(); pit != hits.end(); ++pit)
 
527
      {
 
528
        pit->setMetaValue(score_type, pit->getScore());
 
529
        pit->setScore(score_to_fdr[pit->getScore()]);
 
530
      }
 
531
      it->setHits(hits);
 
532
    }
 
533
 
 
534
                return;
 
535
        }
 
536
 
 
537
 
 
538
        void FalseDiscoveryRate::calculateFDRs_(Map<DoubleReal, DoubleReal>& score_to_fdr, vector<DoubleReal>& target_scores, vector<DoubleReal>& decoy_scores, bool q_value, bool higher_score_better)
 
539
        {
 
540
                Size number_of_target_scores = target_scores.size();
 
541
    // sort the scores
 
542
    if (higher_score_better && !q_value)
 
543
    {
 
544
      sort(target_scores.rbegin(), target_scores.rend());
 
545
      sort(decoy_scores.rbegin(), decoy_scores.rend());
 
546
    }
 
547
    else if (!higher_score_better && !q_value)
 
548
    {
 
549
      sort(target_scores.begin(), target_scores.end());
 
550
      sort(decoy_scores.begin(), decoy_scores.end());
 
551
    }
 
552
    else if (higher_score_better)
 
553
    {
 
554
      sort(target_scores.begin(), target_scores.end());
 
555
      sort(decoy_scores.rbegin(), decoy_scores.rend());
 
556
    }
 
557
    else
 
558
    {
 
559
      sort(target_scores.rbegin(), target_scores.rend());
 
560
      sort(decoy_scores.begin(), decoy_scores.end());
 
561
    }
 
562
 
 
563
    Size j = 0;
 
564
    DoubleReal minimal_fdr = 1.;
 
565
 
 
566
    if (q_value)
 
567
    {
 
568
      for (Size i = 0; i != target_scores.size(); ++i)
 
569
      {
 
570
        if (decoy_scores.empty())
 
571
        {
 
572
          // set FDR to 0 (done below automatically)
 
573
        }
 
574
        else if (i == 0 && j == 0)
 
575
        {
 
576
          while (j != decoy_scores.size()
 
577
                 &&((target_scores[i] <= decoy_scores[j] && higher_score_better) ||
 
578
                    (target_scores[i] >= decoy_scores[j] && !higher_score_better)))
 
579
          {
 
580
            ++j;
 
581
          }
 
582
        }
 
583
        else
 
584
        {
 
585
          if (j == decoy_scores.size())
 
586
          {
 
587
            j--;
 
588
          }
 
589
          while (j != 0
 
590
                 &&((target_scores[i] > decoy_scores[j] && higher_score_better) ||
 
591
                    (target_scores[i] < decoy_scores[j] && !higher_score_better)))
 
592
          {
 
593
            --j;
 
594
          }
 
595
          // Since j has to be equal to the number of fps above the threshold we add one
 
596
          if ((target_scores[i] <= decoy_scores[j] && higher_score_better)
 
597
              || (target_scores[i] >= decoy_scores[j] && !higher_score_better))
 
598
          {
 
599
            ++j;
 
600
          }
 
601
        }
 
602
 
 
603
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
604
        cerr << target_scores[i] << " " << decoy_scores[j] << " " << i << " " << j << " ";
 
605
#endif
 
606
 
 
607
        DoubleReal fdr = 0.;
 
608
 
 
609
        if (minimal_fdr >= (DoubleReal)j / (number_of_target_scores - i))
 
610
        {
 
611
          minimal_fdr = (DoubleReal)j / (number_of_target_scores - i);
 
612
        }
 
613
        fdr = minimal_fdr;
 
614
 
 
615
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
616
        cerr << fdr << endl;
 
617
#endif
 
618
        score_to_fdr[target_scores[i]] = fdr;
 
619
 
 
620
      }
 
621
    }
 
622
    else
 
623
    {
 
624
      for (Size i = 0; i != target_scores.size(); ++i)
 
625
      {
 
626
        while (j != decoy_scores.size() &&
 
627
               ((target_scores[i] <= decoy_scores[j] && higher_score_better) ||
 
628
               (target_scores[i] >= decoy_scores[j] && !higher_score_better)))
 
629
        {
 
630
          ++j;
 
631
        }
 
632
 
 
633
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
634
        cerr << target_scores[i] << " " << decoy_scores[j] << " " << i << " " << j << " ";
 
635
#endif
 
636
        DoubleReal fdr(0);
 
637
 
 
638
        fdr = (DoubleReal)j / (DoubleReal)(i + 1);
 
639
 
 
640
#ifdef FALSE_DISCOVERY_RATE_DEBUG
 
641
        cerr << fdr << endl;
 
642
#endif
 
643
        score_to_fdr[target_scores[i]] = fdr;
 
644
      }
 
645
    }
 
646
        
 
647
 
 
648
   // assign    q-value of decoy_score to closest target_score
 
649
   for (Size i = 0; i != decoy_scores.size(); ++i)
 
650
   {    
 
651
           Size closest_idx = 0;        
 
652
     for (Size j = 0; j != target_scores.size(); ++j)
 
653
     {
 
654
                         if (fabs(decoy_scores[i] - target_scores[j]) < fabs(decoy_scores[i] - target_scores[closest_idx]))
 
655
                         {
 
656
                           closest_idx = j;                              
 
657
                         }       
 
658
           }
 
659
                 score_to_fdr[decoy_scores[i]] = score_to_fdr[target_scores[closest_idx]];
 
660
         }
 
661
 
 
662
        }
 
663
 
 
664
} // namespace OpenMS