~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/APPLICATIONS/UTILS/FFEval.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2013-12-20 11:30:16 UTC
  • mfrom: (5.1.2 sid)
  • Revision ID: package-import@ubuntu.com-20131220113016-wre5g9bteeheq6he
Tags: 1.11.1-3
* remove version number from libbost development package names;
* ensure that AUTHORS is correctly shipped in all packages.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
// -*- mode: C++; tab-width: 2; -*-
2
 
// vi: set ts=2:
3
 
//
4
 
// --------------------------------------------------------------------------
5
 
//                   OpenMS Mass Spectrometry Framework
6
 
// --------------------------------------------------------------------------
7
 
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
8
 
//
9
 
//  This library is free software; you can redistribute it and/or
10
 
//  modify it under the terms of the GNU Lesser General Public
11
 
//  License as published by the Free Software Foundation; either
12
 
//  version 2.1 of the License, or (at your option) any later version.
13
 
//
14
 
//  This library is distributed in the hope that it will be useful,
15
 
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 
//  Lesser General Public License for more details.
18
 
//
19
 
//  You should have received a copy of the GNU Lesser General Public
20
 
//  License along with this library; if not, write to the Free Software
21
 
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
1
// --------------------------------------------------------------------------
 
2
//                   OpenMS -- Open-Source Mass Spectrometry
 
3
// --------------------------------------------------------------------------
 
4
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
 
5
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
 
6
//
 
7
// This software is released under a three-clause BSD license:
 
8
//  * Redistributions of source code must retain the above copyright
 
9
//    notice, this list of conditions and the following disclaimer.
 
10
//  * Redistributions in binary form must reproduce the above copyright
 
11
//    notice, this list of conditions and the following disclaimer in the
 
12
//    documentation and/or other materials provided with the distribution.
 
13
//  * Neither the name of any author or any participating institution
 
14
//    may be used to endorse or promote products derived from this software
 
15
//    without specific prior written permission.
 
16
// For a full list of authors, refer to the file AUTHORS.
 
17
// --------------------------------------------------------------------------
 
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 
19
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 
20
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 
21
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
 
22
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 
23
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 
24
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 
25
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 
26
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 
27
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 
28
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22
29
//
23
30
// --------------------------------------------------------------------------
24
31
// $Maintainer: $
39
46
//-------------------------------------------------------------
40
47
 
41
48
/**
42
 
        @page UTILS_FFEval FFEval
43
 
        
44
 
        @brief Evaluation tool for feature detection algorithms.
45
 
                
 
49
    @page UTILS_FFEval FFEval
 
50
 
 
51
    @brief Evaluation tool for feature detection algorithms.
 
52
 
46
53
 
47
54
  To plot the ROC curve you might use:
48
55
 
52
59
  lines(c(0,1),c(0,1))
53
60
@endcode
54
61
 
55
 
        <B>The command line parameters of this tool are:</B>
56
 
        @verbinclude UTILS_FFEval.cli
 
62
    <B>The command line parameters of this tool are:</B>
 
63
    @verbinclude UTILS_FFEval.cli
 
64
    <B>INI file documentation of this tool:</B>
 
65
    @htmlinclude UTILS_FFEval.html
57
66
*/
58
67
 
59
68
// We do not want this class to show up in the docu:
60
69
/// @cond TOPPCLASSES
61
70
 
62
 
class TOPPFFEval
63
 
        : public TOPPBase
 
71
class TOPPFFEval :
 
72
  public TOPPBase
64
73
{
65
 
 public:
66
 
        TOPPFFEval()
67
 
                : TOPPBase("FFEval","Evaluation tool for feature detection algorithms.",false)
68
 
        {
69
 
        }
70
 
        
71
 
 protected:
72
 
 
73
 
        void registerOptionsAndFlags_()
74
 
        {
75
 
                addText_("Input options");
76
 
                registerInputFile_("in","<file>","","Feature input file, which contains the data to be tested against the truth file.");
77
 
                setValidFormats_("in", StringList::create("featureXML"));
78
 
                registerInputFile_("truth","<file>","","Truth feature file that defines what features should be found.");
79
 
                setValidFormats_("truth", StringList::create("featureXML"));
80
 
                registerDoubleOption_("rt_tol","<double>",0.3,"Allowed tolerance of RT relative to average feature RT span.", false);
81
 
                setMinFloat_("rt_tol",0);
82
 
                registerDoubleOption_("rt_tol_abs","<double>",-1.0,"Allowed absolute tolerance of RT (overwrites 'rt_tol' if set above zero).", false);
83
 
                setMinFloat_("rt_tol_abs",-1);
84
 
                registerDoubleOption_("mz_tol","<double>",0.25,"Allowed tolerance in m/z (is divided by charge).", false);
85
 
                setMinFloat_("mz_tol",0);
86
 
                registerOutputFile_("out","<file>","","Feature output file. If given, an annotated input file is written.", false);
87
 
                setValidFormats_("out", StringList::create("featureXML"));
88
 
                registerInputFile_("abort_reasons","<file>","","Feature file containing seeds with abort reasons.",false);
89
 
                setValidFormats_("abort_reasons", StringList::create("featureXML"));
90
 
                registerOutputFile_("out_roc","<file>","","If given, a ROC curve file is created (ROC points based on intensity threshold)", false);
91
 
        }
92
 
        
93
 
        /// Counts the number of features with meta value @p name equal to @p value
94
 
        UInt count(const FeatureMap<>& map, const String& name, const String& value="")
95
 
        {
96
 
                UInt count =0;
97
 
                for (Size i=0; i<map.size(); ++i)
98
 
                {
99
 
                        if (map[i].metaValueExists(name))
100
 
                        {
101
 
                                if (value=="")
102
 
                                {
103
 
                                        ++count;
104
 
                                }
105
 
                                else
106
 
                                {
107
 
                                        if (map[i].getMetaValue(name).toString()==value)
108
 
                                        {
109
 
                                                ++count;
110
 
                                        }
111
 
                                }
112
 
                        } 
113
 
                }
114
 
                return count;
115
 
        }
116
 
        
117
 
        /// Returns the total number and percentage in parentheses
118
 
        String percentage(Size count, Size size)
119
 
        {
120
 
                return String(" (") + String::number(100.0*count/size,2) + "%)";
121
 
        }
122
 
 
123
 
        String fiveNumbers(vector<DoubleReal> a, UInt decimal_places)
124
 
        {
125
 
                sort(a.begin(),a.end());
126
 
                return String::number(a[0],decimal_places) + " " + String::number(a[a.size()/4],decimal_places) + " " + String::number(a[a.size()/2],decimal_places) + " " + String::number(a[(3*a.size())/4],decimal_places) + " " + String::number(a.back(),decimal_places);
127
 
        }
128
 
 
129
 
        ExitCodes main_(int , const char**)
130
 
        {
131
 
                //load data
132
 
                FeatureMap<> features_in,features_truth;
133
 
                FeatureXMLFile().load(getStringOption_("in"),features_in);
134
 
                features_in.sortByPosition();
135
 
                FeatureXMLFile().load(getStringOption_("truth"),features_truth);
136
 
                features_truth.sortByPosition();
137
 
                FeatureMap<> abort_reasons;
138
 
                if(getStringOption_("abort_reasons")!="")
139
 
                {
140
 
                        FeatureXMLFile().load(getStringOption_("abort_reasons"),abort_reasons);
141
 
                }
142
 
                DoubleReal mz_tol = getDoubleOption_("mz_tol");
143
 
                writeDebug_(String("Final MZ tolerance: ") + mz_tol, 1);
144
 
                
145
 
                //determine average RT tolerance:
146
 
                //median feature RT span times given factor
147
 
                vector<DoubleReal> rt_spans;
148
 
                for (Size t=0; t<features_in.size(); ++t)
149
 
                {
150
 
                        if (features_in[t].getConvexHulls().size()!=0)
151
 
                        {
152
 
                                rt_spans.push_back(features_in[t].getConvexHull().getBoundingBox().width());
153
 
                        }
154
 
                }
155
 
                //feature convex hulls are available => relative RT span
156
 
                DoubleReal rt_tol = getDoubleOption_("rt_tol_abs");
157
 
                if (rt_tol<0.0)
158
 
                {
159
 
      if ( !rt_spans.empty() )
160
 
                        {
161
 
                                sort(rt_spans.begin(), rt_spans.end());
162
 
                                rt_tol = getDoubleOption_("rt_tol")*rt_spans[rt_spans.size()/2];
163
 
                        }
 
74
public:
 
75
  TOPPFFEval() :
 
76
    TOPPBase("FFEval", "Evaluation tool for feature detection algorithms.", false)
 
77
  {
 
78
  }
 
79
 
 
80
protected:
 
81
 
 
82
  void registerOptionsAndFlags_()
 
83
  {
 
84
    registerInputFile_("in", "<file>", "", "Feature input file, which contains the data to be tested against the truth file.");
 
85
    setValidFormats_("in", StringList::create("featureXML"));
 
86
    registerInputFile_("truth", "<file>", "", "Truth feature file that defines what features should be found.");
 
87
    setValidFormats_("truth", StringList::create("featureXML"));
 
88
    registerDoubleOption_("rt_tol", "<double>", 0.3, "Allowed tolerance of RT relative to average feature RT span.", false);
 
89
    setMinFloat_("rt_tol", 0);
 
90
    registerDoubleOption_("rt_tol_abs", "<double>", -1.0, "Allowed absolute tolerance of RT (overwrites 'rt_tol' if set above zero).", false);
 
91
    setMinFloat_("rt_tol_abs", -1);
 
92
    registerDoubleOption_("mz_tol", "<double>", 0.25, "Allowed tolerance in m/z (is divided by charge).", false);
 
93
    setMinFloat_("mz_tol", 0);
 
94
    registerOutputFile_("out", "<file>", "", "Feature output file. If given, an annotated input file is written.", false);
 
95
    setValidFormats_("out", StringList::create("featureXML"));
 
96
    registerInputFile_("abort_reasons", "<file>", "", "Feature file containing seeds with abort reasons.", false);
 
97
    setValidFormats_("abort_reasons", StringList::create("featureXML"));
 
98
    registerOutputFile_("out_roc", "<file>", "", "If given, a ROC curve file is created (ROC points based on intensity threshold)", false);
 
99
    setValidFormats_("out_roc", StringList::create("csv"));
 
100
  }
 
101
 
 
102
  /// Counts the number of features with meta value @p name equal to @p value
 
103
  UInt count(const FeatureMap<> & map, const String & name, const String & value = "")
 
104
  {
 
105
    UInt count = 0;
 
106
    for (Size i = 0; i < map.size(); ++i)
 
107
    {
 
108
      if (map[i].metaValueExists(name))
 
109
      {
 
110
        if (value == "")
 
111
        {
 
112
          ++count;
 
113
        }
 
114
        else
 
115
        {
 
116
          if (map[i].getMetaValue(name).toString() == value)
 
117
          {
 
118
            ++count;
 
119
          }
 
120
        }
 
121
      }
 
122
    }
 
123
    return count;
 
124
  }
 
125
 
 
126
  /// Returns the total number and percentage in parentheses
 
127
  String percentage(Size count, Size size)
 
128
  {
 
129
    return String(" (") + String::number(100.0 * count / size, 2) + "%)";
 
130
  }
 
131
 
 
132
  String fiveNumbers(vector<DoubleReal> a, UInt decimal_places)
 
133
  {
 
134
    sort(a.begin(), a.end());
 
135
    return String::number(a[0], decimal_places) + " " + String::number(a[a.size() / 4], decimal_places) + " " + String::number(a[a.size() / 2], decimal_places) + " " + String::number(a[(3 * a.size()) / 4], decimal_places) + " " + String::number(a.back(), decimal_places);
 
136
  }
 
137
 
 
138
  ExitCodes main_(int, const char **)
 
139
  {
 
140
    //load data
 
141
    FeatureMap<> features_in, features_truth;
 
142
    FeatureXMLFile().load(getStringOption_("in"), features_in);
 
143
    features_in.sortByPosition();
 
144
    FeatureXMLFile().load(getStringOption_("truth"), features_truth);
 
145
    features_truth.sortByPosition();
 
146
    FeatureMap<> abort_reasons;
 
147
    if (getStringOption_("abort_reasons") != "")
 
148
    {
 
149
      FeatureXMLFile().load(getStringOption_("abort_reasons"), abort_reasons);
 
150
    }
 
151
    DoubleReal mz_tol = getDoubleOption_("mz_tol");
 
152
    writeDebug_(String("Final MZ tolerance: ") + mz_tol, 1);
 
153
 
 
154
    //determine average RT tolerance:
 
155
    //median feature RT span times given factor
 
156
    vector<DoubleReal> rt_spans;
 
157
    for (Size t = 0; t < features_in.size(); ++t)
 
158
    {
 
159
      if (features_in[t].getConvexHulls().size() != 0)
 
160
      {
 
161
        rt_spans.push_back(features_in[t].getConvexHull().getBoundingBox().width());
 
162
      }
 
163
    }
 
164
    //feature convex hulls are available => relative RT span
 
165
    DoubleReal rt_tol = getDoubleOption_("rt_tol_abs");
 
166
    if (rt_tol < 0.0)
 
167
    {
 
168
      if (!rt_spans.empty())
 
169
      {
 
170
        sort(rt_spans.begin(), rt_spans.end());
 
171
        rt_tol = getDoubleOption_("rt_tol") * rt_spans[rt_spans.size() / 2];
 
172
      }
164
173
      else if (features_in.empty())
165
174
      {
166
 
        // do nothing, rt_tol does not really matter, as we will not find a match anyway, but we want to have the stats 
 
175
        // do nothing, rt_tol does not really matter, as we will not find a match anyway, but we want to have the stats
167
176
        // at the end, so we do not abort
168
177
      }
169
 
                        else
170
 
                        {
171
 
                                writeLog_("Error: Input features do not have convex hulls. You have to set 'rt_tol_abs'!");
172
 
                                return ILLEGAL_PARAMETERS;
173
 
                        }
174
 
                }
175
 
                writeDebug_(String("Final RT tolerance: ") + rt_tol, 1);
176
 
                
177
 
                //general statistics
178
 
                std::vector<DoubleReal> ints_t;
179
 
                std::vector<DoubleReal> ints_i;
180
 
                std::vector<DoubleReal> ints_found;
181
 
                std::vector<DoubleReal> ints_missed;
182
 
                Map<String,UInt> abort_strings;
183
 
                
184
 
                for (Size m=0; m<features_truth.size(); ++m)
185
 
                {
186
 
                        Feature& f_t =  features_truth[m];
187
 
                        UInt match_count = 0;
188
 
                        bool correct_charge = false;
189
 
                        bool exact_centroid_match = false;
190
 
                        Size last_match_index = features_in.size()+1;
191
 
                        for (Size a=0; a<features_in.size(); ++a)
192
 
                        {
193
 
                                const Feature& f_i =  features_in[a];
194
 
                                //RT match
195
 
                                if (fabs(f_i.getRT()-f_t.getRT())<rt_tol)
196
 
                                {
197
 
                                        DoubleReal charge_mz_tol = mz_tol / f_t.getCharge();
198
 
                                        //Exact m/z match
199
 
                                        if (fabs(f_i.getMZ()-f_t.getMZ())<charge_mz_tol)
200
 
                                        {
201
 
                                                ++match_count;
202
 
                                                exact_centroid_match = true;
203
 
                                                if(f_i.getCharge()==f_t.getCharge()) correct_charge = true;
204
 
                                                last_match_index = a;
205
 
                                        }
206
 
                                        //Centroid is one trace off, but still contained in the convex hull
207
 
                                        else if (f_i.getConvexHull().getBoundingBox().encloses(f_t.getPosition())
208
 
                                                                         &&
209
 
                                                                         (
210
 
                                                                          fabs(f_i.getMZ()+1.0/f_t.getCharge()-f_t.getMZ())<charge_mz_tol
211
 
                                                                          ||
212
 
                                                                          fabs(f_i.getMZ()-1.0/f_t.getCharge()-f_t.getMZ())<charge_mz_tol
213
 
                                                                         )
214
 
                                                      )
215
 
                                        {
216
 
                                                ++match_count;
217
 
                                                last_match_index = a;
218
 
                                                if(f_i.getCharge()==f_t.getCharge()) correct_charge = true;
219
 
                                        }
220
 
                                }
221
 
                        }
222
 
                        
223
 
                        f_t.setMetaValue("matches",match_count);
224
 
                        if (match_count==1)
225
 
                        {
226
 
                                //flag matched feature with addition information
227
 
                                if (correct_charge)
228
 
                                {
229
 
                                        f_t.setMetaValue("correct_charge",String("true"));
230
 
                                        f_t.setMetaValue("intensity_ratio",features_in[last_match_index].getIntensity()/f_t.getIntensity());
231
 
                                        features_in[last_match_index].setMetaValue("correct_hit", "true"); //flag the feature for ROC curve
232
 
                                }
233
 
                                else
234
 
                                {
235
 
                                        f_t.setMetaValue("correct_charge",String("false"));
236
 
                                }
237
 
                                
238
 
                                if (exact_centroid_match)
239
 
                                {
240
 
                                        f_t.setMetaValue("exact_centroid_match",String("true"));
241
 
                                }
242
 
                                else
243
 
                                {
244
 
                                        f_t.setMetaValue("exact_centroid_match",String("false"));
245
 
                                }
246
 
                        }
247
 
                        //evaluation of correct features only
248
 
                        if (match_count==1 && correct_charge)
249
 
                        {
250
 
                                ints_t.push_back(f_t.getIntensity());
251
 
                                ints_i.push_back(features_in[last_match_index].getIntensity());
252
 
                                ints_found.push_back(f_t.getIntensity());
253
 
                        }
254
 
                        else
255
 
                        {
256
 
                                ints_missed.push_back(f_t.getIntensity());
257
 
                                
258
 
                                //look up the abort reason of the nearest seed
259
 
                                DoubleReal best_score_ab = 0;
260
 
                                String reason = "";
261
 
                                for (Size b=0; b<abort_reasons.size(); ++b)
262
 
                                {
263
 
                                        const Feature& f_ab =  abort_reasons[b];
264
 
                                        if ( fabs(f_ab.getRT() - f_t.getRT()) <= rt_tol
265
 
                                                && fabs(f_ab.getMZ() - f_t.getMZ()) <= mz_tol) 
266
 
                                        {
267
 
                                                DoubleReal score = (1.0-fabs(f_ab.getMZ()-f_t.getMZ())/mz_tol) * (1.0-fabs(f_ab.getRT()-f_t.getRT())/rt_tol);
268
 
                                                if (score > best_score_ab)
269
 
                                                {
270
 
                                                        best_score_ab = score;
271
 
                                                        reason = f_ab.getMetaValue("abort_reason");
272
 
                                                }
273
 
                                        }
274
 
                                }
275
 
                                if (reason=="")
276
 
                                {
277
 
                                        reason = "No seed found";
278
 
                                }
279
 
                                if (abort_strings.has(reason))
280
 
                                {
281
 
                                        abort_strings[reason]++;        
282
 
                                }
283
 
                                else
284
 
                                {
285
 
                                        abort_strings[reason] = 1;
286
 
                                }
287
 
                        }
288
 
                }
289
 
 
290
 
                //------------------------ general statistics ------------------------
291
 
                cout << endl;
292
 
                cout << "general information:" << endl;
293
 
                cout << "====================" << endl;
294
 
                cout << "input features: " << features_in.size() << endl;               
295
 
                cout << "truth features: " << features_truth.size() << endl;            
296
 
 
297
 
                //------------------------ matches ------------------------
298
 
                cout << endl;
299
 
                cout << "feature matching statistics:" << endl;
300
 
                cout << "============================" << endl;
301
 
                Size no_match = count(features_truth,"matches","0");
302
 
                cout << "no match: " << no_match << percentage(no_match,features_truth.size()) << endl;
303
 
                Size one_match = count(features_truth,"matches","1");
304
 
                cout << "one match: " << one_match << percentage(one_match,features_truth.size()) << endl;
305
 
                Size charge_match = count(features_truth,"correct_charge","true");
306
 
                cout << " - correct charge: " << charge_match << percentage(charge_match,features_truth.size()) << endl;
307
 
                Size centroid_match = count(features_truth,"exact_centroid_match","true");
308
 
                cout << " - exact centroid match: " << centroid_match << percentage(centroid_match,features_truth.size()) << endl;
309
 
                Size multi_match = features_truth.size() - count(features_truth,"matches","0") - count(features_truth,"matches","1");
310
 
                cout << "multiple matches: " << multi_match << percentage(multi_match,features_truth.size()) << endl;
311
 
                Size incorrect_match = multi_match + one_match - charge_match;
312
 
                cout << "incorrect matches: " << incorrect_match << percentage(incorrect_match,features_truth.size()) << endl;
313
 
                if (abort_reasons.size())
314
 
                {
315
 
                        cout << "reasons for unmatched features:" << endl;
316
 
                        for (Map<String,UInt>::iterator it=abort_strings.begin(); it!=abort_strings.end(); ++it)
317
 
                        {
318
 
                                cout << " - " << String(it->second).fillLeft(' ',4) << ": " << it->first << endl;
319
 
                        }
320
 
                }
321
 
                //------------------------ intensity ------------------------
322
 
                cout << endl;
323
 
                cout << "intensity statistics:" << endl;
324
 
                cout << "=====================" << endl;
325
 
                if (ints_i.empty())
326
 
                {
327
 
                        cout << "correlation of found features: nan" << endl;
328
 
                }
329
 
                else
330
 
                {
331
 
                        cout << "correlation of found features: " << pearsonCorrelationCoefficient(ints_i.begin(),ints_i.end(),ints_t.begin(),ints_t.end()) << endl;
332
 
                }
333
 
                if (ints_found.empty())
334
 
                {
335
 
                        cout << "intensity distribution of found: 0.0 0.0 0.0 0.0 0.0" << endl;
336
 
                }
337
 
                else
338
 
                {
339
 
                        cout << "intensity distribution of found: " << fiveNumbers(ints_found,1) << endl;
340
 
                }
341
 
                if (ints_missed.empty())
342
 
                {
343
 
                        cout << "intensity distribution of missed: 0.0 0.0 0.0 0.0 0.0" << endl;
344
 
                }
345
 
                else
346
 
                {
347
 
                        cout << "intensity distribution of missed: " << fiveNumbers(ints_missed,1) << endl;
348
 
                }
349
 
                
350
 
                //------------------------ charges ------------------------
351
 
                cout << endl;
352
 
                cout << "charge matches statistics:" << endl;
353
 
                cout << "===========================" << endl;
354
 
                Map<UInt,UInt> present_charges,found_charges;
355
 
                for (Size i=0;i<features_truth.size(); ++i)
356
 
                {
357
 
                        UInt charge = features_truth[i].getCharge();
358
 
                        present_charges[charge]++;
359
 
                        if (features_truth[i].getMetaValue("correct_charge").toString()=="true")
360
 
                        {
361
 
                                found_charges[charge]++;
362
 
                        }
363
 
                }
364
 
                for (Map<UInt,UInt>::const_iterator it=present_charges.begin(); it!=present_charges.end(); ++it)
365
 
                {
366
 
                        cout << "charge " << it->first << ": " << found_charges[it->first] << "/" << it->second << percentage(found_charges[it->first],it->second) << endl;
367
 
                }
368
 
                
369
 
                //write output
370
 
                if (getStringOption_("out")!="")
371
 
                {
372
 
                        FeatureXMLFile().store(getStringOption_("out"),features_truth);
373
 
                }
374
 
                
375
 
                //ROC curve
376
 
                if (getStringOption_("out_roc")!="")
377
 
                {
378
 
                        TextFile tf;
379
 
                        tf.push_back("false     correct FDR     TPR");
380
 
                        
381
 
                        features_in.sortByIntensity(true);
382
 
                        UInt f_correct = 0;
383
 
                        UInt f_false = 0;
384
 
                        DoubleReal found = features_in.size();
385
 
                        DoubleReal correct = features_truth.size();
386
 
                        for (Size i=0; i<features_in.size(); ++i)
387
 
                        {
388
 
                                if (features_in[i].metaValueExists("correct_hit"))
389
 
                                {
390
 
                                        ++f_correct;
391
 
                                }
392
 
                                else
393
 
                                {
394
 
                                        ++f_false;
395
 
                                }
396
 
                                tf.push_back(String(f_false) + "        " + f_correct + "       " + String::number(f_false/found,3) + " " + String::number(f_correct/correct,3));
397
 
                        }
398
 
                        tf.store(getStringOption_("out_roc"));
399
 
                }
400
 
                
401
 
                return EXECUTION_OK;
402
 
        }
 
178
      else
 
179
      {
 
180
        writeLog_("Error: Input features do not have convex hulls. You have to set 'rt_tol_abs'!");
 
181
        return ILLEGAL_PARAMETERS;
 
182
      }
 
183
    }
 
184
    writeDebug_(String("Final RT tolerance: ") + rt_tol, 1);
 
185
 
 
186
    //general statistics
 
187
    std::vector<DoubleReal> ints_t;
 
188
    std::vector<DoubleReal> ints_i;
 
189
    std::vector<DoubleReal> ints_found;
 
190
    std::vector<DoubleReal> ints_missed;
 
191
    Map<String, UInt> abort_strings;
 
192
 
 
193
    for (Size m = 0; m < features_truth.size(); ++m)
 
194
    {
 
195
      Feature & f_t =  features_truth[m];
 
196
      UInt match_count = 0;
 
197
      bool correct_charge = false;
 
198
      bool exact_centroid_match = false;
 
199
      Size last_match_index = features_in.size() + 1;
 
200
      for (Size a = 0; a < features_in.size(); ++a)
 
201
      {
 
202
        const Feature & f_i =  features_in[a];
 
203
        //RT match
 
204
        if (fabs(f_i.getRT() - f_t.getRT()) < rt_tol)
 
205
        {
 
206
          DoubleReal charge_mz_tol = mz_tol / f_t.getCharge();
 
207
          //Exact m/z match
 
208
          if (fabs(f_i.getMZ() - f_t.getMZ()) < charge_mz_tol)
 
209
          {
 
210
            ++match_count;
 
211
            exact_centroid_match = true;
 
212
            if (f_i.getCharge() == f_t.getCharge()) correct_charge = true;
 
213
            last_match_index = a;
 
214
          }
 
215
          //Centroid is one trace off, but still contained in the convex hull
 
216
          else if (f_i.getConvexHull().getBoundingBox().encloses(f_t.getPosition())
 
217
                  &&
 
218
                   (
 
219
                     fabs(f_i.getMZ() + 1.0 / f_t.getCharge() - f_t.getMZ()) < charge_mz_tol
 
220
                   ||
 
221
                     fabs(f_i.getMZ() - 1.0 / f_t.getCharge() - f_t.getMZ()) < charge_mz_tol
 
222
                   )
 
223
                   )
 
224
          {
 
225
            ++match_count;
 
226
            last_match_index = a;
 
227
            if (f_i.getCharge() == f_t.getCharge()) correct_charge = true;
 
228
          }
 
229
        }
 
230
      }
 
231
 
 
232
      f_t.setMetaValue("matches", match_count);
 
233
      if (match_count == 1)
 
234
      {
 
235
        //flag matched feature with addition information
 
236
        if (correct_charge)
 
237
        {
 
238
          f_t.setMetaValue("correct_charge", String("true"));
 
239
          f_t.setMetaValue("intensity_ratio", features_in[last_match_index].getIntensity() / f_t.getIntensity());
 
240
          features_in[last_match_index].setMetaValue("correct_hit", "true");           //flag the feature for ROC curve
 
241
        }
 
242
        else
 
243
        {
 
244
          f_t.setMetaValue("correct_charge", String("false"));
 
245
        }
 
246
 
 
247
        if (exact_centroid_match)
 
248
        {
 
249
          f_t.setMetaValue("exact_centroid_match", String("true"));
 
250
        }
 
251
        else
 
252
        {
 
253
          f_t.setMetaValue("exact_centroid_match", String("false"));
 
254
        }
 
255
      }
 
256
      //evaluation of correct features only
 
257
      if (match_count == 1 && correct_charge)
 
258
      {
 
259
        ints_t.push_back(f_t.getIntensity());
 
260
        ints_i.push_back(features_in[last_match_index].getIntensity());
 
261
        ints_found.push_back(f_t.getIntensity());
 
262
      }
 
263
      else
 
264
      {
 
265
        ints_missed.push_back(f_t.getIntensity());
 
266
 
 
267
        //look up the abort reason of the nearest seed
 
268
        DoubleReal best_score_ab = 0;
 
269
        String reason = "";
 
270
        for (Size b = 0; b < abort_reasons.size(); ++b)
 
271
        {
 
272
          const Feature & f_ab =  abort_reasons[b];
 
273
          if (fabs(f_ab.getRT() - f_t.getRT()) <= rt_tol
 
274
             && fabs(f_ab.getMZ() - f_t.getMZ()) <= mz_tol)
 
275
          {
 
276
            DoubleReal score = (1.0 - fabs(f_ab.getMZ() - f_t.getMZ()) / mz_tol) * (1.0 - fabs(f_ab.getRT() - f_t.getRT()) / rt_tol);
 
277
            if (score > best_score_ab)
 
278
            {
 
279
              best_score_ab = score;
 
280
              reason = f_ab.getMetaValue("abort_reason");
 
281
            }
 
282
          }
 
283
        }
 
284
        if (reason == "")
 
285
        {
 
286
          reason = "No seed found";
 
287
        }
 
288
        if (abort_strings.has(reason))
 
289
        {
 
290
          abort_strings[reason]++;
 
291
        }
 
292
        else
 
293
        {
 
294
          abort_strings[reason] = 1;
 
295
        }
 
296
      }
 
297
    }
 
298
 
 
299
    //------------------------ general statistics ------------------------
 
300
    cout << endl;
 
301
    cout << "general information:" << endl;
 
302
    cout << "====================" << endl;
 
303
    cout << "input features: " << features_in.size() << endl;
 
304
    cout << "truth features: " << features_truth.size() << endl;
 
305
 
 
306
    //------------------------ matches ------------------------
 
307
    cout << endl;
 
308
    cout << "feature matching statistics:" << endl;
 
309
    cout << "============================" << endl;
 
310
    Size no_match = count(features_truth, "matches", "0");
 
311
    cout << "no match: " << no_match << percentage(no_match, features_truth.size()) << endl;
 
312
    Size one_match = count(features_truth, "matches", "1");
 
313
    cout << "one match: " << one_match << percentage(one_match, features_truth.size()) << endl;
 
314
    Size charge_match = count(features_truth, "correct_charge", "true");
 
315
    cout << " - correct charge: " << charge_match << percentage(charge_match, features_truth.size()) << endl;
 
316
    Size centroid_match = count(features_truth, "exact_centroid_match", "true");
 
317
    cout << " - exact centroid match: " << centroid_match << percentage(centroid_match, features_truth.size()) << endl;
 
318
    Size multi_match = features_truth.size() - count(features_truth, "matches", "0") - count(features_truth, "matches", "1");
 
319
    cout << "multiple matches: " << multi_match << percentage(multi_match, features_truth.size()) << endl;
 
320
    Size incorrect_match = multi_match + one_match - charge_match;
 
321
    cout << "incorrect matches: " << incorrect_match << percentage(incorrect_match, features_truth.size()) << endl;
 
322
    if (abort_reasons.size())
 
323
    {
 
324
      cout << "reasons for unmatched features:" << endl;
 
325
      for (Map<String, UInt>::iterator it = abort_strings.begin(); it != abort_strings.end(); ++it)
 
326
      {
 
327
        cout << " - " << String(it->second).fillLeft(' ', 4) << ": " << it->first << endl;
 
328
      }
 
329
    }
 
330
    //------------------------ intensity ------------------------
 
331
    cout << endl;
 
332
    cout << "intensity statistics:" << endl;
 
333
    cout << "=====================" << endl;
 
334
    if (ints_i.empty())
 
335
    {
 
336
      cout << "correlation of found features: nan" << endl;
 
337
    }
 
338
    else
 
339
    {
 
340
      cout << "correlation of found features: " << pearsonCorrelationCoefficient(ints_i.begin(), ints_i.end(), ints_t.begin(), ints_t.end()) << endl;
 
341
    }
 
342
    if (ints_found.empty())
 
343
    {
 
344
      cout << "intensity distribution of found: 0.0 0.0 0.0 0.0 0.0" << endl;
 
345
    }
 
346
    else
 
347
    {
 
348
      cout << "intensity distribution of found: " << fiveNumbers(ints_found, 1) << endl;
 
349
    }
 
350
    if (ints_missed.empty())
 
351
    {
 
352
      cout << "intensity distribution of missed: 0.0 0.0 0.0 0.0 0.0" << endl;
 
353
    }
 
354
    else
 
355
    {
 
356
      cout << "intensity distribution of missed: " << fiveNumbers(ints_missed, 1) << endl;
 
357
    }
 
358
 
 
359
    //------------------------ charges ------------------------
 
360
    cout << endl;
 
361
    cout << "charge matches statistics:" << endl;
 
362
    cout << "===========================" << endl;
 
363
    Map<UInt, UInt> present_charges, found_charges;
 
364
    for (Size i = 0; i < features_truth.size(); ++i)
 
365
    {
 
366
      UInt charge = features_truth[i].getCharge();
 
367
      present_charges[charge]++;
 
368
      if (features_truth[i].getMetaValue("correct_charge").toString() == "true")
 
369
      {
 
370
        found_charges[charge]++;
 
371
      }
 
372
    }
 
373
    for (Map<UInt, UInt>::const_iterator it = present_charges.begin(); it != present_charges.end(); ++it)
 
374
    {
 
375
      cout << "charge " << it->first << ": " << found_charges[it->first] << "/" << it->second << percentage(found_charges[it->first], it->second) << endl;
 
376
    }
 
377
 
 
378
    //write output
 
379
    if (getStringOption_("out") != "")
 
380
    {
 
381
      FeatureXMLFile().store(getStringOption_("out"), features_truth);
 
382
    }
 
383
 
 
384
    //ROC curve
 
385
    if (getStringOption_("out_roc") != "")
 
386
    {
 
387
      TextFile tf;
 
388
      tf.push_back("false\tcorrect\tFDR\tTPR");
 
389
 
 
390
      features_in.sortByIntensity(true);
 
391
      UInt f_correct = 0;
 
392
      UInt f_false = 0;
 
393
      DoubleReal found = features_in.size();
 
394
      DoubleReal correct = features_truth.size();
 
395
      for (Size i = 0; i < features_in.size(); ++i)
 
396
      {
 
397
        if (features_in[i].metaValueExists("correct_hit"))
 
398
        {
 
399
          ++f_correct;
 
400
        }
 
401
        else
 
402
        {
 
403
          ++f_false;
 
404
        }
 
405
        tf.push_back(String(f_false) + "\t"+ f_correct + "\t"+ String::number(f_false / found, 3) + "\t"+ String::number(f_correct / correct, 3));
 
406
      }
 
407
      tf.store(getStringOption_("out_roc"));
 
408
    }
 
409
 
 
410
    return EXECUTION_OK;
 
411
  }
 
412
 
403
413
};
404
414
 
405
 
int main( int argc, const char** argv )
 
415
int main(int argc, const char ** argv)
406
416
{
407
 
        TOPPFFEval tool;
408
 
        return tool.main(argc,argv);
 
417
  TOPPFFEval tool;
 
418
  return tool.main(argc, argv);
409
419
}
410
420
 
411
421
/// @endcond
412
 
 
413
 
 
414