52
59
lines(c(0,1),c(0,1))
55
<B>The command line parameters of this tool are:</B>
56
@verbinclude UTILS_FFEval.cli
62
<B>The command line parameters of this tool are:</B>
63
@verbinclude UTILS_FFEval.cli
64
<B>INI file documentation of this tool:</B>
65
@htmlinclude UTILS_FFEval.html
59
68
// We do not want this class to show up in the docu:
60
69
/// @cond TOPPCLASSES
67
: TOPPBase("FFEval","Evaluation tool for feature detection algorithms.",false)
73
void registerOptionsAndFlags_()
75
addText_("Input options");
76
registerInputFile_("in","<file>","","Feature input file, which contains the data to be tested against the truth file.");
77
setValidFormats_("in", StringList::create("featureXML"));
78
registerInputFile_("truth","<file>","","Truth feature file that defines what features should be found.");
79
setValidFormats_("truth", StringList::create("featureXML"));
80
registerDoubleOption_("rt_tol","<double>",0.3,"Allowed tolerance of RT relative to average feature RT span.", false);
81
setMinFloat_("rt_tol",0);
82
registerDoubleOption_("rt_tol_abs","<double>",-1.0,"Allowed absolute tolerance of RT (overwrites 'rt_tol' if set above zero).", false);
83
setMinFloat_("rt_tol_abs",-1);
84
registerDoubleOption_("mz_tol","<double>",0.25,"Allowed tolerance in m/z (is divided by charge).", false);
85
setMinFloat_("mz_tol",0);
86
registerOutputFile_("out","<file>","","Feature output file. If given, an annotated input file is written.", false);
87
setValidFormats_("out", StringList::create("featureXML"));
88
registerInputFile_("abort_reasons","<file>","","Feature file containing seeds with abort reasons.",false);
89
setValidFormats_("abort_reasons", StringList::create("featureXML"));
90
registerOutputFile_("out_roc","<file>","","If given, a ROC curve file is created (ROC points based on intensity threshold)", false);
93
/// Counts the number of features with meta value @p name equal to @p value
94
UInt count(const FeatureMap<>& map, const String& name, const String& value="")
97
for (Size i=0; i<map.size(); ++i)
99
if (map[i].metaValueExists(name))
107
if (map[i].getMetaValue(name).toString()==value)
117
/// Returns the total number and percentage in parentheses
118
String percentage(Size count, Size size)
120
return String(" (") + String::number(100.0*count/size,2) + "%)";
123
String fiveNumbers(vector<DoubleReal> a, UInt decimal_places)
125
sort(a.begin(),a.end());
126
return String::number(a[0],decimal_places) + " " + String::number(a[a.size()/4],decimal_places) + " " + String::number(a[a.size()/2],decimal_places) + " " + String::number(a[(3*a.size())/4],decimal_places) + " " + String::number(a.back(),decimal_places);
129
ExitCodes main_(int , const char**)
132
FeatureMap<> features_in,features_truth;
133
FeatureXMLFile().load(getStringOption_("in"),features_in);
134
features_in.sortByPosition();
135
FeatureXMLFile().load(getStringOption_("truth"),features_truth);
136
features_truth.sortByPosition();
137
FeatureMap<> abort_reasons;
138
if(getStringOption_("abort_reasons")!="")
140
FeatureXMLFile().load(getStringOption_("abort_reasons"),abort_reasons);
142
DoubleReal mz_tol = getDoubleOption_("mz_tol");
143
writeDebug_(String("Final MZ tolerance: ") + mz_tol, 1);
145
//determine average RT tolerance:
146
//median feature RT span times given factor
147
vector<DoubleReal> rt_spans;
148
for (Size t=0; t<features_in.size(); ++t)
150
if (features_in[t].getConvexHulls().size()!=0)
152
rt_spans.push_back(features_in[t].getConvexHull().getBoundingBox().width());
155
//feature convex hulls are available => relative RT span
156
DoubleReal rt_tol = getDoubleOption_("rt_tol_abs");
159
if ( !rt_spans.empty() )
161
sort(rt_spans.begin(), rt_spans.end());
162
rt_tol = getDoubleOption_("rt_tol")*rt_spans[rt_spans.size()/2];
76
TOPPBase("FFEval", "Evaluation tool for feature detection algorithms.", false)
82
void registerOptionsAndFlags_()
84
registerInputFile_("in", "<file>", "", "Feature input file, which contains the data to be tested against the truth file.");
85
setValidFormats_("in", StringList::create("featureXML"));
86
registerInputFile_("truth", "<file>", "", "Truth feature file that defines what features should be found.");
87
setValidFormats_("truth", StringList::create("featureXML"));
88
registerDoubleOption_("rt_tol", "<double>", 0.3, "Allowed tolerance of RT relative to average feature RT span.", false);
89
setMinFloat_("rt_tol", 0);
90
registerDoubleOption_("rt_tol_abs", "<double>", -1.0, "Allowed absolute tolerance of RT (overwrites 'rt_tol' if set above zero).", false);
91
setMinFloat_("rt_tol_abs", -1);
92
registerDoubleOption_("mz_tol", "<double>", 0.25, "Allowed tolerance in m/z (is divided by charge).", false);
93
setMinFloat_("mz_tol", 0);
94
registerOutputFile_("out", "<file>", "", "Feature output file. If given, an annotated input file is written.", false);
95
setValidFormats_("out", StringList::create("featureXML"));
96
registerInputFile_("abort_reasons", "<file>", "", "Feature file containing seeds with abort reasons.", false);
97
setValidFormats_("abort_reasons", StringList::create("featureXML"));
98
registerOutputFile_("out_roc", "<file>", "", "If given, a ROC curve file is created (ROC points based on intensity threshold)", false);
99
setValidFormats_("out_roc", StringList::create("csv"));
102
/// Counts the number of features with meta value @p name equal to @p value
103
UInt count(const FeatureMap<> & map, const String & name, const String & value = "")
106
for (Size i = 0; i < map.size(); ++i)
108
if (map[i].metaValueExists(name))
116
if (map[i].getMetaValue(name).toString() == value)
126
/// Returns the total number and percentage in parentheses
127
String percentage(Size count, Size size)
129
return String(" (") + String::number(100.0 * count / size, 2) + "%)";
132
String fiveNumbers(vector<DoubleReal> a, UInt decimal_places)
134
sort(a.begin(), a.end());
135
return String::number(a[0], decimal_places) + " " + String::number(a[a.size() / 4], decimal_places) + " " + String::number(a[a.size() / 2], decimal_places) + " " + String::number(a[(3 * a.size()) / 4], decimal_places) + " " + String::number(a.back(), decimal_places);
138
ExitCodes main_(int, const char **)
141
FeatureMap<> features_in, features_truth;
142
FeatureXMLFile().load(getStringOption_("in"), features_in);
143
features_in.sortByPosition();
144
FeatureXMLFile().load(getStringOption_("truth"), features_truth);
145
features_truth.sortByPosition();
146
FeatureMap<> abort_reasons;
147
if (getStringOption_("abort_reasons") != "")
149
FeatureXMLFile().load(getStringOption_("abort_reasons"), abort_reasons);
151
DoubleReal mz_tol = getDoubleOption_("mz_tol");
152
writeDebug_(String("Final MZ tolerance: ") + mz_tol, 1);
154
//determine average RT tolerance:
155
//median feature RT span times given factor
156
vector<DoubleReal> rt_spans;
157
for (Size t = 0; t < features_in.size(); ++t)
159
if (features_in[t].getConvexHulls().size() != 0)
161
rt_spans.push_back(features_in[t].getConvexHull().getBoundingBox().width());
164
//feature convex hulls are available => relative RT span
165
DoubleReal rt_tol = getDoubleOption_("rt_tol_abs");
168
if (!rt_spans.empty())
170
sort(rt_spans.begin(), rt_spans.end());
171
rt_tol = getDoubleOption_("rt_tol") * rt_spans[rt_spans.size() / 2];
164
173
else if (features_in.empty())
166
// do nothing, rt_tol does not really matter, as we will not find a match anyway, but we want to have the stats
175
// do nothing, rt_tol does not really matter, as we will not find a match anyway, but we want to have the stats
167
176
// at the end, so we do not abort
171
writeLog_("Error: Input features do not have convex hulls. You have to set 'rt_tol_abs'!");
172
return ILLEGAL_PARAMETERS;
175
writeDebug_(String("Final RT tolerance: ") + rt_tol, 1);
178
std::vector<DoubleReal> ints_t;
179
std::vector<DoubleReal> ints_i;
180
std::vector<DoubleReal> ints_found;
181
std::vector<DoubleReal> ints_missed;
182
Map<String,UInt> abort_strings;
184
for (Size m=0; m<features_truth.size(); ++m)
186
Feature& f_t = features_truth[m];
187
UInt match_count = 0;
188
bool correct_charge = false;
189
bool exact_centroid_match = false;
190
Size last_match_index = features_in.size()+1;
191
for (Size a=0; a<features_in.size(); ++a)
193
const Feature& f_i = features_in[a];
195
if (fabs(f_i.getRT()-f_t.getRT())<rt_tol)
197
DoubleReal charge_mz_tol = mz_tol / f_t.getCharge();
199
if (fabs(f_i.getMZ()-f_t.getMZ())<charge_mz_tol)
202
exact_centroid_match = true;
203
if(f_i.getCharge()==f_t.getCharge()) correct_charge = true;
204
last_match_index = a;
206
//Centroid is one trace off, but still contained in the convex hull
207
else if (f_i.getConvexHull().getBoundingBox().encloses(f_t.getPosition())
210
fabs(f_i.getMZ()+1.0/f_t.getCharge()-f_t.getMZ())<charge_mz_tol
212
fabs(f_i.getMZ()-1.0/f_t.getCharge()-f_t.getMZ())<charge_mz_tol
217
last_match_index = a;
218
if(f_i.getCharge()==f_t.getCharge()) correct_charge = true;
223
f_t.setMetaValue("matches",match_count);
226
//flag matched feature with addition information
229
f_t.setMetaValue("correct_charge",String("true"));
230
f_t.setMetaValue("intensity_ratio",features_in[last_match_index].getIntensity()/f_t.getIntensity());
231
features_in[last_match_index].setMetaValue("correct_hit", "true"); //flag the feature for ROC curve
235
f_t.setMetaValue("correct_charge",String("false"));
238
if (exact_centroid_match)
240
f_t.setMetaValue("exact_centroid_match",String("true"));
244
f_t.setMetaValue("exact_centroid_match",String("false"));
247
//evaluation of correct features only
248
if (match_count==1 && correct_charge)
250
ints_t.push_back(f_t.getIntensity());
251
ints_i.push_back(features_in[last_match_index].getIntensity());
252
ints_found.push_back(f_t.getIntensity());
256
ints_missed.push_back(f_t.getIntensity());
258
//look up the abort reason of the nearest seed
259
DoubleReal best_score_ab = 0;
261
for (Size b=0; b<abort_reasons.size(); ++b)
263
const Feature& f_ab = abort_reasons[b];
264
if ( fabs(f_ab.getRT() - f_t.getRT()) <= rt_tol
265
&& fabs(f_ab.getMZ() - f_t.getMZ()) <= mz_tol)
267
DoubleReal score = (1.0-fabs(f_ab.getMZ()-f_t.getMZ())/mz_tol) * (1.0-fabs(f_ab.getRT()-f_t.getRT())/rt_tol);
268
if (score > best_score_ab)
270
best_score_ab = score;
271
reason = f_ab.getMetaValue("abort_reason");
277
reason = "No seed found";
279
if (abort_strings.has(reason))
281
abort_strings[reason]++;
285
abort_strings[reason] = 1;
290
//------------------------ general statistics ------------------------
292
cout << "general information:" << endl;
293
cout << "====================" << endl;
294
cout << "input features: " << features_in.size() << endl;
295
cout << "truth features: " << features_truth.size() << endl;
297
//------------------------ matches ------------------------
299
cout << "feature matching statistics:" << endl;
300
cout << "============================" << endl;
301
Size no_match = count(features_truth,"matches","0");
302
cout << "no match: " << no_match << percentage(no_match,features_truth.size()) << endl;
303
Size one_match = count(features_truth,"matches","1");
304
cout << "one match: " << one_match << percentage(one_match,features_truth.size()) << endl;
305
Size charge_match = count(features_truth,"correct_charge","true");
306
cout << " - correct charge: " << charge_match << percentage(charge_match,features_truth.size()) << endl;
307
Size centroid_match = count(features_truth,"exact_centroid_match","true");
308
cout << " - exact centroid match: " << centroid_match << percentage(centroid_match,features_truth.size()) << endl;
309
Size multi_match = features_truth.size() - count(features_truth,"matches","0") - count(features_truth,"matches","1");
310
cout << "multiple matches: " << multi_match << percentage(multi_match,features_truth.size()) << endl;
311
Size incorrect_match = multi_match + one_match - charge_match;
312
cout << "incorrect matches: " << incorrect_match << percentage(incorrect_match,features_truth.size()) << endl;
313
if (abort_reasons.size())
315
cout << "reasons for unmatched features:" << endl;
316
for (Map<String,UInt>::iterator it=abort_strings.begin(); it!=abort_strings.end(); ++it)
318
cout << " - " << String(it->second).fillLeft(' ',4) << ": " << it->first << endl;
321
//------------------------ intensity ------------------------
323
cout << "intensity statistics:" << endl;
324
cout << "=====================" << endl;
327
cout << "correlation of found features: nan" << endl;
331
cout << "correlation of found features: " << pearsonCorrelationCoefficient(ints_i.begin(),ints_i.end(),ints_t.begin(),ints_t.end()) << endl;
333
if (ints_found.empty())
335
cout << "intensity distribution of found: 0.0 0.0 0.0 0.0 0.0" << endl;
339
cout << "intensity distribution of found: " << fiveNumbers(ints_found,1) << endl;
341
if (ints_missed.empty())
343
cout << "intensity distribution of missed: 0.0 0.0 0.0 0.0 0.0" << endl;
347
cout << "intensity distribution of missed: " << fiveNumbers(ints_missed,1) << endl;
350
//------------------------ charges ------------------------
352
cout << "charge matches statistics:" << endl;
353
cout << "===========================" << endl;
354
Map<UInt,UInt> present_charges,found_charges;
355
for (Size i=0;i<features_truth.size(); ++i)
357
UInt charge = features_truth[i].getCharge();
358
present_charges[charge]++;
359
if (features_truth[i].getMetaValue("correct_charge").toString()=="true")
361
found_charges[charge]++;
364
for (Map<UInt,UInt>::const_iterator it=present_charges.begin(); it!=present_charges.end(); ++it)
366
cout << "charge " << it->first << ": " << found_charges[it->first] << "/" << it->second << percentage(found_charges[it->first],it->second) << endl;
370
if (getStringOption_("out")!="")
372
FeatureXMLFile().store(getStringOption_("out"),features_truth);
376
if (getStringOption_("out_roc")!="")
379
tf.push_back("false correct FDR TPR");
381
features_in.sortByIntensity(true);
384
DoubleReal found = features_in.size();
385
DoubleReal correct = features_truth.size();
386
for (Size i=0; i<features_in.size(); ++i)
388
if (features_in[i].metaValueExists("correct_hit"))
396
tf.push_back(String(f_false) + " " + f_correct + " " + String::number(f_false/found,3) + " " + String::number(f_correct/correct,3));
398
tf.store(getStringOption_("out_roc"));
180
writeLog_("Error: Input features do not have convex hulls. You have to set 'rt_tol_abs'!");
181
return ILLEGAL_PARAMETERS;
184
writeDebug_(String("Final RT tolerance: ") + rt_tol, 1);
187
std::vector<DoubleReal> ints_t;
188
std::vector<DoubleReal> ints_i;
189
std::vector<DoubleReal> ints_found;
190
std::vector<DoubleReal> ints_missed;
191
Map<String, UInt> abort_strings;
193
for (Size m = 0; m < features_truth.size(); ++m)
195
Feature & f_t = features_truth[m];
196
UInt match_count = 0;
197
bool correct_charge = false;
198
bool exact_centroid_match = false;
199
Size last_match_index = features_in.size() + 1;
200
for (Size a = 0; a < features_in.size(); ++a)
202
const Feature & f_i = features_in[a];
204
if (fabs(f_i.getRT() - f_t.getRT()) < rt_tol)
206
DoubleReal charge_mz_tol = mz_tol / f_t.getCharge();
208
if (fabs(f_i.getMZ() - f_t.getMZ()) < charge_mz_tol)
211
exact_centroid_match = true;
212
if (f_i.getCharge() == f_t.getCharge()) correct_charge = true;
213
last_match_index = a;
215
//Centroid is one trace off, but still contained in the convex hull
216
else if (f_i.getConvexHull().getBoundingBox().encloses(f_t.getPosition())
219
fabs(f_i.getMZ() + 1.0 / f_t.getCharge() - f_t.getMZ()) < charge_mz_tol
221
fabs(f_i.getMZ() - 1.0 / f_t.getCharge() - f_t.getMZ()) < charge_mz_tol
226
last_match_index = a;
227
if (f_i.getCharge() == f_t.getCharge()) correct_charge = true;
232
f_t.setMetaValue("matches", match_count);
233
if (match_count == 1)
235
//flag matched feature with addition information
238
f_t.setMetaValue("correct_charge", String("true"));
239
f_t.setMetaValue("intensity_ratio", features_in[last_match_index].getIntensity() / f_t.getIntensity());
240
features_in[last_match_index].setMetaValue("correct_hit", "true"); //flag the feature for ROC curve
244
f_t.setMetaValue("correct_charge", String("false"));
247
if (exact_centroid_match)
249
f_t.setMetaValue("exact_centroid_match", String("true"));
253
f_t.setMetaValue("exact_centroid_match", String("false"));
256
//evaluation of correct features only
257
if (match_count == 1 && correct_charge)
259
ints_t.push_back(f_t.getIntensity());
260
ints_i.push_back(features_in[last_match_index].getIntensity());
261
ints_found.push_back(f_t.getIntensity());
265
ints_missed.push_back(f_t.getIntensity());
267
//look up the abort reason of the nearest seed
268
DoubleReal best_score_ab = 0;
270
for (Size b = 0; b < abort_reasons.size(); ++b)
272
const Feature & f_ab = abort_reasons[b];
273
if (fabs(f_ab.getRT() - f_t.getRT()) <= rt_tol
274
&& fabs(f_ab.getMZ() - f_t.getMZ()) <= mz_tol)
276
DoubleReal score = (1.0 - fabs(f_ab.getMZ() - f_t.getMZ()) / mz_tol) * (1.0 - fabs(f_ab.getRT() - f_t.getRT()) / rt_tol);
277
if (score > best_score_ab)
279
best_score_ab = score;
280
reason = f_ab.getMetaValue("abort_reason");
286
reason = "No seed found";
288
if (abort_strings.has(reason))
290
abort_strings[reason]++;
294
abort_strings[reason] = 1;
299
//------------------------ general statistics ------------------------
301
cout << "general information:" << endl;
302
cout << "====================" << endl;
303
cout << "input features: " << features_in.size() << endl;
304
cout << "truth features: " << features_truth.size() << endl;
306
//------------------------ matches ------------------------
308
cout << "feature matching statistics:" << endl;
309
cout << "============================" << endl;
310
Size no_match = count(features_truth, "matches", "0");
311
cout << "no match: " << no_match << percentage(no_match, features_truth.size()) << endl;
312
Size one_match = count(features_truth, "matches", "1");
313
cout << "one match: " << one_match << percentage(one_match, features_truth.size()) << endl;
314
Size charge_match = count(features_truth, "correct_charge", "true");
315
cout << " - correct charge: " << charge_match << percentage(charge_match, features_truth.size()) << endl;
316
Size centroid_match = count(features_truth, "exact_centroid_match", "true");
317
cout << " - exact centroid match: " << centroid_match << percentage(centroid_match, features_truth.size()) << endl;
318
Size multi_match = features_truth.size() - count(features_truth, "matches", "0") - count(features_truth, "matches", "1");
319
cout << "multiple matches: " << multi_match << percentage(multi_match, features_truth.size()) << endl;
320
Size incorrect_match = multi_match + one_match - charge_match;
321
cout << "incorrect matches: " << incorrect_match << percentage(incorrect_match, features_truth.size()) << endl;
322
if (abort_reasons.size())
324
cout << "reasons for unmatched features:" << endl;
325
for (Map<String, UInt>::iterator it = abort_strings.begin(); it != abort_strings.end(); ++it)
327
cout << " - " << String(it->second).fillLeft(' ', 4) << ": " << it->first << endl;
330
//------------------------ intensity ------------------------
332
cout << "intensity statistics:" << endl;
333
cout << "=====================" << endl;
336
cout << "correlation of found features: nan" << endl;
340
cout << "correlation of found features: " << pearsonCorrelationCoefficient(ints_i.begin(), ints_i.end(), ints_t.begin(), ints_t.end()) << endl;
342
if (ints_found.empty())
344
cout << "intensity distribution of found: 0.0 0.0 0.0 0.0 0.0" << endl;
348
cout << "intensity distribution of found: " << fiveNumbers(ints_found, 1) << endl;
350
if (ints_missed.empty())
352
cout << "intensity distribution of missed: 0.0 0.0 0.0 0.0 0.0" << endl;
356
cout << "intensity distribution of missed: " << fiveNumbers(ints_missed, 1) << endl;
359
//------------------------ charges ------------------------
361
cout << "charge matches statistics:" << endl;
362
cout << "===========================" << endl;
363
Map<UInt, UInt> present_charges, found_charges;
364
for (Size i = 0; i < features_truth.size(); ++i)
366
UInt charge = features_truth[i].getCharge();
367
present_charges[charge]++;
368
if (features_truth[i].getMetaValue("correct_charge").toString() == "true")
370
found_charges[charge]++;
373
for (Map<UInt, UInt>::const_iterator it = present_charges.begin(); it != present_charges.end(); ++it)
375
cout << "charge " << it->first << ": " << found_charges[it->first] << "/" << it->second << percentage(found_charges[it->first], it->second) << endl;
379
if (getStringOption_("out") != "")
381
FeatureXMLFile().store(getStringOption_("out"), features_truth);
385
if (getStringOption_("out_roc") != "")
388
tf.push_back("false\tcorrect\tFDR\tTPR");
390
features_in.sortByIntensity(true);
393
DoubleReal found = features_in.size();
394
DoubleReal correct = features_truth.size();
395
for (Size i = 0; i < features_in.size(); ++i)
397
if (features_in[i].metaValueExists("correct_hit"))
405
tf.push_back(String(f_false) + "\t"+ f_correct + "\t"+ String::number(f_false / found, 3) + "\t"+ String::number(f_correct / correct, 3));
407
tf.store(getStringOption_("out_roc"));
405
int main( int argc, const char** argv )
415
int main(int argc, const char ** argv)
408
return tool.main(argc,argv);
418
return tool.main(argc, argv);