1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Chris Bielow $
25
// $Authors: Clemens Groepl, Chris Bielow$
26
// --------------------------------------------------------------------------
28
#include <OpenMS/FORMAT/FeatureXMLFile.h>
29
#include <OpenMS/KERNEL/FeatureMap.h>
31
#include <OpenMS/APPLICATIONS/TOPPBase.h>
32
#include <OpenMS/MATH/STATISTICS/LinearRegression.h>
34
#include <OpenMS/SYSTEM/File.h>
39
#include <gsl/gsl_math.h>
41
using namespace OpenMS;
45
typedef Feature::CoordinateType CoordinateType;
47
//-------------------------------------------------------------
49
//-------------------------------------------------------------
52
@page TOPP_AdditiveSeries AdditiveSeries
54
@brief Computes an additive series to quantify a peptide in a set of samples.
58
<td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. predecessor tools </td>
59
<td VALIGN="middle" ROWSPAN=3> \f$ \longrightarrow \f$ AdditiveSeries \f$ \longrightarrow \f$</td>
60
<td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. successor tools </td>
63
<td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref TOPP_FeatureFinderCentroided </td>
64
<td VALIGN="middle" ALIGN = "center" ROWSPAN=2> - </td>
67
<td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref TOPP_IDMapper </td>
71
This module computes an additive series for an absolute
72
quantification of a peptide in a set of samples. The
73
output consists of a GNUplot script which can be used
74
to visualize the results and some XML output for further processing.
76
In this version, the application computes the additive
77
series as a ratio of the intensities of two different peptides.
78
One of these peptides serves as internal standard for
81
<B>The command line parameters of this tool are:</B>
82
@verbinclude TOPP_AdditiveSeries.cli
85
// We do not want this class to show up in the docu:
93
: TOPPBase("AdditiveSeries","Computes an additive series to quantify a peptide in a set of samples.")
98
void registerOptionsAndFlags_()
100
registerInputFileList_("in","<files>",StringList(),"input files separated by blanks",true);
101
setValidFormats_("in",StringList::create("featureXML"));
102
registerOutputFile_("out","<file>","","output XML file containg regression line and confidence interval");
103
registerDoubleOption_("mz_tolerance","<tol>",1.0, "Tolerance in m/z dimension",false);
104
registerDoubleOption_("rt_tolerance","<tol>",1.0, "Tolerance in RT dimension",false);
105
registerDoubleList_("concentrations","<concentrations>", DoubleList(), "Spiked concentrations");
107
addText_(" Feature/standard position:");
108
registerDoubleOption_("feature_rt","<rt>",std::numeric_limits<double>::quiet_NaN(), "RT position of the feature");
109
registerDoubleOption_("feature_mz","<mz>",std::numeric_limits<double>::quiet_NaN(), "m/z position of the feature");
110
registerDoubleOption_("standard_rt","<rt>",std::numeric_limits<double>::quiet_NaN(), "RT position of the standard");
111
registerDoubleOption_("standard_mz","<mz>",std::numeric_limits<double>::quiet_NaN(), "m/z position of the standard");
114
addText_(" GNUplot options:");
115
registerFlag_("write_gnuplot_output","Flag that activates the GNUplot output");
116
registerStringOption_("out_gp","<name>","","base file name (3 files with different extensions are created)",false);
117
registerStringOption_("mz_unit","<unit>","Thomson","the m/z unit of the plot",false);
118
registerStringOption_("rt_unit","<unit>","seconds","the RT unit of the plot",false);
122
// searches for a features with coordinates within the tolerance in this map
123
// NOTE: It might happen that there are several features at similar coordinates.
124
// In this case, the program cannot be sure which one is the correct. So we decided
125
// to use the one with the strongest intensity.
126
bool readMapFile_(String filename, vector<double>& intensities,
127
CoordinateType tol_mz, CoordinateType tol_rt,
128
DPosition<2> fpos1, DPosition<2> fpos2)
131
if (!File::exists(filename) )
133
cout << "File " << filename << " not found. " << endl;
137
cout << "Reading from " << filename << endl;
139
FeatureXMLFile map_file;
141
map_file.load(filename,map);
146
FeatureMap<>::iterator iter = map.begin();
147
while (iter!= map.end() )
150
// cout << "Read: " << *iter << endl;
152
if ( (iter->getRT() < fpos1[Feature::RT] + tol_rt) &&
153
(iter->getRT() > fpos1[Feature::RT] - tol_rt) &&
154
(iter->getMZ() < fpos1[Feature::MZ] + tol_mz) &&
155
(iter->getMZ() > fpos1[Feature::MZ] - tol_mz) )
157
// cout << "Found feature1 at " << endl;
158
// cout << iter->getRT() << " " << iter->getMZ() << " " << iter->getIntensity() << endl;
159
// feature at correct position found, save intensity
164
else if (feat1->getIntensity() < iter->getIntensity() )
168
// f1_sum += iter->getIntensity();
172
if ( (iter->getRT() < fpos2[Feature::RT] + tol_rt) &&
173
(iter->getRT() > fpos2[Feature::RT] - tol_rt) &&
174
(iter->getMZ() < fpos2[Feature::MZ] + tol_mz) &&
175
(iter->getMZ() > fpos2[Feature::MZ] - tol_mz) )
177
// cout << "Found feature2 at " << endl;
178
// cout << iter->getRT() << " " << iter->getMZ() << " " << iter->getIntensity() << endl;
184
else if (feat2->getIntensity() < iter->getIntensity() )
189
// f2_sum += iter->getIntensity();
195
if (feat1 != 0 && feat2 != 0) //(f1_sum != 0 && f2_sum != 0)
197
cout << "Feature 1: " << *feat1 << endl;
198
cout << "Feature 2: " << *feat2 << endl;
199
cout << "Intensity ratio : " << ( feat1->getIntensity() / feat2->getIntensity() ) << endl;
200
intensities.push_back( feat1->getIntensity() / feat2->getIntensity());
205
writeDebug_(String("Feature 1 was not found. "),1);
208
writeDebug_(String("Feature 2 was not found. "),1);
214
Computes the linear regression for a series of measurements, the
215
x-axis intercept of the regression line and its confidence interval, and
216
writes a couple of files from which a nice plot of all this can be
217
generated using the gnuplot program.
219
bool computeRegressionAndWriteGnuplotFiles_ ( vector<double>::const_iterator const conc_vec_begin,
220
vector<double>::const_iterator const conc_vec_end,
221
vector<double>::const_iterator const area_vec_begin,
222
double const confidence_p,
223
String const filename_prefix,
224
String const output_filename,
225
String const format = "",
226
bool const write_gnuplot = true
232
LinearRegression linreg;
233
linreg.computeRegression ( confidence_p, conc_vec_begin, conc_vec_end, area_vec_begin );
238
// the peak data goes here
239
String datafilename(filename_prefix);
240
datafilename+=String(".dat");
241
ofstream dataout(datafilename.c_str());
243
// the gnuplot commands go here
244
String commandfilename(filename_prefix);
245
commandfilename+=String(".cmd");
246
ofstream cmdout(commandfilename.c_str());
248
// the error bar for the x-axis intercept goes here
249
String errorbarfilename(filename_prefix);
250
errorbarfilename+=String(".err");
251
ofstream errout(errorbarfilename.c_str());
253
// writing the commands
255
"set ylabel \"ion count\"\n"
256
"set xlabel \"concentration\"\n"
257
"set key left Left reverse\n"
260
if ( ! format.empty() )
262
if ( format == "png" )
265
"set terminal png \n"
266
"set output \"" << filename_prefix << ".png\"\n" ;
268
else if ( format == "eps" )
271
"set terminal postscript eps \n"
272
"set output \"" << filename_prefix << ".eps\"\n" ;
277
"plot \"" << datafilename <<"\" w points ps 2 pt 1 lt 8 title \"data\" " // want data on first line of key
278
", " << linreg.getIntercept() << "+" << linreg.getSlope() << "*x lt 2 lw 3 title \"linear regression: "
279
<< linreg.getIntercept() << " + " << linreg.getSlope() << " * x\" "
280
", \"" << datafilename <<"\" w points ps 2 pt 1 lt 8 notitle " // draw data a second time, on top of reg. line
281
", \"" << errorbarfilename << "\" using ($1):(0) w points pt 13 ps 2 lt 1 title \"x-intercept: " << linreg.getXIntercept() << "\" "
282
", \"" << errorbarfilename << "\" w xerrorbars lw 3 lt 1 title \"95% interval: [ " << linreg.getLower() << ", " << linreg.getUpper() << " ]\"\n"
286
// writing the x-axis intercept error bar
287
errout << linreg.getXIntercept() << " 0 " << linreg.getLower() << " " << linreg.getUpper() << endl;
290
// writing the peak data points
291
vector<double>::const_iterator cit = conc_vec_begin;
292
vector<double>::const_iterator ait = area_vec_begin;
293
dataout.precision(writtenDigits<DoubleReal>());
294
for ( ;cit != conc_vec_end; ++cit, ++ait )
296
dataout << *cit << ' ' << *ait << '\n';
300
} // end if (write_gnuplot)
302
// write results to XML file
304
results.open(output_filename.c_str());
306
results << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>" << endl;
307
results << "<results_additiveseries>" << endl;
308
results << "\t<slope>" << linreg.getSlope() << "</slope>" << endl;
309
results << "\t<intercept>" << linreg.getIntercept() << "</intercept>" << endl;
310
results << "\t<x_intercept>" << linreg.getXIntercept() << "</x_intercept>" << endl;
311
results << "\t<confidence_lowerlimit>" << linreg.getLower() << "</confidence_lowerlimit>" << endl;
312
results << "\t<confidence_upperlimit>" << linreg.getUpper() << "</confidence_upperlimit>" << endl;
313
results << "\t<pearson_squared>" << linreg.getRSquared() << "</pearson_squared>" << endl;
314
results << "\t<std_residuals>" << linreg.getStandDevRes() << "</std_residuals>" << endl;
315
results << "\t<t_statistic>" << linreg.getTValue() << "</t_statistic>" << endl;
316
results << "</results_additiveseries>" << endl;
329
ExitCodes main_(int , const char**)
331
//-------------------------------------------------------------
332
// parsing parameters
333
//-------------------------------------------------------------
334
Param const& add_param = getParam_();
335
writeDebug_("Used parameters", add_param, 3);
337
CoordinateType tol_mz = getDoubleOption_("mz_tolerance");
338
CoordinateType tol_rt = getDoubleOption_("rt_tolerance");
340
String out_f = getStringOption_("out");
342
if (add_param.getValue("feature_mz").isEmpty() || add_param.getValue("feature_rt").isEmpty() )
344
writeLog_("Feature coordinates not given. Aborting.");
345
return ILLEGAL_PARAMETERS;
347
DPosition<2> feat_pos1;
348
feat_pos1[Feature::MZ] = (CoordinateType) add_param.getValue("feature_mz");
349
feat_pos1[Feature::RT] = (CoordinateType) add_param.getValue("feature_rt");
351
if (add_param.getValue("standard_mz").isEmpty() || add_param.getValue("standard_rt").isEmpty() )
353
writeLog_("Standard coordinates not given. Aborting.");
354
return ILLEGAL_PARAMETERS;
356
DPosition<2> feat_pos2;
357
feat_pos2[Feature::MZ] = (CoordinateType) add_param.getValue("standard_mz");
358
feat_pos2[Feature::RT] = (CoordinateType) add_param.getValue("standard_rt");
360
writeDebug_(String("Setting tolerances to ") + tol_mz + " " + tol_rt,1);
362
// introduce a flag for each concentration. true => the corresponding feature was found
365
// fetching list of files
366
StringList files = getStringList_("in");
369
vector<DoubleReal> intensities;
370
vector<String>::const_iterator cit = files.begin();
371
while (cit != files.end())
373
if (readMapFile_(*cit,intensities,tol_mz,tol_rt,feat_pos1,feat_pos2) )
375
flags.push_back(true);
379
flags.push_back(false);
384
// read the spiked concentrations
385
DoubleList sp_concentrations = getDoubleList_("concentrations");
387
vector<double> sp_concentrations2;
388
for (Size i=0; i<sp_concentrations.size(); i++)
390
if (flags.at(i) == true )
392
sp_concentrations2.push_back( sp_concentrations.at(i) );
396
cout << "Found feature pairs: " << intensities.size() << endl;
397
cout << "Spiked concentrations: " << sp_concentrations.size() << endl;
399
if (intensities.empty() || sp_concentrations.empty() )
402
writeLog_("Did not find any data. Aborting!");
403
return ILLEGAL_PARAMETERS;
406
// set prefix of gnuplot output
407
String filename_prefix = getStringOption_("out_gp");;
408
if (getFlag_("write_gnuplot_output"))
410
writeDebug_(String("Writing gnuplot output"),1);
411
computeRegressionAndWriteGnuplotFiles_ (sp_concentrations2.begin(), sp_concentrations2.end(),
412
intensities.begin(), 0.95, filename_prefix, out_f, "eps", true);
416
writeDebug_(" No GNUplot output is written...",1);
417
computeRegressionAndWriteGnuplotFiles_ (sp_concentrations2.begin(), sp_concentrations2.end(),
418
intensities.begin(), 0.95, filename_prefix, out_f, "eps", false);
430
int main( int argc, const char** argv )
433
return tool.main(argc,argv);