~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/APPLICATIONS/TOPP/AdditiveSeries.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
//
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: Chris Bielow $
 
25
// $Authors: Clemens Groepl, Chris Bielow$
 
26
// --------------------------------------------------------------------------
 
27
 
 
28
#include <OpenMS/FORMAT/FeatureXMLFile.h>
 
29
#include <OpenMS/KERNEL/FeatureMap.h>
 
30
 
 
31
#include <OpenMS/APPLICATIONS/TOPPBase.h>
 
32
#include <OpenMS/MATH/STATISTICS/LinearRegression.h>
 
33
 
 
34
#include <OpenMS/SYSTEM/File.h>
 
35
 
 
36
#include <map>
 
37
#include <vector>
 
38
#include <algorithm>
 
39
#include <gsl/gsl_math.h>
 
40
 
 
41
using namespace OpenMS;
 
42
using namespace Math;
 
43
using namespace std;
 
44
 
 
45
typedef Feature::CoordinateType CoordinateType;
 
46
 
 
47
//-------------------------------------------------------------
 
48
//Doxygen docu
 
49
//-------------------------------------------------------------
 
50
 
 
51
/**
 
52
        @page TOPP_AdditiveSeries AdditiveSeries
 
53
 
 
54
        @brief Computes an additive series to quantify a peptide in a set of samples.
 
55
<CENTER>
 
56
        <table>
 
57
                <tr>
 
58
                        <td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. predecessor tools </td>
 
59
                        <td VALIGN="middle" ROWSPAN=3> \f$ \longrightarrow \f$ AdditiveSeries \f$ \longrightarrow \f$</td>
 
60
                        <td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. successor tools </td>
 
61
                </tr>
 
62
                <tr>
 
63
      <td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref TOPP_FeatureFinderCentroided </td>
 
64
                        <td VALIGN="middle" ALIGN = "center" ROWSPAN=2> - </td>
 
65
                </tr>
 
66
                <tr>
 
67
                        <td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref TOPP_IDMapper </td>
 
68
                </tr>
 
69
        </table>
 
70
</CENTER>
 
71
        This module computes an additive series for an absolute
 
72
        quantification of a peptide in a set of samples. The
 
73
        output consists of a GNUplot script which can be used
 
74
        to visualize the results and some XML output for further processing.
 
75
 
 
76
        In this version, the application computes the additive
 
77
        series as a ratio of the intensities of two different peptides.
 
78
        One of these peptides serves as internal standard for
 
79
        calibration.
 
80
 
 
81
        <B>The command line parameters of this tool are:</B>
 
82
        @verbinclude TOPP_AdditiveSeries.cli
 
83
*/
 
84
 
 
85
// We do not want this class to show up in the docu:
 
86
/// @cond TOPPCLASSES
 
87
 
 
88
class AdditiveSeries
 
89
        : public TOPPBase
 
90
{
 
91
 public:
 
92
        AdditiveSeries()
 
93
                : TOPPBase("AdditiveSeries","Computes an additive series to quantify a peptide in a set of samples.")
 
94
        {
 
95
        }
 
96
 
 
97
 protected:
 
98
        void registerOptionsAndFlags_()
 
99
        {
 
100
                registerInputFileList_("in","<files>",StringList(),"input files separated by blanks",true);
 
101
                setValidFormats_("in",StringList::create("featureXML"));
 
102
                registerOutputFile_("out","<file>","","output XML file containg regression line and confidence interval");
 
103
                registerDoubleOption_("mz_tolerance","<tol>",1.0, "Tolerance in m/z dimension",false);
 
104
                registerDoubleOption_("rt_tolerance","<tol>",1.0, "Tolerance in RT dimension",false);
 
105
    registerDoubleList_("concentrations","<concentrations>", DoubleList(), "Spiked concentrations");
 
106
                addEmptyLine_();
 
107
                addText_("  Feature/standard position:");
 
108
                registerDoubleOption_("feature_rt","<rt>",std::numeric_limits<double>::quiet_NaN(), "RT position of the feature");
 
109
                registerDoubleOption_("feature_mz","<mz>",std::numeric_limits<double>::quiet_NaN(), "m/z position of the feature");
 
110
                registerDoubleOption_("standard_rt","<rt>",std::numeric_limits<double>::quiet_NaN(), "RT position of the standard");
 
111
                registerDoubleOption_("standard_mz","<mz>",std::numeric_limits<double>::quiet_NaN(), "m/z position of the standard");
 
112
 
 
113
                addEmptyLine_();
 
114
                addText_("  GNUplot options:");
 
115
                registerFlag_("write_gnuplot_output","Flag that activates the GNUplot output");
 
116
                registerStringOption_("out_gp","<name>","","base file name (3 files with different extensions are created)",false);
 
117
                registerStringOption_("mz_unit","<unit>","Thomson","the m/z unit of the plot",false);
 
118
                registerStringOption_("rt_unit","<unit>","seconds","the RT unit of the plot",false);
 
119
  }
 
120
 
 
121
 
 
122
  // searches for a features with coordinates within the tolerance in this map
 
123
        // NOTE: It might happen that there are several features at similar coordinates.
 
124
        // In this case, the program cannot be sure which one is the correct. So we decided
 
125
        // to use the one with the strongest intensity.
 
126
        bool readMapFile_(String filename, vector<double>& intensities,
 
127
                                                                                CoordinateType tol_mz, CoordinateType tol_rt,
 
128
                                                                                DPosition<2> fpos1, DPosition<2> fpos2)
 
129
        {
 
130
 
 
131
                if (!File::exists(filename) )
 
132
                {
 
133
                        cout << "File " << filename << " not found. " << endl;
 
134
                        return false;
 
135
                }
 
136
 
 
137
                cout << "Reading from " << filename << endl;
 
138
 
 
139
                FeatureXMLFile map_file;
 
140
                FeatureMap<> map;
 
141
                map_file.load(filename,map);
 
142
 
 
143
                Feature* feat1 = 0;
 
144
                Feature* feat2 = 0;
 
145
 
 
146
                FeatureMap<>::iterator iter = map.begin();
 
147
                while (iter!= map.end() )
 
148
                {
 
149
 
 
150
//                      cout << "Read: " << *iter << endl;
 
151
 
 
152
                        if ( (iter->getRT() <  fpos1[Feature::RT] + tol_rt) &&
 
153
                                         (iter->getRT() >  fpos1[Feature::RT] - tol_rt) &&
 
154
                                         (iter->getMZ() <  fpos1[Feature::MZ] + tol_mz) &&
 
155
                                         (iter->getMZ() >  fpos1[Feature::MZ] - tol_mz) )
 
156
                        {
 
157
//                              cout << "Found feature1 at " << endl;
 
158
//                              cout << iter->getRT() << " " << iter->getMZ()  << " " << iter->getIntensity() <<  endl;
 
159
                                // feature at correct position found, save intensity
 
160
                                if (!feat1)
 
161
                                {
 
162
                                        feat1 = &(*iter);
 
163
                                }
 
164
                                else if (feat1->getIntensity() <  iter->getIntensity() )
 
165
                                {
 
166
                                        feat1 = &(*iter);
 
167
                                }
 
168
                                //                              f1_sum += iter->getIntensity();
 
169
 
 
170
                        }
 
171
 
 
172
                        if ( (iter->getRT() <  fpos2[Feature::RT] + tol_rt) &&
 
173
                                         (iter->getRT() >  fpos2[Feature::RT] - tol_rt) &&
 
174
                                         (iter->getMZ() <  fpos2[Feature::MZ] + tol_mz) &&
 
175
                                         (iter->getMZ() >  fpos2[Feature::MZ] - tol_mz) )
 
176
                        {
 
177
//                              cout << "Found feature2 at " << endl;
 
178
//                              cout << iter->getRT() << " " << iter->getMZ() << " " << iter->getIntensity() <<  endl;
 
179
                                // same as above
 
180
                                if (!feat2)
 
181
                                {
 
182
                                        feat2 = &(*iter);
 
183
                                }
 
184
                                else if (feat2->getIntensity() <  iter->getIntensity() )
 
185
                                {
 
186
                                        feat2 = &(*iter);
 
187
                                }
 
188
 
 
189
                                //                              f2_sum += iter->getIntensity();
 
190
                        }
 
191
 
 
192
                        ++iter;
 
193
                }       // end of while
 
194
 
 
195
                if (feat1 != 0 && feat2 != 0)  //(f1_sum != 0 && f2_sum != 0)
 
196
                {
 
197
                        cout << "Feature 1: " << *feat1 << endl;
 
198
                        cout << "Feature 2: " << *feat2 << endl;
 
199
                        cout << "Intensity ratio : " << ( feat1->getIntensity() / feat2->getIntensity() ) << endl;
 
200
                        intensities.push_back( feat1->getIntensity() / feat2->getIntensity());
 
201
 
 
202
                        return true;
 
203
                }
 
204
                if (!feat1)
 
205
                        writeDebug_(String("Feature 1 was not found. "),1);
 
206
 
 
207
                if (!feat2)
 
208
                        writeDebug_(String("Feature 2 was not found. "),1);
 
209
 
 
210
                return false;
 
211
        }
 
212
 
 
213
        /*
 
214
                 Computes the linear regression for a series of measurements, the
 
215
                 x-axis intercept of the regression line and its confidence interval, and
 
216
                 writes a couple of files from which a nice plot of all this can be
 
217
                 generated using the gnuplot program.
 
218
        */
 
219
        bool computeRegressionAndWriteGnuplotFiles_ ( vector<double>::const_iterator const conc_vec_begin,
 
220
                                                                                                                                                                                                vector<double>::const_iterator const conc_vec_end,
 
221
                                                                                                                                                                                                vector<double>::const_iterator const area_vec_begin,
 
222
                                                                                                                                                                                                double const confidence_p,
 
223
                                                                                                                                                                                                String const filename_prefix,
 
224
                                                                                                                                                                                                String const output_filename,
 
225
                                                                                                                                                                                                String const format = "",
 
226
                                                                                                                                                                                                bool const write_gnuplot = true
 
227
                                                                                                                                                                                        )
 
228
        {
 
229
 
 
230
                try
 
231
                {
 
232
                        LinearRegression linreg;
 
233
                        linreg.computeRegression ( confidence_p, conc_vec_begin, conc_vec_end, area_vec_begin );
 
234
 
 
235
                        if (write_gnuplot)
 
236
                        {
 
237
 
 
238
                                // the peak data goes here
 
239
                                String datafilename(filename_prefix);
 
240
                                datafilename+=String(".dat");
 
241
                                ofstream dataout(datafilename.c_str());
 
242
 
 
243
                                // the gnuplot commands go here
 
244
                                String commandfilename(filename_prefix);
 
245
                                commandfilename+=String(".cmd");
 
246
                                ofstream cmdout(commandfilename.c_str());
 
247
 
 
248
                                // the error bar for the x-axis intercept goes here
 
249
                                String errorbarfilename(filename_prefix);
 
250
                                errorbarfilename+=String(".err");
 
251
                                ofstream errout(errorbarfilename.c_str());
 
252
 
 
253
                                // writing the commands
 
254
                                cmdout <<
 
255
                                        "set ylabel \"ion count\"\n"
 
256
                                        "set xlabel \"concentration\"\n"
 
257
                                        "set key left Left reverse\n"
 
258
                                        ;
 
259
 
 
260
                                if ( ! format.empty() )
 
261
                                {
 
262
                                        if ( format == "png" )
 
263
                                        {
 
264
                                                cmdout <<
 
265
                                                        "set terminal png \n"
 
266
                                                        "set output \"" << filename_prefix << ".png\"\n" ;
 
267
                                        }
 
268
                                        else if ( format == "eps" )
 
269
                                        {
 
270
                                                cmdout <<
 
271
                                                        "set terminal postscript eps \n"
 
272
                                                        "set output \"" << filename_prefix << ".eps\"\n" ;
 
273
                                        }
 
274
 
 
275
                                }
 
276
                                cmdout <<
 
277
                                        "plot \""  << datafilename <<"\"  w points ps 2 pt 1 lt 8 title \"data\" " // want data on first line of key
 
278
                                        ",  " << linreg.getIntercept() << "+" <<  linreg.getSlope() << "*x lt 2 lw 3 title \"linear regression: "
 
279
                                                         << linreg.getIntercept() << " + " <<  linreg.getSlope() << " * x\" "
 
280
                                        ", \""  << datafilename <<"\"  w points ps 2 pt 1 lt 8 notitle " // draw data a second time, on top of reg. line
 
281
                                        ", \"" << errorbarfilename << "\"  using ($1):(0) w points pt 13 ps 2 lt 1 title \"x-intercept: " << linreg.getXIntercept() << "\" "
 
282
                                        ", \"" << errorbarfilename << "\"  w xerrorbars lw 3 lt 1 title \"95% interval: [ " << linreg.getLower() << ", " << linreg.getUpper() << " ]\"\n"
 
283
                                        ;
 
284
                                cmdout.close();
 
285
 
 
286
                                // writing the x-axis intercept error bar
 
287
                                errout << linreg.getXIntercept() << " 0 " << linreg.getLower() << " " << linreg.getUpper() << endl;
 
288
                                errout.close();
 
289
 
 
290
                                // writing the peak data points
 
291
                                vector<double>::const_iterator cit = conc_vec_begin;
 
292
                                vector<double>::const_iterator ait = area_vec_begin;
 
293
                                dataout.precision(writtenDigits<DoubleReal>());
 
294
                                for ( ;cit != conc_vec_end; ++cit, ++ait )
 
295
                                {
 
296
                                        dataout << *cit << ' ' << *ait << '\n';
 
297
                                }
 
298
                                dataout.close();
 
299
 
 
300
                        } // end if (write_gnuplot)
 
301
 
 
302
                        // write results to XML file
 
303
                        ofstream results;
 
304
                        results.open(output_filename.c_str());
 
305
 
 
306
                        results << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>" << endl;
 
307
                        results << "<results_additiveseries>" << endl;
 
308
                        results << "\t<slope>" << linreg.getSlope() << "</slope>" << endl;
 
309
                        results << "\t<intercept>" << linreg.getIntercept() << "</intercept>" << endl;
 
310
                        results << "\t<x_intercept>" << linreg.getXIntercept() << "</x_intercept>" << endl;
 
311
                        results << "\t<confidence_lowerlimit>" << linreg.getLower() << "</confidence_lowerlimit>" << endl;
 
312
                        results << "\t<confidence_upperlimit>" << linreg.getUpper() << "</confidence_upperlimit>" << endl;
 
313
                        results << "\t<pearson_squared>" << linreg.getRSquared() << "</pearson_squared>" << endl;
 
314
                        results << "\t<std_residuals>" << linreg.getStandDevRes() << "</std_residuals>" << endl;
 
315
                        results << "\t<t_statistic>" << linreg.getTValue() << "</t_statistic>" << endl;
 
316
                        results << "</results_additiveseries>" << endl;
 
317
 
 
318
                        results.close();
 
319
                }
 
320
                catch (string& s)
 
321
                {
 
322
                        cout << s <<  endl;
 
323
                        return 1;
 
324
                }
 
325
 
 
326
                return 0;
 
327
        }
 
328
 
 
329
        ExitCodes main_(int , const char**)
 
330
        {
 
331
                //-------------------------------------------------------------
 
332
                // parsing parameters
 
333
                //-------------------------------------------------------------
 
334
                Param const& add_param =  getParam_();
 
335
                writeDebug_("Used parameters", add_param, 3);
 
336
 
 
337
                CoordinateType tol_mz = getDoubleOption_("mz_tolerance");
 
338
                CoordinateType tol_rt = getDoubleOption_("rt_tolerance");
 
339
 
 
340
                String out_f  = getStringOption_("out");
 
341
 
 
342
                if (add_param.getValue("feature_mz").isEmpty() || add_param.getValue("feature_rt").isEmpty() )
 
343
                {
 
344
                        writeLog_("Feature coordinates not given. Aborting.");
 
345
                        return ILLEGAL_PARAMETERS;
 
346
                }
 
347
                DPosition<2> feat_pos1;
 
348
                feat_pos1[Feature::MZ] = (CoordinateType) add_param.getValue("feature_mz");
 
349
                feat_pos1[Feature::RT] = (CoordinateType) add_param.getValue("feature_rt");
 
350
 
 
351
                if (add_param.getValue("standard_mz").isEmpty() || add_param.getValue("standard_rt").isEmpty() )
 
352
                {
 
353
                        writeLog_("Standard coordinates not given. Aborting.");
 
354
                        return ILLEGAL_PARAMETERS;
 
355
                }
 
356
                DPosition<2> feat_pos2;
 
357
                feat_pos2[Feature::MZ] = (CoordinateType) add_param.getValue("standard_mz");
 
358
                feat_pos2[Feature::RT] = (CoordinateType) add_param.getValue("standard_rt");
 
359
 
 
360
                writeDebug_(String("Setting tolerances to ") + tol_mz + " " + tol_rt,1);
 
361
 
 
362
                // introduce a flag for each concentration. true => the corresponding feature was found
 
363
                vector<bool> flags;
 
364
 
 
365
                // fetching list of files
 
366
                StringList files = getStringList_("in");
 
367
 
 
368
                // collect features
 
369
                vector<DoubleReal> intensities;
 
370
                vector<String>::const_iterator cit = files.begin();
 
371
                while (cit != files.end())
 
372
                {
 
373
                        if (readMapFile_(*cit,intensities,tol_mz,tol_rt,feat_pos1,feat_pos2) )
 
374
                        {
 
375
                                flags.push_back(true);
 
376
                        }
 
377
                        else
 
378
                        {
 
379
                                flags.push_back(false);
 
380
                        }
 
381
                        ++cit;
 
382
                }
 
383
 
 
384
                // read the spiked concentrations
 
385
    DoubleList sp_concentrations = getDoubleList_("concentrations");
 
386
 
 
387
                vector<double> sp_concentrations2;
 
388
                for (Size i=0; i<sp_concentrations.size(); i++)
 
389
                {
 
390
                        if (flags.at(i) == true )
 
391
                        {
 
392
                                sp_concentrations2.push_back( sp_concentrations.at(i) );
 
393
                        }
 
394
                }
 
395
 
 
396
                cout << "Found feature pairs: " <<  intensities.size() << endl;
 
397
                cout << "Spiked concentrations: " << sp_concentrations.size() << endl;
 
398
 
 
399
                if (intensities.empty() || sp_concentrations.empty() )
 
400
                {
 
401
 
 
402
                        writeLog_("Did not find any data. Aborting!");
 
403
                        return ILLEGAL_PARAMETERS;
 
404
                }
 
405
 
 
406
                // set prefix of gnuplot output
 
407
                String filename_prefix = getStringOption_("out_gp");;
 
408
                if (getFlag_("write_gnuplot_output"))
 
409
                {
 
410
                        writeDebug_(String("Writing gnuplot output"),1);
 
411
                        computeRegressionAndWriteGnuplotFiles_ (sp_concentrations2.begin(), sp_concentrations2.end(),
 
412
                                                                                                                                                                                        intensities.begin(), 0.95, filename_prefix, out_f, "eps", true);
 
413
                }
 
414
                else
 
415
                {
 
416
                        writeDebug_(" No GNUplot output is written...",1);
 
417
                        computeRegressionAndWriteGnuplotFiles_ (sp_concentrations2.begin(), sp_concentrations2.end(),
 
418
                                                                                                                                                                                        intensities.begin(), 0.95, filename_prefix, out_f, "eps", false);
 
419
                }
 
420
 
 
421
                return EXECUTION_OK;
 
422
        }
 
423
 
 
424
};
 
425
 
 
426
 
 
427
/// @endcond
 
428
 
 
429
 
 
430
int main( int argc, const char** argv )
 
431
{
 
432
    AdditiveSeries tool;
 
433
    return tool.main(argc,argv);
 
434
}
 
435
 
 
436