1
// --------------------------------------------------------------------------
2
// OpenMS -- Open-Source Mass Spectrometry
3
// --------------------------------------------------------------------------
4
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
7
// This software is released under a three-clause BSD license:
8
// * Redistributions of source code must retain the above copyright
9
// notice, this list of conditions and the following disclaimer.
10
// * Redistributions in binary form must reproduce the above copyright
11
// notice, this list of conditions and the following disclaimer in the
12
// documentation and/or other materials provided with the distribution.
13
// * Neither the name of any author or any participating institution
14
// may be used to endorse or promote products derived from this software
15
// without specific prior written permission.
16
// For a full list of authors, refer to the file AUTHORS.
17
// --------------------------------------------------------------------------
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
// --------------------------------------------------------------------------
31
// $Maintainer: Lars Nilse $
32
// $Authors: Lars Nilse, Steffen Sass, Holger Plattfaut, Bastian Blank $
33
// --------------------------------------------------------------------------
35
#ifndef OPENMS_FILTERING_DATAREDUCTION_SILACANALYZER_H
36
#define OPENMS_FILTERING_DATAREDUCTION_SILACANALYZER_H
39
#include <OpenMS/config.h>
40
// #include <OpenMS/APPLICATIONS/TOPPBase.h>
41
#include <OpenMS/CONCEPT/ProgressLogger.h>
42
#include <OpenMS/DATASTRUCTURES/DBoundingBox.h>
43
#include <OpenMS/KERNEL/MSExperiment.h>
44
#include <OpenMS/KERNEL/StandardTypes.h>
45
#include <OpenMS/KERNEL/ConsensusMap.h>
46
#include <OpenMS/KERNEL/FeatureMap.h>
47
#include <OpenMS/FORMAT/MzMLFile.h>
48
#include <OpenMS/FORMAT/ConsensusXMLFile.h>
49
#include <OpenMS/FORMAT/FeatureXMLFile.h>
50
#include <OpenMS/MATH/STATISTICS/LinearRegression.h>
51
#include <OpenMS/KERNEL/RangeUtils.h>
52
#include <OpenMS/KERNEL/ChromatogramTools.h>
53
#include <OpenMS/FORMAT/MzQuantMLFile.h>
54
#include <OpenMS/METADATA/MSQuantifications.h>
56
#include <OpenMS/FILTERING/DATAREDUCTION/SILACFilter.h>
57
#include <OpenMS/FILTERING/DATAREDUCTION/SILACFiltering.h>
58
#include <OpenMS/COMPARISON/CLUSTERING/SILACClustering.h>
59
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/PeakWidthEstimator.h>
62
#include <boost/algorithm/string/split.hpp>
63
#include <boost/algorithm/string/classification.hpp>
77
* @brief Algorithm to use for SILACAnalysis
79
* Please initialize before usage using the initialize function.
80
* Next, one can estimate the peak width before filtering the data.
82
* In the final step, clusterData will generate the output data.
86
class OPENMS_DLLAPI SILACAnalyzer :
91
// input and output files
103
String selected_labels;
106
Int missed_cleavages;
107
UInt isotopes_per_peptide_min;
108
UInt isotopes_per_peptide_max;
110
// section "algorithm"
111
DoubleReal rt_threshold;
113
DoubleReal intensity_cutoff;
114
DoubleReal intensity_correlation;
115
DoubleReal model_deviation;
116
bool allow_missing_peaks;
119
std::vector<std::vector<String> > SILAClabels; // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6"
120
std::vector<std::vector<DoubleReal> > massShifts; // list of mass shifts
122
typedef SILACClustering Clustering;
124
MSQuantifications msq;
128
allow_missing_peaks(true)
133
* @brief Initializes the algorithm with parameters
135
* @param selected_labels_ Labels used for labelling the sample. For example, [Lys4,Arg6][Lys8,Arg10] describes a mixtures of three samples. One of them unlabelled, one labelled with Lys4 and Arg6 and a third one with Lys8 and Arg10. The used labels must be described in the label_identifiers parameter
136
* @param label_identifiers a map of labels and corresponding mass shits e.g. "Arg6" => 6.0201290268
141
String selected_labels_,
144
Int missed_cleavages_,
145
UInt isotopes_per_peptide_min_,
146
UInt isotopes_per_peptide_max_,
147
// section "algorithm"
148
DoubleReal rt_threshold_,
150
DoubleReal intensity_cutoff_,
151
DoubleReal intensity_correlation_,
152
DoubleReal model_deviation_,
153
bool allow_missing_peaks_,
155
std::map<String, DoubleReal> label_identifiers)
157
selected_labels = selected_labels_;
158
charge_min = charge_min_;
159
charge_max = charge_max_;
160
missed_cleavages = missed_cleavages_;
161
isotopes_per_peptide_min = isotopes_per_peptide_min_;
162
isotopes_per_peptide_max = isotopes_per_peptide_max_;
164
rt_threshold = rt_threshold_;
166
intensity_cutoff = intensity_cutoff_;
167
intensity_correlation = intensity_correlation_;
168
model_deviation = model_deviation_;
169
allow_missing_peaks = allow_missing_peaks_;
171
calculateLabelsAndMassShifts(label_identifiers);
175
* @brief Calculate the internal massShift and label datastructures from a
176
* map of identifiers and mass shifts (e.g. "Arg6" => 6.0201290268).
178
* Note that this part is necessary for the algorithm to work and this
179
* function is called from the initialize section. The algorithm has to be
180
* initialized _first_ with the list of actually used labels
181
* (selected_labels) using the initialize_sample call.
183
void calculateLabelsAndMassShifts(std::map<String, DoubleReal> label_identifiers);
185
void run_all(MSExperiment<Peak1D> & exp, ConsensusMap & out_map)
187
PeakWidthEstimator::Result peak_width;
188
std::vector<std::vector<SILACPattern> > data;
189
MSQuantifications msq;
190
std::vector<Clustering *> cluster_data;
192
peak_width = estimatePeakWidth(exp);
193
filterData(exp, peak_width, data);
194
clusterData(exp, peak_width, cluster_data, data);
196
// write output to consensus map
197
for (std::vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
199
generateClusterConsensusByCluster(out_map, **it);
204
* @brief Peak width estimation
206
PeakWidthEstimator::Result estimatePeakWidth(const MSExperiment<Peak1D> & exp);
211
void filterData(MSExperiment<Peak1D> & exp, const PeakWidthEstimator::Result & peak_width, std::vector<std::vector<SILACPattern> > & data);
216
void clusterData(const MSExperiment<> &, const PeakWidthEstimator::Result &, std::vector<Clustering *> &, std::vector<std::vector<SILACPattern> > & data);
220
* @brief Returns the list of SILAC labels, e.g.
221
* selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6"
223
std::vector<std::vector<String> > & getSILAClabels()
229
* @brief Returns the list of mass shifts calcualted
231
std::vector<std::vector<DoubleReal> > & getMassShifts()
239
* @brief Generate ConsensusMap from clustering result
241
void generateClusterConsensusByCluster(ConsensusMap &, const Clustering &) const;
244
* @brief Generate ConsensusMap from clustering result, one consensus per pattern
246
void generateClusterConsensusByPattern(ConsensusMap &, const Clustering &, UInt & cluster_id) const;
249
* @brief Generate debug output from clustering result
251
void generateClusterDebug(std::ostream & out, const Clustering & clustering, UInt & cluster_id) const;
255
* @brief Generate ConsensusMap from filter result
257
void generateFilterConsensusByPattern(ConsensusMap &, const std::vector<SILACPattern> &) const;
262
* @brief Generate a consensus entry from a pattern
264
ConsensusFeature generateSingleConsensusByPattern(const SILACPattern &) const;
270
* @brief Generate FeatureMap from clustering result
272
void generateClusterFeatureByCluster(FeatureMap<> &, const Clustering &) const;
275
* @brief Read filter result from ConsensusMap
277
void readFilterConsensusByPattern(ConsensusMap &, std::vector<std::vector<SILACPattern> > &);
279
static const String & selectColor(UInt nr);
282
* @brief Read consensusXML from file to ConsensusMap
284
void readConsensus(const String & filename, ConsensusMap & in) const
286
ConsensusXMLFile c_file;
287
c_file.load(filename, in);
291
* @brief Write consensusXML from ConsensusMap to file
293
void writeConsensus(const String & filename, ConsensusMap & out) const
295
out.sortByPosition();
296
out.applyMemberFunction(&UniqueIdInterface::setUniqueId);
297
out.setExperimentType("silac");
299
ConsensusXMLFile c_file;
300
c_file.store(filename, out);
304
* @brief Write MzQuantML from ConsensusMap to file
306
void writeMzQuantML(const String & filename, MSQuantifications & msq) const
308
//~ TODO apply above to ConsensusMap befor putting into Msq
309
//~ out.sortByPosition();
310
//~ out.applyMemberFunction(&UniqueIdInterface::setUniqueId);
311
//~ out.setExperimentType("SILAC");
314
file.store(filename, msq);
318
* @brief Write featureXML from FeatureMap to file
320
void writeFeatures(const String & filename, FeatureMap<> & out) const
322
out.sortByPosition();
323
out.applyMemberFunction(&UniqueIdInterface::setUniqueId);
325
FeatureXMLFile f_file;
326
f_file.store(filename, out);
332
#endif /* SILACANALYZER_H_ */