1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1
// --------------------------------------------------------------------------
2
// OpenMS -- Open-Source Mass Spectrometry
3
// --------------------------------------------------------------------------
4
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
7
// This software is released under a three-clause BSD license:
8
// * Redistributions of source code must retain the above copyright
9
// notice, this list of conditions and the following disclaimer.
10
// * Redistributions in binary form must reproduce the above copyright
11
// notice, this list of conditions and the following disclaimer in the
12
// documentation and/or other materials provided with the distribution.
13
// * Neither the name of any author or any participating institution
14
// may be used to endorse or promote products derived from this software
15
// without specific prior written permission.
16
// For a full list of authors, refer to the file AUTHORS.
17
// --------------------------------------------------------------------------
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
30
// --------------------------------------------------------------------------
24
31
// $Maintainer: Clemens Groepl $
65
73
V_("@@@ StablePairFinder::updateMembers_()");
67
75
second_nearest_gap_ = param_.getValue("second_nearest_gap");
68
use_IDs_ = String(param_.getValue("use_identifications")) == "true";
76
use_IDs_ = String(param_.getValue("use_identifications")) == "true";
71
79
void StablePairFinder::run(const std::vector<ConsensusMap>& input_maps,
72
ConsensusMap &result_map)
80
ConsensusMap& result_map)
74
82
// empty output destination:
75
83
result_map.clear(false);
78
86
if (input_maps.size() != 2)
80
88
throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
81
"exactly two input maps required");
89
"exactly two input maps required");
83
91
checkIds_(input_maps);
85
// set up the distance functor:
86
DoubleReal max_intensity = max(input_maps[0].getMaxInt(),
87
input_maps[1].getMaxInt());
88
Param distance_params = param_.copy("");
89
distance_params.remove("use_identifications");
90
distance_params.remove("second_nearest_gap");
91
FeatureDistance feature_distance(max_intensity, false);
92
feature_distance.setParameters(distance_params);
93
// set up the distance functor:
94
DoubleReal max_intensity = max(input_maps[0].getMaxInt(),
95
input_maps[1].getMaxInt());
96
Param distance_params = param_.copy("");
97
distance_params.remove("use_identifications");
98
distance_params.remove("second_nearest_gap");
99
FeatureDistance feature_distance(max_intensity, false);
100
feature_distance.setParameters(distance_params);
94
// keep track of pairing:
102
// keep track of pairing:
95
103
std::vector<bool> is_singleton[2];
96
104
is_singleton[0].resize(input_maps[0].size(), true);
97
105
is_singleton[1].resize(input_maps[1].size(), true);
99
typedef pair<DoubleReal, DoubleReal> DoublePair;
100
DoublePair init = make_pair(FeatureDistance::infinity,
101
FeatureDistance::infinity);
107
typedef pair<DoubleReal, DoubleReal> DoublePair;
108
DoublePair init = make_pair(FeatureDistance::infinity,
109
FeatureDistance::infinity);
103
// for every element in map 0:
104
// - index of nearest neighbor in map 1:
111
// for every element in map 0:
112
// - index of nearest neighbor in map 1:
105
113
vector<UInt> nn_index_0(input_maps[0].size(), UInt(-1));
106
// - distances to nearest and second-nearest neighbors in map 1:
114
// - distances to nearest and second-nearest neighbors in map 1:
107
115
vector<DoublePair> nn_distance_0(input_maps[0].size(), init);
109
// for every element in map 1:
110
// - index of nearest neighbor in map 0:
117
// for every element in map 1:
118
// - index of nearest neighbor in map 0:
111
119
vector<UInt> nn_index_1(input_maps[1].size(), UInt(-1));
112
// - distances to nearest and second-nearest neighbors in map 0:
120
// - distances to nearest and second-nearest neighbors in map 0:
113
121
vector<DoublePair> nn_distance_1(input_maps[1].size(), init);
115
123
// iterate over all feature pairs, find nearest neighbors:
124
// TODO: iterate over SENSIBLE RT (and m/z) window -- sort the maps beforehand
125
// to save a lot of processing time...
126
// Once done, remove the warning in the description of the 'use_idenfications' parameter
116
127
for (UInt fi0 = 0; fi0 < input_maps[0].size(); ++fi0)
118
const ConsensusFeature& feat0 = input_maps[0][fi0];
129
const ConsensusFeature& feat0 = input_maps[0][fi0];
120
131
for (UInt fi1 = 0; fi1 < input_maps[1].size(); ++fi1)
122
const ConsensusFeature& feat1 = input_maps[1][fi1];
133
const ConsensusFeature& feat1 = input_maps[1][fi1];
124
if (use_IDs_ && !compatibleIDs_(feat0, feat1)) // check peptide IDs
126
continue; // mismatch
135
if (use_IDs_ && !compatibleIDs_(feat0, feat1)) // check peptide IDs
137
continue; // mismatch
129
140
pair<bool, DoubleReal> result = feature_distance(feat0, feat1);
130
DoubleReal distance = result.second;
131
// we only care if distance constraints are satisfied for "best
132
// matches", not for second-best; this means that second-best distances
133
// can become smaller than best distances!
134
bool valid = result.first;
141
DoubleReal distance = result.second;
142
// we only care if distance constraints are satisfied for "best
143
// matches", not for second-best; this means that second-best distances
144
// can become smaller than best distances!
145
bool valid = result.first;
136
// update entries for map 0:
137
if (distance < nn_distance_0[fi0].second)
139
if (valid && (distance < nn_distance_0[fi0].first))
141
nn_distance_0[fi0].second = nn_distance_0[fi0].first;
142
nn_distance_0[fi0].first = distance;
143
nn_index_0[fi0] = fi1;
145
else nn_distance_0[fi0].second = distance;
147
// update entries for map 1:
148
if (distance < nn_distance_1[fi1].second)
150
if (valid && (distance < nn_distance_1[fi1].first))
152
nn_distance_1[fi1].second = nn_distance_1[fi1].first;
153
nn_distance_1[fi1].first = distance;
154
nn_index_1[fi1] = fi0;
156
else nn_distance_1[fi1].second = distance;
147
// update entries for map 0:
148
if (distance < nn_distance_0[fi0].second)
150
if (valid && (distance < nn_distance_0[fi0].first))
152
nn_distance_0[fi0].second = nn_distance_0[fi0].first;
153
nn_distance_0[fi0].first = distance;
154
nn_index_0[fi0] = fi1;
157
nn_distance_0[fi0].second = distance;
159
// update entries for map 1:
160
if (distance < nn_distance_1[fi1].second)
162
if (valid && (distance < nn_distance_1[fi1].first))
164
nn_distance_1[fi1].second = nn_distance_1[fi1].first;
165
nn_distance_1[fi1].first = distance;
166
nn_index_1[fi1] = fi0;
169
nn_distance_1[fi1].second = distance;
161
174
// if features from the two maps are nearest neighbors of each other, they
162
175
// can become a pair:
163
176
for (UInt fi0 = 0; fi0 < input_maps[0].size(); ++fi0)
165
UInt fi1 = nn_index_0[fi0]; // nearest neighbor of "fi0" in map 1
166
// cout << "index: " << fi0 << ", RT: " << input_maps[0][fi0].getRT()
167
// << ", MZ: " << input_maps[0][fi0].getMZ() << endl
168
// << "neighbor: " << fi1 << ", RT: " << input_maps[1][fi1].getRT()
169
// << ", MZ: " << input_maps[1][fi1].getMZ() << endl
170
// << "d(i,j): " << nn_distance_0[fi0].first << endl
171
// << "d2(i): " << nn_distance_0[fi0].second << endl
172
// << "d2(j): " << nn_distance_1[fi1].second << endl;
174
// criteria set by the parameters must be fulfilled:
175
if ((nn_distance_0[fi0].first < FeatureDistance::infinity) &&
176
(nn_distance_0[fi0].first * second_nearest_gap_ <= nn_distance_0[fi0].second))
178
// "fi0" satisfies constraints...
179
if ((nn_index_1[fi1] == fi0) &&
180
(nn_distance_1[fi1].first * second_nearest_gap_ <= nn_distance_1[fi1].second))
182
// ...nearest neighbor of "fi0" also satisfies constraints (yay!)
183
// cout << "match!" << endl;
184
result_map.push_back(ConsensusFeature());
185
ConsensusFeature& f = result_map.back();
187
f.insert(input_maps[0][fi0]);
188
f.getPeptideIdentifications().insert(f.getPeptideIdentifications().end(), input_maps[0][fi0].getPeptideIdentifications().begin(), input_maps[0][fi0].getPeptideIdentifications().end());
190
f.insert(input_maps[1][fi1]);
191
f.getPeptideIdentifications().insert(f.getPeptideIdentifications().end(), input_maps[1][fi1].getPeptideIdentifications().begin(), input_maps[1][fi1].getPeptideIdentifications().end());
193
f.computeConsensus();
194
DoubleReal quality = 1.0 - nn_distance_0[fi0].first;
195
DoubleReal quality0 = 1.0 - nn_distance_0[fi0].first * second_nearest_gap_ / nn_distance_0[fi0].second;
196
DoubleReal quality1 = 1.0 - nn_distance_1[fi1].first * second_nearest_gap_ / nn_distance_1[fi1].second;
197
quality = quality * quality0 * quality1; // TODO other formula?
199
// incorporate existing quality values:
200
Size size0 = max(input_maps[0][fi0].size(), size_t(1));
201
Size size1 = max(input_maps[1][fi1].size(), size_t(1));
202
// quality contribution from first map:
203
quality0 = input_maps[0][fi0].getQuality() * (size0 - 1);
204
// quality contribution from second map:
205
quality1 = input_maps[1][fi1].getQuality() * (size1 - 1);
206
f.setQuality((quality + quality0 + quality1) / (size0 + size1 - 1));
208
is_singleton[0][fi0] = false;
209
is_singleton[1][fi1] = false;
178
UInt fi1 = nn_index_0[fi0]; // nearest neighbor of "fi0" in map 1
179
// cout << "index: " << fi0 << ", RT: " << input_maps[0][fi0].getRT()
180
// << ", MZ: " << input_maps[0][fi0].getMZ() << endl
181
// << "neighbor: " << fi1 << ", RT: " << input_maps[1][fi1].getRT()
182
// << ", MZ: " << input_maps[1][fi1].getMZ() << endl
183
// << "d(i,j): " << nn_distance_0[fi0].first << endl
184
// << "d2(i): " << nn_distance_0[fi0].second << endl
185
// << "d2(j): " << nn_distance_1[fi1].second << endl;
187
// criteria set by the parameters must be fulfilled:
188
if ((nn_distance_0[fi0].first < FeatureDistance::infinity) &&
189
(nn_distance_0[fi0].first * second_nearest_gap_ <= nn_distance_0[fi0].second))
191
// "fi0" satisfies constraints...
192
if ((nn_index_1[fi1] == fi0) &&
193
(nn_distance_1[fi1].first * second_nearest_gap_ <= nn_distance_1[fi1].second))
195
// ...nearest neighbor of "fi0" also satisfies constraints (yay!)
196
// cout << "match!" << endl;
197
result_map.push_back(ConsensusFeature());
198
ConsensusFeature& f = result_map.back();
200
f.insert(input_maps[0][fi0]);
201
f.getPeptideIdentifications().insert(f.getPeptideIdentifications().end(),
202
input_maps[0][fi0].getPeptideIdentifications().begin(),
203
input_maps[0][fi0].getPeptideIdentifications().end());
205
f.insert(input_maps[1][fi1]);
206
f.getPeptideIdentifications().insert(f.getPeptideIdentifications().end(),
207
input_maps[1][fi1].getPeptideIdentifications().begin(),
208
input_maps[1][fi1].getPeptideIdentifications().end());
210
f.computeConsensus();
211
DoubleReal quality = 1.0 - nn_distance_0[fi0].first;
212
DoubleReal quality0 = 1.0 - nn_distance_0[fi0].first * second_nearest_gap_ / nn_distance_0[fi0].second;
213
DoubleReal quality1 = 1.0 - nn_distance_1[fi1].first * second_nearest_gap_ / nn_distance_1[fi1].second;
214
quality = quality * quality0 * quality1; // TODO other formula?
216
// incorporate existing quality values:
217
Size size0 = max(input_maps[0][fi0].size(), size_t(1));
218
Size size1 = max(input_maps[1][fi1].size(), size_t(1));
219
// quality contribution from first map:
220
quality0 = input_maps[0][fi0].getQuality() * (size0 - 1);
221
// quality contribution from second map:
222
quality1 = input_maps[1][fi1].getQuality() * (size1 - 1);
223
f.setQuality((quality + quality0 + quality1) / (size0 + size1 - 1));
225
is_singleton[0][fi0] = false;
226
is_singleton[1][fi1] = false;
214
231
// write out unmatched consensus features
215
for ( UInt input = 0; input <= 1; ++ input )
232
for (UInt input = 0; input <= 1; ++input)
217
for ( UInt index = 0; index < input_maps[input].size(); ++index )
234
for (UInt index = 0; index < input_maps[input].size(); ++index)
219
if ( is_singleton[input][index] )
236
if (is_singleton[input][index])
221
238
result_map.push_back(input_maps[input][index]);
222
if (result_map.back().size() < 2) // singleton consensus feature
224
result_map.back().setQuality(0.0);
239
if (result_map.back().size() < 2) // singleton consensus feature
241
result_map.back().setQuality(0.0);
230
247
// canonical ordering for checking the results, and the ids have no real meaning anyway
231
248
result_map.sortByMZ();
233
// protein IDs and unassigned peptide IDs are added to the result by the
234
// FeatureGroupingAlgorithm!
238
bool StablePairFinder::compatibleIDs_(const ConsensusFeature& feat1, const ConsensusFeature& feat2) const
240
vector<PeptideIdentification> pep1 = feat1.getPeptideIdentifications(),pep2 = feat2.getPeptideIdentifications();
241
// a feature without identifications always matches:
242
if (pep1.empty() || pep2.empty()) return true;
243
set<AASequence> best1, best2;
244
for (vector<PeptideIdentification>::iterator pep_it = pep1.begin(); pep_it != pep1.end(); ++pep_it)
246
if (pep_it->getHits().empty()) continue; // shouldn't be the case
248
best1.insert(pep_it->getHits()[0].getSequence());
250
for (vector<PeptideIdentification>::iterator pep_it = pep2.begin(); pep_it != pep2.end(); ++pep_it)
252
if (pep_it->getHits().empty()) continue; // shouldn't be the case
254
best2.insert(pep_it->getHits()[0].getSequence());
256
return (best1 == best2);
250
// protein IDs and unassigned peptide IDs are added to the result by the
251
// FeatureGroupingAlgorithm!
254
bool StablePairFinder::compatibleIDs_(const ConsensusFeature& feat1, const ConsensusFeature& feat2) const
256
// a feature without identifications always matches:
257
if (feat1.getPeptideIdentifications().empty() || feat2.getPeptideIdentifications().empty())
260
const vector<PeptideIdentification>& pep1 = feat1.getPeptideIdentifications();
261
const vector<PeptideIdentification>& pep2 = feat2.getPeptideIdentifications();
263
set<String> best1, best2;
264
for (vector<PeptideIdentification>::const_iterator pep_it = pep1.begin(); pep_it != pep1.end(); ++pep_it)
266
if (pep_it->getHits().empty())
267
continue; // shouldn't be the case
269
best1.insert(getBestHitSequence_(*pep_it).toString());
271
for (vector<PeptideIdentification>::const_iterator pep_it = pep2.begin(); pep_it != pep2.end(); ++pep_it)
273
if (pep_it->getHits().empty())
274
continue; // shouldn't be the case
276
best2.insert(getBestHitSequence_(*pep_it).toString());
278
return best1 == best2;
281
const AASequence& StablePairFinder::getBestHitSequence_(const PeptideIdentification& peptideIdentification) const
284
if (peptideIdentification.isHigherScoreBetter())
286
return std::min_element(peptideIdentification.getHits().begin(),
287
peptideIdentification.getHits().end(),
288
PeptideHit::ScoreMore()
293
return std::min_element(peptideIdentification.getHits().begin(),
294
peptideIdentification.getHits().end(),
295
PeptideHit::ScoreLess()