1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Nico Pfeifer $
25
// $Authors: Nico Pfeifer $
26
// --------------------------------------------------------------------------
28
#include <OpenMS/FILTERING/ID/IDFilter.h>
29
#include <OpenMS/CONCEPT/LogStream.h>
45
void IDFilter::filterIdentificationsUnique(const PeptideIdentification& identification,
46
PeptideIdentification& filtered_identification)
48
vector<PeptideHit> hits;
49
filtered_identification = identification;
50
vector<PeptideHit> temp_hits = identification.getHits();
52
for(vector<PeptideHit>::iterator it = temp_hits.begin();
53
it != temp_hits.end();
56
if (find(hits.begin(), hits.end(), *it) == hits.end())
61
filtered_identification.setHits(hits);
64
void IDFilter::filterIdentificationsByBestHits(const PeptideIdentification& identification,
65
PeptideIdentification& filtered_identification,
68
vector<PeptideHit> filtered_peptide_hits;
69
PeptideHit temp_peptide_hit;
70
vector<Size> new_peptide_indices;
72
filtered_identification = identification;
73
filtered_identification.setHits(vector<PeptideHit>());
75
if ( !identification.getHits().empty() )
77
Real optimal_value = identification.getHits()[0].getScore();
78
new_peptide_indices.push_back(0);
80
// searching for peptide(s) with maximal score
81
for (Size i = 1; i < identification.getHits().size(); i++)
83
Real temp_score = identification.getHits()[i].getScore();
84
bool new_leader = false;
85
if ( ( identification.isHigherScoreBetter() && (temp_score > optimal_value))
86
|| (!identification.isHigherScoreBetter() && (temp_score < optimal_value)) ) new_leader=true;
90
optimal_value = temp_score;
91
new_peptide_indices.clear();
92
new_peptide_indices.push_back(i);
94
else if (temp_score == optimal_value)
96
new_peptide_indices.push_back(i);
99
if (!strict || new_peptide_indices.size() == 1)
101
for (Size i = 0; i < new_peptide_indices.size(); i++)
103
filtered_peptide_hits.push_back(identification.getHits()[new_peptide_indices[i]]);
108
if ( !filtered_peptide_hits.empty() )
110
filtered_identification.setHits(filtered_peptide_hits);
111
filtered_identification.assignRanks();
115
void IDFilter::filterIdentificationsByLength(const PeptideIdentification& identification,
117
PeptideIdentification& filtered_identification)
119
vector<Size> new_peptide_indices;
120
vector<PeptideHit> filtered_peptide_hits;
122
filtered_identification = identification;
123
filtered_identification.setHits(vector<PeptideHit>());
125
const vector<PeptideHit>& temp_peptide_hits = identification.getHits();
127
for (Size i = 0; i < temp_peptide_hits.size(); i++)
129
if (temp_peptide_hits[i].getSequence().size() >= min_length)
131
new_peptide_indices.push_back(i);
135
for (Size i = 0; i < new_peptide_indices.size(); i++)
137
filtered_peptide_hits.push_back(identification.getHits()[new_peptide_indices[i]]);
139
if ( !filtered_peptide_hits.empty() )
141
filtered_identification.setHits(filtered_peptide_hits);
142
filtered_identification.assignRanks();
146
void IDFilter::filterIdentificationsByProteins(const PeptideIdentification& identification,
147
const vector< FASTAFile::FASTAEntry >& proteins,
148
PeptideIdentification& filtered_identification,
149
bool no_protein_identifiers)
151
// TODO: this is highly inefficient! the Protein-Index should be build once for all peptide-identifications instead of
152
// doing this once for every ID. Furthermore the index itself is inefficient (use seqan instead)
153
String protein_sequences;
154
String accession_sequences;
155
vector<PeptideHit> filtered_peptide_hits;
156
PeptideHit temp_peptide_hit;
158
filtered_identification = identification;
159
filtered_identification.setHits(vector<PeptideHit>());
161
for (Size i = 0; i < proteins.size(); i++)
163
if (proteins[i].identifier!="")
165
accession_sequences.append("*" + proteins[i].identifier);
167
if (proteins[i].sequence!="")
169
protein_sequences.append("*" + proteins[i].sequence);
172
accession_sequences.append("*");
173
protein_sequences.append("*");
175
for (Size i = 0; i < identification.getHits().size(); i++)
177
if (no_protein_identifiers || accession_sequences=="*")
178
{ // filter by sequence alone if no protein accesssions are available
179
if (protein_sequences.find(identification.getHits()[i].getSequence().toUnmodifiedString()) != String::npos)
181
filtered_peptide_hits.push_back(identification.getHits()[i]);
185
{ // filter by protein accessions
186
for(vector<String>::const_iterator ac_it = identification.getHits()[i].getProteinAccessions().begin();
187
ac_it != identification.getHits()[i].getProteinAccessions().end();
190
if (accession_sequences.find("*" + *ac_it) != String::npos)
192
filtered_peptide_hits.push_back(identification.getHits()[i]);
193
break; // we found a matching protein, the peptide is valid -> exit
199
filtered_identification.setHits(filtered_peptide_hits);
200
filtered_identification.assignRanks();
203
void IDFilter::filterIdentificationsByProteins(const ProteinIdentification& identification,
204
const vector< FASTAFile::FASTAEntry >& proteins,
205
ProteinIdentification& filtered_identification)
207
String protein_sequences;
208
String accession_sequences;
209
vector<ProteinHit> filtered_protein_hits;
210
ProteinHit temp_protein_hit;
212
filtered_identification=identification;
213
filtered_identification.setHits(vector<ProteinHit>());
215
for (Size i = 0; i < proteins.size(); i++)
217
accession_sequences.append("*" + proteins[i].identifier);
219
accession_sequences.append("*");
221
for (Size i = 0; i < identification.getHits().size(); i++)
223
if (accession_sequences.find("*" + identification.getHits()[i].getAccession()) != String::npos)
225
filtered_protein_hits.push_back(identification.getHits()[i]);
229
filtered_identification.setHits(filtered_protein_hits);
230
filtered_identification.assignRanks();
233
void IDFilter::filterIdentificationsByExclusionPeptides(const PeptideIdentification& identification,
234
const set<String>& peptides,
235
PeptideIdentification& filtered_identification)
237
String protein_sequences;
238
String accession_sequences;
239
vector<PeptideHit> filtered_peptide_hits;
240
PeptideHit temp_peptide_hit;
242
filtered_identification=identification;
243
filtered_identification.setHits(vector<PeptideHit>());
245
for (Size i = 0; i < identification.getHits().size(); i++)
247
if (find(peptides.begin(), peptides.end(), identification.getHits()[i].getSequence().toString()) == peptides.end())
249
filtered_peptide_hits.push_back(identification.getHits()[i]);
252
if ( !filtered_peptide_hits.empty() )
254
filtered_identification.setHits(filtered_peptide_hits);
255
filtered_identification.assignRanks();
259
void IDFilter::filterIdentificationsByRTFirstDimPValues(const PeptideIdentification& identification,
260
PeptideIdentification& filtered_identification,
263
DoubleReal border = 1 - p_value;
264
vector< Size > new_peptide_indices;
265
vector<PeptideHit> filtered_peptide_hits;
266
PeptideHit temp_peptide_hit;
268
filtered_identification=identification;
269
filtered_identification.setHits(vector<PeptideHit>());
271
Size missing_meta_value=0;
273
for ( Size i = 0; i < identification.getHits().size(); ++i )
275
if (identification.getHits()[i].metaValueExists("predicted_RT_p_value_first_dim"))
277
if ((DoubleReal)(identification.getHits()[i].getMetaValue("predicted_RT_p_value_first_dim")) <= border )
279
filtered_peptide_hits.push_back(identification.getHits()[i]);
282
else ++missing_meta_value;
284
if (missing_meta_value>0) LOG_WARN << "Filtering identifications by p-value did not work on " << missing_meta_value << " of " << identification.getHits().size() << " hits. Your data is missing a meta-value ('predicted_RT_p_value_first_dim') from RTPredict!\n";
286
if ( !filtered_peptide_hits.empty() )
288
filtered_identification.setHits(filtered_peptide_hits);
289
filtered_identification.assignRanks();
293
void IDFilter::filterIdentificationsByRTPValues(const PeptideIdentification& identification,
294
PeptideIdentification& filtered_identification,
297
DoubleReal border = 1 - p_value;
298
vector< Size > new_peptide_indices;
299
vector<PeptideHit> filtered_peptide_hits;
300
PeptideHit temp_peptide_hit;
302
filtered_identification=identification;
303
filtered_identification.setHits(vector<PeptideHit>());
305
Size missing_meta_value=0;
307
for (Size i = 0; i < identification.getHits().size(); i++)
309
if (identification.getHits()[i].metaValueExists("predicted_RT_p_value"))
311
if ((DoubleReal)(identification.getHits()[i].getMetaValue("predicted_RT_p_value")) <= border )
313
filtered_peptide_hits.push_back(identification.getHits()[i]);
316
else ++missing_meta_value;
318
if (missing_meta_value>0) LOG_WARN << "Filtering identifications by p-value did not work on " << missing_meta_value << " of " << identification.getHits().size() << " hits. Your data is missing a meta-value ('predicted_RT_p_value') from RTPredict!\n";
320
if ( !filtered_peptide_hits.empty() )
322
filtered_identification.setHits(filtered_peptide_hits);
323
filtered_identification.assignRanks();
327
void IDFilter::removeUnreferencedProteinHits(const ProteinIdentification& identification,
328
const vector<PeptideIdentification> peptide_identifications,
329
ProteinIdentification& filtered_identification)
331
vector<ProteinHit> filtered_protein_hits;
332
const vector<ProteinHit>& temp_protein_hits = identification.getHits();
333
vector<PeptideHit> temp_peptide_hits;
335
filtered_identification=identification;
336
filtered_identification.setHits(vector<ProteinHit>());
337
String identifier = identification.getIdentifier();
340
for (Size j = 0; j < temp_protein_hits.size(); ++j)
344
while(i < peptide_identifications.size() && !found)
346
if (identifier == peptide_identifications[i].getIdentifier())
348
temp_peptide_hits.clear();
349
peptide_identifications[i].getReferencingHits(temp_protein_hits[j].getAccession(), temp_peptide_hits);
350
if ( !temp_peptide_hits.empty() )
352
filtered_protein_hits.push_back(temp_protein_hits[j]);
359
filtered_identification.setHits(filtered_protein_hits);
362
} // namespace OpenMS