55
62
@brief Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library.
60
<td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. predecessor tools </td>
61
<td VALIGN="middle" ROWSPAN=2> \f$ \longrightarrow \f$ SpecLibSearcher \f$ \longrightarrow \f$</td>
62
<td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. successor tools </td>
65
<td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref UTILS_SpecLibCreator </td>
66
<td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref TOPP_IDFilter or @n any protein/peptide processing tool</td>
67
<td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. predecessor tools </td>
68
<td VALIGN="middle" ROWSPAN=2> \f$ \longrightarrow \f$ SpecLibSearcher \f$ \longrightarrow \f$</td>
69
<td ALIGN = "center" BGCOLOR="#EBEBEB"> pot. successor tools </td>
72
<td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref UTILS_SpecLibCreator </td>
73
<td VALIGN="middle" ALIGN = "center" ROWSPAN=1> @ref TOPP_IDFilter or @n any protein/peptide processing tool</td>
71
@experimental This TOPP-tool is not well tested and not all features might be properly implemented and tested.
78
@experimental This TOPP-tool is not well tested and not all features might be properly implemented and tested.
73
<B>The command line parameters of this tool are:</B>
74
@verbinclude TOPP_SpecLibSearcher.cli
80
<B>The command line parameters of this tool are:</B>
81
@verbinclude TOPP_SpecLibSearcher.cli
82
<B>INI file documentation of this tool:</B>
83
@htmlinclude TOPP_SpecLibSearcher.html
77
86
// We do not want this class to show up in the docu:
78
87
/// @cond TOPPCLASSES
80
class TOPPSpecLibSearcher
85
: TOPPBase("SpecLibSearcher","Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library.")
90
void registerOptionsAndFlags_()
92
registerInputFileList_("in","<files>",StringList::create(""),"Input files");
93
setValidFormats_("in",StringList::create("mzML"));
94
registerInputFile_("lib","<file>","","searchable spectral library(MSP format)");
95
registerOutputFileList_("out","<files>",StringList::create(""),"Output files. Have to be as many as input files");
96
setValidFormats_("out",StringList::create("idXML"));
97
registerDoubleOption_("precursor_mass_tolerance","<tolerance>",3,"Precursor mass tolerance, (Th)",false);
98
registerIntOption_("round_precursor_to_integer","<number>",10,"many precursor m/z multipling number lead to the same number; are packed in the same vector for faster search.Should be higher for high-resolution data",false,true);
99
// registerDoubleOption_("fragment_mass_tolerance","<tolerance>",0.3,"Fragment mass error",false);
101
// registerStringOption_("precursor_error_units", "<unit>", "Da", "parent monoisotopic mass error units", false);
102
// registerStringOption_("fragment_error_units", "<unit>", "Da", "fragment monoisotopic mass error units", false);
103
// vector<String> valid_strings;
104
// valid_strings.push_back("Da");
105
// setValidStrings_("precursor_error_units", valid_strings);
106
// setValidStrings_("fragment_error_units", valid_strings);
107
// registerIntOption_("min_precursor_charge", "<charge>", 1, "minimum precursor ion charge", false);
108
// registerIntOption_("max_precursor_charge", "<charge>", 3, "maximum precursor ion charge", false);
109
registerStringOption_("compare_function","<string>","ZhangSimilarityScore","function for similarity comparisson",false);
110
PeakSpectrumCompareFunctor::registerChildren();
111
setValidStrings_("compare_function",Factory<PeakSpectrumCompareFunctor>::registeredProducts());
112
registerIntOption_("top_hits","<number>",10,"save the first <number> top hits. For all type -1",false);
114
addText_("Filtering options. Most are especially useful when the query spectra are raw.");
115
registerIntOption_("min_peaks","<number>",5, "required mininum number of peaks for a query spectrum",false);
116
registerDoubleOption_("remove_peaks_below_threshold","<threshold>",2.01,"All peaks of a query spectrum with intensities below <threshold> will be zeroed.",false);
117
registerIntOption_("max_peaks","<number>",150,"Use only the top <number> of peaks.",false);
118
registerIntOption_("cut_peaks_below","<number>",1000,"Remove all peaks which are lower than 1/<number> of the highest peaks. Default equals all peaks which are lower than 0.001 of the maximum intensity peak",false);
120
vector<String> all_mods;
121
ModificationsDB::getInstance()->getAllSearchModifications(all_mods);
122
registerStringList_("fixed_modifications", "<mods>", StringList::create(""), "fixed modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)' or 'Oxidation (M)'", false);
123
setValidStrings_("fixed_modifications", all_mods);
125
registerStringList_("variable_modifications", "<mods>", StringList::create(""), "variable modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)' or 'Oxidation (M)'", false);
126
setValidStrings_("variable_modifications", all_mods);
131
ExitCodes main_(int , const char**)
133
//-------------------------------------------------------------
134
// parameter handling
135
//-------------------------------------------------------------
137
StringList in_spec = getStringList_("in");
138
StringList out = getStringList_("out");
139
String in_lib = getStringOption_("lib");
140
String compare_function = getStringOption_("compare_function");
141
Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer");
142
Real precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance");
143
//Int min_precursor_charge = getIntOption_("min_precursor_charge");
144
//Int max_precursor_charge = getIntOption_("max_precursor_charge");
145
Real remove_peaks_below_threshold = getDoubleOption_("remove_peaks_below_threshold");
146
UInt min_peaks = getIntOption_("min_peaks");
147
UInt max_peaks= getIntOption_("max_peaks");
148
Int cut_peaks_below = getIntOption_("cut_peaks_below");
149
StringList fixed_modifications = getStringList_("fixed_modifications");
150
StringList variable_modifications = getStringList_("variable_modifications");
151
Int top_hits = getIntOption_("top_hits");
154
writeLog_("top_hits (should be >= -1 )");
155
return ILLEGAL_PARAMETERS;
157
//-------------------------------------------------------------
159
//-------------------------------------------------------------
160
if(out.size() != in_spec.size())
162
writeLog_("out (should be as many as input files)");
163
return ILLEGAL_PARAMETERS;
166
time_t prog_time = time(NULL);
167
MSPFile spectral_library;
168
RichPeakMap query, library;
169
//spectrum which will be identified
171
spectra.setLogType(log_type_);
173
time_t start_build_time = time(NULL);
174
//-------------------------------------------------------------
175
//building map for faster search
176
//-------------------------------------------------------------
178
//library containing already identified peptide spectra
179
vector< PeptideIdentification > ids;
180
spectral_library.load(in_lib,ids,library);
182
map<Size, vector<PeakSpectrum> > MSLibrary;
184
RichPeakMap::iterator s;
185
vector<PeptideIdentification>::iterator i;
186
ModificationsDB* mdb = ModificationsDB::getInstance();
187
for(s = library.begin(), i = ids.begin() ; s < library.end();++s,++i)
189
DoubleReal precursor_MZ = (*s).getPrecursors()[0].getMZ();
190
Size MZ_multi = (Size)precursor_MZ*precursor_mass_multiplier;
191
map<Size,vector<PeakSpectrum> >::iterator found;
192
found = MSLibrary.find(MZ_multi);
195
bool variable_modifications_ok = true;
196
bool fixed_modifications_ok = true;
197
const AASequence& aaseq= i->getHits()[0].getSequence();
198
//variable fixed modifications
199
if(!fixed_modifications.empty())
201
for(Size i = 0; i< aaseq.size(); ++i)
203
const Residue& mod = aaseq.getResidue(i);
204
for(Size s = 0; s < fixed_modifications.size();++s)
206
if(mod.getOneLetterCode() ==mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification())
208
fixed_modifications_ok = false;
214
//variable modifications
215
if(aaseq.isModified() && (!variable_modifications.empty()))
217
for(Size i = 0; i< aaseq.size(); ++i)
219
if(aaseq.isModified(i))
221
const Residue& mod = aaseq.getResidue(i);
222
for(Size s = 0; s < variable_modifications.size();++s)
224
if(mod.getOneLetterCode() ==mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification())
226
variable_modifications_ok = false;
233
if(variable_modifications_ok && fixed_modifications_ok)
235
PeptideIdentification& translocate_pid = *i;
236
librar.getPeptideIdentifications().push_back(translocate_pid);
237
librar.setPrecursors(s->getPrecursors());
238
//library entry transformation
239
for(UInt l = 0; l< s->size(); ++l)
242
if((*s)[l].getIntensity() > remove_peaks_below_threshold)
244
const String& info = (*s)[l].getMetaValue("MSPPeakInfo");
247
peak.setIntensity(sqrt(0.2*(*s)[l].getIntensity()));
251
peak.setIntensity(sqrt((*s)[l].getIntensity()));
254
peak.setMZ((*s)[l].getMZ());
255
peak.setPosition((*s)[l].getPosition());
256
librar.push_back(peak);
259
if(found != MSLibrary.end())
261
found->second.push_back(librar);
265
vector<PeakSpectrum> tmp;
266
tmp.push_back(librar);
267
MSLibrary.insert(make_pair(MZ_multi,tmp));
272
time_t end_build_time = time(NULL);
273
cout<<"Time needed for preprocessing data: "<<(end_build_time - start_build_time)<<"\n";
275
PeakSpectrumCompareFunctor* comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function);
276
//-------------------------------------------------------------
278
//-------------------------------------------------------------
280
StringList::iterator in, out_file;
281
for(in = in_spec.begin() , out_file = out.begin(); in < in_spec.end(); ++in,++out_file)
283
time_t start_time = time(NULL);
284
spectra.load(*in,query);
285
//Will hold valuable hits
286
vector<PeptideIdentification> peptide_ids;
287
vector<ProteinIdentification> protein_ids;
288
// Write parameters to ProteinIdentifcation
289
ProteinIdentification prot_id;
290
//Parameters of identificaion
291
prot_id.setIdentifier("test");
292
prot_id.setSearchEngineVersion("SpecLibSearcher");
293
prot_id.setDateTime(DateTime::now());
294
prot_id.setScoreType(compare_function);
295
ProteinIdentification::SearchParameters searchparam;
296
searchparam.precursor_tolerance = precursor_mass_tolerance;
297
prot_id.setSearchParameters(searchparam);
298
/***********SEARCH**********/
299
for(UInt j = 0; j < query.size(); ++j)
301
//Set identifier for each identifications
302
PeptideIdentification pid;
303
pid.setIdentifier("test");
304
pid.setScoreType(compare_function);
306
pr_hit.setAccession(j);
307
prot_id.insertHit(pr_hit);
308
//RichPeak1D to Peak1D transformation for the compare function query
311
query[j].sortByIntensity(true);
312
DoubleReal min_high_intensity = 0;
314
if(query[j].empty() || query[j].getMSLevel()!=2 )
318
if(query[j].getPrecursors().empty())
320
writeLog_("Warning MS2 spectrum without precursor information");
324
min_high_intensity = (1/cut_peaks_below)*query[j][0].getIntensity();
326
query[j].sortByPosition();
327
for(UInt k = 0; k < query[j].size() && k < max_peaks; ++k)
329
if(query[j][k].getIntensity() > remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity )
332
peak.setIntensity(sqrt(query[j][k].getIntensity()));
333
peak.setMZ(query[j][k].getMZ());
334
peak.setPosition(query[j][k].getPosition());
335
quer.push_back(peak);
338
if(quer.size() >= min_peaks)
346
DoubleReal query_MZ = query[j].getPrecursors()[0].getMZ();
349
bool charge_one = false;
350
Int percent = (Int)Math::round((query[j].size()/100.0) * 3.0);
351
Int margin = (Int)Math::round((query[j].size()/100.0)* 1.0);
352
for(vector<RichPeak1D>::iterator peak = query[j].end()-1; percent >= 0; --peak , --percent)
354
if(peak->getMZ() < query_MZ)
363
Real min_MZ = (query_MZ - precursor_mass_tolerance) *precursor_mass_multiplier;
364
Real max_MZ = (query_MZ + precursor_mass_tolerance) *precursor_mass_multiplier;
365
for(Size mz = (Size)min_MZ; mz <= ((Size)max_MZ)+1; ++mz)
367
map<Size, vector<PeakSpectrum> >::iterator found;
368
found = MSLibrary.find(mz);
369
if(found != MSLibrary.end())
371
vector<PeakSpectrum>& library = found->second;
372
for(Size i = 0; i < library.size(); ++i)
374
Real this_MZ = library[i].getPrecursors()[0].getMZ()*precursor_mass_multiplier;
375
if(this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) ||charge_one == false ))
377
PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0];
378
PeakSpectrum& librar = library[i];
379
//Special treatment for SpectraST score as it computes a score based on the whole library
380
if(compare_function == "SpectraSTSimilarityScore")
382
SpectraSTSimilarityScore* sp= static_cast<SpectraSTSimilarityScore*>(comparor);
383
BinnedSpectrum quer_bin = sp->transform(quer);
384
BinnedSpectrum librar_bin = sp->transform(librar);
385
score = (*sp)(quer,librar);//(*sp)(quer_bin,librar_bin);
386
double dot_bias = sp->dot_bias(quer_bin,librar_bin,score);
387
hit.setMetaValue("DOTBIAS",dot_bias);
391
if(compare_function =="CompareFouriertransform")
393
CompareFouriertransform* ft = static_cast<CompareFouriertransform*>(comparor);
395
ft->transform(librar);
397
score = (*comparor)(quer,librar);
400
DataValue RT(library[i].getRT());
401
DataValue MZ(library[i].getPrecursors()[0].getMZ());
402
hit.setMetaValue("RT",RT);
403
hit.setMetaValue("MZ",MZ);
405
hit.addProteinAccession(pr_hit.getAccession());
412
pid.setHigherScoreBetter(true);
414
if(compare_function =="SpectraSTSimilarityScore")
416
if(!pid.empty() && !pid.getHits().empty())
418
vector<PeptideHit> final_hits;
419
final_hits.resize(pid.getHits().size());
420
SpectraSTSimilarityScore* sp= static_cast<SpectraSTSimilarityScore*>(comparor);
422
for( ; runner_up < pid.getHits().size() ; ++runner_up)
424
if(pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString()|| runner_up > 5)
429
double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore());
430
for(Size s = 0; s < pid.getHits().size();++s)
432
final_hits[s] = pid.getHits()[s];
433
final_hits[s].setMetaValue("delta D",delta_D);
434
final_hits[s].setMetaValue("dot product",pid.getHits()[s].getScore());
435
final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(),delta_D,pid.getHits()[s].getMetaValue("DOTBIAS")));
437
//final_hits[s].removeMetaValue("DOTBIAS");
439
pid.setHits(final_hits);
441
pid.setMetaValue("MZ",query[j].getPrecursors()[0].getMZ());
442
pid.setMetaValue("RT",query_MZ);
445
if(top_hits != -1 && (UInt)top_hits < pid.getHits().size())
447
vector<PeptideHit> hits;
448
hits.resize(top_hits);
449
for(Size i = 0; i < (UInt)top_hits; ++i)
451
hits[i] = pid.getHits()[i];
455
peptide_ids.push_back(pid);
457
protein_ids.push_back(prot_id);
458
//-------------------------------------------------------------
460
//-------------------------------------------------------------
461
IdXMLFile id_xml_file;
462
id_xml_file.store(*out_file,protein_ids,peptide_ids);
463
time_t end_time = time(NULL);
464
cout<<"Search time: "<<difftime(end_time,start_time)<<" seconds for "<<*in<<"\n";
466
time_t end_time = time(NULL);
467
cout<<"Total time: "<<difftime(end_time,prog_time)<<" secconds\n";
476
int main( int argc, const char** argv )
478
TOPPSpecLibSearcher tool;
479
return tool.main(argc,argv);
89
class TOPPSpecLibSearcher :
93
TOPPSpecLibSearcher() :
94
TOPPBase("SpecLibSearcher", "Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library.")
99
void registerOptionsAndFlags_()
101
registerInputFileList_("in", "<files>", StringList::create(""), "Input files");
102
setValidFormats_("in", StringList::create("mzML"));
103
registerInputFile_("lib", "<file>", "", "searchable spectral library (MSP format)");
104
setValidFormats_("lib", StringList::create("msp"));
105
registerOutputFileList_("out", "<files>", StringList::create(""), "Output files. Have to be as many as input files");
106
setValidFormats_("out", StringList::create("idXML"));
107
registerDoubleOption_("precursor_mass_tolerance", "<tolerance>", 3, "Precursor mass tolerance, (Th)", false);
108
registerIntOption_("round_precursor_to_integer", "<number>", 10, "many precursor m/z multipling number lead to the same number; are packed in the same vector for faster search.Should be higher for high-resolution data", false, true);
109
// registerDoubleOption_("fragment_mass_tolerance","<tolerance>",0.3,"Fragment mass error",false);
111
// registerStringOption_("precursor_error_units", "<unit>", "Da", "parent monoisotopic mass error units", false);
112
// registerStringOption_("fragment_error_units", "<unit>", "Da", "fragment monoisotopic mass error units", false);
113
// vector<String> valid_strings;
114
// valid_strings.push_back("Da");
115
// setValidStrings_("precursor_error_units", valid_strings);
116
// setValidStrings_("fragment_error_units", valid_strings);
117
// registerIntOption_("min_precursor_charge", "<charge>", 1, "minimum precursor ion charge", false);
118
// registerIntOption_("max_precursor_charge", "<charge>", 3, "maximum precursor ion charge", false);
119
registerStringOption_("compare_function", "<string>", "ZhangSimilarityScore", "function for similarity comparisson", false);
120
PeakSpectrumCompareFunctor::registerChildren();
121
setValidStrings_("compare_function", Factory<PeakSpectrumCompareFunctor>::registeredProducts());
122
registerIntOption_("top_hits", "<number>", 10, "save the first <number> top hits. For all type -1", false);
125
registerTOPPSubsection_("filter", "Filtering options. Most are especially useful when the query spectra are raw.");
126
registerDoubleOption_("filter:remove_peaks_below_threshold", "<threshold>", 2.01, "All peaks of a query spectrum with intensities below <threshold> will be zeroed.", false);
127
registerIntOption_("filter:min_peaks", "<number>", 5, "required mininum number of peaks for a query spectrum", false);
128
registerIntOption_("filter:max_peaks", "<number>", 150, "Use only the top <number> of peaks.", false);
129
registerIntOption_("filter:cut_peaks_below", "<number>", 1000, "Remove all peaks which are lower than 1/<number> of the highest peaks. Default equals all peaks which are lower than 0.001 of the maximum intensity peak", false);
131
vector<String> all_mods;
132
ModificationsDB::getInstance()->getAllSearchModifications(all_mods);
133
registerStringList_("fixed_modifications", "<mods>", StringList::create(""), "fixed modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)' or 'Oxidation (M)'", false);
134
setValidStrings_("fixed_modifications", all_mods);
136
registerStringList_("variable_modifications", "<mods>", StringList::create(""), "variable modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)' or 'Oxidation (M)'", false);
137
setValidStrings_("variable_modifications", all_mods);
141
ExitCodes main_(int, const char **)
143
//-------------------------------------------------------------
144
// parameter handling
145
//-------------------------------------------------------------
147
StringList in_spec = getStringList_("in");
148
StringList out = getStringList_("out");
149
String in_lib = getStringOption_("lib");
150
String compare_function = getStringOption_("compare_function");
151
Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer");
152
Real precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance");
153
//Int min_precursor_charge = getIntOption_("min_precursor_charge");
154
//Int max_precursor_charge = getIntOption_("max_precursor_charge");
155
Real remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold");
156
UInt min_peaks = getIntOption_("filter:min_peaks");
157
UInt max_peaks = getIntOption_("filter:max_peaks");
158
Int cut_peaks_below = getIntOption_("filter:cut_peaks_below");
159
StringList fixed_modifications = getStringList_("fixed_modifications");
160
StringList variable_modifications = getStringList_("variable_modifications");
161
Int top_hits = getIntOption_("top_hits");
164
writeLog_("top_hits (should be >= -1 )");
165
return ILLEGAL_PARAMETERS;
167
//-------------------------------------------------------------
169
//-------------------------------------------------------------
170
if (out.size() != in_spec.size())
172
writeLog_("out (should be as many as input files)");
173
return ILLEGAL_PARAMETERS;
176
time_t prog_time = time(NULL);
177
MSPFile spectral_library;
178
RichPeakMap query, library;
179
//spectrum which will be identified
181
spectra.setLogType(log_type_);
183
time_t start_build_time = time(NULL);
184
//-------------------------------------------------------------
185
//building map for faster search
186
//-------------------------------------------------------------
188
//library containing already identified peptide spectra
189
vector<PeptideIdentification> ids;
190
spectral_library.load(in_lib, ids, library);
192
map<Size, vector<PeakSpectrum> > MSLibrary;
194
RichPeakMap::iterator s;
195
vector<PeptideIdentification>::iterator i;
196
ModificationsDB * mdb = ModificationsDB::getInstance();
197
for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i)
199
DoubleReal precursor_MZ = (*s).getPrecursors()[0].getMZ();
200
Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier;
201
map<Size, vector<PeakSpectrum> >::iterator found;
202
found = MSLibrary.find(MZ_multi);
205
bool variable_modifications_ok = true;
206
bool fixed_modifications_ok = true;
207
const AASequence & aaseq = i->getHits()[0].getSequence();
208
//variable fixed modifications
209
if (!fixed_modifications.empty())
211
for (Size i = 0; i < aaseq.size(); ++i)
213
const Residue & mod = aaseq.getResidue(i);
214
for (Size s = 0; s < fixed_modifications.size(); ++s)
216
if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification())
218
fixed_modifications_ok = false;
224
//variable modifications
225
if (aaseq.isModified() && (!variable_modifications.empty()))
227
for (Size i = 0; i < aaseq.size(); ++i)
229
if (aaseq.isModified(i))
231
const Residue & mod = aaseq.getResidue(i);
232
for (Size s = 0; s < variable_modifications.size(); ++s)
234
if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification())
236
variable_modifications_ok = false;
243
if (variable_modifications_ok && fixed_modifications_ok)
245
PeptideIdentification & translocate_pid = *i;
246
librar.getPeptideIdentifications().push_back(translocate_pid);
247
librar.setPrecursors(s->getPrecursors());
248
//library entry transformation
249
for (UInt l = 0; l < s->size(); ++l)
252
if ((*s)[l].getIntensity() > remove_peaks_below_threshold)
254
const String & info = (*s)[l].getMetaValue("MSPPeakInfo");
257
peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity()));
261
peak.setIntensity(sqrt((*s)[l].getIntensity()));
264
peak.setMZ((*s)[l].getMZ());
265
peak.setPosition((*s)[l].getPosition());
266
librar.push_back(peak);
269
if (found != MSLibrary.end())
271
found->second.push_back(librar);
275
vector<PeakSpectrum> tmp;
276
tmp.push_back(librar);
277
MSLibrary.insert(make_pair(MZ_multi, tmp));
282
time_t end_build_time = time(NULL);
283
cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n";
285
PeakSpectrumCompareFunctor * comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function);
286
//-------------------------------------------------------------
288
//-------------------------------------------------------------
290
StringList::iterator in, out_file;
291
for (in = in_spec.begin(), out_file = out.begin(); in < in_spec.end(); ++in, ++out_file)
293
time_t start_time = time(NULL);
294
spectra.load(*in, query);
295
//Will hold valuable hits
296
vector<PeptideIdentification> peptide_ids;
297
vector<ProteinIdentification> protein_ids;
298
// Write parameters to ProteinIdentifcation
299
ProteinIdentification prot_id;
300
//Parameters of identificaion
301
prot_id.setIdentifier("test");
302
prot_id.setSearchEngineVersion("SpecLibSearcher");
303
prot_id.setDateTime(DateTime::now());
304
prot_id.setScoreType(compare_function);
305
ProteinIdentification::SearchParameters searchparam;
306
searchparam.precursor_tolerance = precursor_mass_tolerance;
307
prot_id.setSearchParameters(searchparam);
308
/***********SEARCH**********/
309
for (UInt j = 0; j < query.size(); ++j)
311
//Set identifier for each identifications
312
PeptideIdentification pid;
313
pid.setIdentifier("test");
314
pid.setScoreType(compare_function);
316
pr_hit.setAccession(j);
317
prot_id.insertHit(pr_hit);
318
//RichPeak1D to Peak1D transformation for the compare function query
321
query[j].sortByIntensity(true);
322
DoubleReal min_high_intensity = 0;
324
if (query[j].empty() || query[j].getMSLevel() != 2)
328
if (query[j].getPrecursors().empty())
330
writeLog_("Warning MS2 spectrum without precursor information");
334
min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity();
336
query[j].sortByPosition();
337
for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k)
339
if (query[j][k].getIntensity() > remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity)
342
peak.setIntensity(sqrt(query[j][k].getIntensity()));
343
peak.setMZ(query[j][k].getMZ());
344
peak.setPosition(query[j][k].getPosition());
345
quer.push_back(peak);
348
if (quer.size() >= min_peaks)
356
DoubleReal query_MZ = query[j].getPrecursors()[0].getMZ();
359
bool charge_one = false;
360
Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0);
361
Int margin = (Int) Math::round((query[j].size() / 100.0) * 1.0);
362
for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent)
364
if (peak->getMZ() < query_MZ)
369
if (percent > margin)
373
Real min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier;
374
Real max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier;
375
for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz)
377
map<Size, vector<PeakSpectrum> >::iterator found;
378
found = MSLibrary.find(mz);
379
if (found != MSLibrary.end())
381
vector<PeakSpectrum> & library = found->second;
382
for (Size i = 0; i < library.size(); ++i)
384
Real this_MZ = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier;
385
if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false))
387
PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0];
388
PeakSpectrum & librar = library[i];
389
//Special treatment for SpectraST score as it computes a score based on the whole library
390
if (compare_function == "SpectraSTSimilarityScore")
392
SpectraSTSimilarityScore * sp = static_cast<SpectraSTSimilarityScore *>(comparor);
393
BinnedSpectrum quer_bin = sp->transform(quer);
394
BinnedSpectrum librar_bin = sp->transform(librar);
395
score = (*sp)(quer, librar); //(*sp)(quer_bin,librar_bin);
396
double dot_bias = sp->dot_bias(quer_bin, librar_bin, score);
397
hit.setMetaValue("DOTBIAS", dot_bias);
401
if (compare_function == "CompareFouriertransform")
403
CompareFouriertransform * ft = static_cast<CompareFouriertransform *>(comparor);
405
ft->transform(librar);
407
score = (*comparor)(quer, librar);
410
DataValue RT(library[i].getRT());
411
DataValue MZ(library[i].getPrecursors()[0].getMZ());
412
hit.setMetaValue("RT", RT);
413
hit.setMetaValue("MZ", MZ);
415
hit.addProteinAccession(pr_hit.getAccession());
422
pid.setHigherScoreBetter(true);
424
if (compare_function == "SpectraSTSimilarityScore")
426
if (!pid.empty() && !pid.getHits().empty())
428
vector<PeptideHit> final_hits;
429
final_hits.resize(pid.getHits().size());
430
SpectraSTSimilarityScore * sp = static_cast<SpectraSTSimilarityScore *>(comparor);
432
for (; runner_up < pid.getHits().size(); ++runner_up)
434
if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5)
439
double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore());
440
for (Size s = 0; s < pid.getHits().size(); ++s)
442
final_hits[s] = pid.getHits()[s];
443
final_hits[s].setMetaValue("delta D", delta_D);
444
final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore());
445
final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS")));
447
//final_hits[s].removeMetaValue("DOTBIAS");
449
pid.setHits(final_hits);
451
pid.setMetaValue("MZ", query[j].getPrecursors()[0].getMZ());
452
pid.setMetaValue("RT", query_MZ);
455
if (top_hits != -1 && (UInt)top_hits < pid.getHits().size())
457
vector<PeptideHit> hits;
458
hits.resize(top_hits);
459
for (Size i = 0; i < (UInt)top_hits; ++i)
461
hits[i] = pid.getHits()[i];
465
peptide_ids.push_back(pid);
467
protein_ids.push_back(prot_id);
468
//-------------------------------------------------------------
470
//-------------------------------------------------------------
471
IdXMLFile id_xml_file;
472
id_xml_file.store(*out_file, protein_ids, peptide_ids);
473
time_t end_time = time(NULL);
474
cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n";
476
time_t end_time = time(NULL);
477
cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n";
486
int main(int argc, const char ** argv)
488
TOPPSpecLibSearcher tool;
489
return tool.main(argc, argv);