1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Rene Hussong$
26
// --------------------------------------------------------------------------
28
#ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_ISOTOPEWAVELETTRANSFORM_H
29
#define OPENMS_TRANSFORMATIONS_FEATUREFINDER_ISOTOPEWAVELETTRANSFORM_H
31
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/IsotopeWaveletConstants.h>
32
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/IsotopeWavelet.h>
33
#include <OpenMS/KERNEL/FeatureMap.h>
34
#include <OpenMS/KERNEL/MSExperiment.h>
35
#include <OpenMS/KERNEL/MSSpectrum.h>
36
#include <OpenMS/CONCEPT/Exception.h>
37
#include <OpenMS/MATH/STATISTICS/LinearRegression.h>
38
#include <OpenMS/DATASTRUCTURES/ConstRefVector.h>
41
#include <boost/math/special_functions/bessel.hpp>
48
#ifdef OPENMS_HAS_CUDA
50
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/IsotopeWaveletCudaKernel.h>
53
// we are not yet sure if we really want to drag in cutil.h and the CUDA_SAFE_CALL definitions...
54
#ifndef CUDA_SAFE_CALL
55
#define CUDA_SAFE_CALL(call) call;
61
/** @brief An internally used class, subsuming several variables */
76
/** @brief A class implementing the isotope wavelet transform.
77
* If you just want to find features using the isotope wavelet, take a look at the FeatureFinderAlgorithmIsotopeWavelet class. Usually, you only
78
* have to consider the class at hand if you plan to change the basic implementation of the transform. */
79
template <typename PeakType>
80
class IsotopeWaveletTransform
85
/** @brief Internally used data structure. */
88
DoubleReal mz; //<The monoisotopic position
89
UInt c; //<Note, this is not the charge (it is charge-1!!!)
90
DoubleReal score; //<The associated score
91
DoubleReal intens; //<The transformed intensity at the monoisotopic mass
92
DoubleReal ref_intens;
93
DoubleReal RT; //<The elution time (not the scan index)
94
UInt RT_index; //<The elution time (map) index
95
UInt MZ_begin; //<Index
99
typedef std::multimap<UInt, BoxElement> Box; ///<Key: RT index, value: BoxElement
102
/** @brief Internally (only by GPUs) used data structure .
103
* It allows efficient data exchange between CPU and GPU and avoids unnecessary memory moves.
104
* The class is tailored on the isotope wavelet transform and is in general not applicable on similar - but different - situations. */
107
friend class IsotopeWaveletTransform;
111
/** Default constructor */
113
: reference_(NULL), trans_intens_(NULL)
117
/** Copy constructor */
118
TransSpectrum(const MSSpectrum<PeakType>* reference)
119
: reference_(reference)
121
trans_intens_ = new std::vector<float> (reference_->size(), 0.0);
125
virtual ~TransSpectrum()
127
delete (trans_intens_);
130
virtual void destroy ()
132
delete (trans_intens_);
138
/** Returns the RT value (not the index) of the associated scan. */
139
inline DoubleReal getRT () const
141
return (reference_->getRT());
144
/** Returns the mass-over-charge ratio at index @p i. */
145
inline DoubleReal getMZ (const UInt i) const
147
return ((*reference_)[i].getMZ());
150
/** Returns the reference (non-transformed) intensity at index @p i. */
151
inline DoubleReal getRefIntensity (const UInt i) const
153
return ((*reference_)[i].getIntensity());
156
/** Returns the transformed intensity at index @p i. */
157
inline DoubleReal getTransIntensity (const UInt i) const
159
return ((*trans_intens_)[i]);
162
/** Stores the intensity value @p i of the transform at position @p i. */
163
inline void setTransIntensity (const UInt i, const DoubleReal intens)
165
(*trans_intens_)[i] = intens;
168
/** Returns the size of spectra. */
169
inline Size size () const
171
return (trans_intens_->size());
174
/** Returns a pointer to the reference spectrum. */
175
inline const MSSpectrum<PeakType>* getRefSpectrum ()
180
/** Returns a pointer to the reference spectrum. */
181
inline const MSSpectrum<PeakType>* getRefSpectrum () const
186
/** Attention: iterations will only performed over the reference spectrum.
187
* You will have to use the "distance"-function in order to get the corresponding entry of the transform. */
188
inline typename MSSpectrum<PeakType>::const_iterator MZBegin (const DoubleReal mz) const
190
return (reference_->MZBegin(mz));
193
/** Attention: iterations will only performed over the reference spectrum.
194
* You will have to use the "distance"-function in order to get the corresponding entry of the transform. */
195
inline typename MSSpectrum<PeakType>::const_iterator MZEnd (const DoubleReal mz) const
197
return (reference_->MZEnd(mz));
200
/** Attention: iterations will only performed over the reference spectrum.
201
* You will have to use the "distance"-function in order to get the corresponding entry of the transform. */
202
inline typename MSSpectrum<PeakType>::const_iterator end () const
204
return (reference_->end());
207
/** Attention: iterations will only performed over the reference spectrum.
208
* You will have to use the "distance"-function in order to get the corresponding entry of the transform. */
209
inline typename MSSpectrum<PeakType>::const_iterator begin () const
211
return (reference_->begin());
217
const MSSpectrum<PeakType>* reference_; //<The reference spectrum
218
std::vector<float>* trans_intens_; //<The intensities of the transform
224
/** @brief Constructor.
226
* @param min_mz The smallest m/z value occurring in your map.
227
* @param max_mz The largest m/z value occurring in your map.
228
* @param max_charge The highest charge state you would like to consider. */
229
IsotopeWaveletTransform (const DoubleReal min_mz, const DoubleReal max_mz, const UInt max_charge, const Size max_scan_size=0, const bool use_cuda=false, const bool hr_data=false, const String intenstype="ref");
231
/** @brief Destructor. */
232
virtual ~IsotopeWaveletTransform () ;
235
/** @brief Computes the isotope wavelet transform of charge state @p c.
236
* @param c_trans The transform.
237
* @param c_ref The reference spectrum.
238
* @oaram c The charge state minus 1 (e.g. c=2 means charge state 3) at which you want to compute the transform. */
239
virtual void getTransform (MSSpectrum<PeakType>& c_trans, const MSSpectrum<PeakType>& c_ref, const UInt c);
241
/** @brief Computes the isotope wavelet transform of charge state @p c.
242
* @param c_trans The transform.
243
* @param c_ref The reference spectrum.
244
* @oaram c The charge state minus 1 (e.g. c=2 means charge state 3) at which you want to compute the transform. */
245
virtual void getTransformHighRes (MSSpectrum<PeakType>& c_trans, const MSSpectrum<PeakType>& c_ref, const UInt c);
247
/** @brief Given an isotope wavelet transformed spectrum @p candidates, this function assigns to every significant
248
* pattern its corresponding charge state and a score indicating the reliability of the prediction. The result of this
249
* process is stored internally. Important: Before calling this function, apply updateRanges() to the original map.
251
* @param candidates A isotope wavelet transformed spectrum. Entry "number i" in this vector must correspond to the
252
* charge-"(i-1)"-transform of its mass signal. (This is exactly the output of the function @see getTransforms.)
253
* @param ref The reference scan (the untransformed raw data) corresponding to @p candidates.
254
* @param c The corrsponding charge state minus 1 (e.g. c=2 means charge state 3)
255
* @param scan_index The index of the scan (w.r.t. to some map) currently under consideration.
256
* @param ampl_cutoff The thresholding parameter. This parameter is the only (and hence a really important)
257
* parameter of the isotope wavelet transform. On the basis of @p ampl_cutoff the program tries to distinguish between
258
* noise and signal. Please note that it is not a "simple" hard thresholding parameter in the sense of drawing a virtual
259
* line in the spectrum, which is then used as a guillotine cut. Maybe you should play around a bit with this parameter to
260
* get a feeling about its range. For peptide mass fingerprints on small data sets (like single MALDI-scans e.g.), it
261
* makes sense to start @p ampl_cutoff=0 or even @p ampl_cutoff=-1,
262
* indicating no thresholding at all. Note that also ampl_cutoff=0 triggers (a moderate) thresholding based on the
263
* average intensity in the wavelet transform.
264
* @param check_PPMs If enabled, the algorithm will check each monoisotopic mass candidate for its plausibility
265
* by computing the ppm difference between this mass and the averagine model. */
266
virtual void identifyCharge (const MSSpectrum<PeakType>& candidates, const MSSpectrum<PeakType>& ref, const UInt scan_index, const UInt c,
267
const DoubleReal ampl_cutoff, const bool check_PPMs) ;
269
virtual void initializeScan (const MSSpectrum<PeakType>& c_ref, const UInt c=0);
271
#ifdef OPENMS_HAS_CUDA
272
/** @brief Sets up all necessary arrays with correct boundaries and 'worst-case' sizes.
273
* @param scan The scan under consideration. */
274
virtual int initializeScanCuda (const MSSpectrum<PeakType>& scan, const UInt c=0);
276
/** @brief Clean up. */
277
virtual void finalizeScanCuda ();
279
/** @brief Computes The isotope wavelet transform of charge state (@p c+1) on a CUDA compatible GPU.
280
* @param c_trans Contains the reference spectrum (already by call) as well as the transformed intensities.
281
* @param c The charge state minus 1 (e.g. c=2 means charge state 3)*/
282
virtual void getTransformCuda (TransSpectrum &c_trans, const UInt c);
284
/** @brief Essentially the same as its namesake CPU-version, but on a CUDA compatible GPU device.
285
* @param candidates A isotope wavelet transformed spectrum. Entry "number i" in this vector must correspond to the
286
* charge-"(i-1)"-transform of its mass signal. (This is exactly the output of the function @see getTransforms.)
287
* @param c The corrsponding charge state minus 1 (e.g. c=2 means charge state 3)
288
* @param scan_index The index of the scan (w.r.t. to some map) currently under consideration.
289
* @param ampl_cutoff The thresholding parameter. This parameter is the only (and hence a really important)
290
* parameter of the isotope wavelet transform. On the basis of @p ampl_cutoff the program tries to distinguish between
291
* noise and signal. Please note that it is not a "simple" hard thresholding parameter in the sense of drawing a virtual
292
* line in the spectrum, which is then used as a guillotine cut. Maybe you should play around a bit with this parameter to
293
* get a feeling about its range. For peptide mass fingerprints on small data sets (like single MALDI-scans e.g.), it
294
* makes sense to start @p ampl_cutoff=0 or even @p ampl_cutoff=-1,
295
* indicating no thresholding at all. Note that also ampl_cutoff=0 triggers (a moderate) thresholding based on the
296
* average intensity in the wavelet transform.
297
* * @param check_PPMs If enabled, the algorithm will check each monoisotopic mass candidate for its plausibility
298
* by computing the ppm difference between this mass and the averagine model. */
299
virtual void identifyChargeCuda (const TransSpectrum& candidates, const UInt scan_index, const UInt c,
300
const DoubleReal ampl_cutoff, const bool check_PPMs);
302
/** Sorts the associated spectrum @p by increasing intensities.
303
* @param sorted The spectrum to be sorted. */
304
virtual int sortCuda (MSSpectrum<PeakType>& sorted);
308
/** @brief A function keeping track of currently open and closed sweep line boxes.
309
* This function is used by the isotope wavelet feature finder and must be called for each processed scan.
310
* @param map The original map containing the data set to be analyzed.
311
* @param scan_index The index of the scan currently under consideration w.r.t. its MS map.
312
* This information is necessary to sweep across the map after each scan has been evaluated.
313
* @param RT_votes_cutoff See the IsotopeWaveletFF class. */
314
void updateBoxStates (const MSExperiment<PeakType>& map, const Size scan_index, const UInt RT_interleave,
315
const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1) ;
318
void mergeFeatures (IsotopeWaveletTransform<PeakType>* later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff);
321
/** @brief Filters the candidates further more and maps the internally used data structures to the OpenMS framework.
322
* @param map The original map containing the data set to be analyzed.
323
* @param max_charge The maximal charge state under consideration.
324
* @param RT_votes_cutoff See the IsotopeWaveletFF class.*/
325
FeatureMap<Feature> mapSeeds2Features (const MSExperiment<PeakType>& map, const UInt RT_votes_cutoff) ;
327
/** @brief Returns the closed boxes. */
328
virtual std::multimap<DoubleReal, Box> getClosedBoxes ()
329
{ return (closed_boxes_); };
332
/** @brief Computes a linear (intensity) interpolation.
333
* @param left_iter The point left to the query.
334
* @param mz_pos The query point.
335
* @param right_iter The point right to the query. */
336
inline DoubleReal getLinearInterpolation (const typename MSSpectrum<PeakType>::const_iterator& left_iter, const DoubleReal mz_pos, const typename MSSpectrum<PeakType>::const_iterator& right_iter)
338
return (left_iter->getIntensity() + (right_iter->getIntensity() - left_iter->getIntensity())/(right_iter->getMZ() - left_iter->getMZ()) * (mz_pos-left_iter->getMZ()));
341
/** @brief Computes a linear (intensity) interpolation.
342
* @param mz_a The m/z value of the point left to the query.
343
* @param mz_a The intensity value of the point left to the query.
344
* @param mz_pos The query point.
345
* @param mz_b The m/z value of the point right to the query.
346
* @param intens_b The intensity value of the point left to the query. */
347
inline DoubleReal getLinearInterpolation (const DoubleReal mz_a, const DoubleReal intens_a, const DoubleReal mz_pos, const DoubleReal mz_b, const DoubleReal intens_b)
349
return (intens_a + (intens_b - intens_a)/(mz_b - mz_a) * (mz_pos-mz_a));
352
inline DoubleReal getSigma () const
357
inline void setSigma (const DoubleReal sigma)
362
virtual void computeMinSpacing (const MSSpectrum<PeakType>& c_ref);
364
inline DoubleReal getMinSpacing () const
366
return (min_spacing_);
369
inline Size getMaxScanSize () const
371
return (max_scan_size_);
378
/** @brief Default Constructor.
379
* @note Provided just for inheritance reasons. You should always use the other constructor. */
380
IsotopeWaveletTransform () ;
383
inline void sampleTheCMarrWavelet_ (const MSSpectrum<PeakType>& scan, const Int wavelet_length, const Int mz_index, const UInt charge);
386
/** @brief Given a candidate for an isotopic pattern, this function computes the corresponding score
387
* @param candidate A isotope wavelet transformed spectrum.
388
* @param peak_cutoff The number of peaks we will consider for the isotopic pattern.
389
* @param seed_mz The predicted position of the monoisotopic peak.
390
* @param c The charge state minus 1 (e.g. c=2 means charge state 3) for which the score should be determined.
391
* @param ampl_cutoff The threshold. */
392
virtual DoubleReal scoreThis_ (const TransSpectrum& candidate, const UInt peak_cutoff,
393
const DoubleReal seed_mz, const UInt c, const DoubleReal ampl_cutoff) ;
395
/** @brief Given a candidate for an isotopic pattern, this function computes the corresponding score
396
* @param candidate A isotope wavelet transformed spectrum.
397
* @param peak_cutoff The number of peaks we will consider for the isotopic pattern.
398
* @param seed_mz The predicted position of the monoisotopic peak.
399
* @param c The charge state minus 1 (e.g. c=2 means charge state 3) for which the score should be determined.
400
* @param ampl_cutoff The threshold. */
401
virtual DoubleReal scoreThis_ (const MSSpectrum<PeakType>& candidate, const UInt peak_cutoff,
402
const DoubleReal seed_mz, const UInt c, const DoubleReal ampl_cutoff) ;
405
/** @brief A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the data set
406
* (in comparison to the averagine model)
407
* @param candidate The wavelet transformed spectrum containing the candidate.
408
* @param ref The original spectrum containing the candidate.
409
* @param seed_mz The m/z position of the candidate pattern.
410
* @param c The predicted charge state minus 1 (e.g. c=2 means charge state 3) of the candidate.
411
* @param scan_index The index of the scan under consideration (w.r.t. the original map). */
412
virtual bool checkPositionForPlausibility_ (const TransSpectrum& candidate, const MSSpectrum<PeakType>& ref, const DoubleReal seed_mz,
413
const UInt c, const UInt scan_index, const bool check_PPMs, const DoubleReal transintens, const DoubleReal prev_score) ;
415
/** @brief A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the data set
416
* (in comparison to the averagine model)
417
* @param candidate The wavelet transformed spectrum containing the candidate.
418
* @param ref The original spectrum containing the candidate.
419
* @param seed_mz The m/z position of the candidate pattern.
420
* @param c The predicted charge state minus 1 (e.g. c=2 means charge state 3) of the candidate.
421
* @param scan_index The index of the scan under consideration (w.r.t. the original map). */
422
virtual bool checkPositionForPlausibility_ (const MSSpectrum<PeakType>& candidate, const MSSpectrum<PeakType>& ref, const DoubleReal seed_mz,
423
const UInt c, const UInt scan_index, const bool check_PPMs, const DoubleReal transintens, const DoubleReal prev_score) ;
425
virtual std::pair<DoubleReal, DoubleReal> checkPPMTheoModel_ (const MSSpectrum<PeakType>& ref, const DoubleReal c_mz, const UInt c) ;
428
/** @brief Computes the average (transformed) intensity (neglecting negative values) of @p scan. */
429
inline DoubleReal getAvIntens_ (const TransSpectrum& scan);
430
/** @brief Computes the average intensity (neglecting negative values) of @p scan. */
431
inline DoubleReal getAvIntens_ (const MSSpectrum<PeakType>& scan);
433
/** @brief Computes the standard deviation (neglecting negative values) of the (transformed) intensities of @p scan. */
434
inline DoubleReal getSdIntens_ (const TransSpectrum& scan, const DoubleReal mean) ;
435
/** @brief Computes the standard deviation (neglecting negative values) of the intensities of @p scan. */
436
inline DoubleReal getSdIntens_ (const MSSpectrum<PeakType>& scan, const DoubleReal mean) ;
438
/** @brief Inserts a potential isotopic pattern into an open box or - if no such box exists - creates a new one.
439
* @param mz The position of the pattern.
440
* @param scan The index of the scan, we are currently analyzing (w.r.t. the data map).
441
* This information is necessary for the post-processing (sweep lining).
442
* @param charge The estimated charge state minus 1 (e.g. c=2 means charge state 3) of the pattern.
443
* @param score The pattern's score.
444
* @param intens The intensity at the monoisotopic peak.
445
* @param rt The retention time of the scan (similar to @p scan, but here: no index, but the real value).
446
* @param MZ_begin The starting index of the pattern (m/z) w.r.t. the current scan.
447
* @param MZ_end The end index (w.r.t. the monoisotopic position!) of the pattern (m/z) w.r.t. the current scan. */
448
virtual void push2Box_ (const DoubleReal mz, const UInt scan, UInt c, const DoubleReal score,
449
const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end, const DoubleReal ref_intens) ;
451
/** @brief Essentially the same function as @see push2Box_.
452
* In contrast to @see push2Box this function stores its candidates only temporarily. In particular, this
453
* function is only used within a single scan transform. After the wavelet transform is computed on
454
* that scan, all candidates are pushed by this function and finally clustered together by @see clusterSeeds_.
455
* Afterwards, a final push by @see push2Box_ is performed storing the clustered candidates.
457
* @param mz The position of the pattern.
458
* @param scan The index of the scan, we are currently analyzing (w.r.t. the data map).
459
* This information is necessary for the post-processing (sweep lining).
460
* @param charge The estimated charge state minus 1 (e.g. c=2 means charge state 3) of the pattern.
461
* @param score The pattern's score.
462
* @param intens The intensity at the monoisotopic peak.
463
* @param rt The retention time of the scan (similar to @p scan, but here: no index, but the real value).
464
* @param MZ_begin The starting index of the pattern (m/z) w.r.t. the current scan.
465
* @param MZ_end The end index (w.r.t. the monoisotopic position!) of the pattern (m/z) w.r.t. the current scan.*/
466
virtual void push2TmpBox_ (const DoubleReal mz, const UInt scan, UInt charge, const DoubleReal score,
467
const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end) ;
469
/** @brief Computes the average MZ spacing of @p scan in the range @p start_index to @p end_index.
471
* @param scan The scan we are interested in.
472
* @param start_index An optional starting position (index) w.r.t. @p scan.
473
* @param end_index An optional final position (index) w.r.t. @p scan.*/
474
inline DoubleReal getAvMZSpacing_ (const MSSpectrum<PeakType>& scan);//, Int start_index=0, Int end_index=-1) ;
477
/** @brief Clusters the seeds stored by push2TmpBox_.
478
* @param candidates A isotope wavelet transformed spectrum.
479
* @param ref The corresponding original spectrum (w.r.t. @p candidates).
480
* @param scan_index The index of the scan under consideration (w.r.t. the original map). */
481
void clusterSeeds_ (const TransSpectrum& candidates, const MSSpectrum<PeakType>& ref,
482
const UInt scan_index, const UInt c, const bool check_PPMs) ;
484
/** @brief Clusters the seeds stored by push2TmpBox_.
485
* @param candidates A isotope wavelet transformed spectrum.
486
* @param ref The corresponding original spectrum (w.r.t. @p candidates).
487
* @param scan_index The index of the scan under consideration (w.r.t. the original map). */
488
virtual void clusterSeeds_ (const MSSpectrum<PeakType>& candidates, const MSSpectrum<PeakType>& ref,
489
const UInt scan_index, const UInt c, const bool check_PPMs) ;
492
/** @brief A currently still necessary function that extends the box @p box in order to capture also
493
* signals whose isotopic pattern is nearly diminishing
494
* @param map The experimental map.
495
* @param box The box to be extended. */
496
void extendBox_ (const MSExperiment<PeakType>& map, const Box box);
498
/** @brief Returns the monoisotopic mass (with corresponding decimal values) we would expect at @p c_mass.
499
* @param c_mass The mass for which we would like to know the averagine decimal places. */
500
inline DoubleReal peptideMassRule_ (const DoubleReal c_mass) const
502
DoubleReal correction_fac = c_mass / Constants::PEPTIDE_MASS_RULE_BOUND;
503
DoubleReal old_frac_mass = c_mass - (Int)(c_mass);
504
DoubleReal new_mass = ((Int)(c_mass))* (1.+Constants::PEPTIDE_MASS_RULE_FACTOR)-(Int)(correction_fac);
505
DoubleReal new_frac_mass = new_mass - (Int)(new_mass);
507
if (new_frac_mass-old_frac_mass > 0.5)
512
if (new_frac_mass-old_frac_mass < -0.5)
520
/** @brief Returns the parts-per-million deviation of the masses.
521
* @param mass_a The first mass.
522
* @param mass_b The second mass. */
523
inline DoubleReal getPPMs_ (const DoubleReal mass_a, const DoubleReal mass_b) const
525
return (fabs(mass_a-mass_b)/(0.5*(mass_a+mass_b))*1e6);
529
//internally used data structures for the sweep line algorithm
530
std::multimap<DoubleReal, Box> open_boxes_, closed_boxes_, end_boxes_, front_boxes_; //DoubleReal = average m/z position
531
std::vector<std::multimap<DoubleReal, Box> >* tmp_boxes_; //for each charge we need a separate container
533
DoubleReal av_MZ_spacing_, sigma_;
534
std::vector<DoubleReal> c_mzs_, c_spacings_, psi_, prod_, xs_;
535
std::vector<DoubleReal> interpol_xs_, interpol_ys_;
538
UInt max_num_peaks_per_pattern_, max_charge_, data_length_;
541
Int from_max_to_left_, from_max_to_right_;
542
std::vector<int> indices_;
544
MSSpectrum<PeakType> c_sorted_candidate_;
545
DoubleReal min_spacing_, max_mz_cutoff_;
546
std::vector<float> scores_, zeros_;
548
#ifdef OPENMS_HAS_CUDA
551
UInt largest_array_size_, overall_size_, block_size_, to_load_, to_compute_;
553
void* cuda_device_intens_;
554
void* cuda_device_pos_;
555
void* cuda_device_trans_intens_;
556
void* cuda_device_fwd2_;
557
void* cuda_device_posindices_sorted_;
558
void* cuda_device_trans_intens_sorted_;
559
void* cuda_device_scores_;
560
std::vector<float> cuda_positions_, cuda_intensities_;
561
dim3 dimGrid_, dimBlock_;
566
bool myCudaComparator (const cudaHelp& a, const cudaHelp& b);
568
template <typename PeakType>
569
bool intensityComparator (const PeakType& a, const PeakType& b)
571
return (a.getIntensity() > b.getIntensity());
574
template <typename PeakType>
575
bool intensityAscendingComparator (const PeakType& a, const PeakType& b)
577
return (a.getIntensity() < b.getIntensity());
580
template <typename PeakType>
581
bool intensityPointerComparator (PeakType* a, PeakType* b)
583
return (a->getIntensity() > b->getIntensity());
586
template <typename PeakType>
587
bool positionComparator (const PeakType& a, const PeakType& b)
589
return (a.getMZ() < b.getMZ());
592
template <typename PeakType>
593
IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform ()
595
tmp_boxes_ = new std::vector<std::multimap<DoubleReal, Box> > (1);
599
max_num_peaks_per_pattern_ = 3;
602
#ifdef OPENMS_HAS_CUDA
603
largest_array_size_ = 0;
610
template <typename PeakType>
611
IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform (const DoubleReal min_mz, const DoubleReal max_mz, const UInt max_charge, const Size max_scan_size, const bool use_cuda, const bool hr_data, String intenstype)
613
max_charge_ = max_charge;
614
max_scan_size_ = max_scan_size;
616
intenstype_ = intenstype;
617
tmp_boxes_ = new std::vector<std::multimap<DoubleReal, Box> > (max_charge);
618
if (max_scan_size <= 0) //only important for the CPU
620
IsotopeWavelet::init (max_mz, max_charge);
624
max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(max_mz, max_charge);
625
max_num_peaks_per_pattern_= IsotopeWavelet::getNumPeakCutOff(max_mz, max_charge);
627
#ifdef OPENMS_HAS_CUDA
628
if (use_cuda) //only important for the GPU
632
max_scan_size_ = max_scan_size*(4*(max_mz_cutoff_-1)-1);
634
largest_array_size_ = pow(2, ceil(log(max_scan_size_ + Constants::CUDA_EXTENDED_BLOCK_SIZE_MAX)/log(2.0)));
636
cuda_positions_.reserve(largest_array_size_);
637
cuda_intensities_.reserve(largest_array_size_);
638
indices_.resize (largest_array_size_);
639
for (UInt q=0; q<largest_array_size_; ++q)
644
h_data_ = (float*) malloc (largest_array_size_*sizeof(float));
645
h_pos_ = (int*) malloc (largest_array_size_*sizeof(int));
651
Int size_estimate ((Int)ceil(max_scan_size_/(max_mz-min_mz)));
652
Int to_reserve ((Int)ceil(size_estimate*max_num_peaks_per_pattern_*Constants::IW_NEUTRON_MASS));
653
psi_.reserve (to_reserve); //The wavelet
654
prod_.reserve (to_reserve);
655
xs_.reserve (to_reserve);
656
interpol_xs_.resize(Constants::DEFAULT_NUM_OF_INTERPOLATION_POINTS);
657
interpol_ys_.resize(Constants::DEFAULT_NUM_OF_INTERPOLATION_POINTS);
662
Int size_estimate ((Int)ceil(max_scan_size_/(max_mz-min_mz)));
663
Int to_reserve ((Int)ceil(size_estimate*max_num_peaks_per_pattern_*Constants::IW_NEUTRON_MASS));
664
psi_.reserve (to_reserve); //The wavelet
665
prod_.reserve (to_reserve);
666
xs_.reserve (to_reserve);
667
interpol_xs_.resize(Constants::DEFAULT_NUM_OF_INTERPOLATION_POINTS);
668
interpol_ys_.resize(Constants::DEFAULT_NUM_OF_INTERPOLATION_POINTS);
674
template <typename PeakType>
675
IsotopeWaveletTransform<PeakType>::~IsotopeWaveletTransform ()
677
#ifdef OPENMS_HAS_CUDA
678
if (h_data_ != NULL) free (h_data_);
679
if (h_pos_ != NULL) free (h_pos_);
689
template <typename PeakType>
690
void IsotopeWaveletTransform<PeakType>::getTransform (MSSpectrum<PeakType>& c_trans, const MSSpectrum<PeakType>& c_ref, const UInt c)
692
Int spec_size ((Int)c_ref.size());
693
//in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
694
//for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
696
DoubleReal value, T_boundary_left, T_boundary_right, old, c_diff, current, old_pos, my_local_MZ, my_local_lambda, origin, c_mz;
698
for(Int my_local_pos=0; my_local_pos<spec_size; ++my_local_pos)
700
value=0; T_boundary_left = 0, T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge)/(DoubleReal)charge;
701
old=0; old_pos=(my_local_pos-from_max_to_left_-1>=0) ? c_ref[my_local_pos-from_max_to_left_-1].getMZ() : c_ref[0].getMZ()-min_spacing_;
702
my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ*charge);
704
origin = -my_local_MZ+Constants::IW_QUARTER_NEUTRON_MASS/(DoubleReal)charge;
706
for (Int current_conv_pos = std::max(0, my_local_pos-from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
708
if (current_conv_pos >= spec_size)
710
value += 0.5*old*min_spacing_;
714
c_mz = c_ref[current_conv_pos].getMZ();
715
c_diff = c_mz + origin;
717
//Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
718
current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda (my_local_lambda, c_diff*charge+1.)*c_ref[current_conv_pos].getIntensity() : 0;
720
value += 0.5*(current + old)*(c_mz-old_pos);
728
c_trans[my_local_pos].setIntensity (value);
733
template <typename PeakType>
734
void IsotopeWaveletTransform<PeakType>::getTransformHighRes (MSSpectrum<PeakType>& c_trans, const MSSpectrum<PeakType>& c_ref, const UInt c)
736
Int spec_size ((Int)c_ref.size());
737
//in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
738
//for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
740
DoubleReal value, T_boundary_left, T_boundary_right, c_diff, current, my_local_MZ, my_local_lambda, origin, c_mz;
742
for(Int my_local_pos=0; my_local_pos<spec_size; ++my_local_pos)
744
value=0; T_boundary_left = 0, T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge)/(DoubleReal)charge;
747
my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ*charge);
749
origin = -my_local_MZ+Constants::IW_QUARTER_NEUTRON_MASS/(DoubleReal)charge;
751
for (Int current_conv_pos = std::max(0, my_local_pos-from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
753
if (current_conv_pos >= spec_size)
758
c_mz = c_ref[current_conv_pos].getMZ();
759
c_diff = c_mz + origin;
761
//Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
762
current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda (my_local_lambda, c_diff*charge+1.)*c_ref[current_conv_pos].getIntensity() : 0;
767
c_trans[my_local_pos].setIntensity (value);
772
template <typename PeakType>
773
void IsotopeWaveletTransform<PeakType>::initializeScan (const MSSpectrum<PeakType>& c_ref, const UInt c)
775
data_length_ = (UInt) c_ref.size();
776
computeMinSpacing(c_ref);
777
Int wavelet_length=0, quarter_length=0;
779
if(hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
782
typename MSSpectrum<PeakType>::const_iterator start_iter, end_iter;
783
for (UInt i=0; i<data_length_; ++i)
785
c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[i].getMZ(), c+1);
786
start_iter = c_ref.MZEnd(c_ref[i].getMZ());
787
end_iter = c_ref.MZBegin(c_ref[i].getMZ()+c_mz_cutoff);
788
wavelet_length = std::max ((SignedSize) wavelet_length, distance(start_iter, end_iter)+1);
789
end_iter = c_ref.MZEnd(c_ref[i].getMZ()-Constants::IW_QUARTER_NEUTRON_MASS/DoubleReal(c+1.));
790
quarter_length = std::max ((SignedSize) quarter_length, distance(end_iter, start_iter)+1);
796
max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[data_length_-1].getMZ(), max_charge_);
797
wavelet_length = (UInt) ceil(max_mz_cutoff_/min_spacing_);
802
if (wavelet_length > (Int) c_ref.size())
804
std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points ("<< c_ref.size() << "). This might (!) severely affect the transform."<< std::endl;
805
std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
806
std::cout << "Warning/Error generated at scan with RT " << c_ref.getRT() << "." << std::endl;
809
Int max_index = (UInt) (Constants::IW_QUARTER_NEUTRON_MASS/min_spacing_);
810
from_max_to_left_ = max_index;
811
from_max_to_right_ = wavelet_length-1-from_max_to_left_;
815
template <typename PeakType>
816
void IsotopeWaveletTransform<PeakType>::computeMinSpacing (const MSSpectrum<PeakType>& c_ref)
818
min_spacing_ = INT_MAX;
819
for (UInt c_conv_pos=1; c_conv_pos<c_ref.size(); ++c_conv_pos)
821
min_spacing_ = std::min (min_spacing_, c_ref[c_conv_pos].getMZ()-c_ref[c_conv_pos-1].getMZ());
826
#ifdef OPENMS_HAS_CUDA
827
template <typename PeakType>
828
void IsotopeWaveletTransform<PeakType>::finalizeScanCuda ()
830
(cudaFree(cuda_device_pos_));
831
(cudaFree(cuda_device_intens_));
832
(cudaFree(cuda_device_trans_intens_));
833
(cudaFree(cuda_device_fwd2_));
834
(cudaFree(cuda_device_trans_intens_sorted_));
835
(cudaFree(cuda_device_posindices_sorted_));
836
(cudaFree(cuda_device_scores_));
840
template <typename PeakType>
841
int IsotopeWaveletTransform<PeakType>::initializeScanCuda (const MSSpectrum<PeakType>& scan, const UInt c)
843
data_length_ = scan.size();
845
std::vector<float> pre_positions (data_length_), pre_intensities (data_length_);
847
min_spacing_=INT_MAX;
848
pre_positions[0] = scan[0].getMZ();
849
pre_intensities[0] = scan[0].getIntensity();
851
for (UInt i=1; i<data_length_; ++i)
853
pre_positions[i] = scan[i].getMZ();
854
c_spacing = pre_positions[i]-pre_positions[i-1];
855
if (c_spacing < min_spacing_)
857
min_spacing_ = c_spacing;
859
pre_intensities[i] = scan[i].getIntensity();
861
if (min_spacing_ == INT_MAX) //spectrum consists of a single data point
863
std::cout << "Scan consits of a single point. Unable to compute transform." << std::endl;
864
return (Constants::CUDA_INIT_FAIL);
867
//Estimating the wave_length ...
868
UInt wavelet_length=0, quarter_length=0;
869
if(hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
872
typename MSSpectrum<PeakType>::const_iterator start_iter, end_iter;
873
for (UInt i=0; i<data_length_; ++i)
875
c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(scan[i].getMZ(), c+1);
876
start_iter = scan.MZEnd(scan[i].getMZ());
877
end_iter = scan.MZBegin(scan[i].getMZ()+c_mz_cutoff);
878
wavelet_length = std::max ((long int) wavelet_length, distance(start_iter, end_iter)+1);
879
end_iter = scan.MZEnd(scan[i].getMZ()-Constants::IW_QUARTER_NEUTRON_MASS/DoubleReal(c+1.));
880
quarter_length = std::max ((long int) quarter_length, distance(end_iter, start_iter)+1);
886
max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(scan[data_length_-1].getMZ(), max_charge_);
887
wavelet_length = (UInt) ceil(max_mz_cutoff_/min_spacing_);
891
if (wavelet_length > data_length_ || wavelet_length == 1) //==1, because of 'ceil'
893
std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points ("<< data_length_ << "). This might (!) severely affect the transform."<< std::endl;
894
std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
895
std::cout << "Warning/Error generated at scan with RT " << scan.getRT() << "." << std::endl;
901
max_index = quarter_length;
905
max_index = (UInt) (Constants::IW_QUARTER_NEUTRON_MASS/min_spacing_);
907
from_max_to_left_ = max_index;
908
from_max_to_right_ = wavelet_length-1-from_max_to_left_;
910
Int problem_size = Constants::CUDA_BLOCK_SIZE_MAX;
911
to_load_ = problem_size + from_max_to_left_ + from_max_to_right_;
913
UInt missing_points = problem_size - (data_length_ % problem_size);
914
overall_size_ = wavelet_length-1+data_length_+missing_points;
916
num_elements_ = overall_size_;
917
Int dev_num_elements = 1, tmp = overall_size_ >> 1;
919
//Get power of 2 elements (necessary for the sorting algorithm)
922
dev_num_elements <<= 1;
926
if (num_elements_ > dev_num_elements)
928
dev_num_elements <<= 1;
931
if (dev_num_elements < Constants::CUDA_MIN_SORT_SIZE)
933
dev_num_elements = Constants::CUDA_MIN_SORT_SIZE;
936
overall_size_ = dev_num_elements;
938
cuda_intensities_.resize (overall_size_, 0); cuda_positions_.resize (overall_size_, 0);
939
//Pad the values to the left; the positions should not matter if the values are zero
940
float first_pos = pre_positions[0];
941
for (Int i=0; i<from_max_to_left_; ++i)
943
cuda_positions_[i] = first_pos-(from_max_to_left_-i)*min_spacing_;
946
for (UInt i=0; i<data_length_; ++i)
948
cuda_positions_[from_max_to_left_+i] = pre_positions[i];
949
cuda_intensities_[from_max_to_left_+i] = pre_intensities[i];
952
float last_pos = pre_positions[pre_positions.size()-1];
953
for (UInt i=0; i<missing_points + from_max_to_right_ + dev_num_elements - num_elements_; ++i)
955
cuda_positions_[from_max_to_left_+data_length_+i] = last_pos + (i+1)*min_spacing_;
958
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
959
std::stringstream name; name << "cuda_input_" << scan.getRT() << ".out\0";
960
std::fstream outfile(name.str().c_str(), std::ios::out);
961
for (size_t i=0; i<overall_size_; ++i)
962
outfile << cuda_positions_[i] << " " << cuda_intensities_[i] << std::endl;
967
dimBlock_ = dim3 (Constants::CUDA_BLOCK_SIZE_MAX);
968
to_compute_ = problem_size;
969
dimGrid_ = dim3 ((data_length_+missing_points)/problem_size);
971
(cudaMalloc(&cuda_device_posindices_sorted_, overall_size_*sizeof(int)));
972
(cudaMalloc(&cuda_device_pos_, overall_size_*sizeof(float)));
973
(cudaMemcpy(cuda_device_pos_, &(cuda_positions_[0]), overall_size_*sizeof(float), cudaMemcpyHostToDevice));
974
(cudaMalloc(&cuda_device_intens_, overall_size_*sizeof(float)));
975
(cudaMemcpy(cuda_device_intens_, &(cuda_intensities_[0]), overall_size_*sizeof(float), cudaMemcpyHostToDevice));
976
(cudaMalloc(&cuda_device_trans_intens_, overall_size_*sizeof(float)));
977
(cudaMalloc(&cuda_device_fwd2_, overall_size_*sizeof(float)));
978
(cudaMalloc(&cuda_device_trans_intens_sorted_, overall_size_*sizeof(float)));
980
c_sorted_candidate_.resize (overall_size_);
981
scores_.resize(data_length_);
982
zeros_.resize(overall_size_);
983
memset (&zeros_[0], 0., overall_size_*sizeof(float));
985
(cudaMalloc(&cuda_device_scores_, overall_size_*sizeof(float)));
987
return (Constants::CUDA_INIT_SUCCESS);
991
template <typename PeakType>
992
void IsotopeWaveletTransform<PeakType>::getTransformCuda (TransSpectrum &c_trans, const UInt c)
994
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
995
std::cout << "res in vector" << std::endl;
996
std::vector<float> res (overall_size_, 0);
998
(cudaMemcpy(cuda_device_trans_intens_, &zeros_[0], overall_size_*sizeof(float), cudaMemcpyHostToDevice));
1000
(cudaMemcpy(cuda_device_fwd2_, &zeros_[0], overall_size_*sizeof(float), cudaMemcpyHostToDevice));
1001
getExternalCudaTransforms (dimGrid_, dimBlock_, (float*)cuda_device_pos_, (float*)cuda_device_intens_, from_max_to_left_, from_max_to_right_, (float*)cuda_device_trans_intens_,
1002
c+1, to_load_, to_compute_, data_length_, (float*)cuda_device_fwd2_, hr_data_);
1004
(cudaMemcpy(cuda_device_trans_intens_sorted_, cuda_device_fwd2_, overall_size_*sizeof(float), cudaMemcpyDeviceToDevice));
1006
(cudaMemcpy(&((*c_trans.trans_intens_)[0]), (float*)cuda_device_trans_intens_+from_max_to_left_, data_length_*sizeof(float), cudaMemcpyDeviceToHost));
1007
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1008
(cudaMemcpy(&(res[0]), (float*)cuda_device_trans_intens_+from_max_to_left_, data_length_*sizeof(float), cudaMemcpyDeviceToHost));
1009
for (UInt i=0; i<data_length_; ++i)
1011
c_trans.setTransIntensity (i, res[i]);
1017
template <typename PeakType>
1018
int IsotopeWaveletTransform<PeakType>::sortCuda (MSSpectrum<PeakType>& sorted)
1020
(cudaMemcpy(cuda_device_posindices_sorted_, &indices_[0], overall_size_*sizeof(int), cudaMemcpyHostToDevice));
1021
Int gpu_index = sortOnDevice((float*)cuda_device_trans_intens_sorted_, (int*) cuda_device_posindices_sorted_, overall_size_, 0);
1023
if (gpu_index < 0) //i.e., there is no positive intensity value at all
1028
(cudaMemcpy(h_data_, (float*)cuda_device_trans_intens_sorted_+gpu_index, sizeof(float) * (overall_size_-gpu_index), cudaMemcpyDeviceToHost));
1029
(cudaMemcpy(h_pos_, (int*)cuda_device_posindices_sorted_+gpu_index, sizeof(int) * (overall_size_-gpu_index), cudaMemcpyDeviceToHost));
1031
for (UInt i=0; i<(overall_size_-gpu_index); ++i)
1033
sorted[i].setIntensity (h_data_[i]);
1034
sorted[i].setMZ (cuda_positions_[h_pos_[i]]);
1041
template <typename PeakType>
1042
void IsotopeWaveletTransform<PeakType>::identifyChargeCuda (const TransSpectrum& candidates,
1043
const UInt scan_index, const UInt c, const DoubleReal ampl_cutoff, const bool check_PPMs)
1045
const MSSpectrum<PeakType>& ref (*candidates.getRefSpectrum());
1046
UInt index, MZ_start, MZ_end;
1047
typename MSSpectrum<PeakType>::iterator iter, bound_iter;
1048
typename MSSpectrum<PeakType>::const_iterator iter_start, iter_end, iter_p, iter2, seed_iter;
1049
DoubleReal mz_cutoff, seed_mz, c_av_intens=0, c_score=0, c_sd_intens=0, threshold=0, help_mz;
1051
Int gpu_index = sortCuda (c_sorted_candidate_), c_index;
1052
if (gpu_index < 0) //the transform produced non-exploitable data
1057
std::vector<UInt> processed (data_length_, 0);
1058
if (ampl_cutoff < 0)
1064
c_av_intens = getAvIntens_ (candidates);
1065
c_sd_intens = getSdIntens_ (candidates, c_av_intens);
1066
threshold=ampl_cutoff*c_sd_intens + c_av_intens;
1069
Int num_of_scores = overall_size_-gpu_index;
1071
(cudaMemcpy(cuda_device_scores_, &zeros_[0], num_of_scores*sizeof(float), cudaMemcpyHostToDevice));
1073
scoreOnDevice ((int*)cuda_device_posindices_sorted_, (float*)cuda_device_trans_intens_, (float*)cuda_device_pos_, (float*)cuda_device_scores_,
1074
c, num_of_scores, overall_size_, max_num_peaks_per_pattern_, threshold);
1076
(cudaMemcpy(&scores_[0], cuda_device_scores_, num_of_scores*sizeof(float), cudaMemcpyDeviceToHost));
1078
std::vector<float>::iterator score_iter;
1079
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1080
std::stringstream stream;
1081
stream << "sorted_gpu_" << candidates.getRT() << "_" << c+1 << ".trans\0";
1082
std::ofstream ofile (stream.str().c_str());
1083
for (c_index = overall_size_-gpu_index-1, score_iter = scores_.begin()+num_of_scores-1; c_index >= 0; --c_index, --score_iter)
1085
ofile << c_sorted_candidate_[c_index].getMZ() << "\t" << c_sorted_candidate_[c_index].getIntensity() << "\t" << *score_iter << std::endl;
1090
for (c_index = overall_size_-gpu_index-1, score_iter = scores_.begin()+num_of_scores-1; c_index >= 0; --c_index, --score_iter)
1092
seed_mz = c_sorted_candidate_[c_index].getMZ();
1094
//We can replace the following two lines ...
1095
//seed_iter = ref.MZBegin(seed_mz);
1096
//index = distance(ref.begin(), seed_iter);
1098
index = h_pos_[c_index]-from_max_to_left_;
1099
seed_iter = ref.begin()+index;
1101
if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)] || index <= 0)
1106
mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(seed_mz, c+1);
1107
//Mark the region as processed
1108
//Do not move this further down, since we have to mark this as processed in any case,
1109
//even when score <=0; otherwise we would look around the maximum's position unless
1110
//any significant point is found
1111
iter_start =ref.MZBegin(ref.begin(), seed_mz-Constants::IW_QUARTER_NEUTRON_MASS/(c+1.), seed_iter);
1112
iter_end = ref.MZEnd(seed_iter, seed_mz+mz_cutoff/(c+1.), ref.end());
1114
if (iter_end == ref.end())
1119
MZ_start = distance (ref.begin(), iter_start);
1120
MZ_end = distance (ref.begin(), iter_end);
1122
memset(&(processed[MZ_start]), 1, sizeof(UInt)*(MZ_end-MZ_start+1));
1124
c_score = *score_iter;
1126
if (c_score <= 0 && c_score != -1000)
1131
//Push the seed into its corresponding box (or create a new one, if necessary)
1132
//Do ***NOT*** move this further down!
1134
push2TmpBox_ (seed_mz, scan_index, c, c_score, c_sorted_candidate_[c_index].getIntensity(), ref.getRT(), MZ_start, MZ_end);
1136
//Push neighboring peaks to compute finally a derivative over the isotope pattern envelope
1137
help_mz = seed_mz - Constants::IW_NEUTRON_MASS/(c+1.);
1138
iter2 = candidates.MZBegin (help_mz);
1140
if (iter2 == candidates.end() || iter2 == candidates.begin())
1145
if (fabs(iter2->getMZ()-seed_mz) > 0.5*Constants::IW_NEUTRON_MASS/(c+1.))
1146
//In the other case, we are too close to the peak, leading to incorrect derivatives.
1148
if (iter2 != candidates.end())
1150
UInt dist = distance(candidates.begin(), iter2);
1151
push2TmpBox_ (iter2->getMZ(), scan_index, c, 0,
1152
getLinearInterpolation ((iter2-1)->getMZ(), candidates.getTransIntensity(dist-1), help_mz, iter2->getMZ(), candidates.getTransIntensity(dist)),
1153
candidates.getRT(), MZ_start, MZ_end);
1157
help_mz = seed_mz + Constants::IW_NEUTRON_MASS/(c+1.);
1158
iter2 = candidates.MZBegin (help_mz);
1160
if (iter2 == candidates.end() || iter2 == candidates.begin())
1165
if (fabs(iter2->getMZ()-seed_mz) > 0.5*Constants::IW_NEUTRON_MASS/(c+1.))
1166
//In the other case, we are too close to the peak, leading to incorrect derivatives.
1168
if (iter2 != candidates.end())
1170
UInt dist = distance(candidates.begin(), iter2);
1171
push2TmpBox_ (iter2->getMZ(), scan_index, c, 0,
1172
getLinearInterpolation ((iter2-1)->getMZ(), candidates.getTransIntensity(dist-1), help_mz, iter2->getMZ(), candidates.getTransIntensity(dist)),
1173
candidates.getRT(), MZ_start, MZ_end);
1178
clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
1183
template <typename PeakType>
1184
void IsotopeWaveletTransform<PeakType>::identifyCharge (const MSSpectrum<PeakType>& candidates,
1185
const MSSpectrum<PeakType>& ref, const UInt scan_index, const UInt c, const DoubleReal ampl_cutoff, const bool check_PPMs)
1187
Size scan_size (candidates.size());
1188
typename ConstRefVector<MSSpectrum<PeakType> >::iterator iter;
1189
typename MSSpectrum<PeakType>::const_iterator iter_start, iter_end, iter_p, seed_iter, iter2;
1190
DoubleReal mz_cutoff, seed_mz, c_av_intens=0, c_score=0, c_sd_intens=0, threshold=0, help_mz, share, share_pos, bwd, fwd;
1191
UInt MZ_start, MZ_end;
1193
MSSpectrum<PeakType> diffed (candidates);
1194
diffed[0].setIntensity(0); diffed[scan_size-1].setIntensity(0);
1196
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1197
std::stringstream stream;
1198
stream << "diffed_" << ref.getRT() << "_" << c+1 << ".trans\0";
1199
std::ofstream ofile (stream.str().c_str());
1202
if (!hr_data_)//LowRes data
1204
for (UInt i=0; i<scan_size-2; ++i)
1206
share = candidates[i+1].getIntensity(), share_pos = candidates[i+1].getMZ();
1207
bwd = (share-candidates[i].getIntensity())/(share_pos-candidates[i].getMZ());
1208
fwd = (candidates[i+2].getIntensity()-share)/(candidates[i+2].getMZ()-share_pos);
1210
if (!(bwd>=0 && fwd<=0) || share > ref[i+1].getIntensity())
1212
diffed[i+1].setIntensity(0);
1215
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1216
ofile << diffed[i+1].getMZ() << "\t" << diffed[i+1].getIntensity() << std::endl;
1222
for (UInt i=0; i<scan_size-2; ++i)
1224
share = candidates[i+1].getIntensity(), share_pos = candidates[i+1].getMZ();
1225
bwd = (share-candidates[i].getIntensity())/(share_pos-candidates[i].getMZ());
1226
fwd = (candidates[i+2].getIntensity()-share)/(candidates[i+2].getMZ()-share_pos);
1228
if (!(bwd>=0 && fwd<=0))
1230
diffed[i+1].setIntensity(0);
1233
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1234
ofile << diffed[i+1].getMZ() << "\t" << diffed[i+1].getIntensity() << std::endl;
1238
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1242
ConstRefVector<MSSpectrum<PeakType> > c_sorted_candidate_ (diffed.begin(), diffed.end());
1244
//Sort the transform in descending order according to the intensities present in the transform
1245
c_sorted_candidate_.sortByIntensity();
1247
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1248
std::stringstream stream2;
1249
stream2 << "sorted_cpu_" << candidates.getRT() << "_" << c+1 << ".trans\0";
1250
std::ofstream ofile2 (stream2.str().c_str());
1251
for (iter=c_sorted_candidate_.end()-1; iter != c_sorted_candidate_.begin(); --iter)
1253
ofile2 << iter->getMZ() << "\t" << iter->getIntensity() << std::endl;
1258
std::vector<UInt> processed (scan_size, 0);
1260
if (ampl_cutoff < 0)
1266
c_av_intens = getAvIntens_ (candidates);
1267
c_sd_intens = getSdIntens_ (candidates, c_av_intens);
1268
threshold=ampl_cutoff*c_sd_intens + c_av_intens;
1271
for (iter=c_sorted_candidate_.end()-1; iter != c_sorted_candidate_.begin(); --iter)
1273
if (iter->getIntensity() <= 0)
1278
seed_mz = iter->getMZ();
1279
seed_iter = ref.MZBegin(seed_mz);
1281
if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)])
1286
mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos (seed_mz, c+1);
1287
//Mark the region as processed
1288
//Do not move this further down, since we have to mark this as processed in any case,
1289
//even when score <=0; otherwise we would look around the maximum's position unless
1290
//any significant point is found
1291
iter_start = ref.MZBegin(ref.begin(), seed_mz-Constants::IW_QUARTER_NEUTRON_MASS/(c+1.), seed_iter);
1292
iter_end = ref.MZEnd(seed_iter, seed_mz+mz_cutoff/(c+1.), ref.end());
1293
if (iter_end == ref.end())
1298
MZ_start = distance (ref.begin(), iter_start);
1299
MZ_end = distance (ref.begin(), iter_end);
1301
memset (&(processed[MZ_start]), 1, sizeof(UInt)*(MZ_end-MZ_start+1));
1303
c_score = scoreThis_ (candidates, IsotopeWavelet::getNumPeakCutOff(seed_mz*(c+1.)), seed_mz, c, threshold);
1305
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1306
if (trunc(seed_mz) == 874)
1307
std::cout << seed_mz << "\t" << c_score << std::endl;
1310
if (c_score <= 0 && c_score != -1000)
1315
//Push the seed into its corresponding box (or create a new one, if necessary)
1316
//Do ***NOT*** move this further down!
1317
push2TmpBox_ (seed_mz, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), MZ_start, MZ_end);
1319
help_mz = seed_mz - Constants::IW_NEUTRON_MASS/(c+1.);
1320
iter2 = candidates.MZBegin (help_mz);
1321
if (iter2 == candidates.end() || iter2 == candidates.begin())
1326
if (fabs(iter2->getMZ()-seed_mz) > 0.5*Constants::IW_NEUTRON_MASS/(c+1.))
1327
//In the other case, we are too close to the peak, leading to incorrect derivatives.
1329
if (iter2 != candidates.end())
1331
push2TmpBox_ (iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2-1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
1335
help_mz = seed_mz + Constants::IW_NEUTRON_MASS/(c+1.);
1336
iter2 = candidates.MZBegin (help_mz);
1337
if (iter2 == candidates.end()|| iter2 == candidates.begin())
1342
if (fabs(iter2->getMZ()-seed_mz) > 0.5*Constants::IW_NEUTRON_MASS/(c+1.))
1343
//In the other case, we are too close to the peak, leading to incorrect derivatives.
1345
if (iter2 != candidates.end())
1347
push2TmpBox_ (iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2-1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
1352
clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
1355
#if defined(OPENMS_HAS_TBB) && defined (OPENMS_HAS_CUDA)
1356
template <typename PeakType>
1357
void IsotopeWaveletTransform<PeakType>::mergeFeatures (IsotopeWaveletTransform<PeakType>* later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff)
1359
typename std::multimap<DoubleReal, Box>::iterator front_iter, end_iter, best_match, help_iter;
1361
//First of all do the trivial part of the merge
1362
for (end_iter=later_iwt->closed_boxes_.begin(); end_iter!=later_iwt->closed_boxes_.end(); ++end_iter)
1364
closed_boxes_.insert (*end_iter);
1367
typename std::multimap<DoubleReal, Box>& end_container (this->end_boxes_);
1368
typename std::multimap<DoubleReal, Box>& front_container (later_iwt->front_boxes_);
1370
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1371
std::cout << "FontBox: " << front_container.size() << std::endl;
1372
for (front_iter=front_container.begin(); front_iter != front_container.end(); ++front_iter)
1373
std::cout << front_iter->first << "\t" << front_iter->second.size() << std::endl;
1375
std::cout << "EndBox: " << end_container.size() << std::endl;
1376
for (front_iter=end_container.begin(); front_iter != end_container.end(); ++front_iter)
1377
std::cout << front_iter->first << "\t" << front_iter->second.size() << std::endl;
1380
typename std::multimap<UInt, BoxElement>::iterator biter;
1382
DoubleReal best_dist, c_dist; UInt c;
1383
//Now, try to find matching boxes for the rest
1384
for (front_iter=front_container.begin(); front_iter != front_container.end(); )
1386
best_match = end_container.end(); best_dist = INT_MAX;
1387
//This is everything else than efficient, but both containers should be very small in size
1388
for (end_iter=end_container.begin(); end_iter != end_container.end(); ++end_iter)
1391
for (biter=front_iter->second.begin(); biter != front_iter->second.end(); ++biter)
1393
c=std::max (c, biter->second.c);
1396
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1397
std::cout << "Trying to match: " << end_iter->first << " to front " << front_iter->first << std::endl;
1400
c_dist = fabs(end_iter->first - front_iter->first);
1401
if (c_dist < Constants::IW_HALF_NEUTRON_MASS/(c+1.) && c_dist < best_dist)
1403
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1404
std::cout << "best match " << front_iter->second.begin()->first << "\t" << (--(end_iter->second.end()))->first << std::endl;
1406
if ((front_iter->second.begin()->first - (--(end_iter->second.end()))->first) <= RT_interleave+1)
1407
//otherwise, there are too many blank scans in between
1409
best_match = end_iter;
1414
if (best_match == end_container.end()) //No matching pair found
1416
if (front_iter->second.size() >= RT_votes_cutoff)
1418
closed_boxes_.insert (*front_iter);
1419
//extendBox_ (map, front_iter->second);
1423
else //That's the funny part
1425
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1426
std::cout << "Merging the boxes: " << front_iter->first << "\t" << best_match->first << std::endl;
1429
front_iter->second.insert (best_match->second.begin(), best_match->second.end());
1430
Box replacement (front_iter->second);
1432
//We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1433
DoubleReal c_mz = front_iter->first * (front_iter->second.size()-best_match->second.size()) + best_match->first*best_match->second.size();
1434
c_mz /= ((DoubleReal) (front_iter->second.size()));
1436
help_iter = front_iter;
1438
std::pair<DoubleReal, std::multimap<UInt, BoxElement> > help3 (c_mz, replacement);
1439
closed_boxes_.insert (help3);
1440
//extendBox_ (map, help3.second);
1441
front_container.erase (front_iter);
1442
end_container.erase (best_match);
1443
front_iter = help_iter;
1447
//Merge the rest in end_container
1448
for (end_iter=end_container.begin(); end_iter != end_container.end(); ++end_iter)
1450
if (end_iter->second.size() >= RT_votes_cutoff)
1452
closed_boxes_.insert (*end_iter);
1453
//extendBox_ (map, end_iter->second);
1460
template<typename PeakType>
1461
DoubleReal IsotopeWaveletTransform<PeakType>::scoreThis_ (const MSSpectrum<PeakType>& candidate,
1462
const UInt peak_cutoff, const DoubleReal seed_mz, const UInt c, const DoubleReal ampl_cutoff)
1464
DoubleReal c_score=0, c_val;
1465
typename MSSpectrum<PeakType>::const_iterator c_left_iter2, c_right_iter2;
1466
Int signal_size ((Int)candidate.size());
1467
//in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
1468
//for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
1470
//p_h_ind indicates if we are looking for a whole or a peak
1471
Int p_h_ind=1, end=4*(peak_cutoff-1) -1; //4 times and not 2 times, since we move by 0.5 m/z entities
1473
std::vector<DoubleReal> positions (end);
1474
for (Int i=0; i<end; ++i)
1476
positions[i] = seed_mz-((peak_cutoff-1)*Constants::IW_NEUTRON_MASS-(i+1)*Constants::IW_HALF_NEUTRON_MASS)/((DoubleReal)c+1);
1479
DoubleReal l_score=0, mid_val=0;
1480
Int start_index = distance(candidate.begin(), candidate.MZBegin (positions[0]))-1;
1481
for (Int v=1; v<=end; ++v, ++p_h_ind)
1485
if (start_index < signal_size-1) ++start_index;
1487
} while (candidate[start_index].getMZ() < positions[v-1]);
1489
if (start_index <= 0 || start_index>=signal_size-1) //unable to interpolate
1494
c_left_iter2 = candidate.begin()+start_index-1;
1495
c_right_iter2 = c_left_iter2+1;
1497
c_val = c_left_iter2->getIntensity() + (c_right_iter2->getIntensity() - c_left_iter2->getIntensity())/(c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v-1]-c_left_iter2->getMZ());
1499
if (v == (int)(ceil(end/2.)))
1505
if (p_h_ind%2 == 1) //I.e. a whole
1508
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1509
if (trunc(seed_mz) == 874) std::cout << -c_val << std::endl;
1515
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1516
if (trunc(seed_mz) == 874) std::cout << c_val << std::endl;
1521
start_index = distance(candidate.begin(), c_left_iter2);
1524
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1525
std::ofstream ofile_score ("scores.dat", ios::app);
1526
std::ofstream ofile_check_score ("check_scores.dat", ios::app);
1527
ofile_score.close();
1528
ofile_check_score.close();
1531
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1532
if (trunc(seed_mz) == 874)
1533
std::cout << "final_score: " << seed_mz << "\t" << c_score << "\t l_score: " << l_score << "\t" << c_score-l_score-mid_val << "\t" << c_score-mid_val << "\t" << ampl_cutoff << std::endl;
1536
if (c_score-mid_val <= 0)
1541
if (c_score-mid_val <= ampl_cutoff)
1546
if (l_score <=0 || c_score-l_score-mid_val <= 0)
1556
template<typename PeakType>
1557
DoubleReal IsotopeWaveletTransform<PeakType>::scoreThis_ (const TransSpectrum& candidate,
1558
const UInt peak_cutoff, const DoubleReal seed_mz, const UInt c, const DoubleReal ampl_cutoff)
1560
DoubleReal c_score=0, c_val;
1561
typename MSSpectrum<PeakType>::const_iterator c_left_iter2, c_right_iter2;
1562
Int signal_size((Int)candidate.size());
1564
//p_h_ind indicates if we are looking for a whole or a peak
1565
Int p_h_ind=1, end=4*(peak_cutoff-1) -1; //4 times and not 2 times, since we move by 0.5 m/z entities
1567
std::vector<DoubleReal> positions (end);
1568
for (Int i=0; i<end; ++i)
1570
positions[i] = seed_mz-((peak_cutoff-1)*Constants::IW_NEUTRON_MASS-(i+1)*Constants::IW_HALF_NEUTRON_MASS)/((DoubleReal)c+1);
1573
DoubleReal l_score=0, mid_val=0;
1574
Int start_index = distance(candidate.begin(), candidate.MZBegin (positions[0]))-1;
1575
for (Int v=1; v<=end; ++v, ++p_h_ind)
1579
if (start_index < signal_size-1) ++start_index;
1581
} while (candidate.getMZ(start_index) < positions[v-1]);
1583
if (start_index <= 0 || start_index>=signal_size-1) //unable to interpolate
1588
c_left_iter2 = candidate.begin()+start_index-1;
1589
c_right_iter2 = c_left_iter2+1;
1591
c_val = candidate.getTransIntensity(start_index-1) + (candidate.getTransIntensity(start_index)-candidate.getTransIntensity(start_index-1))/(c_right_iter2->getMZ()-c_left_iter2->getMZ()) * (positions[v-1]-c_left_iter2->getMZ());
1592
if (v == (int)(ceil(end/2.)))
1598
if (p_h_ind%2 == 1) //I.e. a whole
1607
start_index = distance(candidate.begin(), c_left_iter2);
1610
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1611
std::ofstream ofile_score ("scores.dat", ios::app);
1612
std::ofstream ofile_check_score ("check_scores.dat", ios::app);
1613
ofile_score << c_check_point << "\t" << c_score << std::endl;
1614
ofile_score.close();
1615
ofile_check_score.close();
1618
if (l_score <=0 || c_score-l_score-mid_val <= 0 || c_score-mid_val <= ampl_cutoff)
1627
template <typename PeakType>
1628
void IsotopeWaveletTransform<PeakType>::clusterSeeds_ (const MSSpectrum<PeakType>& candidate,
1629
const MSSpectrum<PeakType>& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1631
typename std::multimap<DoubleReal, Box>::iterator iter;
1632
typename Box::iterator box_iter;
1633
std::vector<BoxElement> final_box;
1634
DoubleReal c_mz, av_score=0, av_mz=0, av_intens=0, av_abs_intens=0, count=0;
1635
DoubleReal virtual_av_mz=0, virtual_av_intens=0, virtual_av_abs_intens=0, virtual_count=0;
1637
typename std::pair<DoubleReal, DoubleReal> c_extend;
1638
for (iter=tmp_boxes_->at(c).begin(); iter!=tmp_boxes_->at(c).end(); ++iter)
1641
Box& c_box = iter->second;
1642
av_score=0, av_mz=0, av_intens=0, av_abs_intens=0, count=0;
1643
virtual_av_mz=0, virtual_av_intens=0, virtual_av_abs_intens=0, virtual_count=0;
1645
//Now, let's get the RT boundaries for the box
1646
for (box_iter=c_box.begin(); box_iter!=c_box.end(); ++box_iter)
1648
if (box_iter->second.score == 0) //virtual helping point
1650
if (count != 0) continue; //it is in any way not pure virtual
1652
c_mz = box_iter->second.mz;
1653
virtual_av_intens += box_iter->second.intens;
1654
virtual_av_abs_intens += fabs(box_iter->second.intens);
1655
virtual_av_mz += c_mz*fabs(box_iter->second.intens);
1660
c_mz = box_iter->second.mz;
1661
av_score += box_iter->second.score;
1662
av_intens += box_iter->second.intens;
1663
av_abs_intens += fabs(box_iter->second.intens);
1664
av_mz += c_mz*fabs(box_iter->second.intens);
1669
if (count == 0) //pure virtual helping box
1671
av_intens = virtual_av_intens/virtual_count;
1673
av_mz = virtual_av_mz/virtual_av_abs_intens;
1679
av_mz /= av_abs_intens;
1682
BoxElement c_box_element;
1683
c_box_element.mz = av_mz;
1684
c_box_element.c = c;
1685
c_box_element.score = av_score;
1686
c_box_element.intens = av_intens;
1688
c_box_element.RT=c_box.begin()->second.RT;
1689
final_box.push_back(c_box_element);
1692
Size num_o_feature = final_box.size();
1693
if (num_o_feature == 0)
1695
tmp_boxes_->at(c).clear();
1699
//Computing the derivatives
1700
std::vector<DoubleReal> bwd_diffs(num_o_feature, 0);
1703
for (Size i=1; i<num_o_feature; ++i)
1705
bwd_diffs[i] = (final_box[i].intens-final_box[i-1].intens)/(final_box[i].mz-final_box[i-1].mz);
1708
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1709
std::ofstream ofile_bwd ("bwd_cpu.dat");
1710
for (Size i=0; i<num_o_feature; ++i)
1712
ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1718
for (Size i=0; i<num_o_feature-1; ++i)
1720
while (i<num_o_feature-2)
1722
if(final_box[i].score>0 || final_box[i].score == -1000) //this has been an helping point
1727
if (bwd_diffs[i]>0 && bwd_diffs[i+1]<0)
1729
checkPositionForPlausibility_ (candidate, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1734
tmp_boxes_->at(c).clear();
1738
template <typename PeakType>
1739
DoubleReal IsotopeWaveletTransform<PeakType>::getAvIntens_ (const MSSpectrum<PeakType>& scan)
1741
DoubleReal av_intens=0;
1742
for (UInt i=0; i<scan.size(); ++i)
1744
if (scan[i].getIntensity() >= 0)
1746
av_intens += scan[i].getIntensity();
1749
return (av_intens / (double)scan.size());
1753
template <typename PeakType>
1754
DoubleReal IsotopeWaveletTransform<PeakType>::getSdIntens_ (const MSSpectrum<PeakType>& scan, const DoubleReal mean)
1756
DoubleReal res=0, intens;
1757
for (UInt i=0; i<scan.size(); ++i)
1759
if (scan[i].getIntensity() >= 0)
1761
intens = scan[i].getIntensity();
1762
res += (intens-mean)*(intens-mean);
1765
return (sqrt(res/(double)(scan.size()-1)));
1769
template <typename PeakType>
1770
DoubleReal IsotopeWaveletTransform<PeakType>::getAvMZSpacing_ (const MSSpectrum<PeakType>& scan)//, Int start_index, Int end_index)
1772
std::vector<DoubleReal> diffs (scan.size()-1, 0);
1773
for (UInt i=0; i<scan.size()-1; ++i)
1775
diffs[i]= scan[i+1].getMZ() - scan[i].getMZ();
1778
sort(diffs.begin(), diffs.end());
1779
DoubleReal av_MZ_spacing=0;
1780
for (UInt i=0; i<diffs.size()/2; ++i)
1782
av_MZ_spacing += diffs[i];
1785
return (av_MZ_spacing / (diffs.size()/2));
1789
template <typename PeakType>
1790
DoubleReal IsotopeWaveletTransform<PeakType>::getAvIntens_ (const TransSpectrum& scan)
1792
DoubleReal av_intens=0;
1793
for (UInt i=0; i<scan.size(); ++i)
1795
if (scan.getTransIntensity(i) >= 0)
1797
av_intens += scan.getTransIntensity(i);
1800
return (av_intens / (double)scan.size());
1804
template <typename PeakType>
1805
DoubleReal IsotopeWaveletTransform<PeakType>::getSdIntens_ (const TransSpectrum& scan, const DoubleReal mean)
1807
DoubleReal res=0, intens;
1808
for (UInt i=0; i<scan.size(); ++i)
1810
if (scan.getTransIntensity(i) >= 0)
1812
intens = scan.getTransIntensity(i);
1813
res += (intens-mean)*(intens-mean);
1816
return (sqrt(res/(double)(scan.size()-1)));
1821
template <typename PeakType>
1822
void IsotopeWaveletTransform<PeakType>::push2Box_ (const DoubleReal mz, const UInt scan, UInt c,
1823
const DoubleReal score, const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end, DoubleReal ref_intens)
1825
const DoubleReal dist_constraint (Constants::IW_HALF_NEUTRON_MASS/(DoubleReal)max_charge_);
1827
typename std::multimap<DoubleReal, Box>::iterator upper_iter (open_boxes_.upper_bound(mz));
1828
typename std::multimap<DoubleReal, Box>::iterator lower_iter ( open_boxes_.lower_bound(mz));
1830
if (lower_iter != open_boxes_.end())
1832
//Ugly, but necessary due to the implementation of STL lower_bound
1833
if (mz != lower_iter->first && lower_iter != open_boxes_.begin())
1839
typename std::multimap<DoubleReal, Box>::iterator insert_iter;
1840
bool create_new_box=true;
1841
if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
1843
//There is another special case to be considered here:
1844
//Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1845
//then the lower bound for the new mz value is box.end and this would usually force a new entry
1846
if (!open_boxes_.empty())
1848
if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1850
create_new_box=false;
1851
insert_iter = lower_iter;
1856
create_new_box=true;
1861
if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1863
insert_iter = lower_iter;
1864
create_new_box=false;
1868
create_new_box=true;
1873
if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
1875
//Here is the question if you should figure out the smallest charge .... and then
1877
//Figure out which entry is closer to m/z
1878
DoubleReal dist_lower = fabs(lower_iter->first - mz);
1879
DoubleReal dist_upper = fabs(upper_iter->first - mz);
1880
dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1881
dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1883
if (dist_lower>=dist_constraint && dist_upper>=dist_constraint) // they are both too far away
1885
create_new_box=true;
1889
insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1890
create_new_box=false;
1895
element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens=intens; element.ref_intens=ref_intens;
1896
element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1899
if (create_new_box == false)
1901
std::pair<UInt, BoxElement> help2 (scan, element);
1902
insert_iter->second.insert (help2);
1904
//Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1905
Box replacement (insert_iter->second);
1907
//We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1908
//Also note that we already inserted the new entry, leading to size-1.
1909
DoubleReal c_mz = insert_iter->first * (insert_iter->second.size()-1) + mz;
1910
c_mz /= ((DoubleReal) insert_iter->second.size());
1912
//Now let's remove the old and insert the new one
1913
open_boxes_.erase (insert_iter);
1914
std::pair<DoubleReal, std::multimap<UInt, BoxElement> > help3 (c_mz, replacement);
1915
open_boxes_.insert (help3);
1919
std::pair<UInt, BoxElement> help2 (scan, element);
1920
std::multimap<UInt, BoxElement> help3;
1921
help3.insert (help2);
1922
std::pair<DoubleReal, std::multimap<UInt, BoxElement> > help4 (mz, help3);
1923
open_boxes_.insert (help4);
1928
template <typename PeakType>
1929
void IsotopeWaveletTransform<PeakType>::push2TmpBox_ (const DoubleReal mz, const UInt scan, UInt c,
1930
const DoubleReal score, const DoubleReal intens, const DoubleReal rt, const UInt MZ_begin, const UInt MZ_end)
1932
const DoubleReal dist_constraint (Constants::IW_HALF_NEUTRON_MASS/(DoubleReal)max_charge_);
1934
std::multimap<DoubleReal, Box>& tmp_box (tmp_boxes_->at(c));
1935
typename std::multimap<DoubleReal, Box>::iterator upper_iter (tmp_box.upper_bound(mz));
1936
typename std::multimap<DoubleReal, Box>::iterator lower_iter (tmp_box.lower_bound(mz));
1938
if (lower_iter != tmp_box.end())
1940
//Ugly, but necessary due to the implementation of STL lower_bound
1941
if (mz != lower_iter->first && lower_iter != tmp_box.begin())
1947
typename std::multimap<DoubleReal, Box>::iterator insert_iter;
1948
bool create_new_box=true;
1949
if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
1951
//There is another special case to be considered here:
1952
//Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1953
//then the lower bound for the new mz value is box.end and this would usually force a new entry
1954
if (!tmp_box.empty())
1956
if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1958
create_new_box=false;
1959
insert_iter = lower_iter;
1964
create_new_box=true;
1969
if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1971
insert_iter = lower_iter;
1972
create_new_box=false;
1976
create_new_box=true;
1981
if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
1983
//Figure out which entry is closer to m/z
1984
DoubleReal dist_lower = fabs(lower_iter->first - mz);
1985
DoubleReal dist_upper = fabs(upper_iter->first - mz);
1986
dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1987
dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1989
if (dist_lower>=dist_constraint && dist_upper>=dist_constraint) // they are both too far away
1991
create_new_box=true;
1995
insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1996
create_new_box=false;
2001
element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens=intens; element.ref_intens=-1000;
2002
element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
2004
if (create_new_box == false)
2006
std::pair<UInt, BoxElement> help2 (scan, element);
2007
insert_iter->second.insert (help2);
2009
//Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
2010
Box replacement (insert_iter->second);
2012
//We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
2013
//Also note that we already inserted the new entry, leading to size-1.
2014
DoubleReal c_mz = insert_iter->first * (insert_iter->second.size()-1) + mz;
2015
c_mz /= ((DoubleReal) insert_iter->second.size());
2017
//Now let's remove the old and insert the new one
2018
tmp_box.erase (insert_iter);
2019
std::pair<DoubleReal, std::multimap<UInt, BoxElement> > help3 (c_mz, replacement);
2020
tmp_box.insert (help3);
2024
std::pair<UInt, BoxElement> help2 (scan, element);
2025
std::multimap<UInt, BoxElement> help3;
2026
help3.insert (help2);
2028
std::pair<DoubleReal, std::multimap<UInt, BoxElement> > help4 (mz, help3);
2029
tmp_box.insert (help4);
2034
template <typename PeakType>
2035
void IsotopeWaveletTransform<PeakType>::updateBoxStates (const MSExperiment<PeakType>& map, const Size scan_index, const UInt RT_interleave,
2036
const UInt RT_votes_cutoff, const Int front_bound, const Int end_bound)
2038
typename std::multimap<DoubleReal, Box>::iterator iter, iter2;
2040
if ((Int)scan_index == end_bound && end_bound != (Int)map.size()-1)
2042
for (iter=open_boxes_.begin(); iter!=open_boxes_.end(); ++iter)
2044
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2045
std::cout << "LOW THREAD insert in end_box " << iter->first << std::endl;
2046
typename Box::iterator dings;
2047
for (dings=iter->second.begin(); dings != iter->second.end(); ++dings)
2048
std::cout << map[dings->first].getRT() << "\t" << dings->second.c+1 << std::endl;
2050
end_boxes_.insert (*iter);
2052
open_boxes_.clear();
2056
for (iter=open_boxes_.begin(); iter!=open_boxes_.end(); )
2058
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2059
if (front_bound > 0)
2061
std::cout << "HIGH THREAD open box. " << iter->first << "\t current scan index " << scan_index << "\t" << ((iter->second.begin()))->first << "\t of last scan " << map.size()-1 << "\t" << front_bound << std::endl;
2065
//For each Box we need to figure out, if and when the last RT value has been inserted
2066
UInt lastScan = (--(iter->second.end()))->first;
2067
if (scan_index - lastScan > RT_interleave+1 || scan_index == map.size()-1) //I.e. close the box!
2069
if (iter->second.begin()->first -front_bound <= RT_interleave+1 && front_bound > 0)
2073
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2074
std::cout << "HIGH THREAD insert in front_box " << iter->first << std::endl;
2076
front_boxes_.insert (*iter);
2077
open_boxes_.erase (iter);
2084
//Please do **NOT** simplify the upcoming lines.
2085
//The 'obvious' overhead is necessary since the object represented by iter might be erased
2086
//by push2Box which might be called by extendBox_.
2087
if (iter->second.size() >= RT_votes_cutoff)
2089
//extendBox_ (map, iter->second);
2091
closed_boxes_.insert (*(--iter));
2093
open_boxes_.erase (iter);
2105
template <typename PeakType>
2106
void IsotopeWaveletTransform<PeakType>::extendBox_ (const MSExperiment<PeakType>& map, const Box box)
2108
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2109
std::cout << "**** CHECKING FOR BOX EXTENSIONS ****" << std::endl;
2112
//Determining the elution profile
2113
typename Box::const_iterator iter;
2114
std::vector<DoubleReal> elution_profile (box.size());
2116
for (iter=box.begin(); iter != box.end(); ++iter, ++index)
2118
for (Size i=iter->second.MZ_begin; i!= iter->second.MZ_end; ++i)
2120
elution_profile[index] += map[iter->second.RT_index][i].getIntensity();
2122
elution_profile[index] /= iter->second.MZ_end-iter->second.MZ_begin+1.;
2126
Int max_index=INT_MIN;
2127
for (Size i=0; i<elution_profile.size(); ++i)
2129
if (elution_profile[i] > max)
2132
max = elution_profile[i];
2136
Int max_extension = (Int)(elution_profile.size()) - 2*max_index;
2138
DoubleReal av_elution=0;
2139
for (Size i=0; i<elution_profile.size(); ++i)
2141
av_elution += elution_profile[i];
2143
av_elution /= (DoubleReal)elution_profile.size();
2145
DoubleReal sd_elution=0;
2146
for (Size i=0; i<elution_profile.size(); ++i)
2148
sd_elution += (av_elution-elution_profile[i])*(av_elution-elution_profile[i]);
2150
sd_elution /= (DoubleReal)(elution_profile.size()-1);
2151
sd_elution = sqrt(sd_elution);
2153
//Determine average m/z monoisotopic pos
2155
for (iter=box.begin(); iter != box.end(); ++iter, ++index)
2157
av_mz += iter->second.mz;
2158
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2159
std::cout << iter->second.RT << "\t" << iter->second.mz << "\t" << iter->second.c+1 << std::endl;
2162
av_mz /= (DoubleReal)box.size();
2166
if ((Int)(box.begin()->second.RT_index)-1 < 0)
2171
UInt pre_index = box.begin()->second.RT_index-1;
2172
typename MSSpectrum<PeakType>::const_iterator c_iter = map[pre_index].MZBegin(av_mz);
2173
DoubleReal pre_elution=0;
2175
DoubleReal mz_start = map[pre_index+1][box.begin()->second.MZ_begin].getMZ();
2176
DoubleReal mz_end = map[pre_index+1][box.begin()->second.MZ_end].getMZ();
2178
typename MSSpectrum<PeakType>::const_iterator mz_start_iter = map[pre_index].MZBegin(mz_start), mz_end_iter = map[pre_index].MZBegin(mz_end);
2179
for (typename MSSpectrum<PeakType>::const_iterator mz_iter=mz_start_iter; mz_iter != mz_end_iter; ++mz_iter)
2181
pre_elution += mz_iter->getIntensity();
2185
//Do we need to extend at all?
2186
if (pre_elution <= av_elution-2*sd_elution)
2191
Int c_index = max_extension;
2192
Int first_index = box.begin()->second.RT_index;
2193
for (Int i=1; i<max_extension; ++i)
2195
c_index = first_index-i;
2201
//CHECK Majority vote for charge???????????????
2202
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2203
std::cout << box.begin()->second.RT << "\t" << av_mz << "\t" << box.begin()->second.c+1 << "\t" << " extending the box " << std::endl;
2206
push2Box_ (av_mz, c_index, box.begin()->second.c, box.begin()->second.score, c_iter->getIntensity(),
2207
map[c_index].getRT(), box.begin()->second.MZ_begin, box.begin()->second.MZ_end);
2213
template <typename PeakType>
2214
void IsotopeWaveletTransform<PeakType>::clusterSeeds_ (const TransSpectrum& candidates,
2215
const MSSpectrum<PeakType>& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
2217
typename std::multimap<DoubleReal, Box>::iterator iter;
2218
typename Box::iterator box_iter;
2219
std::vector<BoxElement> final_box;
2220
DoubleReal c_mz, av_score=0, av_mz=0, av_intens=0, av_abs_intens=0, count=0;
2221
DoubleReal virtual_av_mz=0, virtual_av_intens=0, virtual_av_abs_intens=0, virtual_count=0;
2223
typename std::pair<DoubleReal, DoubleReal> c_extend;
2224
for (iter=tmp_boxes_->at(c).begin(); iter!=tmp_boxes_->at(c).end(); ++iter)
2226
Box& c_box = iter->second;
2227
av_score=0, av_mz=0, av_intens=0, av_abs_intens=0, count=0;
2228
virtual_av_mz=0, virtual_av_intens=0, virtual_av_abs_intens=0, virtual_count=0;
2230
for (box_iter=c_box.begin(); box_iter!=c_box.end(); ++box_iter)
2232
if (box_iter->second.score == 0) //virtual helping point
2234
if (count != 0) continue; //it is in any way not pure virtual
2236
c_mz = box_iter->second.mz;
2237
virtual_av_intens += box_iter->second.intens;
2238
virtual_av_abs_intens += fabs(box_iter->second.intens);
2239
virtual_av_mz += c_mz*fabs(box_iter->second.intens);
2244
c_mz = box_iter->second.mz;
2245
av_score += box_iter->second.score;
2246
av_intens += box_iter->second.intens;
2247
av_abs_intens += fabs(box_iter->second.intens);
2248
av_mz += c_mz*fabs(box_iter->second.intens);
2253
if (count == 0) //pure virtual helping box
2255
av_intens = virtual_av_intens/virtual_count;
2257
av_mz = virtual_av_mz/virtual_av_abs_intens;
2263
av_mz /= av_abs_intens;
2266
BoxElement c_box_element;
2267
c_box_element.mz = av_mz;
2268
c_box_element.c = c;
2269
c_box_element.score = av_score;
2270
c_box_element.intens = av_intens;
2272
c_box_element.RT=c_box.begin()->second.RT;
2274
final_box.push_back(c_box_element);
2277
UInt num_o_feature = final_box.size();
2278
if (num_o_feature == 0)
2280
tmp_boxes_->at(c).clear();
2284
//Computing the derivatives
2285
std::vector<DoubleReal> bwd_diffs(num_o_feature, 0);
2288
for (UInt i=1; i<num_o_feature; ++i)
2290
bwd_diffs[i] = (final_box[i].intens-final_box[i-1].intens)/(final_box[i].mz-final_box[i-1].mz);
2293
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2294
std::ofstream ofile_bwd ("bwd_gpu.dat");
2295
for (UInt i=0; i<num_o_feature; ++i)
2297
ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
2302
for (UInt i=0; i<num_o_feature-1; ++i)
2304
while (i<num_o_feature-2)
2306
if(final_box[i].score>0 || final_box[i].score == -1000) //this has been an helping point
2311
if (bwd_diffs[i]>0 && bwd_diffs[i+1]<0)
2313
checkPositionForPlausibility_ (candidates, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
2318
tmp_boxes_->at(c).clear();
2323
template <typename PeakType>
2324
FeatureMap<Feature> IsotopeWaveletTransform<PeakType>::mapSeeds2Features (const MSExperiment<PeakType>& map, const UInt RT_votes_cutoff)
2326
FeatureMap<Feature> feature_map;
2327
typename std::multimap<DoubleReal, Box>::iterator iter;
2328
typename Box::iterator box_iter;
2329
UInt best_charge_index; DoubleReal best_charge_score, c_mz, c_RT; UInt c_charge;
2330
DoubleReal av_intens=0, av_ref_intens=0, av_score=0, av_mz=0, av_RT=0, mz_cutoff, sum_of_ref_intenses_g;
2333
typename std::pair<DoubleReal, DoubleReal> c_extend;
2334
for (iter=closed_boxes_.begin(); iter!=closed_boxes_.end(); ++iter)
2336
sum_of_ref_intenses_g=0;
2337
Box& c_box = iter->second;
2338
std::vector<DoubleReal> charge_votes (max_charge_, 0), charge_binary_votes (max_charge_, 0);
2341
//Let's first determine the charge
2342
//Therefor, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
2343
for (box_iter=c_box.begin(); box_iter!=c_box.end(); ++box_iter)
2345
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2346
if (trunc(box_iter->second.mz) == 874)
2347
std::cout << box_iter->second.c << "\t" << box_iter->second.intens << "\t" << box_iter->second.score << std::endl;
2350
if (box_iter->second.score == -1000)
2356
charge_votes[box_iter->second.c] += box_iter->second.intens;//score; Do not use score, can get problematic for charge state 2 vs 4
2357
++charge_binary_votes[box_iter->second.c];
2365
//... determining the best fitting charge
2366
best_charge_index=0; best_charge_score=0;
2367
for (UInt i=0; i<max_charge_; ++i)
2369
if (charge_votes[i] > best_charge_score)
2371
best_charge_index = i;
2372
best_charge_score = charge_votes[i];
2376
//Pattern found in too few RT scan
2377
if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
2382
c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
2384
av_intens=0, av_ref_intens=0, av_score=0, av_mz=0, av_RT=0;
2385
//Now, let's get the RT boundaries for the box
2386
std::vector<DPosition<2> > point_set;
2387
DoubleReal sum_of_ref_intenses_l;
2388
for (box_iter=c_box.begin(); box_iter!=c_box.end(); ++box_iter)
2390
sum_of_ref_intenses_l=0;
2391
c_mz = box_iter->second.mz;
2392
c_RT = box_iter->second.RT;
2394
mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos (c_mz, c_charge);
2396
point_set.push_back (DPosition<2> (c_RT, c_mz - Constants::IW_QUARTER_NEUTRON_MASS/(DoubleReal)c_charge));
2397
//-1 since we are already at the first peak and +0.75, since this includes the last peak of the wavelet as a whole
2398
point_set.push_back (DPosition<2> (c_RT, c_mz + mz_cutoff/(DoubleReal)c_charge));
2400
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2401
std::cout << "Intenstype: " << intenstype_ << std::endl;
2403
if (intenstype_ == "ref")
2405
//Find monoisotopic max
2406
const MSSpectrum<PeakType> & c_spec (map[box_iter->second.RT_index]);
2407
//'Correct' possible shift
2408
for (unsigned int i=0; i<mz_cutoff; ++i)
2410
typename MSSpectrum<PeakType>::const_iterator h_iter=c_spec.MZBegin(c_mz+i*Constants::IW_NEUTRON_MASS/c_charge + Constants::IW_QUARTER_NEUTRON_MASS/(DoubleReal)c_charge), hc_iter = c_spec.MZBegin(c_mz+i*Constants::IW_NEUTRON_MASS/c_charge);
2412
hc_iter = c_spec.MZBegin(c_mz+i*Constants::IW_NEUTRON_MASS/c_charge);
2414
while (h_iter != c_spec.begin())
2417
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2418
if (trunc(c_mz)==874)
2420
std::cout << "cmz: " << c_mz+i*Constants::IW_NEUTRON_MASS/c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << h_iter->getMZ() << "\t" << h_iter->getIntensity() << std::endl;
2425
if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2430
if (c_mz+i*Constants::IW_NEUTRON_MASS/c_charge - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS/(DoubleReal)c_charge)
2435
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2436
if (trunc(c_mz)==874)
2438
std::cout << "c_mz: " << c_mz+i*Constants::IW_NEUTRON_MASS/c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << i*Constants::IW_NEUTRON_MASS/c_charge << "\t";
2441
sum_of_ref_intenses_l += hc_iter->getIntensity();
2442
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2443
if (trunc(c_mz)==874)
2445
std::cout << sum_of_ref_intenses_l << "********" << std::endl;
2451
if (best_charge_index == box_iter->second.c)
2453
av_score += box_iter->second.score;
2454
av_intens += box_iter->second.intens;
2455
av_ref_intens += box_iter->second.ref_intens;
2456
sum_of_ref_intenses_g += sum_of_ref_intenses_l;
2457
av_mz += c_mz*box_iter->second.intens;
2463
av_ref_intens /= (DoubleReal)charge_binary_votes[best_charge_index];
2464
av_score /= (DoubleReal)charge_binary_votes[best_charge_index];
2465
av_RT /= (DoubleReal)c_box.size();
2467
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2468
if (trunc(av_mz) == 874)
2469
std::cout << av_mz << "\t" << best_charge_index << "\t" << best_charge_score << std::endl;
2473
ConvexHull2D c_conv_hull;
2474
c_conv_hull.addPoints (point_set);
2475
c_feature.setCharge (c_charge);
2476
c_feature.setConvexHulls (std::vector<ConvexHull2D> (1, c_conv_hull));
2478
//This makes the intensity value independent of the m/z (the lambda) value (Skellam distribution)
2479
if (intenstype_ == "corrected")
2481
DoubleReal lambda = IsotopeWavelet::getLambdaL(av_mz*c_charge);
2482
av_intens /= exp(-2*lambda) * boost::math::cyl_bessel_i(0, 2*lambda);
2484
if (intenstype_== "ref")
2486
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2487
if (trunc(c_mz)==874)
2489
std::cout << sum_of_ref_intenses_g << "####" << std::endl;
2493
av_intens = sum_of_ref_intenses_g;
2496
c_feature.setMZ (av_mz);
2497
c_feature.setIntensity (av_intens);
2498
c_feature.setRT (av_RT);
2499
c_feature.setOverallQuality (av_score);
2500
feature_map.push_back (c_feature);
2503
return (feature_map);
2507
template <typename PeakType>
2508
bool IsotopeWaveletTransform<PeakType>::checkPositionForPlausibility_ (const MSSpectrum<PeakType>& candidate,
2509
const MSSpectrum<PeakType>& ref, const DoubleReal seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const DoubleReal transintens, const DoubleReal prev_score)
2511
typename MSSpectrum<PeakType>::const_iterator iter, ref_iter;
2513
peak_cutoff = IsotopeWavelet::getNumPeakCutOff (seed_mz, c+1);
2515
iter = candidate.MZBegin(seed_mz);
2516
//we can ignore those cases
2517
if (iter==candidate.begin() || iter==candidate.end())
2522
std::pair<DoubleReal, DoubleReal> reals;
2523
ref_iter = ref.MZBegin(seed_mz);
2524
//Correct the position
2525
DoubleReal real_mz, real_intens;
2528
reals = checkPPMTheoModel_ (ref, iter->getMZ(), c);
2529
real_mz = reals.first, real_intens = reals.second;
2530
//if (real_mz <= 0 || real_intens <= 0)
2532
typename MSSpectrum<PeakType>::const_iterator h_iter=ref_iter, hc_iter=ref_iter;
2533
while (h_iter != ref.begin())
2536
if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2545
if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS/(c+1.))
2550
reals = checkPPMTheoModel_ (ref, h_iter->getMZ(), c);
2551
real_mz = reals.first, real_intens = reals.second;
2552
if (real_mz <= 0 || real_intens <= 0)
2556
real_mz = h_iter->getMZ();
2557
real_intens = h_iter->getIntensity();
2562
reals = std::pair<DoubleReal, DoubleReal> (seed_mz, ref_iter->getIntensity());
2563
real_mz = reals.first, real_intens = reals.second;
2565
if (real_mz <= 0 || real_intens <= 0)
2567
typename MSSpectrum<PeakType>::const_iterator h_iter=ref_iter, hc_iter=ref_iter;
2568
while (h_iter != ref.begin())
2571
if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2580
if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS/(c+1.))
2585
real_mz = h_iter->getMZ(), real_intens = h_iter->getIntensity();
2586
if (real_mz <= 0 || real_intens <= 0)
2590
real_mz = h_iter->getMZ();
2591
real_intens = h_iter->getIntensity();
2595
DoubleReal c_score = scoreThis_ (candidate, peak_cutoff, real_mz, c, 0);
2602
DoubleReal mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos (real_mz, c+1);
2603
typename MSSpectrum<PeakType>::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz-Constants::IW_QUARTER_NEUTRON_MASS/(c+1.));
2604
typename MSSpectrum<PeakType>::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz+mz_cutoff/(c+1.), ref.end());
2605
if (real_r_MZ_iter == ref.end())
2611
UInt real_mz_begin = distance (ref.begin(), real_l_MZ_iter);
2612
UInt real_mz_end = distance (ref.begin(), real_r_MZ_iter);
2614
if (prev_score == -1000)
2616
push2Box_ (real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2620
push2Box_ (real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2625
template <typename PeakType>
2626
bool IsotopeWaveletTransform<PeakType>::checkPositionForPlausibility_ (const TransSpectrum& candidate,
2627
const MSSpectrum<PeakType>& ref, const DoubleReal seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const DoubleReal transintens, const DoubleReal prev_score)
2629
typename MSSpectrum<PeakType>::const_iterator iter, ref_iter;
2631
peak_cutoff = IsotopeWavelet::getNumPeakCutOff (seed_mz, c+1);
2633
iter = candidate.MZBegin(seed_mz);
2634
//we can ignore those cases
2635
if (iter==candidate.begin() || iter==candidate.end())
2640
std::pair<DoubleReal, DoubleReal> reals;
2641
ref_iter = ref.MZBegin(seed_mz);
2642
//Correct the position
2643
DoubleReal real_mz, real_intens;
2646
reals = checkPPMTheoModel_ (ref, iter->getMZ(), c);
2647
real_mz = reals.first, real_intens = reals.second;
2648
//if (real_mz <= 0 || real_intens <= 0)
2650
typename MSSpectrum<PeakType>::const_iterator h_iter=ref_iter, hc_iter=ref_iter;
2651
while (h_iter != ref.begin())
2654
if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2663
if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS/(c+1.))
2669
reals = checkPPMTheoModel_ (ref, h_iter->getMZ(), c);
2670
real_mz = reals.first, real_intens = reals.second;
2672
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2673
std::cout << "Plausibility check old_mz: " << iter->getMZ() << "\t" << real_mz << std::endl;
2676
if (real_mz <= 0 || real_intens <= 0)
2680
real_mz = h_iter->getMZ();
2681
real_intens = h_iter->getIntensity();
2686
reals = std::pair<DoubleReal, DoubleReal> (seed_mz, ref_iter->getIntensity());
2687
real_mz = reals.first, real_intens = reals.second;
2689
if (real_mz <= 0 || real_intens <= 0)
2691
typename MSSpectrum<PeakType>::const_iterator h_iter=ref_iter, hc_iter=ref_iter;
2692
while (h_iter != ref.begin())
2695
if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2704
if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS/(c+1.))
2709
real_mz = h_iter->getMZ(), real_intens = h_iter->getIntensity();
2710
if (real_mz <= 0 || real_intens <= 0)
2714
real_mz = h_iter->getMZ();
2715
real_intens = h_iter->getIntensity();
2719
DoubleReal c_score = scoreThis_ (candidate, peak_cutoff, real_mz, c, 0);
2726
DoubleReal mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos (real_mz, c+1);
2727
typename MSSpectrum<PeakType>::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz-Constants::IW_QUARTER_NEUTRON_MASS/(c+1.));
2728
typename MSSpectrum<PeakType>::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz+mz_cutoff/(c+1.), ref.end());
2729
if (real_r_MZ_iter == ref.end())
2735
UInt real_mz_begin = distance (ref.begin(), real_l_MZ_iter);
2736
UInt real_mz_end = distance (ref.begin(), real_r_MZ_iter);
2738
if (prev_score == -1000)
2740
push2Box_ (real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2744
push2Box_ (real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2751
template <typename PeakType>
2752
std::pair<DoubleReal, DoubleReal> IsotopeWaveletTransform<PeakType>::checkPPMTheoModel_ (const MSSpectrum<PeakType>& ref, const DoubleReal c_mz, const UInt c)
2754
DoubleReal mass = c_mz*(c+1)-Constants::IW_PROTON_MASS*(c);
2755
DoubleReal ppms = getPPMs_(peptideMassRule_(mass), mass);
2756
if (ppms >= Constants::PEPTIDE_MASS_RULE_THEO_PPM_BOUND)
2758
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2759
std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "REJECT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2761
return (std::pair<DoubleReal, DoubleReal> (-1,-1));
2764
#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2765
std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "ACCEPT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2767
return (std::pair<DoubleReal, DoubleReal> (c_mz, ref.MZBegin(c_mz)->getIntensity()));