1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1
// --------------------------------------------------------------------------
2
// OpenMS -- Open-Source Mass Spectrometry
3
// --------------------------------------------------------------------------
4
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
7
// This software is released under a three-clause BSD license:
8
// * Redistributions of source code must retain the above copyright
9
// notice, this list of conditions and the following disclaimer.
10
// * Redistributions in binary form must reproduce the above copyright
11
// notice, this list of conditions and the following disclaimer in the
12
// documentation and/or other materials provided with the distribution.
13
// * Neither the name of any author or any participating institution
14
// may be used to endorse or promote products derived from this software
15
// without specific prior written permission.
16
// For a full list of authors, refer to the file AUTHORS.
17
// --------------------------------------------------------------------------
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
30
// --------------------------------------------------------------------------
24
31
// $Maintainer: Sandro Andreotti $
76
81
defaults_.setValidStrings("add_y2_ions", StringList::create("true,false"));
78
83
defaults_.setValue("svm:svc_type", 0, "Type of the SVC: 0=C_SVC 1=NU_SVC");
79
defaults_.setMinInt("svm:svc_type",0);
80
defaults_.setMaxInt("svm:svc_type",1);
84
defaults_.setMinInt("svm:svc_type", 0);
85
defaults_.setMaxInt("svm:svc_type", 1);
82
87
defaults_.setValue("svm:svr_type", 1, "Type of the SVR: 0=EPSILON_SVR 1=NU_SVR");
83
defaults_.setMinInt("svm:svr_type",0);
84
defaults_.setMaxInt("svm:svr_type",1);
88
defaults_.setMinInt("svm:svr_type", 0);
89
defaults_.setMaxInt("svm:svr_type", 1);
86
91
defaults_.setValue("svm:svc:kernel_type", 2, "Type of the kernel: 0=LINEAR 1=POLY 2=RBF 3=SIGMOID");
87
defaults_.setMinInt("svm:svc:kernel_type",0);
88
defaults_.setMaxInt("svm:svc:kernel_type",3);
92
defaults_.setMinInt("svm:svc:kernel_type", 0);
93
defaults_.setMaxInt("svm:svc:kernel_type", 3);
90
95
defaults_.setValue("svm:svr:kernel_type", 2, "Type of the kernel: 0=LINEAR 1=POLY 2=RBF 3=SIGMOID");
91
defaults_.setMinInt("svm:svr:kernel_type",0);
92
defaults_.setMaxInt("svm:svr:kernel_type",3);
96
defaults_.setMinInt("svm:svr:kernel_type", 0);
97
defaults_.setMaxInt("svm:svr:kernel_type", 3);
95
100
defaults_.setValue("svm:svc:degree", 3, "For POLY");
96
defaults_.setMinInt("svm:svc:degree",1);
101
defaults_.setMinInt("svm:svc:degree", 1);
98
103
defaults_.setValue("svm:svr:degree", 3, "For POLY");
99
defaults_.setMinInt("svm:svr:degree",1);
104
defaults_.setMinInt("svm:svr:degree", 1);
101
106
defaults_.setValue("svm:svc:gamma", 0.0, "For POLY/RBF/SIGMOID");
102
defaults_.setMinFloat("svm:svc:gamma",0.0);
107
defaults_.setMinFloat("svm:svc:gamma", 0.0);
104
109
defaults_.setValue("svm:svr:gamma", 0.0, "For POLY/RBF/SIGMOID");
105
defaults_.setMinFloat("svm:svr:gamma",0.0);
110
defaults_.setMinFloat("svm:svr:gamma", 0.0);
107
112
//defaults_.setValue("svm:svc:coef0", 0.0, "For POLY/SIGMOID");
108
113
//defaults_.setMinFloat("svm:svc:coef0",0.0);
149
154
defaults_.setValue("svm:additive_cv", "false", "Additive step size (if false multiplicative)");
150
155
defaults_.setValidStrings("svm:additive_cv", StringList::create("true,false"));
152
defaults_.setValue("svm:svc:degree_start",1,"starting point of degree");
157
defaults_.setValue("svm:svc:degree_start", 1, "starting point of degree");
153
158
defaults_.setMinInt("svm:svc:degree_start", 1);
154
defaults_.setValue("svm:svc:degree_step_size",2,"step size point of degree");
155
defaults_.setValue("svm:svc:degree_stop",4,"stopping point of degree");
159
defaults_.setValue("svm:svc:degree_step_size", 2, "step size point of degree");
160
defaults_.setValue("svm:svc:degree_stop", 4, "stopping point of degree");
157
defaults_.setValue("svm:svc:gamma_start",0.00001,"starting point of gamma");
162
defaults_.setValue("svm:svc:gamma_start", 0.00001, "starting point of gamma");
158
163
defaults_.setMinFloat("svm:svc:gamma_start", 0.0);
159
164
defaults_.setMaxFloat("svm:svc:gamma_start", 1.0);
160
defaults_.setValue("svm:svc:gamma_step_size",100,"step size point of gamma");
161
defaults_.setValue("svm:svc:gamma_stop",0.1,"stopping point of gamma");
163
defaults_.setValue("svm:svc:c_start",0.1,"starting point of c");
164
defaults_.setValue("svm:svc:c_step_size",100,"step size of c");
165
defaults_.setValue("svm:svc:c_stop",1000,"stopping point of c");
167
defaults_.setValue("svm:svc:nu_start",0.3,"starting point of nu");
165
defaults_.setValue("svm:svc:gamma_step_size", 100, "step size point of gamma");
166
defaults_.setValue("svm:svc:gamma_stop", 0.1, "stopping point of gamma");
168
defaults_.setValue("svm:svc:c_start", 0.1, "starting point of c");
169
defaults_.setValue("svm:svc:c_step_size", 100, "step size of c");
170
defaults_.setValue("svm:svc:c_stop", 1000, "stopping point of c");
172
defaults_.setValue("svm:svc:nu_start", 0.3, "starting point of nu");
168
173
defaults_.setMinFloat("svm:svc:nu_start", 0);
169
174
defaults_.setMaxFloat("svm:svc:nu_start", 1);
170
defaults_.setValue("svm:svc:nu_step_size",2,"step size of nu");
171
defaults_.setValue("svm:svc:nu_stop",0.6,"stopping point of nu");
175
defaults_.setValue("svm:svc:nu_step_size", 2, "step size of nu");
176
defaults_.setValue("svm:svc:nu_stop", 0.6, "stopping point of nu");
172
177
defaults_.setMinFloat("svm:svc:nu_stop", 0);
173
178
defaults_.setMaxFloat("svm:svc:nu_stop", 1);
175
defaults_.setValue("svm:svr:degree_start",1,"starting point of degree");
180
defaults_.setValue("svm:svr:degree_start", 1, "starting point of degree");
176
181
defaults_.setMinInt("svm:svr:degree_start", 1);
177
defaults_.setValue("svm:svr:degree_step_size",2,"step size point of degree");
178
defaults_.setValue("svm:svr:degree_stop",4,"stopping point of degree");
182
defaults_.setValue("svm:svr:degree_step_size", 2, "step size point of degree");
183
defaults_.setValue("svm:svr:degree_stop", 4, "stopping point of degree");
180
defaults_.setValue("svm:svr:gamma_start",0.00001,"starting point of gamma");
185
defaults_.setValue("svm:svr:gamma_start", 0.00001, "starting point of gamma");
181
186
defaults_.setMinFloat("svm:svr:gamma_start", 0.0);
182
187
defaults_.setMaxFloat("svm:svr:gamma_start", 1.0);
183
defaults_.setValue("svm:svr:gamma_step_size",100,"step size point of gamma");
184
defaults_.setValue("svm:svr:gamma_stop",0.1,"stopping point of gamma");
186
defaults_.setValue("svm:svr:p_start",0.00001,"starting point of p");
187
defaults_.setValue("svm:svr:p_step_size",100,"step size point of p");
188
defaults_.setValue("svm:svr:p_stop",0.1,"stopping point of p");
190
defaults_.setValue("svm:svr:c_start",0.1,"starting point of c");
191
defaults_.setValue("svm:svr:c_step_size",100,"step size of c");
192
defaults_.setValue("svm:svr:c_stop",1000,"stopping point of c");
194
defaults_.setValue("svm:svr:nu_start",0.3,"starting point of nu");
188
defaults_.setValue("svm:svr:gamma_step_size", 100, "step size point of gamma");
189
defaults_.setValue("svm:svr:gamma_stop", 0.1, "stopping point of gamma");
191
defaults_.setValue("svm:svr:p_start", 0.00001, "starting point of p");
192
defaults_.setValue("svm:svr:p_step_size", 100, "step size point of p");
193
defaults_.setValue("svm:svr:p_stop", 0.1, "stopping point of p");
195
defaults_.setValue("svm:svr:c_start", 0.1, "starting point of c");
196
defaults_.setValue("svm:svr:c_step_size", 100, "step size of c");
197
defaults_.setValue("svm:svr:c_stop", 1000, "stopping point of c");
199
defaults_.setValue("svm:svr:nu_start", 0.3, "starting point of nu");
195
200
defaults_.setMinFloat("svm:svr:nu_start", 0);
196
201
defaults_.setMaxFloat("svm:svr:nu_start", 1);
197
defaults_.setValue("svm:svr:nu_step_size",2,"step size of nu");
198
defaults_.setValue("svm:svr:nu_stop",0.6,"stopping point of nu");
202
defaults_.setValue("svm:svr:nu_step_size", 2, "step size of nu");
203
defaults_.setValue("svm:svr:nu_stop", 0.6, "stopping point of nu");
199
204
defaults_.setMinFloat("svm:svr:nu_stop", 0);
200
205
defaults_.setMaxFloat("svm:svr:nu_stop", 1);
526
530
info_outfile.push_back("<PrimaryTypes>");
528
532
//count number of usable spectra
529
Size usable_spectra=spectra.size();
530
std::vector<bool>is_spectrum_usable(spectra.size(), true);
533
Size usable_spectra = spectra.size();
534
std::vector<bool> is_spectrum_usable(spectra.size(), true);
532
536
for (Size index = 0; index < spectra.size(); ++index)
534
538
//test whether annotation for spectrum match. If theoretical and empirical mass differ too much skip this spectrum
535
Int empirical_charge=spectra[index].getPrecursors()[0].getCharge();
536
DoubleReal empirical_parent_mass=spectra[index].getPrecursors()[0].getMZ();
537
DoubleReal theoretical_parent_mass=annotations[index].getMonoWeight(Residue::Full, empirical_charge)/empirical_charge;
539
Int empirical_charge = spectra[index].getPrecursors()[0].getCharge();
540
DoubleReal empirical_parent_mass = spectra[index].getPrecursors()[0].getMZ();
541
DoubleReal theoretical_parent_mass = annotations[index].getMonoWeight(Residue::Full, empirical_charge) / empirical_charge;
539
if (abs(empirical_parent_mass-theoretical_parent_mass) > parent_tolerance)
543
if (abs(empirical_parent_mass - theoretical_parent_mass) > parent_tolerance)
541
545
is_spectrum_usable[index] = false;
542
546
--usable_spectra;
543
std::cerr << "skipping spectrum " << index <<" due to parent mass missmatch"<<std::endl;
547
std::cerr << "skipping spectrum " << index << " due to parent mass missmatch" << std::endl;
545
549
else if (precursor_charge != empirical_charge)
547
551
is_spectrum_usable[index] = false;
548
552
--usable_spectra;
549
std::cerr << "skipping spectrum " << index <<" due to wrong precursor charge"<<std::endl;
553
std::cerr << "skipping spectrum " << index << " due to wrong precursor charge" << std::endl;
553
557
//only required for generatrion of descriptors
554
SvmTheoreticalSpectrumGenerator spec_gen;
558
SvmTheoreticalSpectrumGenerator spec_gen;
556
//Amino acid sequences used for obtaining prefix and suffix mass
560
//Amino acid sequences used for obtaining prefix and suffix mass
557
561
AASequence prefix, suffix;
559
563
DescriptorSet tmp_desc;
560
Size num_features = spec_gen.generateDescriptorSet_(annotations[0],0,ion_types[0], 1, tmp_desc);
564
Size num_features = spec_gen.generateDescriptorSet_(annotations[0], 0, ion_types[0], 1, tmp_desc);
562
566
//use number of features to adapt gamma parameter if set to 0 (same is used in svm-train.c)
563
if(wrap_reg.getDoubleParameter(SVMWrapper::GAMMA) == 0. )
567
if (wrap_reg.getDoubleParameter(SVMWrapper::GAMMA) == 0.)
565
wrap_reg.setParameter(SVMWrapper::GAMMA, 1.0/num_features);
569
wrap_reg.setParameter(SVMWrapper::GAMMA, 1.0 / num_features);
567
if(wrap_class.getDoubleParameter(SVMWrapper::GAMMA) == 0. )
571
if (wrap_class.getDoubleParameter(SVMWrapper::GAMMA) == 0.)
569
wrap_class.setParameter(SVMWrapper::GAMMA, 1.0/num_features);
573
wrap_class.setParameter(SVMWrapper::GAMMA, 1.0 / num_features);
572
576
//vectors to store the minimum and maximum value appearing in the training data for each feature (required for scaling)
573
577
ObservedIntensMap observed_intensities;
574
std::map<Size, std::vector<DescriptorSet> >training_input;
575
std::map<Size, std::vector<double> >training_output;
578
std::map<Size, std::vector<DescriptorSet> > training_input;
579
std::map<Size, std::vector<double> > training_output;
577
581
Size spec_index = 0;
579
583
//run over all input spectra
580
for (PeakMap::const_iterator map_it = spectra.begin(); map_it < spectra.end(); map_it+=x, spec_index+=x)
584
for (PeakMap::const_iterator map_it = spectra.begin(); map_it < spectra.end(); map_it += x, spec_index += x)
582
586
if (!is_spectrum_usable[spec_index])
587
591
Size precursor_charge = map_it->getPrecursors()[0].getCharge();
588
592
PeakSpectrum input_spec_norm(*map_it);
641
645
DescriptorSet descriptors;
642
spec_gen.generateDescriptorSet_(annotations[spec_index],frag_pos-1,ion_types[type_nr], precursor_charge, descriptors);
646
spec_gen.generateDescriptorSet_(annotations[spec_index], frag_pos - 1, ion_types[type_nr], precursor_charge, descriptors);
644
648
training_input[type_nr].push_back(descriptors);
645
training_output[type_nr].push_back(observed_peak_intensity);
649
training_output[type_nr].push_back(observed_peak_intensity);
647
}//end of running over all types
648
}//end of running over all spectra
651
} //end of running over all types
652
} //end of running over all spectra
650
//scale the input data
651
std::vector<double>min_features;
654
//scale the input data
655
std::vector<double> min_features;
652
656
min_features.reserve(num_features);
653
std::vector<double>max_features;
657
std::vector<double> max_features;
654
658
max_features.reserve(num_features);
656
660
min_features.assign(num_features, std::numeric_limits<double>::infinity());
657
661
max_features.assign(num_features, -1 * std::numeric_limits<double>::infinity());
662
666
for (Size type_nr = 0; type_nr < ion_types.size(); ++type_nr)
664
668
//first compute the minimal and maximal entries for each feature
665
669
std::vector<svm_node>::iterator it;
666
for(Size train_row=0; train_row<training_input[type_nr].size(); ++train_row)
670
for (Size train_row = 0; train_row < training_input[type_nr].size(); ++train_row)
669
for(it = training_input[type_nr][train_row].descriptors.begin(); it != training_input[type_nr][train_row].descriptors.end(); ++it)
673
for (it = training_input[type_nr][train_row].descriptors.begin(); it != training_input[type_nr][train_row].descriptors.end(); ++it)
671
while( (Int) index < it->index-1)
675
while ((Int) index < it->index - 1)
673
677
min_features[index] = std::min(min_features[index], 0.0);
674
max_features[index] = std::max(max_features[index],0.0);
678
max_features[index] = std::max(max_features[index], 0.0);
677
681
min_features[index] = std::min(min_features[index], it->value);
696
700
for (Size type_nr = 0; type_nr < ion_types.size(); ++type_nr)
698
if(!is_primary[type_nr])
702
if (!is_primary[type_nr])
703
707
//with the minimal and maximal now scale
704
for(Size train_row=0; train_row<training_input[type_nr].size(); ++train_row)
708
for (Size train_row = 0; train_row < training_input[type_nr].size(); ++train_row)
706
spec_gen.scaleDescriptorSet_(training_input[type_nr][train_row],lower, upper);
710
spec_gen.scaleDescriptorSet_(training_input[type_nr][train_row], lower, upper);
712
716
// Now 2 Steps - first we train a SVM classifier to predict abundance or miss of a peak.
713
717
// In second step we train a SVM regression model to predict intensities. This svm is trained
714
// only with rows for abundant peaks
718
// only with rows for abundant peaks
716
720
//Within this loop the SVR and SVC are trained for the primary ion types
717
721
for (Size type_nr = 0; type_nr < ion_types.size(); ++type_nr)
719
if(!is_primary[type_nr])
723
if (!is_primary[type_nr])
724
728
String svm_model_file_class = filename + "_" + Residue::getResidueTypeName(ion_types[type_nr].residue) + "_" +
725
ion_types[type_nr].loss.getString() + "_" + ion_types[type_nr].charge + "_class.svm";
729
ion_types[type_nr].loss.getString() + "_" + ion_types[type_nr].charge + "_class.svm";
726
730
String svm_model_file_reg = filename + "_" + Residue::getResidueTypeName(ion_types[type_nr].residue) + "_" +
727
ion_types[type_nr].loss.getString() + "_" + ion_types[type_nr].charge + "_reg.svm";
731
ion_types[type_nr].loss.getString() + "_" + ion_types[type_nr].charge + "_reg.svm";
730
734
//------------------------------------------------------------------------------------------
731
735
//----------------------------------Training of SVR-Model-----------------------------
732
736
//------------------------------------------------------------------------------------------
734
std::vector<DescriptorSet>training_input_reg;
735
std::vector<double>training_output_reg;
738
std::vector<DescriptorSet> training_input_reg;
739
std::vector<double> training_output_reg;
736
740
training_input_reg.reserve(training_input[type_nr].size());
737
741
training_output_reg.reserve(training_output[type_nr].size());
739
for(Size i = 0; i < training_output[type_nr].size(); ++i)
743
for (Size i = 0; i < training_output[type_nr].size(); ++i)
741
745
training_input_reg.push_back(training_input[type_nr][i]);
742
training_output_reg.push_back(std::max(0.0,training_output[type_nr][i]));
746
training_output_reg.push_back(std::max(0.0, training_output[type_nr][i]));
747
751
writeTrainingFile_(training_input_reg, training_output_reg, String("Training_") + Residue::getResidueTypeName(ion_types[type_nr].residue) + "_" +
748
ion_types[type_nr].loss.getString() + "_" + ion_types[type_nr].charge + "_reg.dat");
752
ion_types[type_nr].loss.getString() + "_" + ion_types[type_nr].charge + "_reg.dat");
752
756
//std::vector<double> predictions_reg(training_input_reg.size(), 0);
753
svm_node ** input_training_reg = new svm_node * [training_input_reg.size()];
757
svm_node ** input_training_reg = new svm_node *[training_input_reg.size()];
754
758
double * output_training_reg = &training_output_reg[0];
755
for(Size i = 0; i < training_input_reg.size(); ++i)
759
for (Size i = 0; i < training_input_reg.size(); ++i)
757
761
input_training_reg[i] = &(training_input_reg[i].descriptors[0]);
801
805
//------------------------------------------------------------------------------------------
804
for(Size i=0; i<training_output[type_nr].size(); ++i)
808
for (Size i = 0; i < training_output[type_nr].size(); ++i)
806
training_output[type_nr][i] = training_output[type_nr][i] != -1 ? 1:0 ; //if peak was abundant class 1 else 0
810
training_output[type_nr][i] = training_output[type_nr][i] != -1 ? 1 : 0; //if peak was abundant class 1 else 0
809
//create balanced set
813
//create balanced set
812
std::vector<std::vector<Size> >training_data_by_class(2);
816
std::vector<std::vector<Size> > training_data_by_class(2);
813
817
std::vector<double>::const_iterator it;
814
std::vector<DescriptorSet>tmp_training;
815
std::vector<double>tmp_output;
816
for(it = training_output[type_nr].begin(); it != training_output[type_nr].end(); ++it)
818
std::vector<DescriptorSet> tmp_training;
819
std::vector<double> tmp_output;
820
for (it = training_output[type_nr].begin(); it != training_output[type_nr].end(); ++it)
818
training_data_by_class[*it].push_back(it-training_output[type_nr].begin());
822
training_data_by_class[*it].push_back(it - training_output[type_nr].begin());
820
Size min_size=std::min(training_data_by_class[0].size(),training_data_by_class[1].size());
821
std::random_shuffle(training_data_by_class[0].begin(),training_data_by_class[0].end() );
822
std::random_shuffle(training_data_by_class[1].begin(),training_data_by_class[1].end() );
824
Size min_size = std::min(training_data_by_class[0].size(), training_data_by_class[1].size());
825
std::random_shuffle(training_data_by_class[0].begin(), training_data_by_class[0].end());
826
std::random_shuffle(training_data_by_class[1].begin(), training_data_by_class[1].end());
824
for(Size num = 0; num < min_size; ++num)
828
for (Size num = 0; num < min_size; ++num)
826
for(Size intens = 0; intens < 2; ++intens)
830
for (Size intens = 0; intens < 2; ++intens)
828
832
tmp_training.push_back(training_input[type_nr][training_data_by_class[intens][num]]);
829
833
tmp_output.push_back(intens);
942
945
info_outfile.store(info_outfile_name);
946
948
//like in Cong Zhou paper
947
void SvmTheoreticalSpectrumGeneratorTrainer::normalizeIntensity(PeakSpectrum &S) const
949
void SvmTheoreticalSpectrumGeneratorTrainer::normalizeIntensity(PeakSpectrum & S) const
950
Param larg_param=n_larg.getParameters();
951
larg_param.setValue("n", (Int)(S.size()*0.8));
952
Param larg_param = n_larg.getParameters();
953
larg_param.setValue("n", (Int)(S.size() * 0.8));
952
954
n_larg.setParameters(larg_param);
953
955
n_larg.filterPeakSpectrum(S);
954
956
S.sortByPosition();
957
Param norm_param=norm.getParameters();
959
Param norm_param = norm.getParameters();
958
960
norm_param.setValue("method", "to_TIC");
959
961
norm.setParameters(norm_param);
960
962
norm.filterPeakSpectrum(S);
962
DoubleReal min_intens= std::numeric_limits<double>::infinity();
964
DoubleReal min_intens = std::numeric_limits<double>::infinity();
963
965
DoubleReal max_intens = -1 * std::numeric_limits<double>::infinity();
965
std::vector<DoubleReal>intensities(S.size());
966
for(Size i=0; i<S.size(); ++i)
967
std::vector<DoubleReal> intensities(S.size());
968
for (Size i = 0; i < S.size(); ++i)
968
970
//std::cerr<<"before norm: "<<S[i].getIntensity()<<std::endl;
969
if(S[i].getIntensity()>0)
971
if (S[i].getIntensity() > 0)
971
intensities[i]=log(S[i].getIntensity() * 100);
972
max_intens=std::max(max_intens, intensities[i]);
973
min_intens=std::min(min_intens, intensities[i]);
973
intensities[i] = log(S[i].getIntensity() * 100);
974
max_intens = std::max(max_intens, intensities[i]);
975
min_intens = std::min(min_intens, intensities[i]);
977
979
DoubleReal lower = 0;
978
980
DoubleReal upper = 1;
979
981
//normalize intensities to one
980
for(Size i=0; i<S.size(); ++i)
982
for (Size i = 0; i < S.size(); ++i)
982
if(S[i].getIntensity()>0)
984
if (S[i].getIntensity() > 0)
984
DoubleReal intens = lower + (upper-lower) *
985
(intensities[i]-min_intens)/
986
(max_intens-min_intens);
986
DoubleReal intens = lower + (upper - lower) *
987
(intensities[i] - min_intens) /
988
(max_intens - min_intens);
987
989
S[i].setIntensity(intens);
998
void SvmTheoreticalSpectrumGeneratorTrainer::trainSecondaryTypes_(TextFile &info_outfile,
999
void SvmTheoreticalSpectrumGeneratorTrainer::trainSecondaryTypes_(TextFile & info_outfile,
999
1000
Size number_of_regions,
1000
1001
Size number_of_intensity_levels,
1001
ObservedIntensMap &observed_intensities,
1002
const std::vector<IonType> &ion_types,
1003
const std::vector<bool> &is_primary
1002
ObservedIntensMap & observed_intensities,
1003
const std::vector<IonType> & ion_types,
1004
const std::vector<bool> & is_primary
1006
std::vector<DoubleReal>tmp;
1007
for(Size region = 0; region < number_of_regions; ++ region)
1007
std::vector<DoubleReal> tmp;
1008
for (Size region = 0; region < number_of_regions; ++region)
1009
//we start by binning the intensities. We select the bin boarders such that
1010
//the intensities of the primary ions are equally split
1011
std::vector<DoubleReal> &observed_b=observed_intensities[std::make_pair(IonType(Residue::BIon), region)];
1010
//we start by binning the intensities. We select the bin boarders such that
1011
//the intensities of the primary ions are equally split
1012
std::vector<DoubleReal> & observed_b = observed_intensities[std::make_pair(IonType(Residue::BIon), region)];
1012
1013
tmp.insert(tmp.end(), observed_b.begin(), observed_b.end());
1013
std::vector<DoubleReal> &observed_y=observed_intensities[std::make_pair(IonType(Residue::YIon), region)];
1014
tmp.insert(tmp.end(),observed_y.begin(), observed_y.end());
1014
std::vector<DoubleReal> & observed_y = observed_intensities[std::make_pair(IonType(Residue::YIon), region)];
1015
tmp.insert(tmp.end(), observed_y.begin(), observed_y.end());
1017
std::vector<DoubleReal>bin_boarders(number_of_intensity_levels);
1018
std::vector<DoubleReal>bin_values(number_of_intensity_levels);
1018
std::vector<DoubleReal> bin_boarders(number_of_intensity_levels);
1019
std::vector<DoubleReal> bin_values(number_of_intensity_levels);
1019
1020
std::sort(tmp.begin(), tmp.end());
1021
1022
//find the first nonzero
1022
Size first_non_zero=0;
1023
while(first_non_zero<tmp.size()&& tmp[first_non_zero]==0)
1023
Size first_non_zero = 0;
1024
while (first_non_zero < tmp.size() && tmp[first_non_zero] == 0)
1025
1026
++first_non_zero;
1028
Size non_zero_size = tmp.size()-first_non_zero;
1029
Size non_zero_size = tmp.size() - first_non_zero;
1029
1030
Size prev_index = 0;
1030
for(Size i=1; i<number_of_intensity_levels; ++i)
1031
for (Size i = 1; i < number_of_intensity_levels; ++i)
1032
Size index = i* (DoubleReal) non_zero_size/(number_of_intensity_levels-1) + first_non_zero;
1033
Size index = i * (DoubleReal) non_zero_size / (number_of_intensity_levels - 1) + first_non_zero;
1033
1034
DoubleReal count = 0;
1034
for(Size j = prev_index; j < index; ++j)
1035
for (Size j = prev_index; j < index; ++j)
1036
1037
count += tmp[j];
1038
bin_boarders[i-1] = tmp[index-1];
1039
bin_values[i-1] = count/(index-prev_index);
1039
bin_boarders[i - 1] = tmp[index - 1];
1040
bin_values[i - 1] = count / (index - prev_index);
1040
1041
prev_index = index;
1043
1044
info_outfile.push_back("<IntensityBinBoarders>");
1044
for(Size i = 0; i < number_of_intensity_levels-1; ++i)
1045
for (Size i = 0; i < number_of_intensity_levels - 1; ++i)
1046
1047
info_outfile.push_back(bin_boarders[i]);
1048
1049
info_outfile.push_back("</IntensityBinBoarders>");
1049
1050
info_outfile.push_back("<IntensityBinValues>");
1050
for(Size i = 0; i < number_of_intensity_levels-1; ++i)
1051
for (Size i = 0; i < number_of_intensity_levels - 1; ++i)
1052
1053
info_outfile.push_back(bin_values[i]);
1054
1055
info_outfile.push_back("</IntensityBinValues>");
1056
1057
//use the boarder values to bin the entries
1057
for(Size region = 0; region < number_of_regions; ++ region)
1058
for (Size region = 0; region < number_of_regions; ++region)
1059
for(Size i=0; i<ion_types.size(); ++i)
1060
for (Size i = 0; i < ion_types.size(); ++i)
1061
1062
std::vector<DoubleReal> & intensities = observed_intensities[std::make_pair(ion_types[i], region)];
1062
for(Size j=0; j<intensities.size(); ++j)
1063
for (Size j = 0; j < intensities.size(); ++j)
1064
1065
DoubleReal intens = intensities[j];
1065
if(intens == 0.0|| intens == -1.0)
1066
if (intens == 0.0 || intens == -1.0)
1069
while(k<number_of_intensity_levels-1 && intens>bin_boarders[k-1] )
1070
while (k<number_of_intensity_levels - 1 && intens> bin_boarders[k - 1])
1077
std::map<std::pair<IonType,Size>, std::vector<std::vector<DoubleReal> > >joint_counts;
1078
std::map<std::pair<IonType,Size>, std::vector<DoubleReal> >background_counts;
1078
std::map<std::pair<IonType, Size>, std::vector<std::vector<DoubleReal> > > joint_counts;
1079
std::map<std::pair<IonType, Size>, std::vector<DoubleReal> > background_counts;
1080
1081
//count joint appearences of primary and secondary peaks
1081
for(Size i=0; i<ion_types.size(); ++i)
1082
for (Size i = 0; i < ion_types.size(); ++i)
1083
1084
const IonType & type = ion_types[i];
1087
IonType primary_type=IonType(Residue::BIon);
1088
IonType primary_type = IonType(Residue::BIon);
1089
if(type.residue == Residue::YIon ||
1090
type.residue == Residue::XIon ||
1091
type.residue == Residue::ZIon)
1090
if (type.residue == Residue::YIon ||
1091
type.residue == Residue::XIon ||
1092
type.residue == Residue::ZIon)
1093
primary_type=IonType(Residue::YIon);
1094
primary_type = IonType(Residue::YIon);
1095
for(Size region = 0; region < number_of_regions; ++region)
1096
for (Size region = 0; region < number_of_regions; ++region)
1097
joint_counts[std::make_pair(type, region)].assign(number_of_intensity_levels, std::vector<DoubleReal>(number_of_intensity_levels,1));
1098
joint_counts[std::make_pair(type, region)].assign(number_of_intensity_levels, std::vector<DoubleReal>(number_of_intensity_levels, 1));
1098
1099
background_counts[std::make_pair(type, region)].assign(number_of_intensity_levels, number_of_intensity_levels);
1099
const std::vector<DoubleReal> &secondary = observed_intensities[std::make_pair(type, region)];
1100
const std::vector<DoubleReal> &primary = observed_intensities[std::make_pair(primary_type, region)];
1100
const std::vector<DoubleReal> & secondary = observed_intensities[std::make_pair(type, region)];
1101
const std::vector<DoubleReal> & primary = observed_intensities[std::make_pair(primary_type, region)];
1102
for(Size j=0; j<primary.size(); ++j)
1103
for (Size j = 0; j < primary.size(); ++j)
1104
if(secondary[j]!=-1.0)
1105
if (secondary[j] != -1.0)
1106
1107
++joint_counts[std::make_pair(type, region)][(Size)secondary[j]][(Size)primary[j]];
1107
1108
++background_counts[std::make_pair(type, region)][(Size)primary[j]];