43
50
#include <iostream>
49
/// Data structure used in SVMWrapper
52
std::vector< std::vector< std::pair<Int, DoubleReal> > > sequences;
53
std::vector< DoubleReal > labels;
56
: sequences(std::vector< std::vector< std::pair<Int, DoubleReal> > >()),
57
labels(std::vector< DoubleReal >())
61
SVMData(std::vector< std::vector< std::pair<Int, DoubleReal> > >& seqs, std::vector< DoubleReal >& lbls)
67
bool operator == (const SVMData& rhs) const
69
return sequences == rhs.sequences
70
&& labels == rhs.labels;
73
bool store(const String& filename) const
75
std::ofstream output_file(filename.c_str());
77
// checking if file is writable
78
if (!File::writable(filename) || sequences.size() != labels.size())
83
// writing feature vectors
84
for (Size i = 0; i < sequences.size(); i++)
86
output_file << labels[i] << " ";
87
for (Size j = 0; j < sequences[i].size(); ++j)
89
output_file << sequences[i][j].second << ":" << sequences[i][j].first << " " ;
91
output_file << std::endl;
99
bool load(const String& filename)
102
std::vector<String> parts;
103
std::vector<String> temp_parts;
105
if (!File::exists(filename))
109
if (!File::readable(filename))
113
if (File::empty(filename))
118
TextFile text_file(filename.c_str(), true);
119
TextFile::iterator it;
121
it = text_file.begin();
123
sequences.resize(text_file.size(), std::vector< std::pair<Int, DoubleReal> >());
124
labels.resize(text_file.size(), 0.);
125
while(counter < text_file.size()&& it != text_file.end())
127
it->split(' ', parts);
128
labels[counter] = parts[0].trim().toFloat();
129
sequences[counter].resize(parts.size(), std::pair<Int, DoubleReal>());
130
for (Size j = 1; j < parts.size(); ++j)
132
parts[j].split(':', temp_parts);
133
if (temp_parts.size() < 2)
137
sequences[counter][j - 1].second = temp_parts[0].trim().toFloat();
138
sequences[counter][j - 1].first = temp_parts[1].trim().toInt();
56
/// Data structure used in SVMWrapper
59
std::vector<std::vector<std::pair<Int, DoubleReal> > > sequences;
60
std::vector<DoubleReal> labels;
63
sequences(std::vector<std::vector<std::pair<Int, DoubleReal> > >()),
64
labels(std::vector<DoubleReal>())
68
SVMData(std::vector<std::vector<std::pair<Int, DoubleReal> > > & seqs, std::vector<DoubleReal> & lbls) :
74
bool operator==(const SVMData & rhs) const
76
return sequences == rhs.sequences
77
&& labels == rhs.labels;
80
bool store(const String & filename) const
82
std::ofstream output_file(filename.c_str());
84
// checking if file is writable
85
if (!File::writable(filename) || sequences.size() != labels.size())
90
// writing feature vectors
91
for (Size i = 0; i < sequences.size(); i++)
93
output_file << labels[i] << " ";
94
for (Size j = 0; j < sequences[i].size(); ++j)
96
output_file << sequences[i][j].second << ":" << sequences[i][j].first << " ";
98
output_file << std::endl;
106
bool load(const String & filename)
109
std::vector<String> parts;
110
std::vector<String> temp_parts;
112
if (!File::exists(filename))
116
if (!File::readable(filename))
120
if (File::empty(filename))
125
TextFile text_file(filename.c_str(), true);
126
TextFile::iterator it;
128
it = text_file.begin();
130
sequences.resize(text_file.size(), std::vector<std::pair<Int, DoubleReal> >());
131
labels.resize(text_file.size(), 0.);
132
while (counter < text_file.size() && it != text_file.end())
134
it->split(' ', parts);
135
labels[counter] = parts[0].trim().toFloat();
136
sequences[counter].resize(parts.size(), std::pair<Int, DoubleReal>());
137
for (Size j = 1; j < parts.size(); ++j)
139
parts[j].split(':', temp_parts);
140
if (temp_parts.size() < 2)
144
sequences[counter][j - 1].second = temp_parts[0].trim().toFloat();
145
sequences[counter][j - 1].first = temp_parts[1].trim().toInt();
149
156
@brief Serves as a wrapper for the libsvm
151
158
This class can be used for svm predictions. You can either perform classification or regression and
152
choose certain kernel fuctions and additional parameters. Furthermore the models can be saved and
153
loaded and we support also a new kernel function that was specially designed for learning with
154
small sequences of different lengths.
159
choose certain kernel fuctions and additional parameters. Furthermore the models can be saved and
160
loaded and we support also a new kernel function that was specially designed for learning with
161
small sequences of different lengths.
156
class OPENMS_DLLAPI SVMWrapper
157
: public ProgressLogger
162
@brief Parameters for the svm to be set from outside
164
This type is used to specify the kind of parameter that
165
is to be set or retrieved by the set/getParameter methods.
167
enum SVM_parameter_type
169
SVM_TYPE, ///< the svm type cab be NU_SVR or EPSILON_SVR
170
KERNEL_TYPE, ///< the kernel type
171
DEGREE, ///< the degree for the polynomial- kernel
172
C, ///< the C parameter of the svm
173
NU, ///< the nu parameter for nu-SVR
174
P, ///< the epsilon parameter for epsilon-SVR
175
GAMMA, ///< the gamma parameter of the POLY, RBF and SIGMOID kernel
188
/// standard constructor
192
virtual ~SVMWrapper();
195
@brief You can set the parameters of the svm:
197
KERNEL_TYPE: can be LINEAR for the linear kernel
198
RBF for the rbf kernel
199
POLY for the polynomial kernel
200
SIGMOID for the sigmoid kernel
201
DEGREE: the degree for the polynomial- kernel and the
202
locality- improved kernel
204
C: the C parameter of the svm
206
void setParameter(SVM_parameter_type type, Int value);
209
@brief sets the double parameters of the svm
212
void setParameter(SVM_parameter_type type, DoubleReal value);
215
@brief trains the svm
217
The svm is trained with the data stored in the 'svm_problem' structure.
219
Int train(struct svm_problem* problem);
222
@brief trains the svm
224
The svm is trained with the data stored in the 'SVMData' structure.
226
Int train(SVMData& problem);
229
@brief saves the svm model
231
The model of the trained svm is saved into 'modelFilename'. Throws an exception if
232
the model cannot be saved.
234
@exception Exception::UnableToCreateFile
236
void saveModel(std::string modelFilename) const;
239
@brief loads the model
241
The svm- model is loaded. After this, the svm is ready for
244
void loadModel(std::string modelFilename);
247
@brief predicts the labels using the trained model
249
The prediction process is started and the results are stored in 'predicted_labels'.
252
void predict(struct svm_problem* problem, std::vector<DoubleReal>& predicted_labels);
255
@brief predicts the labels using the trained model
257
The prediction process is started and the results are stored in 'predicted_labels'.
260
void predict(const SVMData& problem, std::vector<DoubleReal>& results);
263
@brief You can get the actual int- parameters of the svm
265
KERNEL_TYPE: can be LINEAR for the linear kernel
266
RBF for the rbf kernel
267
POLY for the polynomial kernel
268
SIGMOID for the sigmoid kernel
270
DEGREE: the degree for the polynomial- kernel and the
271
locality- improved kernel
273
SVM_TYPE: the SVm type of the svm: can be NU_SVR or EPSILON_SVR
275
Int getIntParameter(SVM_parameter_type type);
278
@brief You can get the actual double- parameters of the svm
280
C: the C parameter of the svm
281
P: the P parameter of the svm (sets the epsilon in
283
NU: the nu parameter in nu-SVR
284
GAMMA: for POLY, RBF and SIGMOID
286
DoubleReal getDoubleParameter(SVM_parameter_type type);
289
@brief You can create 'number' equally sized random partitions
291
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
294
static void createRandomPartitions(svm_problem* problem, Size number, std::vector<svm_problem*>& partitions);
297
@brief You can create 'number' equally sized random partitions
299
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
302
static void createRandomPartitions(const SVMData& problem,
304
std::vector<SVMData>& problems);
306
@brief You can merge partitions excuding the partition with index 'except'
309
static svm_problem* mergePartitions(const std::vector<svm_problem*>& problems, Size except);
312
@brief You can merge partitions excuding the partition with index 'except'
315
static void mergePartitions(const std::vector<SVMData>& problems,
317
SVMData& merged_problem);
320
@brief predicts the labels using the trained model
322
The prediction process is started and the results are stored in 'predicted_rts'.
325
void predict(const std::vector<svm_node*>& vectors, std::vector<DoubleReal>& predicted_rts);
328
@brief Stores the stored labels of the encoded SVM data at 'labels'
331
static void getLabels(svm_problem* problem, std::vector<DoubleReal>& labels);
334
@brief Performs a CV for the data given by 'problem'
337
DoubleReal performCrossValidation(svm_problem* problem_ul,
338
const SVMData& problem_l,
339
const bool is_labeled,
340
const std::map<SVM_parameter_type, DoubleReal>& start_values_map,
341
const std::map<SVM_parameter_type, DoubleReal>& step_sizes_map,
342
const std::map<SVM_parameter_type, DoubleReal>& end_values_map,
343
Size number_of_partitions,
345
std::map<SVM_parameter_type, DoubleReal>& best_parameters,
346
bool additive_step_sizes = true,
348
String performances_file_name = "performances.txt",
349
bool mcc_as_performance_measure = false);
353
@brief Returns the probability parameter sigma of the fitted laplace model.
355
The libsvm is used to fit a laplace model to the prediction values by performing
356
an internal cv using the training set if setParameter(PROBABILITY, 1) was invoked
357
before using train. Look for your libsvm documentation for more details.
358
The model parameter sigma is returned by this method. If no model was fitted during
359
training zero is returned.
361
DoubleReal getSVRProbability();
364
@brief returns the value of the oligo kernel for sequences 'x' and 'y'
366
This function computes the kernel value of the oligo kernel,
367
which was introduced by Meinicke et al. in 2004. 'x' and
368
'y' are encoded by encodeOligo and 'gauss_table' has to be
369
constructed by calculateGaussTable.
371
'max_distance' can be used to speed up the computation
372
even further by restricting the maximum distance between a k_mer at
373
position i in sequence 'x' and a k_mer at position j
374
in sequence 'y'. If i - j > 'max_distance' the value is not
375
added to the kernel value. This approximation is switched
376
off by default (max_distance < 0).
378
static DoubleReal kernelOligo(const std::vector< std::pair<int, double> >& x,
379
const std::vector< std::pair<int, double> >& y,
380
const std::vector<double>& gauss_table,
381
int max_distance = -1);
384
@brief calculates the oligo kernel value for the encoded sequences 'x' and 'y'
386
This kernel function calculates the oligo kernel value [Meinicke 04] for
387
the sequences 'x' and 'y' that had been encoded by the encodeOligoBorder... function
388
of the LibSVMEncoder class.
390
static DoubleReal kernelOligo(const svm_node* x, const svm_node* y, const std::vector<DoubleReal>& gauss_table, DoubleReal sigma_square = 0, Size max_distance = 50);
393
@brief calculates the significance borders of the error model and stores them in 'sigmas'
396
void getSignificanceBorders(svm_problem* data, std::pair<DoubleReal, DoubleReal>& borders, DoubleReal confidence = 0.95, Size number_of_runs = 5, Size number_of_partitions = 5, DoubleReal step_size = 0.01, Size max_iterations = 1000000);
399
@brief calculates the significance borders of the error model and stores them in 'sigmas'
402
void getSignificanceBorders(const SVMData& data,
403
std::pair<DoubleReal, DoubleReal>& sigmas,
404
DoubleReal confidence = 0.95,
405
Size number_of_runs = 5,
406
Size number_of_partitions = 5,
407
DoubleReal step_size = 0.01,
408
Size max_iterations = 1000000);
411
@brief calculates a p-value for a given data point using the model parameters
413
Uses the model parameters to calculate the p-value for 'point' which has the data
414
entries: measured, predicted retention time.
417
DoubleReal getPValue(DoubleReal sigma1, DoubleReal sigma2, std::pair<DoubleReal, DoubleReal> point);
420
@brief stores the prediction values for the encoded data in 'decision_values'
422
This function can be used to get the prediction values of the data if a model
423
is already trained by the train() method. For regression the result is the same
424
as for the method predict. For classification this function returns the distance from
425
the separating hyperplane. For multiclass classification the decision_values vector
429
void getDecisionValues(svm_problem* data, std::vector<DoubleReal>& decision_values);
432
@brief Scales the data such that every coloumn is scaled to [-1, 1].
434
Scales the x[][].value values of the svm_problem* structure. If the second
435
parameter is omitted, the data is scaled to [-1, 1]. Otherwise the data is scaled to [0, max_scale_value]
437
void scaleData(svm_problem* data, Int max_scale_value = -1);
439
static void calculateGaussTable(Size border_length, DoubleReal sigma, std::vector<DoubleReal>& gauss_table);
442
@brief computes the kernel matrix using the actual svm parameters and the given data
444
This function can be used to compute a kernel matrix. 'problem1' and 'problem2'
445
are used together wit the oligo kernel function (could be extended if you
446
want to use your own kernel functions).
449
svm_problem* computeKernelMatrix(svm_problem* problem1, svm_problem* problem2);
452
@brief computes the kernel matrix using the actual svm parameters and the given data
454
This function can be used to compute a kernel matrix. 'problem1' and 'problem2'
455
are used together wit the oligo kernel function (could be extended if you
456
want to use your own kernel functions).
459
svm_problem* computeKernelMatrix(const SVMData& problem1, const SVMData& problem2);
462
@brief This is used for being able to perform predictions with non libsvm standard kernels
465
void setTrainingSample(svm_problem* training_sample);
468
@brief This is used for being able to perform predictions with non libsvm standard kernels
471
void setTrainingSample(SVMData& training_sample);
474
@brief This function fills probabilities with the probability estimates for the first class.
476
The libSVM function svm_predict_probability is called to get probability estimates
477
for the positive class. Since this is only used for binary classification it is sufficient
478
for every test example to report the probability of the test example belonging to the positive
479
class. Probability estimates have to be turned on during training (svm.setParameter(PROBABILITY, 1)),
480
otherwise this method will fill the 'probabilities' vector with -1s.
482
void getSVCProbabilities(struct svm_problem* problem, std::vector<DoubleReal>& probabilities, std::vector<DoubleReal>& prediction_labels);
485
@brief Sets weights for the classes in C_SVC (see libsvm documentation for further details)
488
void setWeights(const std::vector<Int>& weight_labels, const std::vector<DoubleReal>& weights);
492
@brief find next grid search parameter combination
494
The current grid cell is given in @p actual_values.
495
The result is returned in @p actual_values.
498
bool nextGrid_(const std::vector<DoubleReal>& start_values,
499
const std::vector<DoubleReal>& step_sizes,
500
const std::vector<DoubleReal>& end_values,
501
const bool additive_step_sizes,
502
std::vector<DoubleReal>& actual_values);
504
Size getNumberOfEnclosedPoints_(DoubleReal m1, DoubleReal m2, const std::vector<std::pair<DoubleReal, DoubleReal> >& points);
507
@brief Initializes the svm with standard parameters
510
void initParameters_();
513
@brief This function is passed to lib svm for output control
515
The intention is to discard the output, as we don't need it.
518
static void printToVoid_(const char * /*s*/);
520
svm_parameter* param_; // the parameters for the svm
521
svm_model* model_; // the learnt svm discriminant
522
DoubleReal sigma_; // for the oligo kernel (amount of positional smearing)
523
std::vector<DoubleReal> sigmas_; // for the combined oligo kernel (amount of positional smearing)
524
std::vector<DoubleReal> gauss_table_; // lookup table for fast computation of the oligo kernel
525
std::vector<std::vector<DoubleReal> > gauss_tables_; // lookup table for fast computation of the combined oligo kernel
526
Size kernel_type_; // the actual kernel type
527
Size border_length_; // the actual kernel type
528
svm_problem* training_set_; // the training set
529
svm_problem* training_problem_; // the training set
530
SVMData training_data_; // the training set (different encoding)
163
class OPENMS_DLLAPI SVMWrapper :
164
public ProgressLogger
169
@brief Parameters for the svm to be set from outside
171
This type is used to specify the kind of parameter that
172
is to be set or retrieved by the set/getParameter methods.
174
enum SVM_parameter_type
176
SVM_TYPE, ///< the svm type cab be NU_SVR or EPSILON_SVR
177
KERNEL_TYPE, ///< the kernel type
178
DEGREE, ///< the degree for the polynomial- kernel
179
C, ///< the C parameter of the svm
180
NU, ///< the nu parameter for nu-SVR
181
P, ///< the epsilon parameter for epsilon-SVR
182
GAMMA, ///< the gamma parameter of the POLY, RBF and SIGMOID kernel
195
/// standard constructor
199
virtual ~SVMWrapper();
202
@brief You can set the parameters of the svm:
204
KERNEL_TYPE: can be LINEAR for the linear kernel
205
RBF for the rbf kernel
206
POLY for the polynomial kernel
207
SIGMOID for the sigmoid kernel
208
DEGREE: the degree for the polynomial- kernel and the
209
locality- improved kernel
211
C: the C parameter of the svm
213
void setParameter(SVM_parameter_type type, Int value);
216
@brief sets the double parameters of the svm
219
void setParameter(SVM_parameter_type type, DoubleReal value);
222
@brief trains the svm
224
The svm is trained with the data stored in the 'svm_problem' structure.
226
Int train(struct svm_problem * problem);
229
@brief trains the svm
231
The svm is trained with the data stored in the 'SVMData' structure.
233
Int train(SVMData & problem);
236
@brief saves the svm model
238
The model of the trained svm is saved into 'modelFilename'. Throws an exception if
239
the model cannot be saved.
241
@exception Exception::UnableToCreateFile
243
void saveModel(std::string modelFilename) const;
246
@brief loads the model
248
The svm- model is loaded. After this, the svm is ready for
251
void loadModel(std::string modelFilename);
254
@brief predicts the labels using the trained model
256
The prediction process is started and the results are stored in 'predicted_labels'.
259
void predict(struct svm_problem * problem, std::vector<DoubleReal> & predicted_labels);
262
@brief predicts the labels using the trained model
264
The prediction process is started and the results are stored in 'predicted_labels'.
267
void predict(const SVMData & problem, std::vector<DoubleReal> & results);
270
@brief You can get the actual int- parameters of the svm
272
KERNEL_TYPE: can be LINEAR for the linear kernel
273
RBF for the rbf kernel
274
POLY for the polynomial kernel
275
SIGMOID for the sigmoid kernel
277
DEGREE: the degree for the polynomial- kernel and the
278
locality- improved kernel
280
SVM_TYPE: the SVm type of the svm: can be NU_SVR or EPSILON_SVR
282
Int getIntParameter(SVM_parameter_type type);
285
@brief You can get the actual double- parameters of the svm
287
C: the C parameter of the svm
288
P: the P parameter of the svm (sets the epsilon in
290
NU: the nu parameter in nu-SVR
291
GAMMA: for POLY, RBF and SIGMOID
293
DoubleReal getDoubleParameter(SVM_parameter_type type);
296
@brief You can create 'number' equally sized random partitions
298
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
301
static void createRandomPartitions(svm_problem * problem, Size number, std::vector<svm_problem *> & partitions);
304
@brief You can create 'number' equally sized random partitions
306
This function creates 'number' equally sized random partitions and stores them in 'partitions'.
309
static void createRandomPartitions(const SVMData & problem,
311
std::vector<SVMData> & problems);
313
@brief You can merge partitions excuding the partition with index 'except'
316
static svm_problem * mergePartitions(const std::vector<svm_problem *> & problems, Size except);
319
@brief You can merge partitions excuding the partition with index 'except'
322
static void mergePartitions(const std::vector<SVMData> & problems,
324
SVMData & merged_problem);
327
@brief predicts the labels using the trained model
329
The prediction process is started and the results are stored in 'predicted_rts'.
332
void predict(const std::vector<svm_node *> & vectors, std::vector<DoubleReal> & predicted_rts);
335
@brief Stores the stored labels of the encoded SVM data at 'labels'
338
static void getLabels(svm_problem * problem, std::vector<DoubleReal> & labels);
341
@brief Performs a CV for the data given by 'problem'
344
DoubleReal performCrossValidation(svm_problem * problem_ul,
345
const SVMData & problem_l,
346
const bool is_labeled,
347
const std::map<SVM_parameter_type, DoubleReal> & start_values_map,
348
const std::map<SVM_parameter_type, DoubleReal> & step_sizes_map,
349
const std::map<SVM_parameter_type, DoubleReal> & end_values_map,
350
Size number_of_partitions,
352
std::map<SVM_parameter_type, DoubleReal> & best_parameters,
353
bool additive_step_sizes = true,
355
String performances_file_name = "performances.txt",
356
bool mcc_as_performance_measure = false);
360
@brief Returns the probability parameter sigma of the fitted laplace model.
362
The libsvm is used to fit a laplace model to the prediction values by performing
363
an internal cv using the training set if setParameter(PROBABILITY, 1) was invoked
364
before using train. Look for your libsvm documentation for more details.
365
The model parameter sigma is returned by this method. If no model was fitted during
366
training zero is returned.
368
DoubleReal getSVRProbability();
371
@brief returns the value of the oligo kernel for sequences 'x' and 'y'
373
This function computes the kernel value of the oligo kernel,
374
which was introduced by Meinicke et al. in 2004. 'x' and
375
'y' are encoded by encodeOligo and 'gauss_table' has to be
376
constructed by calculateGaussTable.
378
'max_distance' can be used to speed up the computation
379
even further by restricting the maximum distance between a k_mer at
380
position i in sequence 'x' and a k_mer at position j
381
in sequence 'y'. If i - j > 'max_distance' the value is not
382
added to the kernel value. This approximation is switched
383
off by default (max_distance < 0).
385
static DoubleReal kernelOligo(const std::vector<std::pair<int, double> > & x,
386
const std::vector<std::pair<int, double> > & y,
387
const std::vector<double> & gauss_table,
388
int max_distance = -1);
391
@brief calculates the oligo kernel value for the encoded sequences 'x' and 'y'
393
This kernel function calculates the oligo kernel value [Meinicke 04] for
394
the sequences 'x' and 'y' that had been encoded by the encodeOligoBorder... function
395
of the LibSVMEncoder class.
397
static DoubleReal kernelOligo(const svm_node * x, const svm_node * y, const std::vector<DoubleReal> & gauss_table, DoubleReal sigma_square = 0, Size max_distance = 50);
400
@brief calculates the significance borders of the error model and stores them in 'sigmas'
403
void getSignificanceBorders(svm_problem * data, std::pair<DoubleReal, DoubleReal> & borders, DoubleReal confidence = 0.95, Size number_of_runs = 5, Size number_of_partitions = 5, DoubleReal step_size = 0.01, Size max_iterations = 1000000);
406
@brief calculates the significance borders of the error model and stores them in 'sigmas'
409
void getSignificanceBorders(const SVMData & data,
410
std::pair<DoubleReal, DoubleReal> & sigmas,
411
DoubleReal confidence = 0.95,
412
Size number_of_runs = 5,
413
Size number_of_partitions = 5,
414
DoubleReal step_size = 0.01,
415
Size max_iterations = 1000000);
418
@brief calculates a p-value for a given data point using the model parameters
420
Uses the model parameters to calculate the p-value for 'point' which has the data
421
entries: measured, predicted retention time.
424
DoubleReal getPValue(DoubleReal sigma1, DoubleReal sigma2, std::pair<DoubleReal, DoubleReal> point);
427
@brief stores the prediction values for the encoded data in 'decision_values'
429
This function can be used to get the prediction values of the data if a model
430
is already trained by the train() method. For regression the result is the same
431
as for the method predict. For classification this function returns the distance from
432
the separating hyperplane. For multiclass classification the decision_values vector
436
void getDecisionValues(svm_problem * data, std::vector<DoubleReal> & decision_values);
439
@brief Scales the data such that every coloumn is scaled to [-1, 1].
441
Scales the x[][].value values of the svm_problem* structure. If the second
442
parameter is omitted, the data is scaled to [-1, 1]. Otherwise the data is scaled to [0, max_scale_value]
444
void scaleData(svm_problem * data, Int max_scale_value = -1);
446
static void calculateGaussTable(Size border_length, DoubleReal sigma, std::vector<DoubleReal> & gauss_table);
449
@brief computes the kernel matrix using the actual svm parameters and the given data
451
This function can be used to compute a kernel matrix. 'problem1' and 'problem2'
452
are used together wit the oligo kernel function (could be extended if you
453
want to use your own kernel functions).
456
svm_problem * computeKernelMatrix(svm_problem * problem1, svm_problem * problem2);
459
@brief computes the kernel matrix using the actual svm parameters and the given data
461
This function can be used to compute a kernel matrix. 'problem1' and 'problem2'
462
are used together wit the oligo kernel function (could be extended if you
463
want to use your own kernel functions).
466
svm_problem * computeKernelMatrix(const SVMData & problem1, const SVMData & problem2);
469
@brief This is used for being able to perform predictions with non libsvm standard kernels
472
void setTrainingSample(svm_problem * training_sample);
475
@brief This is used for being able to perform predictions with non libsvm standard kernels
478
void setTrainingSample(SVMData & training_sample);
481
@brief This function fills probabilities with the probability estimates for the first class.
483
The libSVM function svm_predict_probability is called to get probability estimates
484
for the positive class. Since this is only used for binary classification it is sufficient
485
for every test example to report the probability of the test example belonging to the positive
486
class. Probability estimates have to be turned on during training (svm.setParameter(PROBABILITY, 1)),
487
otherwise this method will fill the 'probabilities' vector with -1s.
489
void getSVCProbabilities(struct svm_problem * problem, std::vector<DoubleReal> & probabilities, std::vector<DoubleReal> & prediction_labels);
492
@brief Sets weights for the classes in C_SVC (see libsvm documentation for further details)
495
void setWeights(const std::vector<Int> & weight_labels, const std::vector<DoubleReal> & weights);
499
@brief find next grid search parameter combination
501
The current grid cell is given in @p actual_values.
502
The result is returned in @p actual_values.
505
bool nextGrid_(const std::vector<DoubleReal> & start_values,
506
const std::vector<DoubleReal> & step_sizes,
507
const std::vector<DoubleReal> & end_values,
508
const bool additive_step_sizes,
509
std::vector<DoubleReal> & actual_values);
511
Size getNumberOfEnclosedPoints_(DoubleReal m1, DoubleReal m2, const std::vector<std::pair<DoubleReal, DoubleReal> > & points);
514
@brief Initializes the svm with standard parameters
517
void initParameters_();
520
@brief This function is passed to lib svm for output control
522
The intention is to discard the output, as we don't need it.
525
static void printToVoid_(const char * /*s*/);
527
svm_parameter * param_; // the parameters for the svm
528
svm_model * model_; // the learnt svm discriminant
529
DoubleReal sigma_; // for the oligo kernel (amount of positional smearing)
530
std::vector<DoubleReal> sigmas_; // for the combined oligo kernel (amount of positional smearing)
531
std::vector<DoubleReal> gauss_table_; // lookup table for fast computation of the oligo kernel
532
std::vector<std::vector<DoubleReal> > gauss_tables_; // lookup table for fast computation of the combined oligo kernel
533
Size kernel_type_; // the actual kernel type
534
Size border_length_; // the actual kernel type
535
svm_problem * training_set_; // the training set
536
svm_problem * training_problem_; // the training set
537
SVMData training_data_; // the training set (different encoding)
534
541
} // namespace OpenMS
536
543
#endif // OPENMS_ANALYSIS_SVM_SVMWRAPPER_H