2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2002 Sylvain Roy
23
package weka.classifiers.functions;
25
import weka.classifiers.Classifier;
26
import weka.classifiers.functions.supportVector.Kernel;
27
import weka.classifiers.functions.supportVector.PolyKernel;
28
import weka.classifiers.functions.supportVector.SMOset;
29
import weka.core.Capabilities;
30
import weka.core.Instance;
31
import weka.core.Instances;
32
import weka.core.Option;
33
import weka.core.OptionHandler;
34
import weka.core.SelectedTag;
36
import weka.core.TechnicalInformation;
37
import weka.core.TechnicalInformationHandler;
38
import weka.core.Utils;
39
import weka.core.WeightedInstancesHandler;
40
import weka.core.Capabilities.Capability;
41
import weka.core.TechnicalInformation.Field;
42
import weka.core.TechnicalInformation.Type;
43
import weka.filters.Filter;
44
import weka.filters.unsupervised.attribute.NominalToBinary;
45
import weka.filters.unsupervised.attribute.Normalize;
46
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
47
import weka.filters.unsupervised.attribute.Standardize;
49
import java.util.Enumeration;
50
import java.util.Vector;
53
<!-- globalinfo-start -->
54
* Implements Alex Smola and Bernhard Scholkopf's sequential minimal optimization algorithm for training a support vector regression model. This implementation globally replaces all missing values and transforms nominal attributes into binary ones. It also normalizes all attributes by default. (Note that the coefficients in the output are based on the normalized/standardized data, not the original data.)<br/>
56
* For more information on the SMO algorithm, see<br/>
58
* Alex J. Smola, Bernhard Schoelkopf: A Tutorial on Support Vector Regression. In NeuroCOLT2 Technical Report Series, 1998.<br/>
60
* S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy (1999). Improvements to SMO Algorithm for SVM Regression. Control Division Dept of Mechanical and Production Engineering, National University of Singapore.
62
<!-- globalinfo-end -->
64
<!-- technical-bibtex-start -->
67
* @incollection{Smola1998,
68
* author = {Alex J. Smola and Bernhard Schoelkopf},
69
* booktitle = {NeuroCOLT2 Technical Report Series},
70
* note = {NC2-TR-1998-030},
71
* title = {A Tutorial on Support Vector Regression},
75
* @techreport{Shevade1999,
76
* address = {Control Division Dept of Mechanical and Production Engineering, National University of Singapore},
77
* author = {S.K. Shevade and S.S. Keerthi and C. Bhattacharyya and K.R.K. Murthy},
78
* institution = {National University of Singapore},
79
* note = {Technical Report CD-99-16},
80
* title = {Improvements to SMO Algorithm for SVM Regression},
85
<!-- technical-bibtex-end -->
87
<!-- options-start -->
88
* Valid options are: <p/>
91
* If set, classifier is run in debug mode and
92
* may output additional info to the console</pre>
95
* Turns off all checks - use with caution!
96
* Turning them off assumes that data is purely numeric, doesn't
97
* contain any missing values, and has a nominal class. Turning them
98
* off also means that no header information will be stored if the
99
* machine is linear. Finally, it also assumes that no instance has
100
* a weight equal to 0.
101
* (default: checks on)</pre>
103
* <pre> -S <double>
104
* The amount up to which deviations are
105
* tolerated (epsilon). (default 1e-3)</pre>
107
* <pre> -C <double>
108
* The complexity constant C. (default 1)</pre>
111
* Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)</pre>
113
* <pre> -T <double>
114
* The tolerance parameter. (default 1.0e-3)</pre>
116
* <pre> -P <double>
117
* The epsilon for round-off error. (default 1.0e-12)</pre>
119
* <pre> -K <classname and parameters>
121
* (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
124
* Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:
128
* Enables debugging output (if available) to be printed.
129
* (default: off)</pre>
132
* Turns off all checks - use with caution!
133
* (default: checks on)</pre>
135
* <pre> -C <num>
136
* The size of the cache (a prime number), 0 for full cache and
138
* (default: 250007)</pre>
140
* <pre> -E <num>
141
* The Exponent to use.
142
* (default: 1.0)</pre>
145
* Use lower-order terms.
146
* (default: no)</pre>
150
* @author Sylvain Roy (sro33@student.canterbury.ac.nz)
151
* @version $Revision: 1.13 $
155
implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler {
157
/** for serialization */
158
static final long serialVersionUID = 5783729368717679645L;
160
/** Kernel to use **/
161
protected Kernel m_kernel = new PolyKernel();
163
/** The class index from the training data */
164
protected int m_classIndex = -1;
166
/** The filter used to make attributes numeric. */
167
protected NominalToBinary m_NominalToBinary;
169
/** normalize data */
170
public static final int FILTER_NORMALIZE = 0;
171
/** standardize data */
172
public static final int FILTER_STANDARDIZE = 1;
174
public static final int FILTER_NONE = 2;
175
/** The filter to apply to the training data */
176
public static final Tag [] TAGS_FILTER = {
177
new Tag(FILTER_NORMALIZE, "Normalize training data"),
178
new Tag(FILTER_STANDARDIZE, "Standardize training data"),
179
new Tag(FILTER_NONE, "No normalization/standardization"),
182
/** The filter used to standardize/normalize all values. */
183
protected Filter m_Filter = null;
185
/** Whether to normalize/standardize/neither */
186
protected int m_filterType = FILTER_NORMALIZE;
188
/** The filter used to get rid of missing values. */
189
protected ReplaceMissingValues m_Missing;
191
/** Turn off all checks and conversions? Turning them off assumes
192
that data is purely numeric, doesn't contain any missing values,
193
and has a numeric class. Turning them off also means that
194
no header information will be stored if the machine is linear.
195
Finally, it also assumes that no instance has a weight equal to 0.*/
196
protected boolean m_checksTurnedOff = false;
198
/** The training data. */
199
protected Instances m_data;
201
/** The complexity parameter */
202
protected double m_C = 1.0;
204
/** The Lagrange multipliers */
205
protected double[] m_alpha;
206
protected double[] m_alpha_;
208
/** The thresholds. */
209
protected double m_b, m_bLow, m_bUp;
211
/** The indices for m_bLow and m_bUp */
212
protected int m_iLow, m_iUp;
214
/** Weight vector for linear machine. */
215
protected double[] m_weights;
217
/** The current set of errors for all non-bound examples. */
218
protected double[] m_fcache;
220
/* The four different sets used by the algorithm. */
221
/** {i: 0 < m_alpha[i] < C || 0 < m_alpha_[i] < C} */
222
protected SMOset m_I0;
223
/** {i: m_class[i] = 0, m_alpha_[i] = 0} */
224
protected SMOset m_I1;
225
/** {i: m_class[i] = 0, m_alpha_[i] = C} */
226
protected SMOset m_I2;
227
/** {i: m_class[i] = C, m_alpha_[i] = 0} */
228
protected SMOset m_I3;
230
/** The parameter epsilon */
231
protected double m_epsilon = 1e-3;
233
/** The parameter tol */
234
protected double m_tol = 1.0e-3;
236
/** The parameter eps */
237
protected double m_eps = 1.0e-12;
239
/** Precision constant for updating sets */
240
protected static double m_Del = 1e-10;
242
/** The parameters of the linear transforamtion realized
243
* by the filter on the class attribute */
244
protected double m_Alin;
245
protected double m_Blin;
247
/** Variables to hold weight vector in sparse form.
248
(To reduce storage requirements.) */
249
protected double[] m_sparseWeights;
250
protected int[] m_sparseIndices;
252
/** whether the kernel is a linear one */
253
protected boolean m_KernelIsLinear = false;
256
* Returns a string describing classifier
257
* @return a description suitable for
258
* displaying in the explorer/experimenter gui
260
public String globalInfo() {
262
return "Implements Alex Smola and Bernhard Scholkopf's sequential minimal "
263
+ "optimization algorithm for training a support vector regression model. "
264
+ "This implementation globally replaces all missing values and "
265
+ "transforms nominal attributes into binary ones. It also "
266
+ "normalizes all attributes by default. (Note that the coefficients "
267
+ "in the output are based on the normalized/standardized data, not the "
268
+ "original data.)\n\n"
269
+ "For more information on the SMO algorithm, see\n\n"
270
+ getTechnicalInformation().toString();
274
* Returns an instance of a TechnicalInformation object, containing
275
* detailed information about the technical background of this class,
276
* e.g., paper reference or book this class is based on.
278
* @return the technical information about this class
280
public TechnicalInformation getTechnicalInformation() {
281
TechnicalInformation result;
282
TechnicalInformation additional;
284
result = new TechnicalInformation(Type.INCOLLECTION);
285
result.setValue(Field.AUTHOR, "Alex J. Smola and Bernhard Schoelkopf");
286
result.setValue(Field.YEAR, "1998");
287
result.setValue(Field.TITLE, "A Tutorial on Support Vector Regression");
288
result.setValue(Field.BOOKTITLE, "NeuroCOLT2 Technical Report Series");
289
result.setValue(Field.NOTE, "NC2-TR-1998-030");
291
additional = result.add(Type.TECHREPORT);
292
additional.setValue(Field.AUTHOR, "S.K. Shevade and S.S. Keerthi and C. Bhattacharyya and K.R.K. Murthy");
293
additional.setValue(Field.YEAR, "1999");
294
additional.setValue(Field.TITLE, "Improvements to SMO Algorithm for SVM Regression");
295
additional.setValue(Field.INSTITUTION, "National University of Singapore");
296
additional.setValue(Field.ADDRESS, "Control Division Dept of Mechanical and Production Engineering, National University of Singapore");
297
additional.setValue(Field.NOTE, "Technical Report CD-99-16");
303
* Returns default capabilities of the classifier.
305
* @return the capabilities of this classifier
307
public Capabilities getCapabilities() {
308
Capabilities result = getKernel().getCapabilities();
309
result.setOwner(this);
312
result.enableAllAttributeDependencies();
313
// with NominalToBinary we can also handle nominal attributes, but only
314
// if the kernel can handle numeric attributes
315
if (result.handles(Capability.NUMERIC_ATTRIBUTES))
316
result.enable(Capability.NOMINAL_ATTRIBUTES);
317
result.enable(Capability.MISSING_VALUES);
320
result.disableAllClasses();
321
result.disableAllClassDependencies();
322
result.enable(Capability.NUMERIC_CLASS);
323
result.enable(Capability.DATE_CLASS);
324
result.enable(Capability.MISSING_CLASS_VALUES);
330
* Method for building the classifier.
332
* @param insts the set of training instances
333
* @throws Exception if the classifier can't be built successfully
335
public void buildClassifier(Instances insts) throws Exception {
337
/* check the set of training instances */
339
if (!m_checksTurnedOff) {
340
// can classifier handle the data?
341
getCapabilities().testWithFail(insts);
343
// remove instances with missing class
344
insts = new Instances(insts);
345
insts.deleteWithMissingClass();
347
/* Removes all the instances with weight equal to 0.
348
MUST be done since condition (6) of Shevade's paper
349
is made with the assertion Ci > 0 (See equation (1a). */
350
Instances data = new Instances(insts, insts.numInstances());
351
for(int i = 0; i < insts.numInstances(); i++){
352
if(insts.instance(i).weight() > 0)
353
data.add(insts.instance(i));
355
if (data.numInstances() == 0) {
356
throw new Exception("No training instances left after removing " +
357
"instances with weight 0!");
362
if (!m_checksTurnedOff) {
363
m_Missing = new ReplaceMissingValues();
364
m_Missing.setInputFormat(insts);
365
insts = Filter.useFilter(insts, m_Missing);
370
if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
371
boolean onlyNumeric = true;
372
if (!m_checksTurnedOff) {
373
for (int i = 0; i < insts.numAttributes(); i++) {
374
if (i != insts.classIndex()) {
375
if (!insts.attribute(i).isNumeric()) {
384
m_NominalToBinary = new NominalToBinary();
385
m_NominalToBinary.setInputFormat(insts);
386
insts = Filter.useFilter(insts, m_NominalToBinary);
389
m_NominalToBinary = null;
393
m_NominalToBinary = null;
396
m_classIndex = insts.classIndex();
397
if (m_filterType == FILTER_STANDARDIZE) {
398
m_Filter = new Standardize();
399
((Standardize)m_Filter).setIgnoreClass(true);
400
m_Filter.setInputFormat(insts);
401
insts = Filter.useFilter(insts, m_Filter);
402
} else if (m_filterType == FILTER_NORMALIZE) {
403
m_Filter = new Normalize();
404
((Normalize)m_Filter).setIgnoreClass(true);
405
m_Filter.setInputFormat(insts);
406
insts = Filter.useFilter(insts, m_Filter);
413
// determine which linear transformation has been
414
// applied to the class by the filter
415
if (m_Filter != null) {
416
Instance witness = (Instance)insts.instance(0).copy();
417
witness.setValue(m_classIndex, 0);
418
m_Filter.input(witness);
419
m_Filter.batchFinished();
420
Instance res = m_Filter.output();
421
m_Blin = res.value(m_classIndex);
422
witness.setValue(m_classIndex, 1);
423
m_Filter.input(witness);
424
m_Filter.batchFinished();
425
res = m_Filter.output();
426
m_Alin = res.value(m_classIndex) - m_Blin;
433
m_kernel.buildKernel(m_data);
434
m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);
436
// If machine is linear, reserve space for weights
437
if (m_KernelIsLinear) {
438
m_weights = new double[m_data.numAttributes()];
444
m_fcache = new double[m_data.numInstances()];
447
m_I0 = new SMOset(m_data.numInstances());
448
m_I1 = new SMOset(m_data.numInstances());
449
m_I2 = new SMOset(m_data.numInstances());
450
m_I3 = new SMOset(m_data.numInstances());
453
/* MAIN ROUTINE FOR MODIFICATION 1 */
454
// Follows the specification of the first modification of Shevade's paper
456
// Initialize alpha array to zero
457
m_alpha = new double[m_data.numInstances()];
458
m_alpha_ = new double[m_data.numInstances()];
460
// set I1 to contain all the examples
461
for(int i = 0; i < m_data.numInstances(); i++){
465
// choose any example i from the training set : i = 0
466
m_bUp = m_data.instance(0).classValue() + m_epsilon;
467
m_bLow = m_data.instance(0).classValue() - m_epsilon;
471
boolean examineAll = true;
472
while(numChanged > 0 || examineAll){
475
// loop over all the example
476
for(int I = 0; I < m_alpha.length; I++){
477
numChanged += examineExample(I);
481
for (int I = m_I0.getNext(-1); I != -1; I = m_I0.getNext(I)) {
482
numChanged += examineExample(I);
483
if(m_bUp > m_bLow - 2 * m_tol){
491
else if (numChanged == 0)
495
/* END OF MAIN ROUTINE FOR MODIFICATION 1 */
498
// checkOptimality();
503
m_b = (m_bLow + m_bUp) / 2.0;
506
m_kernel.clean(); m_fcache = null;
507
m_I0 = m_I1 = m_I2 = m_I3 = null;
509
// If machine is linear, delete training data
510
// and store weight vector in sparse format
511
if (m_KernelIsLinear) {
513
// compute weight vector
514
for(int j = 0; j < m_weights.length; j++){
517
for(int k = 0; k < m_alpha.length; k++){
518
for(int j = 0; j < m_weights.length; j++){
519
if(j == m_data.classIndex())
521
m_weights[j] += (m_alpha[k] - m_alpha_[k]) * m_data.instance(k).value(j);
525
// Convert weight vector
526
double[] sparseWeights = new double[m_weights.length];
527
int[] sparseIndices = new int[m_weights.length];
529
for (int ii = 0; ii < m_weights.length; ii++) {
530
if (m_weights[ii] != 0.0) {
531
sparseWeights[counter] = m_weights[ii];
532
sparseIndices[counter] = ii;
536
m_sparseWeights = new double[counter];
537
m_sparseIndices = new int[counter];
538
System.arraycopy(sparseWeights, 0, m_sparseWeights, 0, counter);
539
System.arraycopy(sparseIndices, 0, m_sparseIndices, 0, counter);
541
// Clean out training data
542
if (!m_checksTurnedOff) {
543
m_data = new Instances(m_data, 0);
548
// Clean out weight vector
551
// We don't need the alphas in the linear case
558
* Examines instance. (As defined in Shevade's paper.)
560
* @param i2 index of instance to examine
561
* @return true if examination was successfull
562
* @throws Exception if something goes wrong
564
protected int examineExample(int i2) throws Exception{
566
// Lagrange multipliers for i2
567
double alpha2 = m_alpha[i2];
568
double alpha2_ = m_alpha_[i2];
571
if(m_I0.contains(i2)){
574
// compute F2 = F_i2 and set f-cache[i2] = F2
575
F2 = m_data.instance(i2).classValue();
576
for(int j = 0; j < m_alpha.length; j++){
577
F2 -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i2, j, m_data.instance(i2));
581
// Update (b_low, i_low) or (b_up, i_up) using (F2, i2)...
582
if(m_I1.contains(i2)){
583
if(F2 + m_epsilon < m_bUp){
584
m_bUp = F2 + m_epsilon;
586
} else if (F2 - m_epsilon > m_bLow){
587
m_bLow = F2 - m_epsilon;
590
} else if(m_I2.contains(i2) && (F2+m_epsilon > m_bLow)){
591
m_bLow = F2 + m_epsilon;
593
} else if(m_I3.contains(i2) && (F2-m_epsilon < m_bUp)){
594
m_bUp = F2 - m_epsilon;
599
// check optimality using current b_low and b_up and, if
600
// violated, find an index i1 to do joint optimization with i2...
601
boolean optimality = true;
604
// case 1 : i2 is in I_0a
605
if(m_I0.contains(i2) && 0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()){
606
if(m_bLow-(F2-m_epsilon) > 2 * m_tol){
609
// for i2 in I_0a choose the better i1
610
if((F2 - m_epsilon) - m_bUp > m_bLow - (F2 - m_epsilon)){
613
}else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){
616
// for i2 in I_0a choose the better i1...
617
if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){
623
// case 2 : i2 is in I_0b
624
else if(m_I0.contains(i2) && 0 < alpha2_ && alpha2_ < m_C * m_data.instance(i2).weight()){
625
if(m_bLow-(F2+m_epsilon) > 2 * m_tol){
628
// for i2 in I_0b choose the better i1
629
if((F2 + m_epsilon) - m_bUp > m_bLow - (F2 + m_epsilon)){
632
}else if((F2 + m_epsilon)-m_bUp > 2 * m_tol){
635
// for i2 in I_0b choose the better i1...
636
if(m_bLow-(F2+m_epsilon) > (F2+m_epsilon)-m_bUp){
642
// case 3 : i2 is in I_1
643
else if(m_I1.contains(i2)){
644
if(m_bLow-(F2+m_epsilon) > 2 * m_tol){
647
// for i2 in I_1 choose the better i1
648
if((F2 + m_epsilon) - m_bUp > m_bLow - (F2 + m_epsilon)){
651
} else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){
654
// for i2 in I_1 choose the better i1...
655
if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){
661
// case 4 : i2 is in I_2
662
else if(m_I2.contains(i2)){
663
if((F2+m_epsilon)-m_bUp > 2 * m_tol){
669
// case 5 : i2 is in I_3
670
else if(m_I3.contains(i2)){
671
if(m_bLow-(F2-m_epsilon) > 2 * m_tol){
678
throw new RuntimeException("Inconsistent state ! I0, I1, I2 and I3 " +
679
"must cover the whole set of indices.");
686
if(takeStep(i1, i2)){
695
* Method solving for the Lagrange multipliers for
696
* two instances. (As defined in Shevade's paper.)
698
* @param i1 index of the first instance
699
* @param i2 index of the second instance
700
* @return true if multipliers could be found
701
* @throws Exception if something goes wrong
703
protected boolean takeStep(int i1, int i2) throws Exception{
709
double alpha1 = m_alpha[i1];
710
double alpha1_ = m_alpha_[i1];
711
double alpha2 = m_alpha[i2];
712
double alpha2_ = m_alpha_[i2];
714
double F1 = m_fcache[i1];
715
double F2 = m_fcache[i2];
717
double k11 = m_kernel.eval(i1, i1, m_data.instance(i1));
718
double k12 = m_kernel.eval(i1, i2, m_data.instance(i1));
719
double k22 = m_kernel.eval(i2, i2, m_data.instance(i2));
720
double eta = -2*k12+k11+k22;
721
double gamma = alpha1 - alpha1_ + alpha2 - alpha2_;
723
// In case of numerical instabilies eta might be sligthly less than 0.
724
// (Theoretically, it cannot happen with a kernel which respects Mercer's condition.)
728
boolean case1 = false, case2 = false, case3 = false, case4 = false, finished = false;
729
double deltaphi = F1 - F2;
731
boolean changed = false;
736
// this loop is passed at most three times
737
// Case variables needed to avoid attempting small changes twices
740
(alpha1 > 0 || (alpha1_ == 0 && deltaphi > 0)) &&
741
(alpha2 > 0 || (alpha2_ == 0 && deltaphi < 0)) ){
743
// compute L, H (wrt alpha1, alpha2)
744
L = java.lang.Math.max(0, gamma - m_C * m_data.instance(i1).weight());
745
H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), gamma);
749
a2 = alpha2 - (deltaphi / eta);
751
else if(a2 < L) a2 = L;
753
double Lobj = -L*deltaphi;
754
double Hobj = -H*deltaphi;
760
a1 = alpha1 - (a2 - alpha2);
761
// update alpha1, alpha2 if change is larger than some eps
762
if(java.lang.Math.abs(a1-alpha1) > m_eps ||
763
java.lang.Math.abs(a2-alpha2) > m_eps){
774
(alpha1 > 0 || (alpha1_ == 0 && deltaphi > 2 * m_epsilon)) &&
775
(alpha2_ > 0 || (alpha2 == 0 && deltaphi > 2 * m_epsilon)) ){
777
// compute L, H (wrt alpha1, alpha2_)
778
L = java.lang.Math.max(0, -gamma);
779
H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), -gamma + m_C*m_data.instance(i1).weight());
783
a2 = alpha2_ + ((deltaphi - 2*m_epsilon) / eta);
785
else if(a2 < L) a2 = L;
787
double Lobj = L*(-2*m_epsilon + deltaphi);
788
double Hobj = H*(-2*m_epsilon + deltaphi);
794
a1 = alpha1 + (a2 - alpha2_);
795
// update alpha1, alpha2_ if change is larger than some eps
796
if(java.lang.Math.abs(a1-alpha1) > m_eps ||
797
java.lang.Math.abs(a2-alpha2_) > m_eps){
808
(alpha1_ > 0 || (alpha1 == 0 && deltaphi < -2 * m_epsilon)) &&
809
(alpha2 > 0 || (alpha2_ == 0 && deltaphi < -2 * m_epsilon)) ){
811
// compute L, H (wrt alpha1_, alpha2)
812
L = java.lang.Math.max(0, gamma);
813
H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), m_C * m_data.instance(i1).weight() + gamma);
817
a2 = alpha2 - ((deltaphi + 2*m_epsilon) / eta);
819
else if(a2 < L) a2 = L;
821
double Lobj = -L*(2*m_epsilon + deltaphi);
822
double Hobj = -H*(2*m_epsilon + deltaphi);
828
a1 = alpha1_ + (a2 - alpha2);
829
// update alpha1_, alpha2 if change is larger than some eps
830
if(java.lang.Math.abs(a1-alpha1_) > m_eps ||
831
java.lang.Math.abs(a2-alpha2) > m_eps){
842
(alpha1_ > 0 || (alpha1 == 0 && deltaphi < 0)) &&
843
(alpha2_ > 0 || (alpha2 == 0 && deltaphi > 0)) ){
845
// compute L, H (wrt alpha1_, alpha2_)
846
L = java.lang.Math.max(0, -gamma - m_C * m_data.instance(i1).weight());
847
H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), -gamma);
851
a2 = alpha2_ + deltaphi / eta;
853
else if(a2 < L) a2 = L;
855
double Lobj = L*deltaphi;
856
double Hobj = H*deltaphi;
862
a1 = alpha1_ - (a2 - alpha2_);
863
// update alpha1_, alpha2_ if change is larger than some eps
864
if(java.lang.Math.abs(a1-alpha1_) > m_eps ||
865
java.lang.Math.abs(a2-alpha2_) > m_eps){
879
deltaphi += eta * ((alpha2 - alpha2_) - (m_alpha[i2] - m_alpha_[i2]));
884
// update f-cache[i] for i in I_0 using new Lagrange multipliers
885
for (int i = m_I0.getNext(-1); i != -1; i = m_I0.getNext(i)) {
886
if (i != i1 && i != i2){
888
((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * m_kernel.eval(i1, i, m_data.instance(i1)) +
889
((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * m_kernel.eval(i2, i, m_data.instance(i2));
893
// update f-cache[i1] and f-cache[i2]
895
((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * k11 +
896
((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * k12;
898
((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * k12 +
899
((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * k22;
901
// to prevent precision problems
902
if(alpha1 > m_C * m_data.instance(i1).weight() - m_Del * m_C * m_data.instance(i1).weight()){
903
alpha1 = m_C * m_data.instance(i1).weight();
904
} else if (alpha1 <= m_Del * m_C * m_data.instance(i1).weight()){
907
if(alpha1_ > m_C * m_data.instance(i1).weight() - m_Del * m_C * m_data.instance(i1).weight()){
908
alpha1_ = m_C * m_data.instance(i1).weight();
909
} else if (alpha1_ <= m_Del * m_C * m_data.instance(i1).weight()){
912
if(alpha2 > m_C * m_data.instance(i2).weight() - m_Del * m_C * m_data.instance(i2).weight()){
913
alpha2 = m_C * m_data.instance(i2).weight();
914
} else if (alpha2 <= m_Del * m_C * m_data.instance(i2).weight()){
917
if(alpha2_ > m_C * m_data.instance(i2).weight() - m_Del * m_C * m_data.instance(i2).weight()){
918
alpha2_ = m_C * m_data.instance(i2).weight();
919
} else if (alpha2_ <= m_Del * m_C * m_data.instance(i2).weight()){
923
// Store the changes in alpha, alpha' array
924
m_alpha[i1] = alpha1;
925
m_alpha_[i1] = alpha1_;
926
m_alpha[i2] = alpha2;
927
m_alpha_[i2] = alpha2_;
929
// update I_0, I_1, I_2, I_3
930
if((0 < alpha1 && alpha1 < m_C * m_data.instance(i1).weight()) ||
931
(0 < alpha1_ && alpha1_ < m_C * m_data.instance(i1).weight())){
936
if((alpha1 == 0 && alpha1_ == 0)){
941
if((alpha1 == 0 && alpha1_ == m_C * m_data.instance(i1).weight())){
946
if((alpha1 == m_C * m_data.instance(i1).weight() && alpha1_ == 0)){
951
if((0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()) ||
952
(0 < alpha2_ && alpha2_ < m_C * m_data.instance(i2).weight())){
957
if((alpha2 == 0 && alpha2_ == 0)){
962
if((alpha2 == 0 && alpha2_ == m_C * m_data.instance(i2).weight())){
967
if((alpha2 == m_C * m_data.instance(i2).weight() && alpha2_ == 0)){
977
// Compute (i_low, b_low) and (i_up, b_up) by applying the conditions
978
// mentionned above, using only i1, i2 and indices in I_0
979
m_bLow = -Double.MAX_VALUE; m_bUp = Double.MAX_VALUE;
980
m_iLow =-1; m_iUp = -1;
981
for (int i = m_I0.getNext(-1); i != -1; i = m_I0.getNext(i)) {
982
if(0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight() &&
983
(m_fcache[i] + m_epsilon > m_bLow)){
984
m_bLow = m_fcache[i] + m_epsilon; m_iLow = i;
985
} else if(0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight() &&
986
(m_fcache[i] - m_epsilon > m_bLow)){
987
m_bLow = m_fcache[i] - m_epsilon; m_iLow = i;
989
if(0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight() &&
990
(m_fcache[i] - m_epsilon < m_bUp)){
991
m_bUp = m_fcache[i] - m_epsilon; m_iUp = i;
992
} else if(0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight() &&
993
(m_fcache[i] + m_epsilon < m_bUp)){
994
m_bUp = m_fcache[i] + m_epsilon; m_iUp = i;
998
if(!m_I0.contains(i1)){
999
if(m_I2.contains(i1) &&
1000
(m_fcache[i1] + m_epsilon > m_bLow)){
1001
m_bLow = m_fcache[i1] + m_epsilon; m_iLow = i1;
1002
} else if (m_I1.contains(i1) &&
1003
(m_fcache[i1] - m_epsilon > m_bLow)){
1004
m_bLow = m_fcache[i1] - m_epsilon; m_iLow = i1;
1006
if(m_I3.contains(i1) &&
1007
(m_fcache[i1] - m_epsilon < m_bUp)){
1008
m_bUp = m_fcache[i1] - m_epsilon; m_iUp = i1;
1009
} else if (m_I1.contains(i1) &&
1010
(m_fcache[i1] + m_epsilon < m_bUp)){
1011
m_bUp = m_fcache[i1] + m_epsilon; m_iUp = i1;
1015
if(!m_I0.contains(i2)){
1016
if(m_I2.contains(i2) &&
1017
(m_fcache[i2] + m_epsilon > m_bLow)){
1018
m_bLow = m_fcache[i2] + m_epsilon; m_iLow = i2;
1019
} else if (m_I1.contains(i2) &&
1020
(m_fcache[i2] - m_epsilon > m_bLow)){
1021
m_bLow = m_fcache[i2] - m_epsilon; m_iLow = i2;
1023
if(m_I3.contains(i2) &&
1024
(m_fcache[i2] - m_epsilon < m_bUp)){
1025
m_bUp = m_fcache[i2] - m_epsilon; m_iUp = i2;
1026
} else if (m_I1.contains(i2) &&
1027
(m_fcache[i2] + m_epsilon < m_bUp)){
1028
m_bUp = m_fcache[i2] + m_epsilon; m_iUp = i2;
1031
if(m_iLow == -1 || m_iUp == -1){
1032
throw new RuntimeException("Fatal error! The program failled to "
1033
+ "initialize i_Low, iUp.");
1043
* Classifies a given instance.
1045
* @param inst the instance to be classified
1046
* @return the classification
1047
* @throws Exception if instance could not be classified
1050
public double classifyInstance(Instance inst) throws Exception {
1053
if (!m_checksTurnedOff) {
1054
m_Missing.input(inst);
1055
m_Missing.batchFinished();
1056
inst = m_Missing.output();
1059
if (m_NominalToBinary != null) {
1060
m_NominalToBinary.input(inst);
1061
m_NominalToBinary.batchFinished();
1062
inst = m_NominalToBinary.output();
1065
if (m_Filter != null) {
1066
m_Filter.input(inst);
1067
m_Filter.batchFinished();
1068
inst = m_Filter.output();
1073
double result = m_b;
1075
// Is the machine linear?
1076
if (m_KernelIsLinear) {
1078
// Is weight vector stored in sparse format?
1079
if (m_sparseWeights == null) {
1080
int n1 = inst.numValues();
1081
for (int p = 0; p < n1; p++) {
1082
if (inst.index(p) != m_classIndex) {
1083
result += m_weights[inst.index(p)] * inst.valueSparse(p);
1087
int n1 = inst.numValues(); int n2 = m_sparseWeights.length;
1088
for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
1089
int ind1 = inst.index(p1);
1090
int ind2 = m_sparseIndices[p2];
1092
if (ind1 != m_classIndex) {
1093
result += inst.valueSparse(p1) * m_sparseWeights[p2];
1096
} else if (ind1 > ind2) {
1104
for (int i = 0; i < m_alpha.length; i++) {
1105
result += (m_alpha[i] - m_alpha_[i]) * m_kernel.eval(-1, i, inst);
1109
// apply the inverse transformation
1110
// (due to the normalization/standardization)
1111
return (result - m_Blin) / m_Alin;
1116
* Returns an enumeration describing the available options.
1118
* @return an enumeration of all the available options.
1120
public Enumeration listOptions() {
1122
Vector result = new Vector();
1124
Enumeration enm = super.listOptions();
1125
while (enm.hasMoreElements())
1126
result.addElement(enm.nextElement());
1128
result.addElement(new Option(
1129
"\tTurns off all checks - use with caution!\n"
1130
+ "\tTurning them off assumes that data is purely numeric, doesn't\n"
1131
+ "\tcontain any missing values, and has a nominal class. Turning them\n"
1132
+ "\toff also means that no header information will be stored if the\n"
1133
+ "\tmachine is linear. Finally, it also assumes that no instance has\n"
1134
+ "\ta weight equal to 0.\n"
1135
+ "\t(default: checks on)",
1136
"no-checks", 0, "-no-checks"));
1138
result.addElement(new Option(
1139
"\tThe amount up to which deviations are\n"
1140
+ "\ttolerated (epsilon). (default 1e-3)",
1141
"S", 1, "-S <double>"));
1143
result.addElement(new Option(
1144
"\tThe complexity constant C. (default 1)",
1145
"C", 1, "-C <double>"));
1147
result.addElement(new Option(
1148
"\tWhether to 0=normalize/1=standardize/2=neither. " +
1149
"(default 0=normalize)",
1152
result.addElement(new Option(
1153
"\tThe tolerance parameter. " +
1155
"T", 1, "-T <double>"));
1157
result.addElement(new Option(
1158
"\tThe epsilon for round-off error. " +
1159
"(default 1.0e-12)",
1160
"P", 1, "-P <double>"));
1162
result.addElement(new Option(
1163
"\tThe Kernel to use.\n"
1164
+ "\t(default: weka.classifiers.functions.supportVector.PolyKernel)",
1165
"K", 1, "-K <classname and parameters>"));
1167
result.addElement(new Option(
1169
"", 0, "\nOptions specific to kernel "
1170
+ getKernel().getClass().getName() + ":"));
1172
enm = ((OptionHandler) getKernel()).listOptions();
1173
while (enm.hasMoreElements())
1174
result.addElement(enm.nextElement());
1176
return result.elements();
1181
* Parses a given list of options. <p/>
1183
<!-- options-end -->
1184
<!-- options-end -->
1186
* @param options the list of options as an array of strings
1187
* @throws Exception if an option is not supported
1189
public void setOptions(String[] options) throws Exception {
1191
String[] tmpOptions;
1193
setChecksTurnedOff(Utils.getFlag("no-checks", options));
1195
tmpStr = Utils.getOption('S', options);
1196
if (tmpStr.length() != 0)
1197
setEpsilon(Double.parseDouble(tmpStr));
1201
tmpStr = Utils.getOption('C', options);
1202
if (tmpStr.length() != 0)
1203
setC(Double.parseDouble(tmpStr));
1207
tmpStr = Utils.getOption('T', options);
1208
if (tmpStr.length() != 0)
1209
setToleranceParameter(Double.parseDouble(tmpStr));
1211
setToleranceParameter(1.0e-3);
1213
tmpStr = Utils.getOption('P', options);
1214
if (tmpStr.length() != 0)
1215
setEps(Double.parseDouble(tmpStr));
1219
tmpStr = Utils.getOption('N', options);
1220
if (tmpStr.length() != 0)
1221
setFilterType(new SelectedTag(Integer.parseInt(tmpStr), TAGS_FILTER));
1223
setFilterType(new SelectedTag(FILTER_NORMALIZE, TAGS_FILTER));
1225
tmpStr = Utils.getOption('K', options);
1226
tmpOptions = Utils.splitOptions(tmpStr);
1227
if (tmpOptions.length != 0) {
1228
tmpStr = tmpOptions[0];
1230
setKernel(Kernel.forName(tmpStr, tmpOptions));
1233
super.setOptions(options);
1237
* Gets the current settings of the classifier.
1239
* @return an array of strings suitable for passing to setOptions
1241
public String[] getOptions() {
1246
result = new Vector();
1247
options = super.getOptions();
1248
for (i = 0; i < options.length; i++)
1249
result.add(options[i]);
1251
if (getChecksTurnedOff())
1252
result.add("-no-checks");
1255
result.add("" + getEpsilon());
1258
result.add("" + getC());
1261
result.add("" + getToleranceParameter());
1264
result.add("" + getEps());
1267
result.add("" + m_filterType);
1270
result.add("" + getKernel().getClass().getName() + " " + Utils.joinOptions(getKernel().getOptions()));
1272
return (String[]) result.toArray(new String[result.size()]);
1276
* Disables or enables the checks (which could be time-consuming). Use with
1279
* @param value if true turns off all checks
1281
public void setChecksTurnedOff(boolean value) {
1289
* Returns whether the checks are turned off or not.
1291
* @return true if the checks are turned off
1293
public boolean getChecksTurnedOff() {
1294
return m_checksTurnedOff;
1298
* Returns the tip text for this property
1300
* @return tip text for this property suitable for
1301
* displaying in the explorer/experimenter gui
1303
public String checksTurnedOffTipText() {
1304
return "Turns time-consuming checks off - use with caution.";
1308
* Returns the tip text for this property
1310
* @return tip text for this property suitable for
1311
* displaying in the explorer/experimenter gui
1313
public String kernelTipText() {
1314
return "The kernel to use.";
1318
* Gets the kernel to use.
1320
* @return the kernel
1322
public Kernel getKernel() {
1327
* Sets the kernel to use.
1329
* @param value the kernel
1331
public void setKernel(Kernel value) {
1336
* Returns the tip text for this property
1337
* @return tip text for this property suitable for
1338
* displaying in the explorer/experimenter gui
1340
public String filterTypeTipText() {
1341
return "Determines how/if the data will be transformed.";
1345
* Gets how the training data will be transformed. Will be one of
1346
* FILTER_NORMALIZE, FILTER_STANDARDIZE, FILTER_NONE.
1348
* @return the filtering mode
1350
public SelectedTag getFilterType() {
1352
return new SelectedTag(m_filterType, TAGS_FILTER);
1356
* Sets how the training data will be transformed. Should be one of
1357
* FILTER_NORMALIZE, FILTER_STANDARDIZE, FILTER_NONE.
1359
* @param newType the new filtering mode
1361
public void setFilterType(SelectedTag newType) {
1363
if (newType.getTags() == TAGS_FILTER) {
1364
m_filterType = newType.getSelectedTag().getID();
1369
* Returns the tip text for this property
1370
* @return tip text for this property suitable for
1371
* displaying in the explorer/experimenter gui
1373
public String cTipText() {
1374
return "The complexity parameter C.";
1378
* Get the value of C.
1380
* @return Value of C.
1382
public double getC() {
1388
* Set the value of C.
1390
* @param v Value to assign to C.
1392
public void setC(double v) {
1398
* Returns the tip text for this property
1399
* @return tip text for this property suitable for
1400
* displaying in the explorer/experimenter gui
1402
public String toleranceParameterTipText() {
1403
return "The tolerance parameter (shouldn't be changed).";
1407
* Get the value of tolerance parameter.
1408
* @return Value of tolerance parameter.
1410
public double getToleranceParameter() {
1416
* Set the value of tolerance parameter.
1417
* @param v Value to assign to tolerance parameter.
1419
public void setToleranceParameter(double v) {
1425
* Returns the tip text for this property
1426
* @return tip text for this property suitable for
1427
* displaying in the explorer/experimenter gui
1429
public String epsTipText() {
1430
return "The epsilon for round-off error (shouldn't be changed).";
1434
* Get the value of eps.
1435
* @return Value of eps.
1437
public double getEps() {
1443
* Set the value of eps.
1444
* @param v Value to assign to epsilon.
1446
public void setEps(double v) {
1452
* Returns the tip text for this property
1453
* @return tip text for this property suitable for
1454
* displaying in the explorer/experimenter gui
1456
public String epsilonTipText() {
1457
return "The amount up to which deviations are tolerated. "
1458
+ "Watch out, the value of epsilon is used with the (normalized/standardized) "
1463
* Get the value of epsilon.
1464
* @return Value of epsilon.
1466
public double getEpsilon() {
1472
* Set the value of epsilon.
1473
* @param v Value to assign to epsilon.
1475
public void setEpsilon(double v) {
1482
* Turns off checks for missing values, etc. Use with caution.
1484
public void turnChecksOff() {
1486
m_checksTurnedOff = true;
1491
* Turns on checks for missing values, etc.
1493
public void turnChecksOn() {
1495
m_checksTurnedOff = false;
1500
* Prints out the classifier.
1502
* @return a description of the classifier as a string
1504
public String toString() {
1506
StringBuffer text = new StringBuffer();
1509
if ((m_alpha == null) && (m_sparseWeights == null)) {
1510
return "SMOreg : No model built yet.";
1513
text.append("SMOreg\n\n");
1514
text.append("Kernel used:\n " + m_kernel.toString() + "\n\n");
1516
// display the linear transformation
1518
if (m_filterType == FILTER_STANDARDIZE) {
1519
//text.append("LINEAR TRANSFORMATION APPLIED : \n");
1520
trans = "(standardized) ";
1521
//text.append(trans + m_data.classAttribute().name() + " = " +
1522
// m_Alin + " * " + m_data.classAttribute().name() + " + " + m_Blin + "\n\n");
1523
} else if (m_filterType == FILTER_NORMALIZE) {
1524
//text.append("LINEAR TRANSFORMATION APPLIED : \n");
1525
trans = "(normalized) ";
1526
//text.append(trans + m_data.classAttribute().name() + " = " +
1527
// m_Alin + " * " + m_data.classAttribute().name() + " + " + m_Blin + "\n\n");
1530
// If machine linear, print weight vector
1531
if (m_KernelIsLinear) {
1532
text.append("Machine Linear: showing attribute weights, ");
1533
text.append("not support vectors.\n");
1535
// We can assume that the weight vector is stored in sparse
1536
// format because the classifier has been built
1537
text.append(trans + m_data.classAttribute().name() + " =\n");
1538
for (int i = 0; i < m_sparseWeights.length; i++) {
1539
if (m_sparseIndices[i] != (int)m_classIndex) {
1545
text.append(Utils.doubleToString(m_sparseWeights[i], 12, 4) +
1547
if (m_filterType == FILTER_STANDARDIZE) {
1548
text.append("(standardized) ");
1549
} else if (m_filterType == FILTER_NORMALIZE) {
1550
text.append("(normalized) ");
1552
if (!m_checksTurnedOff) {
1553
text.append(m_data.attribute(m_sparseIndices[i]).name()+"\n");
1555
text.append("attribute with index " +
1556
m_sparseIndices[i] +"\n");
1562
text.append("Support Vector Expansion :\n");
1563
text.append(trans + m_data.classAttribute().name() + " =\n");
1565
for (int i = 0; i < m_alpha.length; i++) {
1566
double val = m_alpha[i] - m_alpha_[i];
1567
if (java.lang.Math.abs(val) < 1e-4)
1574
text.append(Utils.doubleToString(val, 12, 4)
1575
+ " * K[X(" + i + "), X]\n");
1580
text.append(" + " + Utils.doubleToString(m_b, 12, 4));
1582
text.append(" - " + Utils.doubleToString(-m_b, 12, 4));
1584
if (!m_KernelIsLinear) {
1585
text.append("\n\nNumber of support vectors: " + printed);
1588
int numCacheHits = -1;
1589
if(m_kernel != null)
1591
numEval = m_kernel.numEvals();
1592
numCacheHits = m_kernel.numCacheHits();
1594
text.append("\n\nNumber of kernel evaluations: " + numEval);
1595
if (numCacheHits >= 0 && numEval > 0)
1597
double hitRatio = 1 - numEval/(numCacheHits+numEval);
1598
text.append(" (" + Utils.doubleToString(hitRatio*100, 7, 3) + "% cached)");
1600
} catch (Exception e) {
1601
return "Can't print the classifier.";
1604
return text.toString();
1608
* Main method for testing this class.
1610
* @param argv the commandline options
1612
public static void main(String[] argv) {
1613
runClassifier(new SMOreg(), argv);
1620
* Debuggage function.
1621
* Compute the value of the objective function.
1623
* @return the value of the objective function
1624
* @throws Exception if computation fails
1626
protected double objFun() throws Exception {
1629
double t = 0, t2 = 0;
1630
for(int i = 0; i < m_alpha.length; i++){
1631
for(int j = 0; j < m_alpha.length; j++){
1632
t += (m_alpha[i] - m_alpha_[i]) * (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i,j,m_data.instance(i));
1634
t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]) - m_epsilon * (m_alpha[i] + m_alpha_[i]);
1636
res += -0.5 * t + t2;
1642
* Debuggage function.
1643
* Compute the value of the objective function.
1651
* @throws Exception if something goes wrong
1653
protected double objFun(int i1, int i2,
1654
double alpha1, double alpha1_,
1655
double alpha2, double alpha2_) throws Exception {
1658
double t = 0, t2 = 0;
1659
for(int i = 0; i < m_alpha.length; i++){
1663
alphai = alpha1; alphai_ = alpha1_;
1665
alphai = alpha2; alphai_ = alpha2_;
1667
alphai = m_alpha[i]; alphai_ = m_alpha_[i];
1669
for(int j = 0; j < m_alpha.length; j++){
1673
alphaj = alpha1; alphaj_ = alpha1_;
1675
alphaj = alpha2; alphaj_ = alpha2_;
1677
alphaj = m_alpha[j]; alphaj_ = m_alpha_[j];
1679
t += (alphai - alphai_) * (alphaj - alphaj_) * m_kernel.eval(i,j,m_data.instance(i));
1681
t2 += m_data.instance(i).classValue() * (alphai - alphai_) - m_epsilon * (alphai + alphai_);
1683
res += -0.5 * t + t2;
1690
* Debuggage function.
1691
* Check that the set I0, I1, I2 and I3 cover the whole set of index
1692
* and that no attribute appears in two different sets.
1694
* @throws Exception if check fails
1696
protected void checkSets() throws Exception{
1698
boolean[] test = new boolean[m_data.numInstances()];
1699
for (int i = m_I0.getNext(-1); i != -1; i = m_I0.getNext(i)) {
1701
throw new Exception("Fatal error! indice " + i + " appears in two different sets.");
1705
if( !((0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight()) ||
1706
(0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight())) ){
1707
throw new Exception("Warning! I0 contains an incorrect indice.");
1710
for (int i = m_I1.getNext(-1); i != -1; i = m_I1.getNext(i)) {
1712
throw new Exception("Fatal error! indice " + i + " appears in two different sets.");
1716
if( !( m_alpha[i] == 0 && m_alpha_[i] == 0) ){
1717
throw new Exception("Fatal error! I1 contains an incorrect indice.");
1720
for (int i = m_I2.getNext(-1); i != -1; i = m_I2.getNext(i)) {
1722
throw new Exception("Fatal error! indice " + i + " appears in two different sets.");
1726
if( !(m_alpha[i] == 0 && m_alpha_[i] == m_C * m_data.instance(i).weight()) ){
1727
throw new Exception("Fatal error! I2 contains an incorrect indice.");
1730
for (int i = m_I3.getNext(-1); i != -1; i = m_I3.getNext(i)) {
1732
throw new Exception("Fatal error! indice " + i + " appears in two different sets.");
1736
if( !(m_alpha_[i] == 0 && m_alpha[i] == m_C * m_data.instance(i).weight()) ){
1737
throw new Exception("Fatal error! I3 contains an incorrect indice.");
1740
for (int i = 0; i < test.length; i++){
1742
throw new Exception("Fatal error! indice " + i + " doesn't belong to any set.");
1749
* Debuggage function <br/>
1750
* Checks that : <br/>
1751
* alpha*alpha_=0 <br/>
1752
* sum(alpha[i] - alpha_[i]) = 0
1754
* @throws Exception if check fails
1756
protected void checkAlphas() throws Exception{
1759
for(int i = 0; i < m_alpha.length; i++){
1760
if(!(0 == m_alpha[i] || m_alpha_[i] == 0)){
1761
throw new Exception("Fatal error! Inconsistent alphas!");
1763
sum += (m_alpha[i] - m_alpha_[i]);
1766
throw new Exception("Fatal error! Inconsistent alphas' sum = " + sum);
1773
* Debuggage function.
1774
* Display the current status of the program.
1775
* @param i1 the first current indice
1776
* @param i2 the second current indice
1778
* @throws Exception if printing of current status fails
1780
protected void displayStat(int i1, int i2) throws Exception {
1782
System.err.println("\n-------- Status : ---------");
1783
System.err.println("\n i, alpha, alpha'\n");
1784
for(int i = 0; i < m_alpha.length; i++){
1785
double result = (m_bLow + m_bUp)/2.0;
1786
for (int j = 0; j < m_alpha.length; j++) {
1787
result += (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i));
1789
System.err.print(" " + i + ": (" + m_alpha[i] + ", " + m_alpha_[i] +
1790
"), " + (m_data.instance(i).classValue() - m_epsilon) + " <= " +
1791
result + " <= " + (m_data.instance(i).classValue() + m_epsilon));
1793
System.err.print(" <-- i1");
1796
System.err.print(" <-- i2");
1798
System.err.println();
1800
System.err.println("bLow = " + m_bLow + " bUp = " + m_bUp);
1801
System.err.println("---------------------------\n");
1806
* Debuggage function
1807
* Compute and display bLow, lUp and so on...
1809
* @throws Exception if display fails
1811
protected void displayB() throws Exception {
1813
//double bUp = Double.NEGATIVE_INFINITY;
1814
//double bLow = Double.POSITIVE_INFINITY;
1815
//int iUp = -1, iLow = -1;
1816
for(int i = 0; i < m_data.numInstances(); i++){
1817
double Fi = m_data.instance(i).classValue();
1818
for(int j = 0; j < m_alpha.length; j++){
1819
Fi -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i));
1821
System.err.print("(" + m_alpha[i] + ", " + m_alpha_[i] + ") : ");
1822
System.err.print((Fi - m_epsilon) + ", " + (Fi + m_epsilon));
1823
double fim = Fi - m_epsilon, fip = Fi + m_epsilon;
1825
if (m_I0.contains(i)){
1826
if ( 0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight()){
1827
s += "(in I0a) bUp = min(bUp, " + fim + ") bLow = max(bLow, " + fim + ")";
1829
if ( 0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight()){
1830
s += "(in I0a) bUp = min(bUp, " + fip + ") bLow = max(bLow, " + fip + ")";
1833
if (m_I1.contains(i)){
1834
s += "(in I1) bUp = min(bUp, " + fip + ") bLow = max(bLow, " + fim + ")";
1836
if (m_I2.contains(i)){
1837
s += "(in I2) bLow = max(bLow, " + fip + ")";
1839
if (m_I3.contains(i)){
1840
s += "(in I3) bUp = min(bUp, " + fim + ")";
1842
System.err.println(" " + s + " {" + (m_alpha[i]-1) + ", " + (m_alpha_[i]-1) + "}");
1844
System.err.println("\n\n");
1852
* Debuggage function.
1853
* Checks if the equations (6), (8a), (8b), (8c), (8d) hold.
1854
* (Refers to "Improvements to SMO Algorithm for SVM Regression".)
1855
* Prints warnings for each equation which doesn't hold.
1857
* @throws Exception if check fails
1859
protected void checkOptimality() throws Exception {
1861
double bUp = Double.POSITIVE_INFINITY;
1862
double bLow = Double.NEGATIVE_INFINITY;
1863
int iUp = -1, iLow = -1;
1865
for(int i = 0; i < m_data.numInstances(); i++){
1867
double Fi = m_data.instance(i).classValue();
1868
for(int j = 0; j < m_alpha.length; j++){
1869
Fi -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i));
1872
double fitilde = 0, fibarre = 0;
1873
if(m_I0.contains(i) && 0 < m_alpha[i] && m_alpha[i] < m_C * m_data.instance(i).weight()){
1874
fitilde = Fi - m_epsilon;
1875
fibarre = Fi - m_epsilon;
1877
if(m_I0.contains(i) && 0 < m_alpha_[i] && m_alpha_[i] < m_C * m_data.instance(i).weight()){
1878
fitilde = Fi + m_epsilon;
1879
fibarre = Fi + m_epsilon;
1881
if(m_I1.contains(i)){
1882
fitilde = Fi - m_epsilon;
1883
fibarre = Fi + m_epsilon;
1885
if(m_I2.contains(i)){
1886
fitilde = Fi + m_epsilon;
1887
fibarre = Double.POSITIVE_INFINITY;
1889
if(m_I3.contains(i)){
1890
fitilde = Double.NEGATIVE_INFINITY;
1891
fibarre = Fi - m_epsilon;
1902
if(!(bLow <= bUp + 2 * m_tol)){
1903
System.err.println("Warning! Optimality not reached : inequation (6) doesn't hold!");
1906
boolean noPb = true;
1907
for(int i = 0; i < m_data.numInstances(); i++){
1908
double Fi = m_data.instance(i).classValue();
1909
for(int j = 0; j < m_alpha.length; j++){
1910
Fi -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i, j, m_data.instance(i));
1912
double Ei = Fi - ((m_bUp + m_bLow) / 2.0);
1913
if((m_alpha[i] > 0) && !(Ei >= m_epsilon - m_tol)){
1914
System.err.println("Warning! Optimality not reached : inequation (8a) doesn't hold for " + i);
1917
if((m_alpha[i] < m_C * m_data.instance(i).weight()) && !(Ei <= m_epsilon + m_tol)){
1918
System.err.println("Warning! Optimality not reached : inequation (8b) doesn't hold for " + i);
1921
if((m_alpha_[i] > 0) && !(Ei <= -m_epsilon + m_tol)){
1922
System.err.println("Warning! Optimality not reached : inequation (8c) doesn't hold for " + i);
1925
if((m_alpha_[i] < m_C * m_data.instance(i).weight()) && !(Ei >= -m_epsilon - m_tol)){
1926
System.err.println("Warning! Optimality not reached : inequation (8d) doesn't hold for " + i);
1931
System.err.println();
1932
//displayStat(-1,-1);