~ubuntu-branches/ubuntu/raring/python-scipy/raring-proposed

« back to all changes in this revision

Viewing changes to Lib/sandbox/svm/libsvm-2.82/svm.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Matthias Klose
  • Date: 2007-01-07 14:12:12 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20070107141212-mm0ebkh5b37hcpzn
* Remove build dependency on python-numpy-dev.
* python-scipy: Depend on python-numpy instead of python-numpy-dev.
* Package builds on other archs than i386. Closes: #402783.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#include <math.h>
 
2
#include <stdio.h>
 
3
#include <stdlib.h>
 
4
#include <ctype.h>
 
5
#include <float.h>
 
6
#include <string.h>
 
7
#include <stdarg.h>
 
8
#include "svm.h"
 
9
typedef float Qfloat;
 
10
typedef signed char schar;
 
11
#ifndef min
 
12
template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
 
13
#endif
 
14
#ifndef max
 
15
template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
 
16
#endif
 
17
template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
 
18
template <class S, class T> inline void clone(T*& dst, S* src, int n)
 
19
{
 
20
        dst = new T[n];
 
21
        memcpy((void *)dst,(void *)src,sizeof(T)*n);
 
22
}
 
23
inline double powi(double base, int times)
 
24
{
 
25
        double tmp = base, ret = 1.0;
 
26
 
 
27
        for(int t=times; t>0; t/=2)
 
28
        {
 
29
                if(t%2==1) ret*=tmp;
 
30
                tmp = tmp * tmp;
 
31
        }
 
32
        return ret;
 
33
}
 
34
#define INF HUGE_VAL
 
35
#define TAU 1e-12
 
36
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 
37
#if 1
 
38
void info(char *fmt,...)
 
39
{
 
40
        va_list ap;
 
41
        va_start(ap,fmt);
 
42
        vprintf(fmt,ap);
 
43
        va_end(ap);
 
44
}
 
45
void info_flush()
 
46
{
 
47
        fflush(stdout);
 
48
}
 
49
#else
 
50
void info(char *fmt,...) {}
 
51
void info_flush() {}
 
52
#endif
 
53
 
 
54
//
 
55
// Kernel Cache
 
56
//
 
57
// l is the number of total data items
 
58
// size is the cache size limit in bytes
 
59
//
 
60
class Cache
 
61
{
 
62
public:
 
63
        Cache(int l,int size);
 
64
        ~Cache();
 
65
 
 
66
        // request data [0,len)
 
67
        // return some position p where [p,len) need to be filled
 
68
        // (p >= len if nothing needs to be filled)
 
69
        int get_data(const int index, Qfloat **data, int len);
 
70
        void swap_index(int i, int j);  // future_option
 
71
private:
 
72
        int l;
 
73
        int size;
 
74
        struct head_t
 
75
        {
 
76
                head_t *prev, *next;    // a cicular list
 
77
                Qfloat *data;
 
78
                int len;                // data[0,len) is cached in this entry
 
79
        };
 
80
 
 
81
        head_t *head;
 
82
        head_t lru_head;
 
83
        void lru_delete(head_t *h);
 
84
        void lru_insert(head_t *h);
 
85
};
 
86
 
 
87
Cache::Cache(int l_,int size_):l(l_),size(size_)
 
88
{
 
89
        head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
 
90
        size /= sizeof(Qfloat);
 
91
        size -= l * sizeof(head_t) / sizeof(Qfloat);
 
92
        size = max(size, 2*l);  // cache must be large enough for two columns
 
93
        lru_head.next = lru_head.prev = &lru_head;
 
94
}
 
95
 
 
96
Cache::~Cache()
 
97
{
 
98
        for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
 
99
                free(h->data);
 
100
        free(head);
 
101
}
 
102
 
 
103
void Cache::lru_delete(head_t *h)
 
104
{
 
105
        // delete from current location
 
106
        h->prev->next = h->next;
 
107
        h->next->prev = h->prev;
 
108
}
 
109
 
 
110
void Cache::lru_insert(head_t *h)
 
111
{
 
112
        // insert to last position
 
113
        h->next = &lru_head;
 
114
        h->prev = lru_head.prev;
 
115
        h->prev->next = h;
 
116
        h->next->prev = h;
 
117
}
 
118
 
 
119
int Cache::get_data(const int index, Qfloat **data, int len)
 
120
{
 
121
        head_t *h = &head[index];
 
122
        if(h->len) lru_delete(h);
 
123
        int more = len - h->len;
 
124
 
 
125
        if(more > 0)
 
126
        {
 
127
                // free old space
 
128
                while(size < more)
 
129
                {
 
130
                        head_t *old = lru_head.next;
 
131
                        lru_delete(old);
 
132
                        free(old->data);
 
133
                        size += old->len;
 
134
                        old->data = 0;
 
135
                        old->len = 0;
 
136
                }
 
137
 
 
138
                // allocate new space
 
139
                h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
 
140
                size -= more;
 
141
                swap(h->len,len);
 
142
        }
 
143
 
 
144
        lru_insert(h);
 
145
        *data = h->data;
 
146
        return len;
 
147
}
 
148
 
 
149
void Cache::swap_index(int i, int j)
 
150
{
 
151
        if(i==j) return;
 
152
 
 
153
        if(head[i].len) lru_delete(&head[i]);
 
154
        if(head[j].len) lru_delete(&head[j]);
 
155
        swap(head[i].data,head[j].data);
 
156
        swap(head[i].len,head[j].len);
 
157
        if(head[i].len) lru_insert(&head[i]);
 
158
        if(head[j].len) lru_insert(&head[j]);
 
159
 
 
160
        if(i>j) swap(i,j);
 
161
        for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
 
162
        {
 
163
                if(h->len > i)
 
164
                {
 
165
                        if(h->len > j)
 
166
                                swap(h->data[i],h->data[j]);
 
167
                        else
 
168
                        {
 
169
                                // give up
 
170
                                lru_delete(h);
 
171
                                free(h->data);
 
172
                                size += h->len;
 
173
                                h->data = 0;
 
174
                                h->len = 0;
 
175
                        }
 
176
                }
 
177
        }
 
178
}
 
179
 
 
180
//
 
181
// Kernel evaluation
 
182
//
 
183
// the static method k_function is for doing single kernel evaluation
 
184
// the constructor of Kernel prepares to calculate the l*l kernel matrix
 
185
// the member function get_Q is for getting one column from the Q Matrix
 
186
//
 
187
class QMatrix {
 
188
public:
 
189
        virtual Qfloat *get_Q(int column, int len) const = 0;
 
190
        virtual Qfloat *get_QD() const = 0;
 
191
        virtual void swap_index(int i, int j) const = 0;
 
192
        virtual ~QMatrix() {}
 
193
};
 
194
 
 
195
class Kernel: public QMatrix {
 
196
public:
 
197
        Kernel(int l, svm_node * const * x, const svm_parameter& param);
 
198
        virtual ~Kernel();
 
199
 
 
200
        static double k_function(const svm_node *x, const svm_node *y,
 
201
                                 const svm_parameter& param);
 
202
        virtual Qfloat *get_Q(int column, int len) const = 0;
 
203
        virtual Qfloat *get_QD() const = 0;
 
204
        virtual void swap_index(int i, int j) const     // no so const...
 
205
        {
 
206
                swap(x[i],x[j]);
 
207
                if(x_square) swap(x_square[i],x_square[j]);
 
208
        }
 
209
protected:
 
210
 
 
211
        double (Kernel::*kernel_function)(int i, int j) const;
 
212
 
 
213
private:
 
214
        const svm_node **x;
 
215
        double *x_square;
 
216
 
 
217
        // svm_parameter
 
218
        const int kernel_type;
 
219
        const int degree;
 
220
        const double gamma;
 
221
        const double coef0;
 
222
 
 
223
        static double dot(const svm_node *px, const svm_node *py);
 
224
        double kernel_linear(int i, int j) const
 
225
        {
 
226
                return dot(x[i],x[j]);
 
227
        }
 
228
        double kernel_poly(int i, int j) const
 
229
        {
 
230
                return powi(gamma*dot(x[i],x[j])+coef0,degree);
 
231
        }
 
232
        double kernel_rbf(int i, int j) const
 
233
        {
 
234
                return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
 
235
        }
 
236
        double kernel_sigmoid(int i, int j) const
 
237
        {
 
238
                return tanh(gamma*dot(x[i],x[j])+coef0);
 
239
        }
 
240
        double kernel_precomputed(int i, int j) const
 
241
        {
 
242
                return x[i][(int)(x[j][0].value)].value;
 
243
        }
 
244
};
 
245
 
 
246
Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
 
247
:kernel_type(param.kernel_type), degree(param.degree),
 
248
 gamma(param.gamma), coef0(param.coef0)
 
249
{
 
250
        switch(kernel_type)
 
251
        {
 
252
                case LINEAR:
 
253
                        kernel_function = &Kernel::kernel_linear;
 
254
                        break;
 
255
                case POLY:
 
256
                        kernel_function = &Kernel::kernel_poly;
 
257
                        break;
 
258
                case RBF:
 
259
                        kernel_function = &Kernel::kernel_rbf;
 
260
                        break;
 
261
                case SIGMOID:
 
262
                        kernel_function = &Kernel::kernel_sigmoid;
 
263
                case PRECOMPUTED:
 
264
                        kernel_function = &Kernel::kernel_precomputed;
 
265
                        break;
 
266
        }
 
267
 
 
268
        clone(x,x_,l);
 
269
 
 
270
        if(kernel_type == RBF)
 
271
        {
 
272
                x_square = new double[l];
 
273
                for(int i=0;i<l;i++)
 
274
                        x_square[i] = dot(x[i],x[i]);
 
275
        }
 
276
        else
 
277
                x_square = 0;
 
278
}
 
279
 
 
280
Kernel::~Kernel()
 
281
{
 
282
        delete[] x;
 
283
        delete[] x_square;
 
284
}
 
285
 
 
286
double Kernel::dot(const svm_node *px, const svm_node *py)
 
287
{
 
288
        double sum = 0;
 
289
        while(px->index != -1 && py->index != -1)
 
290
        {
 
291
                if(px->index == py->index)
 
292
                {
 
293
                        sum += px->value * py->value;
 
294
                        ++px;
 
295
                        ++py;
 
296
                }
 
297
                else
 
298
                {
 
299
                        if(px->index > py->index)
 
300
                                ++py;
 
301
                        else
 
302
                                ++px;
 
303
                }                       
 
304
        }
 
305
        return sum;
 
306
}
 
307
 
 
308
double Kernel::k_function(const svm_node *x, const svm_node *y,
 
309
                          const svm_parameter& param)
 
310
{
 
311
        switch(param.kernel_type)
 
312
        {
 
313
                case LINEAR:
 
314
                        return dot(x,y);
 
315
                case POLY:
 
316
                        return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
 
317
                case RBF:
 
318
                {
 
319
                        double sum = 0;
 
320
                        while(x->index != -1 && y->index !=-1)
 
321
                        {
 
322
                                if(x->index == y->index)
 
323
                                {
 
324
                                        double d = x->value - y->value;
 
325
                                        sum += d*d;
 
326
                                        ++x;
 
327
                                        ++y;
 
328
                                }
 
329
                                else
 
330
                                {
 
331
                                        if(x->index > y->index)
 
332
                                        {       
 
333
                                                sum += y->value * y->value;
 
334
                                                ++y;
 
335
                                        }
 
336
                                        else
 
337
                                        {
 
338
                                                sum += x->value * x->value;
 
339
                                                ++x;
 
340
                                        }
 
341
                                }
 
342
                        }
 
343
 
 
344
                        while(x->index != -1)
 
345
                        {
 
346
                                sum += x->value * x->value;
 
347
                                ++x;
 
348
                        }
 
349
 
 
350
                        while(y->index != -1)
 
351
                        {
 
352
                                sum += y->value * y->value;
 
353
                                ++y;
 
354
                        }
 
355
                        
 
356
                        return exp(-param.gamma*sum);
 
357
                }
 
358
                case SIGMOID:
 
359
                        return tanh(param.gamma*dot(x,y)+param.coef0);
 
360
                case PRECOMPUTED:  //x: test (validation), y: SV
 
361
                        return x[(int)(y->value)].value;
 
362
                default:
 
363
                        return 0;       /* Unreachable */
 
364
        }
 
365
}
 
366
 
 
367
// Generalized SMO+SVMlight algorithm
 
368
// Solves:
 
369
//
 
370
//      min 0.5(\alpha^T Q \alpha) + b^T \alpha
 
371
//
 
372
//              y^T \alpha = \delta
 
373
//              y_i = +1 or -1
 
374
//              0 <= alpha_i <= Cp for y_i = 1
 
375
//              0 <= alpha_i <= Cn for y_i = -1
 
376
//
 
377
// Given:
 
378
//
 
379
//      Q, b, y, Cp, Cn, and an initial feasible point \alpha
 
380
//      l is the size of vectors and matrices
 
381
//      eps is the stopping criterion
 
382
//
 
383
// solution will be put in \alpha, objective value will be put in obj
 
384
//
 
385
class Solver {
 
386
public:
 
387
        Solver() {};
 
388
        virtual ~Solver() {};
 
389
 
 
390
        struct SolutionInfo {
 
391
                double obj;
 
392
                double rho;
 
393
                double upper_bound_p;
 
394
                double upper_bound_n;
 
395
                double r;       // for Solver_NU
 
396
        };
 
397
 
 
398
        void Solve(int l, const QMatrix& Q, const double *b_, const schar *y_,
 
399
                   double *alpha_, double Cp, double Cn, double eps,
 
400
                   SolutionInfo* si, int shrinking);
 
401
protected:
 
402
        int active_size;
 
403
        schar *y;
 
404
        double *G;              // gradient of objective function
 
405
        enum { LOWER_BOUND, UPPER_BOUND, FREE };
 
406
        char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
 
407
        double *alpha;
 
408
        const QMatrix *Q;
 
409
        const Qfloat *QD;
 
410
        double eps;
 
411
        double Cp,Cn;
 
412
        double *b;
 
413
        int *active_set;
 
414
        double *G_bar;          // gradient, if we treat free variables as 0
 
415
        int l;
 
416
        bool unshrinked;        // XXX
 
417
 
 
418
        double get_C(int i)
 
419
        {
 
420
                return (y[i] > 0)? Cp : Cn;
 
421
        }
 
422
        void update_alpha_status(int i)
 
423
        {
 
424
                if(alpha[i] >= get_C(i))
 
425
                        alpha_status[i] = UPPER_BOUND;
 
426
                else if(alpha[i] <= 0)
 
427
                        alpha_status[i] = LOWER_BOUND;
 
428
                else alpha_status[i] = FREE;
 
429
        }
 
430
        bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
 
431
        bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
 
432
        bool is_free(int i) { return alpha_status[i] == FREE; }
 
433
        void swap_index(int i, int j);
 
434
        void reconstruct_gradient();
 
435
        virtual int select_working_set(int &i, int &j);
 
436
        virtual int max_violating_pair(int &i, int &j);
 
437
        virtual double calculate_rho();
 
438
        virtual void do_shrinking();
 
439
};
 
440
 
 
441
void Solver::swap_index(int i, int j)
 
442
{
 
443
        Q->swap_index(i,j);
 
444
        swap(y[i],y[j]);
 
445
        swap(G[i],G[j]);
 
446
        swap(alpha_status[i],alpha_status[j]);
 
447
        swap(alpha[i],alpha[j]);
 
448
        swap(b[i],b[j]);
 
449
        swap(active_set[i],active_set[j]);
 
450
        swap(G_bar[i],G_bar[j]);
 
451
}
 
452
 
 
453
void Solver::reconstruct_gradient()
 
454
{
 
455
        // reconstruct inactive elements of G from G_bar and free variables
 
456
 
 
457
        if(active_size == l) return;
 
458
 
 
459
        int i;
 
460
        for(i=active_size;i<l;i++)
 
461
                G[i] = G_bar[i] + b[i];
 
462
        
 
463
        for(i=0;i<active_size;i++)
 
464
                if(is_free(i))
 
465
                {
 
466
                        const Qfloat *Q_i = Q->get_Q(i,l);
 
467
                        double alpha_i = alpha[i];
 
468
                        for(int j=active_size;j<l;j++)
 
469
                                G[j] += alpha_i * Q_i[j];
 
470
                }
 
471
}
 
472
 
 
473
void Solver::Solve(int l, const QMatrix& Q, const double *b_, const schar *y_,
 
474
                   double *alpha_, double Cp, double Cn, double eps,
 
475
                   SolutionInfo* si, int shrinking)
 
476
{
 
477
        this->l = l;
 
478
        this->Q = &Q;
 
479
        QD=Q.get_QD();
 
480
        clone(b, b_,l);
 
481
        clone(y, y_,l);
 
482
        clone(alpha,alpha_,l);
 
483
        this->Cp = Cp;
 
484
        this->Cn = Cn;
 
485
        this->eps = eps;
 
486
        unshrinked = false;
 
487
 
 
488
        // initialize alpha_status
 
489
        {
 
490
                alpha_status = new char[l];
 
491
                for(int i=0;i<l;i++)
 
492
                        update_alpha_status(i);
 
493
        }
 
494
 
 
495
        // initialize active set (for shrinking)
 
496
        {
 
497
                active_set = new int[l];
 
498
                for(int i=0;i<l;i++)
 
499
                        active_set[i] = i;
 
500
                active_size = l;
 
501
        }
 
502
 
 
503
        // initialize gradient
 
504
        {
 
505
                G = new double[l];
 
506
                G_bar = new double[l];
 
507
                int i;
 
508
                for(i=0;i<l;i++)
 
509
                {
 
510
                        G[i] = b[i];
 
511
                        G_bar[i] = 0;
 
512
                }
 
513
                for(i=0;i<l;i++)
 
514
                        if(!is_lower_bound(i))
 
515
                        {
 
516
                                const Qfloat *Q_i = Q.get_Q(i,l);
 
517
                                double alpha_i = alpha[i];
 
518
                                int j;
 
519
                                for(j=0;j<l;j++)
 
520
                                        G[j] += alpha_i*Q_i[j];
 
521
                                if(is_upper_bound(i))
 
522
                                        for(j=0;j<l;j++)
 
523
                                                G_bar[j] += get_C(i) * Q_i[j];
 
524
                        }
 
525
        }
 
526
 
 
527
        // optimization step
 
528
 
 
529
        int iter = 0;
 
530
        int counter = min(l,1000)+1;
 
531
 
 
532
        while(1)
 
533
        {
 
534
                // show progress and do shrinking
 
535
 
 
536
                if(--counter == 0)
 
537
                {
 
538
                        counter = min(l,1000);
 
539
                        if(shrinking) do_shrinking();
 
540
                        info("."); info_flush();
 
541
                }
 
542
 
 
543
                int i,j;
 
544
                if(select_working_set(i,j)!=0)
 
545
                {
 
546
                        // reconstruct the whole gradient
 
547
                        reconstruct_gradient();
 
548
                        // reset active set size and check
 
549
                        active_size = l;
 
550
                        info("*"); info_flush();
 
551
                        if(select_working_set(i,j)!=0)
 
552
                                break;
 
553
                        else
 
554
                                counter = 1;    // do shrinking next iteration
 
555
                }
 
556
                
 
557
                ++iter;
 
558
 
 
559
                // update alpha[i] and alpha[j], handle bounds carefully
 
560
                
 
561
                const Qfloat *Q_i = Q.get_Q(i,active_size);
 
562
                const Qfloat *Q_j = Q.get_Q(j,active_size);
 
563
 
 
564
                double C_i = get_C(i);
 
565
                double C_j = get_C(j);
 
566
 
 
567
                double old_alpha_i = alpha[i];
 
568
                double old_alpha_j = alpha[j];
 
569
 
 
570
                if(y[i]!=y[j])
 
571
                {
 
572
                        double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
 
573
                        if (quad_coef <= 0)
 
574
                                quad_coef = TAU;
 
575
                        double delta = (-G[i]-G[j])/quad_coef;
 
576
                        double diff = alpha[i] - alpha[j];
 
577
                        alpha[i] += delta;
 
578
                        alpha[j] += delta;
 
579
                        
 
580
                        if(diff > 0)
 
581
                        {
 
582
                                if(alpha[j] < 0)
 
583
                                {
 
584
                                        alpha[j] = 0;
 
585
                                        alpha[i] = diff;
 
586
                                }
 
587
                        }
 
588
                        else
 
589
                        {
 
590
                                if(alpha[i] < 0)
 
591
                                {
 
592
                                        alpha[i] = 0;
 
593
                                        alpha[j] = -diff;
 
594
                                }
 
595
                        }
 
596
                        if(diff > C_i - C_j)
 
597
                        {
 
598
                                if(alpha[i] > C_i)
 
599
                                {
 
600
                                        alpha[i] = C_i;
 
601
                                        alpha[j] = C_i - diff;
 
602
                                }
 
603
                        }
 
604
                        else
 
605
                        {
 
606
                                if(alpha[j] > C_j)
 
607
                                {
 
608
                                        alpha[j] = C_j;
 
609
                                        alpha[i] = C_j + diff;
 
610
                                }
 
611
                        }
 
612
                }
 
613
                else
 
614
                {
 
615
                        double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
 
616
                        if (quad_coef <= 0)
 
617
                                quad_coef = TAU;
 
618
                        double delta = (G[i]-G[j])/quad_coef;
 
619
                        double sum = alpha[i] + alpha[j];
 
620
                        alpha[i] -= delta;
 
621
                        alpha[j] += delta;
 
622
 
 
623
                        if(sum > C_i)
 
624
                        {
 
625
                                if(alpha[i] > C_i)
 
626
                                {
 
627
                                        alpha[i] = C_i;
 
628
                                        alpha[j] = sum - C_i;
 
629
                                }
 
630
                        }
 
631
                        else
 
632
                        {
 
633
                                if(alpha[j] < 0)
 
634
                                {
 
635
                                        alpha[j] = 0;
 
636
                                        alpha[i] = sum;
 
637
                                }
 
638
                        }
 
639
                        if(sum > C_j)
 
640
                        {
 
641
                                if(alpha[j] > C_j)
 
642
                                {
 
643
                                        alpha[j] = C_j;
 
644
                                        alpha[i] = sum - C_j;
 
645
                                }
 
646
                        }
 
647
                        else
 
648
                        {
 
649
                                if(alpha[i] < 0)
 
650
                                {
 
651
                                        alpha[i] = 0;
 
652
                                        alpha[j] = sum;
 
653
                                }
 
654
                        }
 
655
                }
 
656
 
 
657
                // update G
 
658
 
 
659
                double delta_alpha_i = alpha[i] - old_alpha_i;
 
660
                double delta_alpha_j = alpha[j] - old_alpha_j;
 
661
                
 
662
                for(int k=0;k<active_size;k++)
 
663
                {
 
664
                        G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
 
665
                }
 
666
 
 
667
                // update alpha_status and G_bar
 
668
 
 
669
                {
 
670
                        bool ui = is_upper_bound(i);
 
671
                        bool uj = is_upper_bound(j);
 
672
                        update_alpha_status(i);
 
673
                        update_alpha_status(j);
 
674
                        int k;
 
675
                        if(ui != is_upper_bound(i))
 
676
                        {
 
677
                                Q_i = Q.get_Q(i,l);
 
678
                                if(ui)
 
679
                                        for(k=0;k<l;k++)
 
680
                                                G_bar[k] -= C_i * Q_i[k];
 
681
                                else
 
682
                                        for(k=0;k<l;k++)
 
683
                                                G_bar[k] += C_i * Q_i[k];
 
684
                        }
 
685
 
 
686
                        if(uj != is_upper_bound(j))
 
687
                        {
 
688
                                Q_j = Q.get_Q(j,l);
 
689
                                if(uj)
 
690
                                        for(k=0;k<l;k++)
 
691
                                                G_bar[k] -= C_j * Q_j[k];
 
692
                                else
 
693
                                        for(k=0;k<l;k++)
 
694
                                                G_bar[k] += C_j * Q_j[k];
 
695
                        }
 
696
                }
 
697
        }
 
698
 
 
699
        // calculate rho
 
700
 
 
701
        si->rho = calculate_rho();
 
702
 
 
703
        // calculate objective value
 
704
        {
 
705
                double v = 0;
 
706
                int i;
 
707
                for(i=0;i<l;i++)
 
708
                        v += alpha[i] * (G[i] + b[i]);
 
709
 
 
710
                si->obj = v/2;
 
711
        }
 
712
 
 
713
        // put back the solution
 
714
        {
 
715
                for(int i=0;i<l;i++)
 
716
                        alpha_[active_set[i]] = alpha[i];
 
717
        }
 
718
 
 
719
        // juggle everything back
 
720
        /*{
 
721
                for(int i=0;i<l;i++)
 
722
                        while(active_set[i] != i)
 
723
                                swap_index(i,active_set[i]);
 
724
                                // or Q.swap_index(i,active_set[i]);
 
725
        }*/
 
726
 
 
727
        si->upper_bound_p = Cp;
 
728
        si->upper_bound_n = Cn;
 
729
 
 
730
        info("\noptimization finished, #iter = %d\n",iter);
 
731
 
 
732
        delete[] b;
 
733
        delete[] y;
 
734
        delete[] alpha;
 
735
        delete[] alpha_status;
 
736
        delete[] active_set;
 
737
        delete[] G;
 
738
        delete[] G_bar;
 
739
}
 
740
 
 
741
// return 1 if already optimal, return 0 otherwise
 
742
int Solver::select_working_set(int &out_i, int &out_j)
 
743
{
 
744
        // return i,j such that
 
745
        // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
 
746
        // j: minimizes the decrease of obj value
 
747
        //    (if quadratic coefficeint <= 0, replace it with tau)
 
748
        //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
 
749
        
 
750
        double Gmax = -INF;
 
751
        double Gmax2 = -INF;
 
752
        int Gmax_idx = -1;
 
753
        int Gmin_idx = -1;
 
754
        double obj_diff_min = INF;
 
755
 
 
756
        for(int t=0;t<active_size;t++)
 
757
                if(y[t]==+1)    
 
758
                {
 
759
                        if(!is_upper_bound(t))
 
760
                                if(-G[t] >= Gmax)
 
761
                                {
 
762
                                        Gmax = -G[t];
 
763
                                        Gmax_idx = t;
 
764
                                }
 
765
                }
 
766
                else
 
767
                {
 
768
                        if(!is_lower_bound(t))
 
769
                                if(G[t] >= Gmax)
 
770
                                {
 
771
                                        Gmax = G[t];
 
772
                                        Gmax_idx = t;
 
773
                                }
 
774
                }
 
775
 
 
776
        int i = Gmax_idx;
 
777
        const Qfloat *Q_i = NULL;
 
778
        if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
 
779
                Q_i = Q->get_Q(i,active_size);
 
780
 
 
781
        for(int j=0;j<active_size;j++)
 
782
        {
 
783
                if(y[j]==+1)
 
784
                {
 
785
                        if (!is_lower_bound(j))
 
786
                        {
 
787
                                double grad_diff=Gmax+G[j];
 
788
                                if (G[j] >= Gmax2)
 
789
                                        Gmax2 = G[j];
 
790
                                if (grad_diff > 0)
 
791
                                {
 
792
                                        double obj_diff; 
 
793
                                        double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
 
794
                                        if (quad_coef > 0)
 
795
                                                obj_diff = -(grad_diff*grad_diff)/quad_coef;
 
796
                                        else
 
797
                                                obj_diff = -(grad_diff*grad_diff)/TAU;
 
798
 
 
799
                                        if (obj_diff <= obj_diff_min)
 
800
                                        {
 
801
                                                Gmin_idx=j;
 
802
                                                obj_diff_min = obj_diff;
 
803
                                        }
 
804
                                }
 
805
                        }
 
806
                }
 
807
                else
 
808
                {
 
809
                        if (!is_upper_bound(j))
 
810
                        {
 
811
                                double grad_diff= Gmax-G[j];
 
812
                                if (-G[j] >= Gmax2)
 
813
                                        Gmax2 = -G[j];
 
814
                                if (grad_diff > 0)
 
815
                                {
 
816
                                        double obj_diff; 
 
817
                                        double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
 
818
                                        if (quad_coef > 0)
 
819
                                                obj_diff = -(grad_diff*grad_diff)/quad_coef;
 
820
                                        else
 
821
                                                obj_diff = -(grad_diff*grad_diff)/TAU;
 
822
 
 
823
                                        if (obj_diff <= obj_diff_min)
 
824
                                        {
 
825
                                                Gmin_idx=j;
 
826
                                                obj_diff_min = obj_diff;
 
827
                                        }
 
828
                                }
 
829
                        }
 
830
                }
 
831
        }
 
832
 
 
833
        if(Gmax+Gmax2 < eps)
 
834
                return 1;
 
835
 
 
836
        out_i = Gmax_idx;
 
837
        out_j = Gmin_idx;
 
838
        return 0;
 
839
}
 
840
 
 
841
// return 1 if already optimal, return 0 otherwise
 
842
int Solver::max_violating_pair(int &out_i, int &out_j)
 
843
{
 
844
        // return i,j: maximal violating pair
 
845
 
 
846
        double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
 
847
        int Gmax1_idx = -1;
 
848
 
 
849
        double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
 
850
        int Gmax2_idx = -1;
 
851
 
 
852
        for(int i=0;i<active_size;i++)
 
853
        {
 
854
                if(y[i]==+1)    // y = +1
 
855
                {
 
856
                        if(!is_upper_bound(i))  // d = +1
 
857
                        {
 
858
                                if(-G[i] >= Gmax1)
 
859
                                {
 
860
                                        Gmax1 = -G[i];
 
861
                                        Gmax1_idx = i;
 
862
                                }
 
863
                        }
 
864
                        if(!is_lower_bound(i))  // d = -1
 
865
                        {
 
866
                                if(G[i] >= Gmax2)
 
867
                                {
 
868
                                        Gmax2 = G[i];
 
869
                                        Gmax2_idx = i;
 
870
                                }
 
871
                        }
 
872
                }
 
873
                else            // y = -1
 
874
                {
 
875
                        if(!is_upper_bound(i))  // d = +1
 
876
                        {
 
877
                                if(-G[i] >= Gmax2)
 
878
                                {
 
879
                                        Gmax2 = -G[i];
 
880
                                        Gmax2_idx = i;
 
881
                                }
 
882
                        }
 
883
                        if(!is_lower_bound(i))  // d = -1
 
884
                        {
 
885
                                if(G[i] >= Gmax1)
 
886
                                {
 
887
                                        Gmax1 = G[i];
 
888
                                        Gmax1_idx = i;
 
889
                                }
 
890
                        }
 
891
                }
 
892
        }
 
893
 
 
894
        if(Gmax1+Gmax2 < eps)
 
895
                return 1;
 
896
 
 
897
        out_i = Gmax1_idx;
 
898
        out_j = Gmax2_idx;
 
899
        return 0;
 
900
}
 
901
 
 
902
void Solver::do_shrinking()
 
903
{
 
904
        int i,j,k;
 
905
        if(max_violating_pair(i,j)!=0) return;
 
906
        double Gm1 = -y[j]*G[j];
 
907
        double Gm2 = y[i]*G[i];
 
908
 
 
909
        // shrink
 
910
        
 
911
        for(k=0;k<active_size;k++)
 
912
        {
 
913
                if(is_lower_bound(k))
 
914
                {
 
915
                        if(y[k]==+1)
 
916
                        {
 
917
                                if(-G[k] >= Gm1) continue;
 
918
                        }
 
919
                        else    if(-G[k] >= Gm2) continue;
 
920
                }
 
921
                else if(is_upper_bound(k))
 
922
                {
 
923
                        if(y[k]==+1)
 
924
                        {
 
925
                                if(G[k] >= Gm2) continue;
 
926
                        }
 
927
                        else    if(G[k] >= Gm1) continue;
 
928
                }
 
929
                else continue;
 
930
 
 
931
                --active_size;
 
932
                swap_index(k,active_size);
 
933
                --k;    // look at the newcomer
 
934
        }
 
935
 
 
936
        // unshrink, check all variables again before final iterations
 
937
 
 
938
        if(unshrinked || -(Gm1 + Gm2) > eps*10) return;
 
939
        
 
940
        unshrinked = true;
 
941
        reconstruct_gradient();
 
942
 
 
943
        for(k=l-1;k>=active_size;k--)
 
944
        {
 
945
                if(is_lower_bound(k))
 
946
                {
 
947
                        if(y[k]==+1)
 
948
                        {
 
949
                                if(-G[k] < Gm1) continue;
 
950
                        }
 
951
                        else    if(-G[k] < Gm2) continue;
 
952
                }
 
953
                else if(is_upper_bound(k))
 
954
                {
 
955
                        if(y[k]==+1)
 
956
                        {
 
957
                                if(G[k] < Gm2) continue;
 
958
                        }
 
959
                        else    if(G[k] < Gm1) continue;
 
960
                }
 
961
                else continue;
 
962
 
 
963
                swap_index(k,active_size);
 
964
                active_size++;
 
965
                ++k;    // look at the newcomer
 
966
        }
 
967
}
 
968
 
 
969
double Solver::calculate_rho()
 
970
{
 
971
        double r;
 
972
        int nr_free = 0;
 
973
        double ub = INF, lb = -INF, sum_free = 0;
 
974
        for(int i=0;i<active_size;i++)
 
975
        {
 
976
                double yG = y[i]*G[i];
 
977
 
 
978
                if(is_lower_bound(i))
 
979
                {
 
980
                        if(y[i] > 0)
 
981
                                ub = min(ub,yG);
 
982
                        else
 
983
                                lb = max(lb,yG);
 
984
                }
 
985
                else if(is_upper_bound(i))
 
986
                {
 
987
                        if(y[i] < 0)
 
988
                                ub = min(ub,yG);
 
989
                        else
 
990
                                lb = max(lb,yG);
 
991
                }
 
992
                else
 
993
                {
 
994
                        ++nr_free;
 
995
                        sum_free += yG;
 
996
                }
 
997
        }
 
998
 
 
999
        if(nr_free>0)
 
1000
                r = sum_free/nr_free;
 
1001
        else
 
1002
                r = (ub+lb)/2;
 
1003
 
 
1004
        return r;
 
1005
}
 
1006
 
 
1007
//
 
1008
// Solver for nu-svm classification and regression
 
1009
//
 
1010
// additional constraint: e^T \alpha = constant
 
1011
//
 
1012
class Solver_NU : public Solver
 
1013
{
 
1014
public:
 
1015
        Solver_NU() {}
 
1016
        void Solve(int l, const QMatrix& Q, const double *b, const schar *y,
 
1017
                   double *alpha, double Cp, double Cn, double eps,
 
1018
                   SolutionInfo* si, int shrinking)
 
1019
        {
 
1020
                this->si = si;
 
1021
                Solver::Solve(l,Q,b,y,alpha,Cp,Cn,eps,si,shrinking);
 
1022
        }
 
1023
private:
 
1024
        SolutionInfo *si;
 
1025
        int select_working_set(int &i, int &j);
 
1026
        double calculate_rho();
 
1027
        void do_shrinking();
 
1028
};
 
1029
 
 
1030
// return 1 if already optimal, return 0 otherwise
 
1031
int Solver_NU::select_working_set(int &out_i, int &out_j)
 
1032
{
 
1033
        // return i,j such that y_i = y_j and
 
1034
        // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
 
1035
        // j: minimizes the decrease of obj value
 
1036
        //    (if quadratic coefficeint <= 0, replace it with tau)
 
1037
        //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
 
1038
 
 
1039
        double Gmaxp = -INF;
 
1040
        double Gmaxp2 = -INF;
 
1041
        int Gmaxp_idx = -1;
 
1042
 
 
1043
        double Gmaxn = -INF;
 
1044
        double Gmaxn2 = -INF;
 
1045
        int Gmaxn_idx = -1;
 
1046
 
 
1047
        int Gmin_idx = -1;
 
1048
        double obj_diff_min = INF;
 
1049
 
 
1050
        for(int t=0;t<active_size;t++)
 
1051
                if(y[t]==+1)
 
1052
                {
 
1053
                        if(!is_upper_bound(t))
 
1054
                                if(-G[t] >= Gmaxp)
 
1055
                                {
 
1056
                                        Gmaxp = -G[t];
 
1057
                                        Gmaxp_idx = t;
 
1058
                                }
 
1059
                }
 
1060
                else
 
1061
                {
 
1062
                        if(!is_lower_bound(t))
 
1063
                                if(G[t] >= Gmaxn)
 
1064
                                {
 
1065
                                        Gmaxn = G[t];
 
1066
                                        Gmaxn_idx = t;
 
1067
                                }
 
1068
                }
 
1069
 
 
1070
        int ip = Gmaxp_idx;
 
1071
        int in = Gmaxn_idx;
 
1072
        const Qfloat *Q_ip = NULL;
 
1073
        const Qfloat *Q_in = NULL;
 
1074
        if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
 
1075
                Q_ip = Q->get_Q(ip,active_size);
 
1076
        if(in != -1)
 
1077
                Q_in = Q->get_Q(in,active_size);
 
1078
 
 
1079
        for(int j=0;j<active_size;j++)
 
1080
        {
 
1081
                if(y[j]==+1)
 
1082
                {
 
1083
                        if (!is_lower_bound(j)) 
 
1084
                        {
 
1085
                                double grad_diff=Gmaxp+G[j];
 
1086
                                if (G[j] >= Gmaxp2)
 
1087
                                        Gmaxp2 = G[j];
 
1088
                                if (grad_diff > 0)
 
1089
                                {
 
1090
                                        double obj_diff; 
 
1091
                                        double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
 
1092
                                        if (quad_coef > 0)
 
1093
                                                obj_diff = -(grad_diff*grad_diff)/quad_coef;
 
1094
                                        else
 
1095
                                                obj_diff = -(grad_diff*grad_diff)/TAU;
 
1096
 
 
1097
                                        if (obj_diff <= obj_diff_min)
 
1098
                                        {
 
1099
                                                Gmin_idx=j;
 
1100
                                                obj_diff_min = obj_diff;
 
1101
                                        }
 
1102
                                }
 
1103
                        }
 
1104
                }
 
1105
                else
 
1106
                {
 
1107
                        if (!is_upper_bound(j))
 
1108
                        {
 
1109
                                double grad_diff=Gmaxn-G[j];
 
1110
                                if (-G[j] >= Gmaxn2)
 
1111
                                        Gmaxn2 = -G[j];
 
1112
                                if (grad_diff > 0)
 
1113
                                {
 
1114
                                        double obj_diff; 
 
1115
                                        double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
 
1116
                                        if (quad_coef > 0)
 
1117
                                                obj_diff = -(grad_diff*grad_diff)/quad_coef;
 
1118
                                        else
 
1119
                                                obj_diff = -(grad_diff*grad_diff)/TAU;
 
1120
 
 
1121
                                        if (obj_diff <= obj_diff_min)
 
1122
                                        {
 
1123
                                                Gmin_idx=j;
 
1124
                                                obj_diff_min = obj_diff;
 
1125
                                        }
 
1126
                                }
 
1127
                        }
 
1128
                }
 
1129
        }
 
1130
 
 
1131
        if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
 
1132
                return 1;
 
1133
 
 
1134
        if (y[Gmin_idx] == +1)
 
1135
                out_i = Gmaxp_idx;
 
1136
        else
 
1137
                out_i = Gmaxn_idx;
 
1138
        out_j = Gmin_idx;
 
1139
 
 
1140
        return 0;
 
1141
}
 
1142
 
 
1143
void Solver_NU::do_shrinking()
 
1144
{
 
1145
        double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
 
1146
        double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
 
1147
        double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
 
1148
        double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
 
1149
 
 
1150
        // find maximal violating pair first
 
1151
        int k;
 
1152
        for(k=0;k<active_size;k++)
 
1153
        {
 
1154
                if(!is_upper_bound(k))
 
1155
                {
 
1156
                        if(y[k]==+1)
 
1157
                        {
 
1158
                                if(-G[k] > Gmax1) Gmax1 = -G[k];
 
1159
                        }
 
1160
                        else    if(-G[k] > Gmax3) Gmax3 = -G[k];
 
1161
                }
 
1162
                if(!is_lower_bound(k))
 
1163
                {
 
1164
                        if(y[k]==+1)
 
1165
                        {       
 
1166
                                if(G[k] > Gmax2) Gmax2 = G[k];
 
1167
                        }
 
1168
                        else    if(G[k] > Gmax4) Gmax4 = G[k];
 
1169
                }
 
1170
        }
 
1171
 
 
1172
        // shrinking
 
1173
 
 
1174
        double Gm1 = -Gmax2;
 
1175
        double Gm2 = -Gmax1;
 
1176
        double Gm3 = -Gmax4;
 
1177
        double Gm4 = -Gmax3;
 
1178
 
 
1179
        for(k=0;k<active_size;k++)
 
1180
        {
 
1181
                if(is_lower_bound(k))
 
1182
                {
 
1183
                        if(y[k]==+1)
 
1184
                        {
 
1185
                                if(-G[k] >= Gm1) continue;
 
1186
                        }
 
1187
                        else    if(-G[k] >= Gm3) continue;
 
1188
                }
 
1189
                else if(is_upper_bound(k))
 
1190
                {
 
1191
                        if(y[k]==+1)
 
1192
                        {
 
1193
                                if(G[k] >= Gm2) continue;
 
1194
                        }
 
1195
                        else    if(G[k] >= Gm4) continue;
 
1196
                }
 
1197
                else continue;
 
1198
 
 
1199
                --active_size;
 
1200
                swap_index(k,active_size);
 
1201
                --k;    // look at the newcomer
 
1202
        }
 
1203
 
 
1204
        // unshrink, check all variables again before final iterations
 
1205
 
 
1206
        if(unshrinked || max(-(Gm1+Gm2),-(Gm3+Gm4)) > eps*10) return;
 
1207
        
 
1208
        unshrinked = true;
 
1209
        reconstruct_gradient();
 
1210
 
 
1211
        for(k=l-1;k>=active_size;k--)
 
1212
        {
 
1213
                if(is_lower_bound(k))
 
1214
                {
 
1215
                        if(y[k]==+1)
 
1216
                        {
 
1217
                                if(-G[k] < Gm1) continue;
 
1218
                        }
 
1219
                        else    if(-G[k] < Gm3) continue;
 
1220
                }
 
1221
                else if(is_upper_bound(k))
 
1222
                {
 
1223
                        if(y[k]==+1)
 
1224
                        {
 
1225
                                if(G[k] < Gm2) continue;
 
1226
                        }
 
1227
                        else    if(G[k] < Gm4) continue;
 
1228
                }
 
1229
                else continue;
 
1230
 
 
1231
                swap_index(k,active_size);
 
1232
                active_size++;
 
1233
                ++k;    // look at the newcomer
 
1234
        }
 
1235
}
 
1236
 
 
1237
double Solver_NU::calculate_rho()
 
1238
{
 
1239
        int nr_free1 = 0,nr_free2 = 0;
 
1240
        double ub1 = INF, ub2 = INF;
 
1241
        double lb1 = -INF, lb2 = -INF;
 
1242
        double sum_free1 = 0, sum_free2 = 0;
 
1243
 
 
1244
        for(int i=0;i<active_size;i++)
 
1245
        {
 
1246
                if(y[i]==+1)
 
1247
                {
 
1248
                        if(is_lower_bound(i))
 
1249
                                ub1 = min(ub1,G[i]);
 
1250
                        else if(is_upper_bound(i))
 
1251
                                lb1 = max(lb1,G[i]);
 
1252
                        else
 
1253
                        {
 
1254
                                ++nr_free1;
 
1255
                                sum_free1 += G[i];
 
1256
                        }
 
1257
                }
 
1258
                else
 
1259
                {
 
1260
                        if(is_lower_bound(i))
 
1261
                                ub2 = min(ub2,G[i]);
 
1262
                        else if(is_upper_bound(i))
 
1263
                                lb2 = max(lb2,G[i]);
 
1264
                        else
 
1265
                        {
 
1266
                                ++nr_free2;
 
1267
                                sum_free2 += G[i];
 
1268
                        }
 
1269
                }
 
1270
        }
 
1271
 
 
1272
        double r1,r2;
 
1273
        if(nr_free1 > 0)
 
1274
                r1 = sum_free1/nr_free1;
 
1275
        else
 
1276
                r1 = (ub1+lb1)/2;
 
1277
        
 
1278
        if(nr_free2 > 0)
 
1279
                r2 = sum_free2/nr_free2;
 
1280
        else
 
1281
                r2 = (ub2+lb2)/2;
 
1282
        
 
1283
        si->r = (r1+r2)/2;
 
1284
        return (r1-r2)/2;
 
1285
}
 
1286
 
 
1287
//
 
1288
// Q matrices for various formulations
 
1289
//
 
1290
class SVC_Q: public Kernel
 
1291
 
1292
public:
 
1293
        SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
 
1294
        :Kernel(prob.l, prob.x, param)
 
1295
        {
 
1296
                clone(y,y_,prob.l);
 
1297
                cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)));
 
1298
                QD = new Qfloat[prob.l];
 
1299
                for(int i=0;i<prob.l;i++)
 
1300
                        QD[i]= (Qfloat)(this->*kernel_function)(i,i);
 
1301
        }
 
1302
        
 
1303
        Qfloat *get_Q(int i, int len) const
 
1304
        {
 
1305
                Qfloat *data;
 
1306
                int start;
 
1307
                if((start = cache->get_data(i,&data,len)) < len)
 
1308
                {
 
1309
                        for(int j=start;j<len;j++)
 
1310
                                data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
 
1311
                }
 
1312
                return data;
 
1313
        }
 
1314
 
 
1315
        Qfloat *get_QD() const
 
1316
        {
 
1317
                return QD;
 
1318
        }
 
1319
 
 
1320
        void swap_index(int i, int j) const
 
1321
        {
 
1322
                cache->swap_index(i,j);
 
1323
                Kernel::swap_index(i,j);
 
1324
                swap(y[i],y[j]);
 
1325
                swap(QD[i],QD[j]);
 
1326
        }
 
1327
 
 
1328
        ~SVC_Q()
 
1329
        {
 
1330
                delete[] y;
 
1331
                delete cache;
 
1332
                delete[] QD;
 
1333
        }
 
1334
private:
 
1335
        schar *y;
 
1336
        Cache *cache;
 
1337
        Qfloat *QD;
 
1338
};
 
1339
 
 
1340
class ONE_CLASS_Q: public Kernel
 
1341
{
 
1342
public:
 
1343
        ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
 
1344
        :Kernel(prob.l, prob.x, param)
 
1345
        {
 
1346
                cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)));
 
1347
                QD = new Qfloat[prob.l];
 
1348
                for(int i=0;i<prob.l;i++)
 
1349
                        QD[i]= (Qfloat)(this->*kernel_function)(i,i);
 
1350
        }
 
1351
        
 
1352
        Qfloat *get_Q(int i, int len) const
 
1353
        {
 
1354
                Qfloat *data;
 
1355
                int start;
 
1356
                if((start = cache->get_data(i,&data,len)) < len)
 
1357
                {
 
1358
                        for(int j=start;j<len;j++)
 
1359
                                data[j] = (Qfloat)(this->*kernel_function)(i,j);
 
1360
                }
 
1361
                return data;
 
1362
        }
 
1363
 
 
1364
        Qfloat *get_QD() const
 
1365
        {
 
1366
                return QD;
 
1367
        }
 
1368
 
 
1369
        void swap_index(int i, int j) const
 
1370
        {
 
1371
                cache->swap_index(i,j);
 
1372
                Kernel::swap_index(i,j);
 
1373
                swap(QD[i],QD[j]);
 
1374
        }
 
1375
 
 
1376
        ~ONE_CLASS_Q()
 
1377
        {
 
1378
                delete cache;
 
1379
                delete[] QD;
 
1380
        }
 
1381
private:
 
1382
        Cache *cache;
 
1383
        Qfloat *QD;
 
1384
};
 
1385
 
 
1386
class SVR_Q: public Kernel
 
1387
 
1388
public:
 
1389
        SVR_Q(const svm_problem& prob, const svm_parameter& param)
 
1390
        :Kernel(prob.l, prob.x, param)
 
1391
        {
 
1392
                l = prob.l;
 
1393
                cache = new Cache(l,(int)(param.cache_size*(1<<20)));
 
1394
                QD = new Qfloat[2*l];
 
1395
                sign = new schar[2*l];
 
1396
                index = new int[2*l];
 
1397
                for(int k=0;k<l;k++)
 
1398
                {
 
1399
                        sign[k] = 1;
 
1400
                        sign[k+l] = -1;
 
1401
                        index[k] = k;
 
1402
                        index[k+l] = k;
 
1403
                        QD[k]= (Qfloat)(this->*kernel_function)(k,k);
 
1404
                        QD[k+l]=QD[k];
 
1405
                }
 
1406
                buffer[0] = new Qfloat[2*l];
 
1407
                buffer[1] = new Qfloat[2*l];
 
1408
                next_buffer = 0;
 
1409
        }
 
1410
 
 
1411
        void swap_index(int i, int j) const
 
1412
        {
 
1413
                swap(sign[i],sign[j]);
 
1414
                swap(index[i],index[j]);
 
1415
                swap(QD[i],QD[j]);
 
1416
        }
 
1417
        
 
1418
        Qfloat *get_Q(int i, int len) const
 
1419
        {
 
1420
                Qfloat *data;
 
1421
                int real_i = index[i];
 
1422
                if(cache->get_data(real_i,&data,l) < l)
 
1423
                {
 
1424
                        for(int j=0;j<l;j++)
 
1425
                                data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
 
1426
                }
 
1427
 
 
1428
                // reorder and copy
 
1429
                Qfloat *buf = buffer[next_buffer];
 
1430
                next_buffer = 1 - next_buffer;
 
1431
                schar si = sign[i];
 
1432
                for(int j=0;j<len;j++)
 
1433
                        buf[j] = si * sign[j] * data[index[j]];
 
1434
                return buf;
 
1435
        }
 
1436
 
 
1437
        Qfloat *get_QD() const
 
1438
        {
 
1439
                return QD;
 
1440
        }
 
1441
 
 
1442
        ~SVR_Q()
 
1443
        {
 
1444
                delete cache;
 
1445
                delete[] sign;
 
1446
                delete[] index;
 
1447
                delete[] buffer[0];
 
1448
                delete[] buffer[1];
 
1449
                delete[] QD;
 
1450
        }
 
1451
private:
 
1452
        int l;
 
1453
        Cache *cache;
 
1454
        schar *sign;
 
1455
        int *index;
 
1456
        mutable int next_buffer;
 
1457
        Qfloat *buffer[2];
 
1458
        Qfloat *QD;
 
1459
};
 
1460
 
 
1461
//
 
1462
// construct and solve various formulations
 
1463
//
 
1464
static void solve_c_svc(
 
1465
        const svm_problem *prob, const svm_parameter* param,
 
1466
        double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
 
1467
{
 
1468
        int l = prob->l;
 
1469
        double *minus_ones = new double[l];
 
1470
        schar *y = new schar[l];
 
1471
 
 
1472
        int i;
 
1473
 
 
1474
        for(i=0;i<l;i++)
 
1475
        {
 
1476
                alpha[i] = 0;
 
1477
                minus_ones[i] = -1;
 
1478
                if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
 
1479
        }
 
1480
 
 
1481
        Solver s;
 
1482
        s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
 
1483
                alpha, Cp, Cn, param->eps, si, param->shrinking);
 
1484
 
 
1485
        double sum_alpha=0;
 
1486
        for(i=0;i<l;i++)
 
1487
                sum_alpha += alpha[i];
 
1488
 
 
1489
        if (Cp==Cn)
 
1490
                info("nu = %f\n", sum_alpha/(Cp*prob->l));
 
1491
 
 
1492
        for(i=0;i<l;i++)
 
1493
                alpha[i] *= y[i];
 
1494
 
 
1495
        delete[] minus_ones;
 
1496
        delete[] y;
 
1497
}
 
1498
 
 
1499
static void solve_nu_svc(
 
1500
        const svm_problem *prob, const svm_parameter *param,
 
1501
        double *alpha, Solver::SolutionInfo* si)
 
1502
{
 
1503
        int i;
 
1504
        int l = prob->l;
 
1505
        double nu = param->nu;
 
1506
 
 
1507
        schar *y = new schar[l];
 
1508
 
 
1509
        for(i=0;i<l;i++)
 
1510
                if(prob->y[i]>0)
 
1511
                        y[i] = +1;
 
1512
                else
 
1513
                        y[i] = -1;
 
1514
 
 
1515
        double sum_pos = nu*l/2;
 
1516
        double sum_neg = nu*l/2;
 
1517
 
 
1518
        for(i=0;i<l;i++)
 
1519
                if(y[i] == +1)
 
1520
                {
 
1521
                        alpha[i] = min(1.0,sum_pos);
 
1522
                        sum_pos -= alpha[i];
 
1523
                }
 
1524
                else
 
1525
                {
 
1526
                        alpha[i] = min(1.0,sum_neg);
 
1527
                        sum_neg -= alpha[i];
 
1528
                }
 
1529
 
 
1530
        double *zeros = new double[l];
 
1531
 
 
1532
        for(i=0;i<l;i++)
 
1533
                zeros[i] = 0;
 
1534
 
 
1535
        Solver_NU s;
 
1536
        s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
 
1537
                alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
 
1538
        double r = si->r;
 
1539
 
 
1540
        info("C = %f\n",1/r);
 
1541
 
 
1542
        for(i=0;i<l;i++)
 
1543
                alpha[i] *= y[i]/r;
 
1544
 
 
1545
        si->rho /= r;
 
1546
        si->obj /= (r*r);
 
1547
        si->upper_bound_p = 1/r;
 
1548
        si->upper_bound_n = 1/r;
 
1549
 
 
1550
        delete[] y;
 
1551
        delete[] zeros;
 
1552
}
 
1553
 
 
1554
static void solve_one_class(
 
1555
        const svm_problem *prob, const svm_parameter *param,
 
1556
        double *alpha, Solver::SolutionInfo* si)
 
1557
{
 
1558
        int l = prob->l;
 
1559
        double *zeros = new double[l];
 
1560
        schar *ones = new schar[l];
 
1561
        int i;
 
1562
 
 
1563
        int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
 
1564
 
 
1565
        for(i=0;i<n;i++)
 
1566
                alpha[i] = 1;
 
1567
        if(n<prob->l)
 
1568
                alpha[n] = param->nu * prob->l - n;
 
1569
        for(i=n+1;i<l;i++)
 
1570
                alpha[i] = 0;
 
1571
 
 
1572
        for(i=0;i<l;i++)
 
1573
        {
 
1574
                zeros[i] = 0;
 
1575
                ones[i] = 1;
 
1576
        }
 
1577
 
 
1578
        Solver s;
 
1579
        s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
 
1580
                alpha, 1.0, 1.0, param->eps, si, param->shrinking);
 
1581
 
 
1582
        delete[] zeros;
 
1583
        delete[] ones;
 
1584
}
 
1585
 
 
1586
static void solve_epsilon_svr(
 
1587
        const svm_problem *prob, const svm_parameter *param,
 
1588
        double *alpha, Solver::SolutionInfo* si)
 
1589
{
 
1590
        int l = prob->l;
 
1591
        double *alpha2 = new double[2*l];
 
1592
        double *linear_term = new double[2*l];
 
1593
        schar *y = new schar[2*l];
 
1594
        int i;
 
1595
 
 
1596
        for(i=0;i<l;i++)
 
1597
        {
 
1598
                alpha2[i] = 0;
 
1599
                linear_term[i] = param->p - prob->y[i];
 
1600
                y[i] = 1;
 
1601
 
 
1602
                alpha2[i+l] = 0;
 
1603
                linear_term[i+l] = param->p + prob->y[i];
 
1604
                y[i+l] = -1;
 
1605
        }
 
1606
 
 
1607
        Solver s;
 
1608
        s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
 
1609
                alpha2, param->C, param->C, param->eps, si, param->shrinking);
 
1610
 
 
1611
        double sum_alpha = 0;
 
1612
        for(i=0;i<l;i++)
 
1613
        {
 
1614
                alpha[i] = alpha2[i] - alpha2[i+l];
 
1615
                sum_alpha += fabs(alpha[i]);
 
1616
        }
 
1617
        info("nu = %f\n",sum_alpha/(param->C*l));
 
1618
 
 
1619
        delete[] alpha2;
 
1620
        delete[] linear_term;
 
1621
        delete[] y;
 
1622
}
 
1623
 
 
1624
static void solve_nu_svr(
 
1625
        const svm_problem *prob, const svm_parameter *param,
 
1626
        double *alpha, Solver::SolutionInfo* si)
 
1627
{
 
1628
        int l = prob->l;
 
1629
        double C = param->C;
 
1630
        double *alpha2 = new double[2*l];
 
1631
        double *linear_term = new double[2*l];
 
1632
        schar *y = new schar[2*l];
 
1633
        int i;
 
1634
 
 
1635
        double sum = C * param->nu * l / 2;
 
1636
        for(i=0;i<l;i++)
 
1637
        {
 
1638
                alpha2[i] = alpha2[i+l] = min(sum,C);
 
1639
                sum -= alpha2[i];
 
1640
 
 
1641
                linear_term[i] = - prob->y[i];
 
1642
                y[i] = 1;
 
1643
 
 
1644
                linear_term[i+l] = prob->y[i];
 
1645
                y[i+l] = -1;
 
1646
        }
 
1647
 
 
1648
        Solver_NU s;
 
1649
        s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
 
1650
                alpha2, C, C, param->eps, si, param->shrinking);
 
1651
 
 
1652
        info("epsilon = %f\n",-si->r);
 
1653
 
 
1654
        for(i=0;i<l;i++)
 
1655
                alpha[i] = alpha2[i] - alpha2[i+l];
 
1656
 
 
1657
        delete[] alpha2;
 
1658
        delete[] linear_term;
 
1659
        delete[] y;
 
1660
}
 
1661
 
 
1662
//
 
1663
// decision_function
 
1664
//
 
1665
struct decision_function
 
1666
{
 
1667
        double *alpha;
 
1668
        double rho;     
 
1669
};
 
1670
 
 
1671
decision_function svm_train_one(
 
1672
        const svm_problem *prob, const svm_parameter *param,
 
1673
        double Cp, double Cn)
 
1674
{
 
1675
        double *alpha = Malloc(double,prob->l);
 
1676
        Solver::SolutionInfo si;
 
1677
        switch(param->svm_type)
 
1678
        {
 
1679
                case C_SVC:
 
1680
                        solve_c_svc(prob,param,alpha,&si,Cp,Cn);
 
1681
                        break;
 
1682
                case NU_SVC:
 
1683
                        solve_nu_svc(prob,param,alpha,&si);
 
1684
                        break;
 
1685
                case ONE_CLASS:
 
1686
                        solve_one_class(prob,param,alpha,&si);
 
1687
                        break;
 
1688
                case EPSILON_SVR:
 
1689
                        solve_epsilon_svr(prob,param,alpha,&si);
 
1690
                        break;
 
1691
                case NU_SVR:
 
1692
                        solve_nu_svr(prob,param,alpha,&si);
 
1693
                        break;
 
1694
        }
 
1695
 
 
1696
        info("obj = %f, rho = %f\n",si.obj,si.rho);
 
1697
 
 
1698
        // output SVs
 
1699
 
 
1700
        int nSV = 0;
 
1701
        int nBSV = 0;
 
1702
        for(int i=0;i<prob->l;i++)
 
1703
        {
 
1704
                if(fabs(alpha[i]) > 0)
 
1705
                {
 
1706
                        ++nSV;
 
1707
                        if(prob->y[i] > 0)
 
1708
                        {
 
1709
                                if(fabs(alpha[i]) >= si.upper_bound_p)
 
1710
                                        ++nBSV;
 
1711
                        }
 
1712
                        else
 
1713
                        {
 
1714
                                if(fabs(alpha[i]) >= si.upper_bound_n)
 
1715
                                        ++nBSV;
 
1716
                        }
 
1717
                }
 
1718
        }
 
1719
 
 
1720
        info("nSV = %d, nBSV = %d\n",nSV,nBSV);
 
1721
 
 
1722
        decision_function f;
 
1723
        f.alpha = alpha;
 
1724
        f.rho = si.rho;
 
1725
        return f;
 
1726
}
 
1727
 
 
1728
//
 
1729
// svm_model
 
1730
//
 
1731
struct svm_model
 
1732
{
 
1733
        svm_parameter param;    // parameter
 
1734
        int nr_class;           // number of classes, = 2 in regression/one class svm
 
1735
        int l;                  // total #SV
 
1736
        svm_node **SV;          // SVs (SV[l])
 
1737
        double **sv_coef;       // coefficients for SVs in decision functions (sv_coef[n-1][l])
 
1738
        double *rho;            // constants in decision functions (rho[n*(n-1)/2])
 
1739
        double *probA;          // pariwise probability information
 
1740
        double *probB;
 
1741
 
 
1742
        // for classification only
 
1743
 
 
1744
        int *label;             // label of each class (label[n])
 
1745
        int *nSV;               // number of SVs for each class (nSV[n])
 
1746
                                // nSV[0] + nSV[1] + ... + nSV[n-1] = l
 
1747
        // XXX
 
1748
        int free_sv;            // 1 if svm_model is created by svm_load_model
 
1749
                                // 0 if svm_model is created by svm_train
 
1750
};
 
1751
 
 
1752
// Platt's binary SVM Probablistic Output: an improvement from Lin et al.
 
1753
void sigmoid_train(
 
1754
        int l, const double *dec_values, const double *labels, 
 
1755
        double& A, double& B)
 
1756
{
 
1757
        double prior1=0, prior0 = 0;
 
1758
        int i;
 
1759
 
 
1760
        for (i=0;i<l;i++)
 
1761
                if (labels[i] > 0) prior1+=1;
 
1762
                else prior0+=1;
 
1763
        
 
1764
        int max_iter=100;       // Maximal number of iterations
 
1765
        double min_step=1e-10;  // Minimal step taken in line search
 
1766
        double sigma=1e-3;      // For numerically strict PD of Hessian
 
1767
        double eps=1e-5;
 
1768
        double hiTarget=(prior1+1.0)/(prior1+2.0);
 
1769
        double loTarget=1/(prior0+2.0);
 
1770
        double *t=Malloc(double,l);
 
1771
        double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
 
1772
        double newA,newB,newf,d1,d2;
 
1773
        int iter; 
 
1774
        
 
1775
        // Initial Point and Initial Fun Value
 
1776
        A=0.0; B=log((prior0+1.0)/(prior1+1.0));
 
1777
        double fval = 0.0;
 
1778
 
 
1779
        for (i=0;i<l;i++)
 
1780
        {
 
1781
                if (labels[i]>0) t[i]=hiTarget;
 
1782
                else t[i]=loTarget;
 
1783
                fApB = dec_values[i]*A+B;
 
1784
                if (fApB>=0)
 
1785
                        fval += t[i]*fApB + log(1+exp(-fApB));
 
1786
                else
 
1787
                        fval += (t[i] - 1)*fApB +log(1+exp(fApB));
 
1788
        }
 
1789
        for (iter=0;iter<max_iter;iter++)
 
1790
        {
 
1791
                // Update Gradient and Hessian (use H' = H + sigma I)
 
1792
                h11=sigma; // numerically ensures strict PD
 
1793
                h22=sigma;
 
1794
                h21=0.0;g1=0.0;g2=0.0;
 
1795
                for (i=0;i<l;i++)
 
1796
                {
 
1797
                        fApB = dec_values[i]*A+B;
 
1798
                        if (fApB >= 0)
 
1799
                        {
 
1800
                                p=exp(-fApB)/(1.0+exp(-fApB));
 
1801
                                q=1.0/(1.0+exp(-fApB));
 
1802
                        }
 
1803
                        else
 
1804
                        {
 
1805
                                p=1.0/(1.0+exp(fApB));
 
1806
                                q=exp(fApB)/(1.0+exp(fApB));
 
1807
                        }
 
1808
                        d2=p*q;
 
1809
                        h11+=dec_values[i]*dec_values[i]*d2;
 
1810
                        h22+=d2;
 
1811
                        h21+=dec_values[i]*d2;
 
1812
                        d1=t[i]-p;
 
1813
                        g1+=dec_values[i]*d1;
 
1814
                        g2+=d1;
 
1815
                }
 
1816
 
 
1817
                // Stopping Criteria
 
1818
                if (fabs(g1)<eps && fabs(g2)<eps)
 
1819
                        break;
 
1820
 
 
1821
                // Finding Newton direction: -inv(H') * g
 
1822
                det=h11*h22-h21*h21;
 
1823
                dA=-(h22*g1 - h21 * g2) / det;
 
1824
                dB=-(-h21*g1+ h11 * g2) / det;
 
1825
                gd=g1*dA+g2*dB;
 
1826
 
 
1827
 
 
1828
                stepsize = 1;           // Line Search
 
1829
                while (stepsize >= min_step)
 
1830
                {
 
1831
                        newA = A + stepsize * dA;
 
1832
                        newB = B + stepsize * dB;
 
1833
 
 
1834
                        // New function value
 
1835
                        newf = 0.0;
 
1836
                        for (i=0;i<l;i++)
 
1837
                        {
 
1838
                                fApB = dec_values[i]*newA+newB;
 
1839
                                if (fApB >= 0)
 
1840
                                        newf += t[i]*fApB + log(1+exp(-fApB));
 
1841
                                else
 
1842
                                        newf += (t[i] - 1)*fApB +log(1+exp(fApB));
 
1843
                        }
 
1844
                        // Check sufficient decrease
 
1845
                        if (newf<fval+0.0001*stepsize*gd)
 
1846
                        {
 
1847
                                A=newA;B=newB;fval=newf;
 
1848
                                break;
 
1849
                        }
 
1850
                        else
 
1851
                                stepsize = stepsize / 2.0;
 
1852
                }
 
1853
 
 
1854
                if (stepsize < min_step)
 
1855
                {
 
1856
                        info("Line search fails in two-class probability estimates\n");
 
1857
                        break;
 
1858
                }
 
1859
        }
 
1860
 
 
1861
        if (iter>=max_iter)
 
1862
                info("Reaching maximal iterations in two-class probability estimates\n");
 
1863
        free(t);
 
1864
}
 
1865
 
 
1866
double sigmoid_predict(double decision_value, double A, double B)
 
1867
{
 
1868
        double fApB = decision_value*A+B;
 
1869
        if (fApB >= 0)
 
1870
                return exp(-fApB)/(1.0+exp(-fApB));
 
1871
        else
 
1872
                return 1.0/(1+exp(fApB)) ;
 
1873
}
 
1874
 
 
1875
// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
 
1876
void multiclass_probability(int k, double **r, double *p)
 
1877
{
 
1878
        int t,j;
 
1879
        int iter = 0, max_iter=max(100,k);
 
1880
        double **Q=Malloc(double *,k);
 
1881
        double *Qp=Malloc(double,k);
 
1882
        double pQp, eps=0.005/k;
 
1883
        
 
1884
        for (t=0;t<k;t++)
 
1885
        {
 
1886
                p[t]=1.0/k;  // Valid if k = 1
 
1887
                Q[t]=Malloc(double,k);
 
1888
                Q[t][t]=0;
 
1889
                for (j=0;j<t;j++)
 
1890
                {
 
1891
                        Q[t][t]+=r[j][t]*r[j][t];
 
1892
                        Q[t][j]=Q[j][t];
 
1893
                }
 
1894
                for (j=t+1;j<k;j++)
 
1895
                {
 
1896
                        Q[t][t]+=r[j][t]*r[j][t];
 
1897
                        Q[t][j]=-r[j][t]*r[t][j];
 
1898
                }
 
1899
        }
 
1900
        for (iter=0;iter<max_iter;iter++)
 
1901
        {
 
1902
                // stopping condition, recalculate QP,pQP for numerical accuracy
 
1903
                pQp=0;
 
1904
                for (t=0;t<k;t++)
 
1905
                {
 
1906
                        Qp[t]=0;
 
1907
                        for (j=0;j<k;j++)
 
1908
                                Qp[t]+=Q[t][j]*p[j];
 
1909
                        pQp+=p[t]*Qp[t];
 
1910
                }
 
1911
                double max_error=0;
 
1912
                for (t=0;t<k;t++)
 
1913
                {
 
1914
                        double error=fabs(Qp[t]-pQp);
 
1915
                        if (error>max_error)
 
1916
                                max_error=error;
 
1917
                }
 
1918
                if (max_error<eps) break;
 
1919
                
 
1920
                for (t=0;t<k;t++)
 
1921
                {
 
1922
                        double diff=(-Qp[t]+pQp)/Q[t][t];
 
1923
                        p[t]+=diff;
 
1924
                        pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
 
1925
                        for (j=0;j<k;j++)
 
1926
                        {
 
1927
                                Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
 
1928
                                p[j]/=(1+diff);
 
1929
                        }
 
1930
                }
 
1931
        }
 
1932
        if (iter>=max_iter)
 
1933
                info("Exceeds max_iter in multiclass_prob\n");
 
1934
        for(t=0;t<k;t++) free(Q[t]);
 
1935
        free(Q);
 
1936
        free(Qp);
 
1937
}
 
1938
 
 
1939
// Cross-validation decision values for probability estimates
 
1940
void svm_binary_svc_probability(
 
1941
        const svm_problem *prob, const svm_parameter *param,
 
1942
        double Cp, double Cn, double& probA, double& probB)
 
1943
{
 
1944
        int i;
 
1945
        int nr_fold = 5;
 
1946
        int *perm = Malloc(int,prob->l);
 
1947
        double *dec_values = Malloc(double,prob->l);
 
1948
 
 
1949
        // random shuffle
 
1950
        for(i=0;i<prob->l;i++) perm[i]=i;
 
1951
        for(i=0;i<prob->l;i++)
 
1952
        {
 
1953
                int j = i+rand()%(prob->l-i);
 
1954
                swap(perm[i],perm[j]);
 
1955
        }
 
1956
        for(i=0;i<nr_fold;i++)
 
1957
        {
 
1958
                int begin = i*prob->l/nr_fold;
 
1959
                int end = (i+1)*prob->l/nr_fold;
 
1960
                int j,k;
 
1961
                struct svm_problem subprob;
 
1962
 
 
1963
                subprob.l = prob->l-(end-begin);
 
1964
                subprob.x = Malloc(struct svm_node*,subprob.l);
 
1965
                subprob.y = Malloc(double,subprob.l);
 
1966
                        
 
1967
                k=0;
 
1968
                for(j=0;j<begin;j++)
 
1969
                {
 
1970
                        subprob.x[k] = prob->x[perm[j]];
 
1971
                        subprob.y[k] = prob->y[perm[j]];
 
1972
                        ++k;
 
1973
                }
 
1974
                for(j=end;j<prob->l;j++)
 
1975
                {
 
1976
                        subprob.x[k] = prob->x[perm[j]];
 
1977
                        subprob.y[k] = prob->y[perm[j]];
 
1978
                        ++k;
 
1979
                }
 
1980
                int p_count=0,n_count=0;
 
1981
                for(j=0;j<k;j++)
 
1982
                        if(subprob.y[j]>0)
 
1983
                                p_count++;
 
1984
                        else
 
1985
                                n_count++;
 
1986
 
 
1987
                if(p_count==0 && n_count==0)
 
1988
                        for(j=begin;j<end;j++)
 
1989
                                dec_values[perm[j]] = 0;
 
1990
                else if(p_count > 0 && n_count == 0)
 
1991
                        for(j=begin;j<end;j++)
 
1992
                                dec_values[perm[j]] = 1;
 
1993
                else if(p_count == 0 && n_count > 0)
 
1994
                        for(j=begin;j<end;j++)
 
1995
                                dec_values[perm[j]] = -1;
 
1996
                else
 
1997
                {
 
1998
                        svm_parameter subparam = *param;
 
1999
                        subparam.probability=0;
 
2000
                        subparam.C=1.0;
 
2001
                        subparam.nr_weight=2;
 
2002
                        subparam.weight_label = Malloc(int,2);
 
2003
                        subparam.weight = Malloc(double,2);
 
2004
                        subparam.weight_label[0]=+1;
 
2005
                        subparam.weight_label[1]=-1;
 
2006
                        subparam.weight[0]=Cp;
 
2007
                        subparam.weight[1]=Cn;
 
2008
                        struct svm_model *submodel = svm_train(&subprob,&subparam);
 
2009
                        for(j=begin;j<end;j++)
 
2010
                        {
 
2011
                                svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
 
2012
                                // ensure +1 -1 order; reason not using CV subroutine
 
2013
                                dec_values[perm[j]] *= submodel->label[0];
 
2014
                        }               
 
2015
                        svm_destroy_model(submodel);
 
2016
                        svm_destroy_param(&subparam);
 
2017
                        free(subprob.x);
 
2018
                        free(subprob.y);
 
2019
                }
 
2020
        }               
 
2021
        sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
 
2022
        free(dec_values);
 
2023
        free(perm);
 
2024
}
 
2025
 
 
2026
// Return parameter of a Laplace distribution 
 
2027
double svm_svr_probability(
 
2028
        const svm_problem *prob, const svm_parameter *param)
 
2029
{
 
2030
        int i;
 
2031
        int nr_fold = 5;
 
2032
        double *ymv = Malloc(double,prob->l);
 
2033
        double mae = 0;
 
2034
 
 
2035
        svm_parameter newparam = *param;
 
2036
        newparam.probability = 0;
 
2037
        svm_cross_validation(prob,&newparam,nr_fold,ymv);
 
2038
        for(i=0;i<prob->l;i++)
 
2039
        {
 
2040
                ymv[i]=prob->y[i]-ymv[i];
 
2041
                mae += fabs(ymv[i]);
 
2042
        }               
 
2043
        mae /= prob->l;
 
2044
        double std=sqrt(2*mae*mae);
 
2045
        int count=0;
 
2046
        mae=0;
 
2047
        for(i=0;i<prob->l;i++)
 
2048
                if (fabs(ymv[i]) > 5*std) 
 
2049
                        count=count+1;
 
2050
                else 
 
2051
                        mae+=fabs(ymv[i]);
 
2052
        mae /= (prob->l-count);
 
2053
        info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
 
2054
        free(ymv);
 
2055
        return mae;
 
2056
}
 
2057
 
 
2058
 
 
2059
// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
 
2060
// perm, length l, must be allocated before calling this subroutine
 
2061
void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
 
2062
{
 
2063
        int l = prob->l;
 
2064
        int max_nr_class = 16;
 
2065
        int nr_class = 0;
 
2066
        int *label = Malloc(int,max_nr_class);
 
2067
        int *count = Malloc(int,max_nr_class);
 
2068
        int *data_label = Malloc(int,l);        
 
2069
        int i;
 
2070
 
 
2071
        for(i=0;i<l;i++)
 
2072
        {
 
2073
                int this_label = (int)prob->y[i];
 
2074
                int j;
 
2075
                for(j=0;j<nr_class;j++)
 
2076
                {
 
2077
                        if(this_label == label[j])
 
2078
                        {
 
2079
                                ++count[j];
 
2080
                                break;
 
2081
                        }
 
2082
                }
 
2083
                data_label[i] = j;
 
2084
                if(j == nr_class)
 
2085
                {
 
2086
                        if(nr_class == max_nr_class)
 
2087
                        {
 
2088
                                max_nr_class *= 2;
 
2089
                                label = (int *)realloc(label,max_nr_class*sizeof(int));
 
2090
                                count = (int *)realloc(count,max_nr_class*sizeof(int));
 
2091
                        }
 
2092
                        label[nr_class] = this_label;
 
2093
                        count[nr_class] = 1;
 
2094
                        ++nr_class;
 
2095
                }
 
2096
        }
 
2097
 
 
2098
        int *start = Malloc(int,nr_class);
 
2099
        start[0] = 0;
 
2100
        for(i=1;i<nr_class;i++)
 
2101
                start[i] = start[i-1]+count[i-1];
 
2102
        for(i=0;i<l;i++)
 
2103
        {
 
2104
                perm[start[data_label[i]]] = i;
 
2105
                ++start[data_label[i]];
 
2106
        }
 
2107
        start[0] = 0;
 
2108
        for(i=1;i<nr_class;i++)
 
2109
                start[i] = start[i-1]+count[i-1];
 
2110
 
 
2111
        *nr_class_ret = nr_class;
 
2112
        *label_ret = label;
 
2113
        *start_ret = start;
 
2114
        *count_ret = count;
 
2115
        free(data_label);
 
2116
}
 
2117
 
 
2118
//
 
2119
// Interface functions
 
2120
//
 
2121
svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 
2122
{
 
2123
        svm_model *model = Malloc(svm_model,1);
 
2124
        model->param = *param;
 
2125
        model->free_sv = 0;     // XXX
 
2126
 
 
2127
        if(param->svm_type == ONE_CLASS ||
 
2128
           param->svm_type == EPSILON_SVR ||
 
2129
           param->svm_type == NU_SVR)
 
2130
        {
 
2131
                // regression or one-class-svm
 
2132
                model->nr_class = 2;
 
2133
                model->label = NULL;
 
2134
                model->nSV = NULL;
 
2135
                model->probA = NULL; model->probB = NULL;
 
2136
                model->sv_coef = Malloc(double *,1);
 
2137
 
 
2138
                if(param->probability && 
 
2139
                   (param->svm_type == EPSILON_SVR ||
 
2140
                    param->svm_type == NU_SVR))
 
2141
                {
 
2142
                        model->probA = Malloc(double,1);
 
2143
                        model->probA[0] = svm_svr_probability(prob,param);
 
2144
                }
 
2145
 
 
2146
                decision_function f = svm_train_one(prob,param,0,0);
 
2147
                model->rho = Malloc(double,1);
 
2148
                model->rho[0] = f.rho;
 
2149
 
 
2150
                int nSV = 0;
 
2151
                int i;
 
2152
                for(i=0;i<prob->l;i++)
 
2153
                        if(fabs(f.alpha[i]) > 0) ++nSV;
 
2154
                model->l = nSV;
 
2155
                model->SV = Malloc(svm_node *,nSV);
 
2156
                model->sv_coef[0] = Malloc(double,nSV);
 
2157
                int j = 0;
 
2158
                for(i=0;i<prob->l;i++)
 
2159
                        if(fabs(f.alpha[i]) > 0)
 
2160
                        {
 
2161
                                model->SV[j] = prob->x[i];
 
2162
                                model->sv_coef[0][j] = f.alpha[i];
 
2163
                                ++j;
 
2164
                        }               
 
2165
 
 
2166
                free(f.alpha);
 
2167
        }
 
2168
        else
 
2169
        {
 
2170
                // classification
 
2171
                int l = prob->l;
 
2172
                int nr_class;
 
2173
                int *label = NULL;
 
2174
                int *start = NULL;
 
2175
                int *count = NULL;
 
2176
                int *perm = Malloc(int,l);
 
2177
 
 
2178
                // group training data of the same class
 
2179
                svm_group_classes(prob,&nr_class,&label,&start,&count,perm);            
 
2180
                svm_node **x = Malloc(svm_node *,l);
 
2181
                int i;
 
2182
                for(i=0;i<l;i++)
 
2183
                        x[i] = prob->x[perm[i]];
 
2184
 
 
2185
                // calculate weighted C
 
2186
 
 
2187
                double *weighted_C = Malloc(double, nr_class);
 
2188
                for(i=0;i<nr_class;i++)
 
2189
                        weighted_C[i] = param->C;
 
2190
                for(i=0;i<param->nr_weight;i++)
 
2191
                {       
 
2192
                        int j;
 
2193
                        for(j=0;j<nr_class;j++)
 
2194
                                if(param->weight_label[i] == label[j])
 
2195
                                        break;
 
2196
                        if(j == nr_class)
 
2197
                                fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
 
2198
                        else
 
2199
                                weighted_C[j] *= param->weight[i];
 
2200
                }
 
2201
 
 
2202
                // train k*(k-1)/2 models
 
2203
                
 
2204
                bool *nonzero = Malloc(bool,l);
 
2205
                for(i=0;i<l;i++)
 
2206
                        nonzero[i] = false;
 
2207
                decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
 
2208
 
 
2209
                double *probA=NULL,*probB=NULL;
 
2210
                if (param->probability)
 
2211
                {
 
2212
                        probA=Malloc(double,nr_class*(nr_class-1)/2);
 
2213
                        probB=Malloc(double,nr_class*(nr_class-1)/2);
 
2214
                }
 
2215
 
 
2216
                int p = 0;
 
2217
                for(i=0;i<nr_class;i++)
 
2218
                        for(int j=i+1;j<nr_class;j++)
 
2219
                        {
 
2220
                                svm_problem sub_prob;
 
2221
                                int si = start[i], sj = start[j];
 
2222
                                int ci = count[i], cj = count[j];
 
2223
                                sub_prob.l = ci+cj;
 
2224
                                sub_prob.x = Malloc(svm_node *,sub_prob.l);
 
2225
                                sub_prob.y = Malloc(double,sub_prob.l);
 
2226
                                int k;
 
2227
                                for(k=0;k<ci;k++)
 
2228
                                {
 
2229
                                        sub_prob.x[k] = x[si+k];
 
2230
                                        sub_prob.y[k] = +1;
 
2231
                                }
 
2232
                                for(k=0;k<cj;k++)
 
2233
                                {
 
2234
                                        sub_prob.x[ci+k] = x[sj+k];
 
2235
                                        sub_prob.y[ci+k] = -1;
 
2236
                                }
 
2237
 
 
2238
                                if(param->probability)
 
2239
                                        svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
 
2240
 
 
2241
                                f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
 
2242
                                for(k=0;k<ci;k++)
 
2243
                                        if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
 
2244
                                                nonzero[si+k] = true;
 
2245
                                for(k=0;k<cj;k++)
 
2246
                                        if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
 
2247
                                                nonzero[sj+k] = true;
 
2248
                                free(sub_prob.x);
 
2249
                                free(sub_prob.y);
 
2250
                                ++p;
 
2251
                        }
 
2252
 
 
2253
                // build output
 
2254
 
 
2255
                model->nr_class = nr_class;
 
2256
                
 
2257
                model->label = Malloc(int,nr_class);
 
2258
                for(i=0;i<nr_class;i++)
 
2259
                        model->label[i] = label[i];
 
2260
                
 
2261
                model->rho = Malloc(double,nr_class*(nr_class-1)/2);
 
2262
                for(i=0;i<nr_class*(nr_class-1)/2;i++)
 
2263
                        model->rho[i] = f[i].rho;
 
2264
 
 
2265
                if(param->probability)
 
2266
                {
 
2267
                        model->probA = Malloc(double,nr_class*(nr_class-1)/2);
 
2268
                        model->probB = Malloc(double,nr_class*(nr_class-1)/2);
 
2269
                        for(i=0;i<nr_class*(nr_class-1)/2;i++)
 
2270
                        {
 
2271
                                model->probA[i] = probA[i];
 
2272
                                model->probB[i] = probB[i];
 
2273
                        }
 
2274
                }
 
2275
                else
 
2276
                {
 
2277
                        model->probA=NULL;
 
2278
                        model->probB=NULL;
 
2279
                }
 
2280
 
 
2281
                int total_sv = 0;
 
2282
                int *nz_count = Malloc(int,nr_class);
 
2283
                model->nSV = Malloc(int,nr_class);
 
2284
                for(i=0;i<nr_class;i++)
 
2285
                {
 
2286
                        int nSV = 0;
 
2287
                        for(int j=0;j<count[i];j++)
 
2288
                                if(nonzero[start[i]+j])
 
2289
                                {       
 
2290
                                        ++nSV;
 
2291
                                        ++total_sv;
 
2292
                                }
 
2293
                        model->nSV[i] = nSV;
 
2294
                        nz_count[i] = nSV;
 
2295
                }
 
2296
                
 
2297
                info("Total nSV = %d\n",total_sv);
 
2298
 
 
2299
                model->l = total_sv;
 
2300
                model->SV = Malloc(svm_node *,total_sv);
 
2301
                p = 0;
 
2302
                for(i=0;i<l;i++)
 
2303
                        if(nonzero[i]) model->SV[p++] = x[i];
 
2304
 
 
2305
                int *nz_start = Malloc(int,nr_class);
 
2306
                nz_start[0] = 0;
 
2307
                for(i=1;i<nr_class;i++)
 
2308
                        nz_start[i] = nz_start[i-1]+nz_count[i-1];
 
2309
 
 
2310
                model->sv_coef = Malloc(double *,nr_class-1);
 
2311
                for(i=0;i<nr_class-1;i++)
 
2312
                        model->sv_coef[i] = Malloc(double,total_sv);
 
2313
 
 
2314
                p = 0;
 
2315
                for(i=0;i<nr_class;i++)
 
2316
                        for(int j=i+1;j<nr_class;j++)
 
2317
                        {
 
2318
                                // classifier (i,j): coefficients with
 
2319
                                // i are in sv_coef[j-1][nz_start[i]...],
 
2320
                                // j are in sv_coef[i][nz_start[j]...]
 
2321
 
 
2322
                                int si = start[i];
 
2323
                                int sj = start[j];
 
2324
                                int ci = count[i];
 
2325
                                int cj = count[j];
 
2326
                                
 
2327
                                int q = nz_start[i];
 
2328
                                int k;
 
2329
                                for(k=0;k<ci;k++)
 
2330
                                        if(nonzero[si+k])
 
2331
                                                model->sv_coef[j-1][q++] = f[p].alpha[k];
 
2332
                                q = nz_start[j];
 
2333
                                for(k=0;k<cj;k++)
 
2334
                                        if(nonzero[sj+k])
 
2335
                                                model->sv_coef[i][q++] = f[p].alpha[ci+k];
 
2336
                                ++p;
 
2337
                        }
 
2338
                
 
2339
                free(label);
 
2340
                free(probA);
 
2341
                free(probB);
 
2342
                free(count);
 
2343
                free(perm);
 
2344
                free(start);
 
2345
                free(x);
 
2346
                free(weighted_C);
 
2347
                free(nonzero);
 
2348
                for(i=0;i<nr_class*(nr_class-1)/2;i++)
 
2349
                        free(f[i].alpha);
 
2350
                free(f);
 
2351
                free(nz_count);
 
2352
                free(nz_start);
 
2353
        }
 
2354
        return model;
 
2355
}
 
2356
 
 
2357
// Stratified cross validation
 
2358
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
 
2359
{
 
2360
        int i;
 
2361
        int *fold_start = Malloc(int,nr_fold+1);
 
2362
        int l = prob->l;
 
2363
        int *perm = Malloc(int,l);
 
2364
        int nr_class;
 
2365
 
 
2366
        // stratified cv may not give leave-one-out rate
 
2367
        // Each class to l folds -> some folds may have zero elements
 
2368
        if((param->svm_type == C_SVC ||
 
2369
            param->svm_type == NU_SVC) && nr_fold < l)
 
2370
        {
 
2371
                int *start = NULL;
 
2372
                int *label = NULL;
 
2373
                int *count = NULL;
 
2374
                svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
 
2375
 
 
2376
                // random shuffle and then data grouped by fold using the array perm
 
2377
                int *fold_count = Malloc(int,nr_fold);
 
2378
                int c;
 
2379
                int *index = Malloc(int,l);
 
2380
                for(i=0;i<l;i++)
 
2381
                        index[i]=perm[i];
 
2382
                for (c=0; c<nr_class; c++) 
 
2383
                        for(i=0;i<count[c];i++)
 
2384
                        {
 
2385
                                int j = i+rand()%(count[c]-i);
 
2386
                                swap(index[start[c]+j],index[start[c]+i]);
 
2387
                        }
 
2388
                for(i=0;i<nr_fold;i++)
 
2389
                {
 
2390
                        fold_count[i] = 0;
 
2391
                        for (c=0; c<nr_class;c++)
 
2392
                                fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
 
2393
                }
 
2394
                fold_start[0]=0;
 
2395
                for (i=1;i<=nr_fold;i++)
 
2396
                        fold_start[i] = fold_start[i-1]+fold_count[i-1];
 
2397
                for (c=0; c<nr_class;c++)
 
2398
                        for(i=0;i<nr_fold;i++)
 
2399
                        {
 
2400
                                int begin = start[c]+i*count[c]/nr_fold;
 
2401
                                int end = start[c]+(i+1)*count[c]/nr_fold;
 
2402
                                for(int j=begin;j<end;j++)
 
2403
                                {
 
2404
                                        perm[fold_start[i]] = index[j];
 
2405
                                        fold_start[i]++;
 
2406
                                }
 
2407
                        }
 
2408
                fold_start[0]=0;
 
2409
                for (i=1;i<=nr_fold;i++)
 
2410
                        fold_start[i] = fold_start[i-1]+fold_count[i-1];
 
2411
                free(start);    
 
2412
                free(label);
 
2413
                free(count);    
 
2414
                free(index);
 
2415
                free(fold_count);
 
2416
        }
 
2417
        else
 
2418
        {
 
2419
                for(i=0;i<l;i++) perm[i]=i;
 
2420
                for(i=0;i<l;i++)
 
2421
                {
 
2422
                        int j = i+rand()%(l-i);
 
2423
                        swap(perm[i],perm[j]);
 
2424
                }
 
2425
                for(i=0;i<=nr_fold;i++)
 
2426
                        fold_start[i]=i*l/nr_fold;
 
2427
        }
 
2428
 
 
2429
        for(i=0;i<nr_fold;i++)
 
2430
        {
 
2431
                int begin = fold_start[i];
 
2432
                int end = fold_start[i+1];
 
2433
                int j,k;
 
2434
                struct svm_problem subprob;
 
2435
 
 
2436
                subprob.l = l-(end-begin);
 
2437
                subprob.x = Malloc(struct svm_node*,subprob.l);
 
2438
                subprob.y = Malloc(double,subprob.l);
 
2439
                        
 
2440
                k=0;
 
2441
                for(j=0;j<begin;j++)
 
2442
                {
 
2443
                        subprob.x[k] = prob->x[perm[j]];
 
2444
                        subprob.y[k] = prob->y[perm[j]];
 
2445
                        ++k;
 
2446
                }
 
2447
                for(j=end;j<l;j++)
 
2448
                {
 
2449
                        subprob.x[k] = prob->x[perm[j]];
 
2450
                        subprob.y[k] = prob->y[perm[j]];
 
2451
                        ++k;
 
2452
                }
 
2453
                struct svm_model *submodel = svm_train(&subprob,param);
 
2454
                if(param->probability && 
 
2455
                   (param->svm_type == C_SVC || param->svm_type == NU_SVC))
 
2456
                {
 
2457
                        double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
 
2458
                        for(j=begin;j<end;j++)
 
2459
                                target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
 
2460
                        free(prob_estimates);                   
 
2461
                }
 
2462
                else
 
2463
                        for(j=begin;j<end;j++)
 
2464
                                target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
 
2465
                svm_destroy_model(submodel);
 
2466
                free(subprob.x);
 
2467
                free(subprob.y);
 
2468
        }               
 
2469
        free(fold_start);
 
2470
        free(perm);     
 
2471
}
 
2472
 
 
2473
 
 
2474
int svm_get_svm_type(const svm_model *model)
 
2475
{
 
2476
        return model->param.svm_type;
 
2477
}
 
2478
 
 
2479
int svm_get_nr_class(const svm_model *model)
 
2480
{
 
2481
        return model->nr_class;
 
2482
}
 
2483
 
 
2484
void svm_get_labels(const svm_model *model, int* label)
 
2485
{
 
2486
        if (model->label != NULL)
 
2487
                for(int i=0;i<model->nr_class;i++)
 
2488
                        label[i] = model->label[i];
 
2489
}
 
2490
 
 
2491
double svm_get_svr_probability(const svm_model *model)
 
2492
{
 
2493
        if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
 
2494
            model->probA!=NULL)
 
2495
                return model->probA[0];
 
2496
        else
 
2497
        {
 
2498
                info("Model doesn't contain information for SVR probability inference\n");
 
2499
                return 0;
 
2500
        }
 
2501
}
 
2502
 
 
2503
void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
 
2504
{
 
2505
        if(model->param.svm_type == ONE_CLASS ||
 
2506
           model->param.svm_type == EPSILON_SVR ||
 
2507
           model->param.svm_type == NU_SVR)
 
2508
        {
 
2509
                double *sv_coef = model->sv_coef[0];
 
2510
                double sum = 0;
 
2511
                for(int i=0;i<model->l;i++)
 
2512
                        sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
 
2513
                sum -= model->rho[0];
 
2514
                *dec_values = sum;
 
2515
        }
 
2516
        else
 
2517
        {
 
2518
                int i;
 
2519
                int nr_class = model->nr_class;
 
2520
                int l = model->l;
 
2521
                
 
2522
                double *kvalue = Malloc(double,l);
 
2523
                for(i=0;i<l;i++)
 
2524
                        kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
 
2525
 
 
2526
                int *start = Malloc(int,nr_class);
 
2527
                start[0] = 0;
 
2528
                for(i=1;i<nr_class;i++)
 
2529
                        start[i] = start[i-1]+model->nSV[i-1];
 
2530
 
 
2531
                int p=0;
 
2532
                int pos=0;
 
2533
                for(i=0;i<nr_class;i++)
 
2534
                        for(int j=i+1;j<nr_class;j++)
 
2535
                        {
 
2536
                                double sum = 0;
 
2537
                                int si = start[i];
 
2538
                                int sj = start[j];
 
2539
                                int ci = model->nSV[i];
 
2540
                                int cj = model->nSV[j];
 
2541
                                
 
2542
                                int k;
 
2543
                                double *coef1 = model->sv_coef[j-1];
 
2544
                                double *coef2 = model->sv_coef[i];
 
2545
                                for(k=0;k<ci;k++)
 
2546
                                        sum += coef1[si+k] * kvalue[si+k];
 
2547
                                for(k=0;k<cj;k++)
 
2548
                                        sum += coef2[sj+k] * kvalue[sj+k];
 
2549
                                sum -= model->rho[p++];
 
2550
                                dec_values[pos++] = sum;
 
2551
                        }
 
2552
 
 
2553
                free(kvalue);
 
2554
                free(start);
 
2555
        }
 
2556
}
 
2557
 
 
2558
double svm_predict(const svm_model *model, const svm_node *x)
 
2559
{
 
2560
        if(model->param.svm_type == ONE_CLASS ||
 
2561
           model->param.svm_type == EPSILON_SVR ||
 
2562
           model->param.svm_type == NU_SVR)
 
2563
        {
 
2564
                double res;
 
2565
                svm_predict_values(model, x, &res);
 
2566
                
 
2567
                if(model->param.svm_type == ONE_CLASS)
 
2568
                        return (res>0)?1:-1;
 
2569
                else
 
2570
                        return res;
 
2571
        }
 
2572
        else
 
2573
        {
 
2574
                int i;
 
2575
                int nr_class = model->nr_class;
 
2576
                double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
 
2577
                svm_predict_values(model, x, dec_values);
 
2578
 
 
2579
                int *vote = Malloc(int,nr_class);
 
2580
                for(i=0;i<nr_class;i++)
 
2581
                        vote[i] = 0;
 
2582
                int pos=0;
 
2583
                for(i=0;i<nr_class;i++)
 
2584
                        for(int j=i+1;j<nr_class;j++)
 
2585
                        {
 
2586
                                if(dec_values[pos++] > 0)
 
2587
                                        ++vote[i];
 
2588
                                else
 
2589
                                        ++vote[j];
 
2590
                        }
 
2591
 
 
2592
                int vote_max_idx = 0;
 
2593
                for(i=1;i<nr_class;i++)
 
2594
                        if(vote[i] > vote[vote_max_idx])
 
2595
                                vote_max_idx = i;
 
2596
                free(vote);
 
2597
                free(dec_values);
 
2598
                return model->label[vote_max_idx];
 
2599
        }
 
2600
}
 
2601
 
 
2602
double svm_predict_probability(
 
2603
        const svm_model *model, const svm_node *x, double *prob_estimates)
 
2604
{
 
2605
        if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
 
2606
            model->probA!=NULL && model->probB!=NULL)
 
2607
        {
 
2608
                int i;
 
2609
                int nr_class = model->nr_class;
 
2610
                double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
 
2611
                svm_predict_values(model, x, dec_values);
 
2612
 
 
2613
                double min_prob=1e-7;
 
2614
                double **pairwise_prob=Malloc(double *,nr_class);
 
2615
                for(i=0;i<nr_class;i++)
 
2616
                        pairwise_prob[i]=Malloc(double,nr_class);
 
2617
                int k=0;
 
2618
                for(i=0;i<nr_class;i++)
 
2619
                        for(int j=i+1;j<nr_class;j++)
 
2620
                        {
 
2621
                                pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
 
2622
                                pairwise_prob[j][i]=1-pairwise_prob[i][j];
 
2623
                                k++;
 
2624
                        }
 
2625
                multiclass_probability(nr_class,pairwise_prob,prob_estimates);
 
2626
 
 
2627
                int prob_max_idx = 0;
 
2628
                for(i=1;i<nr_class;i++)
 
2629
                        if(prob_estimates[i] > prob_estimates[prob_max_idx])
 
2630
                                prob_max_idx = i;
 
2631
                for(i=0;i<nr_class;i++)
 
2632
                        free(pairwise_prob[i]);
 
2633
                free(dec_values);
 
2634
                free(pairwise_prob);         
 
2635
                return model->label[prob_max_idx];
 
2636
        }
 
2637
        else 
 
2638
                return svm_predict(model, x);
 
2639
}
 
2640
 
 
2641
const char *svm_type_table[] =
 
2642
{
 
2643
        "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
 
2644
};
 
2645
 
 
2646
const char *kernel_type_table[]=
 
2647
{
 
2648
        "linear","polynomial","rbf","sigmoid","precomputed",NULL
 
2649
};
 
2650
 
 
2651
int svm_save_model(const char *model_file_name, const svm_model *model)
 
2652
{
 
2653
        FILE *fp = fopen(model_file_name,"w");
 
2654
        if(fp==NULL) return -1;
 
2655
 
 
2656
        const svm_parameter& param = model->param;
 
2657
 
 
2658
        fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
 
2659
        fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
 
2660
 
 
2661
        if(param.kernel_type == POLY)
 
2662
                fprintf(fp,"degree %d\n", param.degree);
 
2663
 
 
2664
        if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
 
2665
                fprintf(fp,"gamma %g\n", param.gamma);
 
2666
 
 
2667
        if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
 
2668
                fprintf(fp,"coef0 %g\n", param.coef0);
 
2669
 
 
2670
        int nr_class = model->nr_class;
 
2671
        int l = model->l;
 
2672
        fprintf(fp, "nr_class %d\n", nr_class);
 
2673
        fprintf(fp, "total_sv %d\n",l);
 
2674
        
 
2675
        {
 
2676
                fprintf(fp, "rho");
 
2677
                for(int i=0;i<nr_class*(nr_class-1)/2;i++)
 
2678
                        fprintf(fp," %g",model->rho[i]);
 
2679
                fprintf(fp, "\n");
 
2680
        }
 
2681
        
 
2682
        if(model->label)
 
2683
        {
 
2684
                fprintf(fp, "label");
 
2685
                for(int i=0;i<nr_class;i++)
 
2686
                        fprintf(fp," %d",model->label[i]);
 
2687
                fprintf(fp, "\n");
 
2688
        }
 
2689
 
 
2690
        if(model->probA) // regression has probA only
 
2691
        {
 
2692
                fprintf(fp, "probA");
 
2693
                for(int i=0;i<nr_class*(nr_class-1)/2;i++)
 
2694
                        fprintf(fp," %g",model->probA[i]);
 
2695
                fprintf(fp, "\n");
 
2696
        }
 
2697
        if(model->probB)
 
2698
        {
 
2699
                fprintf(fp, "probB");
 
2700
                for(int i=0;i<nr_class*(nr_class-1)/2;i++)
 
2701
                        fprintf(fp," %g",model->probB[i]);
 
2702
                fprintf(fp, "\n");
 
2703
        }
 
2704
 
 
2705
        if(model->nSV)
 
2706
        {
 
2707
                fprintf(fp, "nr_sv");
 
2708
                for(int i=0;i<nr_class;i++)
 
2709
                        fprintf(fp," %d",model->nSV[i]);
 
2710
                fprintf(fp, "\n");
 
2711
        }
 
2712
 
 
2713
        fprintf(fp, "SV\n");
 
2714
        const double * const *sv_coef = model->sv_coef;
 
2715
        const svm_node * const *SV = model->SV;
 
2716
 
 
2717
        for(int i=0;i<l;i++)
 
2718
        {
 
2719
                for(int j=0;j<nr_class-1;j++)
 
2720
                        fprintf(fp, "%.16g ",sv_coef[j][i]);
 
2721
 
 
2722
                const svm_node *p = SV[i];
 
2723
 
 
2724
                if(param.kernel_type == PRECOMPUTED)
 
2725
                        fprintf(fp,"0:%d ",(int)(p->value));
 
2726
                else
 
2727
                        while(p->index != -1)
 
2728
                        {
 
2729
                                fprintf(fp,"%d:%.8g ",p->index,p->value);
 
2730
                                p++;
 
2731
                        }
 
2732
                fprintf(fp, "\n");
 
2733
        }
 
2734
 
 
2735
        fclose(fp);
 
2736
        return 0;
 
2737
}
 
2738
 
 
2739
svm_model *svm_load_model(const char *model_file_name)
 
2740
{
 
2741
        FILE *fp = fopen(model_file_name,"rb");
 
2742
        if(fp==NULL) return NULL;
 
2743
        
 
2744
        // read parameters
 
2745
 
 
2746
        svm_model *model = Malloc(svm_model,1);
 
2747
        svm_parameter& param = model->param;
 
2748
        model->rho = NULL;
 
2749
        model->probA = NULL;
 
2750
        model->probB = NULL;
 
2751
        model->label = NULL;
 
2752
        model->nSV = NULL;
 
2753
 
 
2754
        char cmd[81];
 
2755
        while(1)
 
2756
        {
 
2757
                fscanf(fp,"%80s",cmd);
 
2758
 
 
2759
                if(strcmp(cmd,"svm_type")==0)
 
2760
                {
 
2761
                        fscanf(fp,"%80s",cmd);
 
2762
                        int i;
 
2763
                        for(i=0;svm_type_table[i];i++)
 
2764
                        {
 
2765
                                if(strcmp(svm_type_table[i],cmd)==0)
 
2766
                                {
 
2767
                                        param.svm_type=i;
 
2768
                                        break;
 
2769
                                }
 
2770
                        }
 
2771
                        if(svm_type_table[i] == NULL)
 
2772
                        {
 
2773
                                fprintf(stderr,"unknown svm type.\n");
 
2774
                                free(model->rho);
 
2775
                                free(model->label);
 
2776
                                free(model->nSV);
 
2777
                                free(model);
 
2778
                                return NULL;
 
2779
                        }
 
2780
                }
 
2781
                else if(strcmp(cmd,"kernel_type")==0)
 
2782
                {               
 
2783
                        fscanf(fp,"%80s",cmd);
 
2784
                        int i;
 
2785
                        for(i=0;kernel_type_table[i];i++)
 
2786
                        {
 
2787
                                if(strcmp(kernel_type_table[i],cmd)==0)
 
2788
                                {
 
2789
                                        param.kernel_type=i;
 
2790
                                        break;
 
2791
                                }
 
2792
                        }
 
2793
                        if(kernel_type_table[i] == NULL)
 
2794
                        {
 
2795
                                fprintf(stderr,"unknown kernel function.\n");
 
2796
                                free(model->rho);
 
2797
                                free(model->label);
 
2798
                                free(model->nSV);
 
2799
                                free(model);
 
2800
                                return NULL;
 
2801
                        }
 
2802
                }
 
2803
                else if(strcmp(cmd,"degree")==0)
 
2804
                        fscanf(fp,"%d",&param.degree);
 
2805
                else if(strcmp(cmd,"gamma")==0)
 
2806
                        fscanf(fp,"%lf",&param.gamma);
 
2807
                else if(strcmp(cmd,"coef0")==0)
 
2808
                        fscanf(fp,"%lf",&param.coef0);
 
2809
                else if(strcmp(cmd,"nr_class")==0)
 
2810
                        fscanf(fp,"%d",&model->nr_class);
 
2811
                else if(strcmp(cmd,"total_sv")==0)
 
2812
                        fscanf(fp,"%d",&model->l);
 
2813
                else if(strcmp(cmd,"rho")==0)
 
2814
                {
 
2815
                        int n = model->nr_class * (model->nr_class-1)/2;
 
2816
                        model->rho = Malloc(double,n);
 
2817
                        for(int i=0;i<n;i++)
 
2818
                                fscanf(fp,"%lf",&model->rho[i]);
 
2819
                }
 
2820
                else if(strcmp(cmd,"label")==0)
 
2821
                {
 
2822
                        int n = model->nr_class;
 
2823
                        model->label = Malloc(int,n);
 
2824
                        for(int i=0;i<n;i++)
 
2825
                                fscanf(fp,"%d",&model->label[i]);
 
2826
                }
 
2827
                else if(strcmp(cmd,"probA")==0)
 
2828
                {
 
2829
                        int n = model->nr_class * (model->nr_class-1)/2;
 
2830
                        model->probA = Malloc(double,n);
 
2831
                        for(int i=0;i<n;i++)
 
2832
                                fscanf(fp,"%lf",&model->probA[i]);
 
2833
                }
 
2834
                else if(strcmp(cmd,"probB")==0)
 
2835
                {
 
2836
                        int n = model->nr_class * (model->nr_class-1)/2;
 
2837
                        model->probB = Malloc(double,n);
 
2838
                        for(int i=0;i<n;i++)
 
2839
                                fscanf(fp,"%lf",&model->probB[i]);
 
2840
                }
 
2841
                else if(strcmp(cmd,"nr_sv")==0)
 
2842
                {
 
2843
                        int n = model->nr_class;
 
2844
                        model->nSV = Malloc(int,n);
 
2845
                        for(int i=0;i<n;i++)
 
2846
                                fscanf(fp,"%d",&model->nSV[i]);
 
2847
                }
 
2848
                else if(strcmp(cmd,"SV")==0)
 
2849
                {
 
2850
                        while(1)
 
2851
                        {
 
2852
                                int c = getc(fp);
 
2853
                                if(c==EOF || c=='\n') break;    
 
2854
                        }
 
2855
                        break;
 
2856
                }
 
2857
                else
 
2858
                {
 
2859
                        fprintf(stderr,"unknown text in model file\n");
 
2860
                        free(model->rho);
 
2861
                        free(model->label);
 
2862
                        free(model->nSV);
 
2863
                        free(model);
 
2864
                        return NULL;
 
2865
                }
 
2866
        }
 
2867
 
 
2868
        // read sv_coef and SV
 
2869
 
 
2870
        int elements = 0;
 
2871
        long pos = ftell(fp);
 
2872
 
 
2873
        while(1)
 
2874
        {
 
2875
                int c = fgetc(fp);
 
2876
                switch(c)
 
2877
                {
 
2878
                        case '\n':
 
2879
                                // count the '-1' element
 
2880
                        case ':':
 
2881
                                ++elements;
 
2882
                                break;
 
2883
                        case EOF:
 
2884
                                goto out;
 
2885
                        default:
 
2886
                                ;
 
2887
                }
 
2888
        }
 
2889
out:
 
2890
        fseek(fp,pos,SEEK_SET);
 
2891
 
 
2892
        int m = model->nr_class - 1;
 
2893
        int l = model->l;
 
2894
        model->sv_coef = Malloc(double *,m);
 
2895
        int i;
 
2896
        for(i=0;i<m;i++)
 
2897
                model->sv_coef[i] = Malloc(double,l);
 
2898
        model->SV = Malloc(svm_node*,l);
 
2899
        svm_node *x_space=NULL;
 
2900
        if(l>0) x_space = Malloc(svm_node,elements);
 
2901
 
 
2902
        int j=0;
 
2903
        for(i=0;i<l;i++)
 
2904
        {
 
2905
                model->SV[i] = &x_space[j];
 
2906
                for(int k=0;k<m;k++)
 
2907
                        fscanf(fp,"%lf",&model->sv_coef[k][i]);
 
2908
                while(1)
 
2909
                {
 
2910
                        int c;
 
2911
                        do {
 
2912
                                c = getc(fp);
 
2913
                                if(c=='\n') goto out2;
 
2914
                        } while(isspace(c));
 
2915
                        ungetc(c,fp);
 
2916
                        fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
 
2917
                        ++j;
 
2918
                }       
 
2919
out2:
 
2920
                x_space[j++].index = -1;
 
2921
        }
 
2922
 
 
2923
        fclose(fp);
 
2924
 
 
2925
        model->free_sv = 1;     // XXX
 
2926
        return model;
 
2927
}
 
2928
 
 
2929
void svm_destroy_model(svm_model* model)
 
2930
{
 
2931
        if(model->free_sv && model->l > 0)
 
2932
                free((void *)(model->SV[0]));
 
2933
        for(int i=0;i<model->nr_class-1;i++)
 
2934
                free(model->sv_coef[i]);
 
2935
        free(model->SV);
 
2936
        free(model->sv_coef);
 
2937
        free(model->rho);
 
2938
        free(model->label);
 
2939
        free(model->probA);
 
2940
        free(model->probB);
 
2941
        free(model->nSV);
 
2942
        free(model);
 
2943
}
 
2944
 
 
2945
void svm_destroy_param(svm_parameter* param)
 
2946
{
 
2947
        free(param->weight_label);
 
2948
        free(param->weight);
 
2949
}
 
2950
 
 
2951
const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
 
2952
{
 
2953
        // svm_type
 
2954
 
 
2955
        int svm_type = param->svm_type;
 
2956
        if(svm_type != C_SVC &&
 
2957
           svm_type != NU_SVC &&
 
2958
           svm_type != ONE_CLASS &&
 
2959
           svm_type != EPSILON_SVR &&
 
2960
           svm_type != NU_SVR)
 
2961
                return "unknown svm type";
 
2962
        
 
2963
        // kernel_type, degree
 
2964
        
 
2965
        int kernel_type = param->kernel_type;
 
2966
        if(kernel_type != LINEAR &&
 
2967
           kernel_type != POLY &&
 
2968
           kernel_type != RBF &&
 
2969
           kernel_type != SIGMOID &&
 
2970
           kernel_type != PRECOMPUTED)
 
2971
                return "unknown kernel type";
 
2972
 
 
2973
        if(param->degree < 0)
 
2974
                return "degree of polynomial kernel < 0";
 
2975
 
 
2976
        // cache_size,eps,C,nu,p,shrinking
 
2977
 
 
2978
        if(param->cache_size <= 0)
 
2979
                return "cache_size <= 0";
 
2980
 
 
2981
        if(param->eps <= 0)
 
2982
                return "eps <= 0";
 
2983
 
 
2984
        if(svm_type == C_SVC ||
 
2985
           svm_type == EPSILON_SVR ||
 
2986
           svm_type == NU_SVR)
 
2987
                if(param->C <= 0)
 
2988
                        return "C <= 0";
 
2989
 
 
2990
        if(svm_type == NU_SVC ||
 
2991
           svm_type == ONE_CLASS ||
 
2992
           svm_type == NU_SVR)
 
2993
                if(param->nu <= 0 || param->nu > 1)
 
2994
                        return "nu <= 0 or nu > 1";
 
2995
 
 
2996
        if(svm_type == EPSILON_SVR)
 
2997
                if(param->p < 0)
 
2998
                        return "p < 0";
 
2999
 
 
3000
        if(param->shrinking != 0 &&
 
3001
           param->shrinking != 1)
 
3002
                return "shrinking != 0 and shrinking != 1";
 
3003
 
 
3004
        if(param->probability != 0 &&
 
3005
           param->probability != 1)
 
3006
                return "probability != 0 and probability != 1";
 
3007
 
 
3008
        if(param->probability == 1 &&
 
3009
           svm_type == ONE_CLASS)
 
3010
                return "one-class SVM probability output not supported yet";
 
3011
 
 
3012
 
 
3013
        // check whether nu-svc is feasible
 
3014
        
 
3015
        if(svm_type == NU_SVC)
 
3016
        {
 
3017
                int l = prob->l;
 
3018
                int max_nr_class = 16;
 
3019
                int nr_class = 0;
 
3020
                int *label = Malloc(int,max_nr_class);
 
3021
                int *count = Malloc(int,max_nr_class);
 
3022
 
 
3023
                int i;
 
3024
                for(i=0;i<l;i++)
 
3025
                {
 
3026
                        int this_label = (int)prob->y[i];
 
3027
                        int j;
 
3028
                        for(j=0;j<nr_class;j++)
 
3029
                                if(this_label == label[j])
 
3030
                                {
 
3031
                                        ++count[j];
 
3032
                                        break;
 
3033
                                }
 
3034
                        if(j == nr_class)
 
3035
                        {
 
3036
                                if(nr_class == max_nr_class)
 
3037
                                {
 
3038
                                        max_nr_class *= 2;
 
3039
                                        label = (int *)realloc(label,max_nr_class*sizeof(int));
 
3040
                                        count = (int *)realloc(count,max_nr_class*sizeof(int));
 
3041
                                }
 
3042
                                label[nr_class] = this_label;
 
3043
                                count[nr_class] = 1;
 
3044
                                ++nr_class;
 
3045
                        }
 
3046
                }
 
3047
        
 
3048
                for(i=0;i<nr_class;i++)
 
3049
                {
 
3050
                        int n1 = count[i];
 
3051
                        for(int j=i+1;j<nr_class;j++)
 
3052
                        {
 
3053
                                int n2 = count[j];
 
3054
                                if(param->nu*(n1+n2)/2 > min(n1,n2))
 
3055
                                {
 
3056
                                        free(label);
 
3057
                                        free(count);
 
3058
                                        return "specified nu is infeasible";
 
3059
                                }
 
3060
                        }
 
3061
                }
 
3062
                free(label);
 
3063
                free(count);
 
3064
        }
 
3065
 
 
3066
        return NULL;
 
3067
}
 
3068
 
 
3069
int svm_check_probability_model(const svm_model *model)
 
3070
{
 
3071
        return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
 
3072
                model->probA!=NULL && model->probB!=NULL) ||
 
3073
                ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
 
3074
                 model->probA!=NULL);
 
3075
}