SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
shogun_libsvm.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * 3. Neither name of copyright holders nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg
34  */
35 
36 #ifndef DOXYGEN_SHOULD_SKIP_THIS
37 
38 #include <shogun/lib/memory.h>
40 #include <shogun/kernel/Kernel.h>
41 #include <shogun/io/SGIO.h>
42 #include <shogun/lib/Time.h>
43 #include <shogun/lib/Signal.h>
44 #include <shogun/lib/common.h>
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <ctype.h>
50 #include <float.h>
51 #include <string.h>
52 #include <stdarg.h>
53 
54 #ifdef HAVE_PTHREAD
55 #include <pthread.h>
56 #endif
57 
58 namespace shogun
59 {
60 
61 typedef KERNELCACHE_ELEM Qfloat;
62 typedef float64_t schar;
63 
64 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n)
65 {
66  dst = SG_MALLOC(T, n);
67  memcpy((void *)dst,(void *)src,sizeof(T)*n);
68 }
69 #define INF HUGE_VAL
70 #define TAU 1e-12
71 
72 class QMatrix;
73 class SVC_QMC;
74 
75 //
76 // Kernel Cache
77 //
78 // l is the number of total data items
79 // size is the cache size limit in bytes
80 //
81 class Cache
82 {
83 public:
84  Cache(int32_t l, int64_t size);
85  ~Cache();
86 
87  // request data [0,len)
88  // return some position p where [p,len) need to be filled
89  // (p >= len if nothing needs to be filled)
90  int32_t get_data(const int32_t index, Qfloat **data, int32_t len);
91  void swap_index(int32_t i, int32_t j); // future_option
92 
93 private:
94  int32_t l;
95  int64_t size;
96  struct head_t
97  {
98  head_t *prev, *next; // a circular list
99  Qfloat *data;
100  int32_t len; // data[0,len) is cached in this entry
101  };
102 
103  head_t *head;
104  head_t lru_head;
105  void lru_delete(head_t *h);
106  void lru_insert(head_t *h);
107 };
108 
109 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_)
110 {
111  head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
112  size /= sizeof(Qfloat);
113  size -= l * sizeof(head_t) / sizeof(Qfloat);
114  size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns
115  lru_head.next = lru_head.prev = &lru_head;
116 }
117 
118 Cache::~Cache()
119 {
120  for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
121  SG_FREE(h->data);
122  SG_FREE(head);
123 }
124 
125 void Cache::lru_delete(head_t *h)
126 {
127  // delete from current location
128  h->prev->next = h->next;
129  h->next->prev = h->prev;
130 }
131 
132 void Cache::lru_insert(head_t *h)
133 {
134  // insert to last position
135  h->next = &lru_head;
136  h->prev = lru_head.prev;
137  h->prev->next = h;
138  h->next->prev = h;
139 }
140 
141 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len)
142 {
143  head_t *h = &head[index];
144  if(h->len) lru_delete(h);
145  int32_t more = len - h->len;
146 
147  if(more > 0)
148  {
149  // free old space
150  while(size < more)
151  {
152  head_t *old = lru_head.next;
153  lru_delete(old);
154  SG_FREE(old->data);
155  size += old->len;
156  old->data = 0;
157  old->len = 0;
158  }
159 
160  // allocate new space
161  h->data = SG_REALLOC(Qfloat, h->data, len);
162  size -= more;
163  CMath::swap(h->len,len);
164  }
165 
166  lru_insert(h);
167  *data = h->data;
168  return len;
169 }
170 
171 void Cache::swap_index(int32_t i, int32_t j)
172 {
173  if(i==j) return;
174 
175  if(head[i].len) lru_delete(&head[i]);
176  if(head[j].len) lru_delete(&head[j]);
177  CMath::swap(head[i].data,head[j].data);
178  CMath::swap(head[i].len,head[j].len);
179  if(head[i].len) lru_insert(&head[i]);
180  if(head[j].len) lru_insert(&head[j]);
181 
182  if(i>j) CMath::swap(i,j);
183  for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
184  {
185  if(h->len > i)
186  {
187  if(h->len > j)
188  CMath::swap(h->data[i],h->data[j]);
189  else
190  {
191  // give up
192  lru_delete(h);
193  SG_FREE(h->data);
194  size += h->len;
195  h->data = 0;
196  h->len = 0;
197  }
198  }
199  }
200 }
201 
202 //
203 // Kernel evaluation
204 //
205 // the static method k_function is for doing single kernel evaluation
206 // the constructor of Kernel prepares to calculate the l*l kernel matrix
207 // the member function get_Q is for getting one column from the Q Matrix
208 //
209 class QMatrix {
210 public:
211  virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
212  virtual Qfloat *get_QD() const = 0;
213  virtual void swap_index(int32_t i, int32_t j) const = 0;
214  virtual ~QMatrix() {}
215 
216  float64_t max_train_time;
217 };
218 
219 class LibSVMKernel;
220 
221 // helper struct for threaded processing
222 struct Q_THREAD_PARAM
223 {
224  int32_t i;
225  int32_t start;
226  int32_t end;
227  Qfloat* data;
228  float64_t* y;
229  const LibSVMKernel* q;
230 };
231 
232 extern Parallel* sg_parallel;
233 
234 class LibSVMKernel: public QMatrix {
235 public:
236  LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param);
237  virtual ~LibSVMKernel();
238 
239  virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
240  virtual Qfloat *get_QD() const = 0;
241  virtual void swap_index(int32_t i, int32_t j) const // no so const...
242  {
243  CMath::swap(x[i],x[j]);
244  if(x_square) CMath::swap(x_square[i],x_square[j]);
245  }
246 
247  static void* compute_Q_parallel_helper(void* p)
248  {
249  Q_THREAD_PARAM* params= (Q_THREAD_PARAM*) p;
250  int32_t i=params->i;
251  int32_t start=params->start;
252  int32_t end=params->end;
253  float64_t* y=params->y;
254  Qfloat* data=params->data;
255  const LibSVMKernel* q=params->q;
256 
257  if (y) // two class
258  {
259  for(int32_t j=start;j<end;j++)
260  data[j] = (Qfloat) y[i]*y[j]*q->kernel_function(i,j);
261  }
262  else // one class, eps svr
263  {
264  for(int32_t j=start;j<end;j++)
265  data[j] = (Qfloat) q->kernel_function(i,j);
266  }
267 
268  return NULL;
269  }
270 
271  void compute_Q_parallel(Qfloat* data, float64_t* lab, int32_t i, int32_t start, int32_t len) const
272  {
273  int32_t num_threads=sg_parallel->get_num_threads();
274  if (num_threads < 2)
275  {
276  Q_THREAD_PARAM params;
277  params.i=i;
278  params.start=start;
279  params.end=len;
280  params.y=lab;
281  params.data=data;
282  params.q=this;
283  compute_Q_parallel_helper((void*) &params);
284  }
285  else
286  {
287 #ifdef HAVE_PTHREAD
288  int32_t total_num=(len-start);
289  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
290  Q_THREAD_PARAM* params = SG_MALLOC(Q_THREAD_PARAM, num_threads);
291  int32_t step= total_num/num_threads;
292 
293  int32_t t;
294 
295  num_threads--;
296  for (t=0; t<num_threads; t++)
297  {
298  params[t].i=i;
299  params[t].start=t*step;
300  params[t].end=(t+1)*step;
301  params[t].y=lab;
302  params[t].data=data;
303  params[t].q=this;
304 
305  int code=pthread_create(&threads[t], NULL,
306  compute_Q_parallel_helper, (void*)&params[t]);
307 
308  if (code != 0)
309  {
310  SG_SWARNING("Thread creation failed (thread %d of %d) "
311  "with error:'%s'\n",t, num_threads, strerror(code));
312  num_threads=t;
313  break;
314  }
315  }
316 
317  params[t].i=i;
318  params[t].start=t*step;
319  params[t].end=len;
320  params[t].y=lab;
321  params[t].data=data;
322  params[t].q=this;
323  compute_Q_parallel_helper(&params[t]);
324 
325  for (t=0; t<num_threads; t++)
326  {
327  if (pthread_join(threads[t], NULL) != 0)
328  SG_SWARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
329  }
330 
331  SG_FREE(params);
332  SG_FREE(threads);
333 #endif /* HAVE_PTHREAD */
334  }
335  }
336 
337  inline float64_t kernel_function(int32_t i, int32_t j) const
338  {
339  return kernel->kernel(x[i]->index,x[j]->index);
340  }
341 
342 private:
343  CKernel* kernel;
344  const svm_node **x;
345  float64_t *x_square;
346 
347  // svm_parameter
348  const int32_t kernel_type;
349  const int32_t degree;
350  const float64_t gamma;
351  const float64_t coef0;
352 };
353 
354 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param)
355 : kernel_type(param.kernel_type), degree(param.degree),
356  gamma(param.gamma), coef0(param.coef0)
357 {
358  clone(x,x_,l);
359  x_square = 0;
360  kernel=param.kernel;
361  max_train_time=param.max_train_time;
362 }
363 
364 LibSVMKernel::~LibSVMKernel()
365 {
366  SG_FREE(x);
367  SG_FREE(x_square);
368 }
369 
370 // Generalized SMO+SVMlight algorithm
371 // Solves:
372 //
373 // min 0.5(\alpha^T Q \alpha) + b^T \alpha
374 //
375 // y^T \alpha = \delta
376 // y_i = +1 or -1
377 // 0 <= alpha_i <= Cp for y_i = 1
378 // 0 <= alpha_i <= Cn for y_i = -1
379 //
380 // Given:
381 //
382 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
383 // l is the size of vectors and matrices
384 // eps is the stopping tolerance
385 //
386 // solution will be put in \alpha, objective value will be put in obj
387 //
388 class Solver {
389 public:
390  Solver() {};
391  virtual ~Solver() {};
392 
393  struct SolutionInfo {
394  float64_t obj;
395  float64_t rho;
396  float64_t upper_bound_p;
397  float64_t upper_bound_n;
398  float64_t r; // for Solver_NU
399  };
400 
401  void Solve(
402  int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_,
403  float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps,
404  SolutionInfo* si, int32_t shrinking, bool use_bias);
405 
406 protected:
407  int32_t active_size;
408  schar *y;
409  float64_t *G; // gradient of objective function
410  enum { LOWER_BOUND, UPPER_BOUND, FREE };
411  char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
412  float64_t *alpha;
413  const QMatrix *Q;
414  const Qfloat *QD;
415  float64_t eps;
416  float64_t Cp,Cn;
417  float64_t *p;
418  int32_t *active_set;
419  float64_t *G_bar; // gradient, if we treat free variables as 0
420  int32_t l;
421  bool unshrink; // XXX
422 
423  float64_t get_C(int32_t i)
424  {
425  return (y[i] > 0)? Cp : Cn;
426  }
427  void update_alpha_status(int32_t i)
428  {
429  if(alpha[i] >= get_C(i))
430  alpha_status[i] = UPPER_BOUND;
431  else if(alpha[i] <= 0)
432  alpha_status[i] = LOWER_BOUND;
433  else alpha_status[i] = FREE;
434  }
435  bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; }
436  bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; }
437  bool is_free(int32_t i) { return alpha_status[i] == FREE; }
438  void swap_index(int32_t i, int32_t j);
439  void reconstruct_gradient();
440  virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
441  virtual float64_t calculate_rho();
442  virtual void do_shrinking();
443 private:
444  bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2);
445 };
446 
447 void Solver::swap_index(int32_t i, int32_t j)
448 {
449  Q->swap_index(i,j);
450  CMath::swap(y[i],y[j]);
451  CMath::swap(G[i],G[j]);
452  CMath::swap(alpha_status[i],alpha_status[j]);
453  CMath::swap(alpha[i],alpha[j]);
454  CMath::swap(p[i],p[j]);
455  CMath::swap(active_set[i],active_set[j]);
456  CMath::swap(G_bar[i],G_bar[j]);
457 }
458 
459 void Solver::reconstruct_gradient()
460 {
461  // reconstruct inactive elements of G from G_bar and free variables
462 
463  if(active_size == l) return;
464 
465  int32_t i,j;
466  int32_t nr_free = 0;
467 
468  for(j=active_size;j<l;j++)
469  G[j] = G_bar[j] + p[j];
470 
471  for(j=0;j<active_size;j++)
472  if(is_free(j))
473  nr_free++;
474 
475  if (nr_free*l > 2*active_size*(l-active_size))
476  {
477  for(i=active_size;i<l;i++)
478  {
479  const Qfloat *Q_i = Q->get_Q(i,active_size);
480  for(j=0;j<active_size;j++)
481  if(is_free(j))
482  G[i] += alpha[j] * Q_i[j];
483  }
484  }
485  else
486  {
487  for(i=0;i<active_size;i++)
488  if(is_free(i))
489  {
490  const Qfloat *Q_i = Q->get_Q(i,l);
491  float64_t alpha_i = alpha[i];
492  for(j=active_size;j<l;j++)
493  G[j] += alpha_i * Q_i[j];
494  }
495  }
496 }
497 
498 void Solver::Solve(
499  int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
500  const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn,
501  float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
502 {
503  this->l = p_l;
504  this->Q = &p_Q;
505  QD=Q->get_QD();
506  clone(p, p_p,l);
507  clone(y, p_y,l);
508  clone(alpha,p_alpha,l);
509  this->Cp = p_Cp;
510  this->Cn = p_Cn;
511  this->eps = p_eps;
512  unshrink = false;
513 
514  // initialize alpha_status
515  {
516  alpha_status = SG_MALLOC(char, l);
517  for(int32_t i=0;i<l;i++)
518  update_alpha_status(i);
519  }
520 
521  // initialize active set (for shrinking)
522  {
523  active_set = SG_MALLOC(int32_t, l);
524  for(int32_t i=0;i<l;i++)
525  active_set[i] = i;
526  active_size = l;
527  }
528 
529  // initialize gradient
531  CTime start_time;
532  {
533  G = SG_MALLOC(float64_t, l);
534  G_bar = SG_MALLOC(float64_t, l);
535  int32_t i;
536  for(i=0;i<l;i++)
537  {
538  G[i] = p_p[i];
539  G_bar[i] = 0;
540  }
541  SG_SINFO("Computing gradient for initial set of non-zero alphas\n");
542  //CMath::display_vector(alpha, l, "alphas");
543  for(i=0;i<l && !CSignal::cancel_computations(); i++)
544  {
545  if(!is_lower_bound(i))
546  {
547  const Qfloat *Q_i = Q->get_Q(i,l);
548  float64_t alpha_i = alpha[i];
549  int32_t j;
550  for(j=0;j<l;j++)
551  G[j] += alpha_i*Q_i[j];
552  if(is_upper_bound(i))
553  for(j=0;j<l;j++)
554  G_bar[j] += get_C(i) * Q_i[j];
555  }
556  SG_SPROGRESS(i, 0, l);
557  }
558  SG_SDONE();
559  }
560 
561  // optimization step
562 
563  int32_t iter = 0;
564  int32_t counter = CMath::min(l,1000)+1;
565 
567  {
568  if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time)
569  break;
570 
571  // show progress and do shrinking
572 
573  if(--counter == 0)
574  {
575  counter = CMath::min(l,1000);
576  if(shrinking) do_shrinking();
577  //SG_SINFO(".");
578  }
579 
580  int32_t i,j;
581  float64_t gap;
582  if(select_working_set(i,j, gap)!=0)
583  {
584  // reconstruct the whole gradient
585  reconstruct_gradient();
586  // reset active set size and check
587  active_size = l;
588  //SG_SINFO("*");
589  if(select_working_set(i,j, gap)!=0)
590  break;
591  else
592  counter = 1; // do shrinking next iteration
593  }
594 
595  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
596 
597  ++iter;
598 
599  // update alpha[i] and alpha[j], handle bounds carefully
600 
601  const Qfloat *Q_i = Q->get_Q(i,active_size);
602  const Qfloat *Q_j = Q->get_Q(j,active_size);
603 
604  float64_t C_i = get_C(i);
605  float64_t C_j = get_C(j);
606 
607  float64_t old_alpha_i = alpha[i];
608  float64_t old_alpha_j = alpha[j];
609 
610  if (!use_bias)
611  {
612  double pi=G[i]-Q_i[i]*alpha[i]-Q_i[j]*alpha[j];
613  double pj=G[j]-Q_i[j]*alpha[i]-Q_j[j]*alpha[j];
614  double det=Q_i[i]*Q_j[j]-Q_i[j]*Q_i[j];
615  double alpha_i=-(Q_j[j]*pi-Q_i[j]*pj)/det;
616  alpha_i=CMath::min(C_i,CMath::max(0.0,alpha_i));
617  double alpha_j=-(-Q_i[j]*pi+Q_i[i]*pj)/det;
618  alpha_j=CMath::min(C_j,CMath::max(0.0,alpha_j));
619 
620  if (alpha_i==0 || alpha_i == C_i)
621  alpha_j=CMath::min(C_j,CMath::max(0.0,-(pj+Q_i[j]*alpha_i)/Q_j[j]));
622  if (alpha_j==0 || alpha_j == C_j)
623  alpha_i=CMath::min(C_i,CMath::max(0.0,-(pi+Q_i[j]*alpha_j)/Q_i[i]));
624 
625  alpha[i]=alpha_i; alpha[j]=alpha_j;
626  }
627  else
628  {
629  if(y[i]!=y[j])
630  {
631  float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
632  if (quad_coef <= 0)
633  quad_coef = TAU;
634  float64_t delta = (-G[i]-G[j])/quad_coef;
635  float64_t diff = alpha[i] - alpha[j];
636  alpha[i] += delta;
637  alpha[j] += delta;
638 
639  if(diff > 0)
640  {
641  if(alpha[j] < 0)
642  {
643  alpha[j] = 0;
644  alpha[i] = diff;
645  }
646  }
647  else
648  {
649  if(alpha[i] < 0)
650  {
651  alpha[i] = 0;
652  alpha[j] = -diff;
653  }
654  }
655  if(diff > C_i - C_j)
656  {
657  if(alpha[i] > C_i)
658  {
659  alpha[i] = C_i;
660  alpha[j] = C_i - diff;
661  }
662  }
663  else
664  {
665  if(alpha[j] > C_j)
666  {
667  alpha[j] = C_j;
668  alpha[i] = C_j + diff;
669  }
670  }
671  }
672  else
673  {
674  float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
675  if (quad_coef <= 0)
676  quad_coef = TAU;
677  float64_t delta = (G[i]-G[j])/quad_coef;
678  float64_t sum = alpha[i] + alpha[j];
679  alpha[i] -= delta;
680  alpha[j] += delta;
681 
682  if(sum > C_i)
683  {
684  if(alpha[i] > C_i)
685  {
686  alpha[i] = C_i;
687  alpha[j] = sum - C_i;
688  }
689  }
690  else
691  {
692  if(alpha[j] < 0)
693  {
694  alpha[j] = 0;
695  alpha[i] = sum;
696  }
697  }
698  if(sum > C_j)
699  {
700  if(alpha[j] > C_j)
701  {
702  alpha[j] = C_j;
703  alpha[i] = sum - C_j;
704  }
705  }
706  else
707  {
708  if(alpha[i] < 0)
709  {
710  alpha[i] = 0;
711  alpha[j] = sum;
712  }
713  }
714  }
715  }
716 
717  // update G
718 
719  float64_t delta_alpha_i = alpha[i] - old_alpha_i;
720  float64_t delta_alpha_j = alpha[j] - old_alpha_j;
721 
722  for(int32_t k=0;k<active_size;k++)
723  {
724  G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
725  }
726 
727  // update alpha_status and G_bar
728 
729  {
730  bool ui = is_upper_bound(i);
731  bool uj = is_upper_bound(j);
732  update_alpha_status(i);
733  update_alpha_status(j);
734  int32_t k;
735  if(ui != is_upper_bound(i))
736  {
737  Q_i = Q->get_Q(i,l);
738  if(ui)
739  for(k=0;k<l;k++)
740  G_bar[k] -= C_i * Q_i[k];
741  else
742  for(k=0;k<l;k++)
743  G_bar[k] += C_i * Q_i[k];
744  }
745 
746  if(uj != is_upper_bound(j))
747  {
748  Q_j = Q->get_Q(j,l);
749  if(uj)
750  for(k=0;k<l;k++)
751  G_bar[k] -= C_j * Q_j[k];
752  else
753  for(k=0;k<l;k++)
754  G_bar[k] += C_j * Q_j[k];
755  }
756  }
757 
758 #ifdef MCSVM_DEBUG
759  // calculate objective value
760  {
761  float64_t v = 0;
762  for(i=0;i<l;i++)
763  v += alpha[i] * (G[i] + p[i]);
764 
765  p_si->obj = v/2;
766 
767  float64_t primal=0;
768  //float64_t gap=100000;
769  SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap);
770  }
771 #endif
772  }
773 
774  // calculate rho
775 
776  if (!use_bias)
777  p_si->rho = 0;
778  else
779  p_si->rho = calculate_rho();
780 
781  // calculate objective value
782  {
783  float64_t v = 0;
784  int32_t i;
785  for(i=0;i<l;i++)
786  v += alpha[i] * (G[i] + p[i]);
787 
788  p_si->obj = v/2;
789  }
790 
791  // put back the solution
792  {
793  for(int32_t i=0;i<l;i++)
794  p_alpha[active_set[i]] = alpha[i];
795  }
796 
797  p_si->upper_bound_p = Cp;
798  p_si->upper_bound_n = Cn;
799 
800  SG_SINFO("\noptimization finished, #iter = %d\n",iter);
801 
802  SG_FREE(p);
803  SG_FREE(y);
804  SG_FREE(alpha);
805  SG_FREE(alpha_status);
806  SG_FREE(active_set);
807  SG_FREE(G);
808  SG_FREE(G_bar);
809 }
810 
811 // return 1 if already optimal, return 0 otherwise
812 int32_t Solver::select_working_set(
813  int32_t &out_i, int32_t &out_j, float64_t &gap)
814 {
815  // return i,j such that
816  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
817  // j: minimizes the decrease of obj value
818  // (if quadratic coefficient <= 0, replace it with tau)
819  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
820 
821  float64_t Gmax = -INF;
822  float64_t Gmax2 = -INF;
823  int32_t Gmax_idx = -1;
824  int32_t Gmin_idx = -1;
825  float64_t obj_diff_min = INF;
826 
827  for(int32_t t=0;t<active_size;t++)
828  if(y[t]==+1)
829  {
830  if(!is_upper_bound(t))
831  if(-G[t] >= Gmax)
832  {
833  Gmax = -G[t];
834  Gmax_idx = t;
835  }
836  }
837  else
838  {
839  if(!is_lower_bound(t))
840  if(G[t] >= Gmax)
841  {
842  Gmax = G[t];
843  Gmax_idx = t;
844  }
845  }
846 
847  int32_t i = Gmax_idx;
848  const Qfloat *Q_i = NULL;
849  if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
850  Q_i = Q->get_Q(i,active_size);
851 
852  for(int32_t j=0;j<active_size;j++)
853  {
854  if(y[j]==+1)
855  {
856  if (!is_lower_bound(j))
857  {
858  float64_t grad_diff=Gmax+G[j];
859  if (G[j] >= Gmax2)
860  Gmax2 = G[j];
861  if (grad_diff > 0)
862  {
863  float64_t obj_diff;
864  float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
865  if (quad_coef > 0)
866  obj_diff = -(grad_diff*grad_diff)/quad_coef;
867  else
868  obj_diff = -(grad_diff*grad_diff)/TAU;
869 
870  if (obj_diff <= obj_diff_min)
871  {
872  Gmin_idx=j;
873  obj_diff_min = obj_diff;
874  }
875  }
876  }
877  }
878  else
879  {
880  if (!is_upper_bound(j))
881  {
882  float64_t grad_diff= Gmax-G[j];
883  if (-G[j] >= Gmax2)
884  Gmax2 = -G[j];
885  if (grad_diff > 0)
886  {
887  float64_t obj_diff;
888  float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
889  if (quad_coef > 0)
890  obj_diff = -(grad_diff*grad_diff)/quad_coef;
891  else
892  obj_diff = -(grad_diff*grad_diff)/TAU;
893 
894  if (obj_diff <= obj_diff_min)
895  {
896  Gmin_idx=j;
897  obj_diff_min = obj_diff;
898  }
899  }
900  }
901  }
902  }
903 
904  gap=Gmax+Gmax2;
905  if(gap < eps)
906  return 1;
907 
908  out_i = Gmax_idx;
909  out_j = Gmin_idx;
910  return 0;
911 }
912 
913 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2)
914 {
915  if(is_upper_bound(i))
916  {
917  if(y[i]==+1)
918  return(-G[i] > Gmax1);
919  else
920  return(-G[i] > Gmax2);
921  }
922  else if(is_lower_bound(i))
923  {
924  if(y[i]==+1)
925  return(G[i] > Gmax2);
926  else
927  return(G[i] > Gmax1);
928  }
929  else
930  return(false);
931 }
932 
933 
934 void Solver::do_shrinking()
935 {
936  int32_t i;
937  float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
938  float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
939 
940  // find maximal violating pair first
941  for(i=0;i<active_size;i++)
942  {
943  if(y[i]==+1)
944  {
945  if(!is_upper_bound(i))
946  {
947  if(-G[i] >= Gmax1)
948  Gmax1 = -G[i];
949  }
950  if(!is_lower_bound(i))
951  {
952  if(G[i] >= Gmax2)
953  Gmax2 = G[i];
954  }
955  }
956  else
957  {
958  if(!is_upper_bound(i))
959  {
960  if(-G[i] >= Gmax2)
961  Gmax2 = -G[i];
962  }
963  if(!is_lower_bound(i))
964  {
965  if(G[i] >= Gmax1)
966  Gmax1 = G[i];
967  }
968  }
969  }
970 
971  if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
972  {
973  unshrink = true;
974  reconstruct_gradient();
975  active_size = l;
976  }
977 
978  for(i=0;i<active_size;i++)
979  if (be_shrunk(i, Gmax1, Gmax2))
980  {
981  active_size--;
982  while (active_size > i)
983  {
984  if (!be_shrunk(active_size, Gmax1, Gmax2))
985  {
986  swap_index(i,active_size);
987  break;
988  }
989  active_size--;
990  }
991  }
992 }
993 
994 float64_t Solver::calculate_rho()
995 {
996  float64_t r;
997  int32_t nr_free = 0;
998  float64_t ub = INF, lb = -INF, sum_free = 0;
999  for(int32_t i=0;i<active_size;i++)
1000  {
1001  float64_t yG = y[i]*G[i];
1002 
1003  if(is_upper_bound(i))
1004  {
1005  if(y[i]==-1)
1006  ub = CMath::min(ub,yG);
1007  else
1008  lb = CMath::max(lb,yG);
1009  }
1010  else if(is_lower_bound(i))
1011  {
1012  if(y[i]==+1)
1013  ub = CMath::min(ub,yG);
1014  else
1015  lb = CMath::max(lb,yG);
1016  }
1017  else
1018  {
1019  ++nr_free;
1020  sum_free += yG;
1021  }
1022  }
1023 
1024  if(nr_free>0)
1025  r = sum_free/nr_free;
1026  else
1027  r = (ub+lb)/2;
1028 
1029  return r;
1030 }
1031 
1032 
1033 //
1034 //Solve with individually weighted examples
1035 //
1036 class WeightedSolver : public Solver
1037 {
1038 
1039 public:
1040 
1041  WeightedSolver(float64_t* cost_vec)
1042  {
1043 
1044  this->Cs = cost_vec;
1045 
1046  }
1047 
1048  virtual float64_t get_C(int32_t i)
1049  {
1050 
1051  return Cs[i];
1052  }
1053 
1054 protected:
1055 
1056  float64_t* Cs;
1057 
1058 };
1059 
1060 
1061 //
1062 // Solver for nu-svm classification and regression
1063 //
1064 // additional constraint: e^T \alpha = constant
1065 //
1066 class Solver_NU : public Solver
1067 {
1068 public:
1069  Solver_NU() {}
1070  void Solve(
1071  int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
1072  const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
1073  float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
1074  {
1075  this->si = p_si;
1076  Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,
1077  shrinking,use_bias);
1078  }
1079 private:
1080  SolutionInfo *si;
1081  int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
1082  float64_t calculate_rho();
1083  bool be_shrunk(
1084  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1085  float64_t Gmax4);
1086  void do_shrinking();
1087 };
1088 
1089 // return 1 if already optimal, return 0 otherwise
1090 int32_t Solver_NU::select_working_set(
1091  int32_t &out_i, int32_t &out_j, float64_t &gap)
1092 {
1093  // return i,j such that y_i = y_j and
1094  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1095  // j: minimizes the decrease of obj value
1096  // (if quadratic coefficient <= 0, replace it with tau)
1097  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1098 
1099  float64_t Gmaxp = -INF;
1100  float64_t Gmaxp2 = -INF;
1101  int32_t Gmaxp_idx = -1;
1102 
1103  float64_t Gmaxn = -INF;
1104  float64_t Gmaxn2 = -INF;
1105  int32_t Gmaxn_idx = -1;
1106 
1107  int32_t Gmin_idx = -1;
1108  float64_t obj_diff_min = INF;
1109 
1110  for(int32_t t=0;t<active_size;t++)
1111  if(y[t]==+1)
1112  {
1113  if(!is_upper_bound(t))
1114  if(-G[t] >= Gmaxp)
1115  {
1116  Gmaxp = -G[t];
1117  Gmaxp_idx = t;
1118  }
1119  }
1120  else
1121  {
1122  if(!is_lower_bound(t))
1123  if(G[t] >= Gmaxn)
1124  {
1125  Gmaxn = G[t];
1126  Gmaxn_idx = t;
1127  }
1128  }
1129 
1130  int32_t ip = Gmaxp_idx;
1131  int32_t in = Gmaxn_idx;
1132  const Qfloat *Q_ip = NULL;
1133  const Qfloat *Q_in = NULL;
1134  if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1135  Q_ip = Q->get_Q(ip,active_size);
1136  if(in != -1)
1137  Q_in = Q->get_Q(in,active_size);
1138 
1139  for(int32_t j=0;j<active_size;j++)
1140  {
1141  if(y[j]==+1)
1142  {
1143  if (!is_lower_bound(j))
1144  {
1145  float64_t grad_diff=Gmaxp+G[j];
1146  if (G[j] >= Gmaxp2)
1147  Gmaxp2 = G[j];
1148  if (grad_diff > 0)
1149  {
1150  float64_t obj_diff;
1151  float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1152  if (quad_coef > 0)
1153  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1154  else
1155  obj_diff = -(grad_diff*grad_diff)/TAU;
1156 
1157  if (obj_diff <= obj_diff_min)
1158  {
1159  Gmin_idx=j;
1160  obj_diff_min = obj_diff;
1161  }
1162  }
1163  }
1164  }
1165  else
1166  {
1167  if (!is_upper_bound(j))
1168  {
1169  float64_t grad_diff=Gmaxn-G[j];
1170  if (-G[j] >= Gmaxn2)
1171  Gmaxn2 = -G[j];
1172  if (grad_diff > 0)
1173  {
1174  float64_t obj_diff;
1175  float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
1176  if (quad_coef > 0)
1177  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1178  else
1179  obj_diff = -(grad_diff*grad_diff)/TAU;
1180 
1181  if (obj_diff <= obj_diff_min)
1182  {
1183  Gmin_idx=j;
1184  obj_diff_min = obj_diff;
1185  }
1186  }
1187  }
1188  }
1189  }
1190 
1191  gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2);
1192  if(gap < eps)
1193  return 1;
1194 
1195  if (y[Gmin_idx] == +1)
1196  out_i = Gmaxp_idx;
1197  else
1198  out_i = Gmaxn_idx;
1199  out_j = Gmin_idx;
1200 
1201  return 0;
1202 }
1203 
1204 bool Solver_NU::be_shrunk(
1205  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1206  float64_t Gmax4)
1207 {
1208  if(is_upper_bound(i))
1209  {
1210  if(y[i]==+1)
1211  return(-G[i] > Gmax1);
1212  else
1213  return(-G[i] > Gmax4);
1214  }
1215  else if(is_lower_bound(i))
1216  {
1217  if(y[i]==+1)
1218  return(G[i] > Gmax2);
1219  else
1220  return(G[i] > Gmax3);
1221  }
1222  else
1223  return(false);
1224 }
1225 
1226 void Solver_NU::do_shrinking()
1227 {
1228  float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1229  float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1230  float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1231  float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1232 
1233  // find maximal violating pair first
1234  int32_t i;
1235  for(i=0;i<active_size;i++)
1236  {
1237  if(!is_upper_bound(i))
1238  {
1239  if(y[i]==+1)
1240  {
1241  if(-G[i] > Gmax1) Gmax1 = -G[i];
1242  }
1243  else if(-G[i] > Gmax4) Gmax4 = -G[i];
1244  }
1245  if(!is_lower_bound(i))
1246  {
1247  if(y[i]==+1)
1248  {
1249  if(G[i] > Gmax2) Gmax2 = G[i];
1250  }
1251  else if(G[i] > Gmax3) Gmax3 = G[i];
1252  }
1253  }
1254 
1255  if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1256  {
1257  unshrink = true;
1258  reconstruct_gradient();
1259  active_size = l;
1260  }
1261 
1262  for(i=0;i<active_size;i++)
1263  if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1264  {
1265  active_size--;
1266  while (active_size > i)
1267  {
1268  if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1269  {
1270  swap_index(i,active_size);
1271  break;
1272  }
1273  active_size--;
1274  }
1275  }
1276 }
1277 
1278 float64_t Solver_NU::calculate_rho()
1279 {
1280  int32_t nr_free1 = 0,nr_free2 = 0;
1281  float64_t ub1 = INF, ub2 = INF;
1282  float64_t lb1 = -INF, lb2 = -INF;
1283  float64_t sum_free1 = 0, sum_free2 = 0;
1284 
1285  for(int32_t i=0; i<active_size; i++)
1286  {
1287  if(y[i]==+1)
1288  {
1289  if(is_upper_bound(i))
1290  lb1 = CMath::max(lb1,G[i]);
1291  else if(is_lower_bound(i))
1292  ub1 = CMath::min(ub1,G[i]);
1293  else
1294  {
1295  ++nr_free1;
1296  sum_free1 += G[i];
1297  }
1298  }
1299  else
1300  {
1301  if(is_upper_bound(i))
1302  lb2 = CMath::max(lb2,G[i]);
1303  else if(is_lower_bound(i))
1304  ub2 = CMath::min(ub2,G[i]);
1305  else
1306  {
1307  ++nr_free2;
1308  sum_free2 += G[i];
1309  }
1310  }
1311  }
1312 
1313  float64_t r1,r2;
1314  if(nr_free1 > 0)
1315  r1 = sum_free1/nr_free1;
1316  else
1317  r1 = (ub1+lb1)/2;
1318 
1319  if(nr_free2 > 0)
1320  r2 = sum_free2/nr_free2;
1321  else
1322  r2 = (ub2+lb2)/2;
1323 
1324  si->r = (r1+r2)/2;
1325  return (r1-r2)/2;
1326 }
1327 
1328 class SVC_QMC: public LibSVMKernel
1329 {
1330 public:
1331  SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac)
1332  :LibSVMKernel(prob.l, prob.x, param)
1333  {
1334  nr_class=n_class;
1335  factor=fac;
1336  clone(y,y_,prob.l);
1337  cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
1338  QD = SG_MALLOC(Qfloat, prob.l);
1339  for(int32_t i=0;i<prob.l;i++)
1340  {
1341  QD[i]= factor*(nr_class-1)*kernel_function(i,i);
1342  }
1343  }
1344 
1345  Qfloat *get_Q(int32_t i, int32_t len) const
1346  {
1347  Qfloat *data;
1348  int32_t start;
1349  if((start = cache->get_data(i,&data,len)) < len)
1350  {
1351  compute_Q_parallel(data, NULL, i, start, len);
1352 
1353  for(int32_t j=start;j<len;j++)
1354  {
1355  if (y[i]==y[j])
1356  data[j] *= (factor*(nr_class-1));
1357  else
1358  data[j] *= (-factor);
1359  }
1360  }
1361  return data;
1362  }
1363 
1364  inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j)
1365  {
1366  if (y[i]==y[j])
1367  return Q/(nr_class-1);
1368  else
1369  return -Q;
1370  }
1371 
1372  Qfloat *get_QD() const
1373  {
1374  return QD;
1375  }
1376 
1377  void swap_index(int32_t i, int32_t j) const
1378  {
1379  cache->swap_index(i,j);
1380  LibSVMKernel::swap_index(i,j);
1381  CMath::swap(y[i],y[j]);
1382  CMath::swap(QD[i],QD[j]);
1383  }
1384 
1385  ~SVC_QMC()
1386  {
1387  SG_FREE(y);
1388  delete cache;
1389  SG_FREE(QD);
1390  }
1391 private:
1392  float64_t factor;
1393  float64_t nr_class;
1394  schar *y;
1395  Cache *cache;
1396  Qfloat *QD;
1397 };
1398 
1399 //
1400 // Solver for nu-svm classification and regression
1401 //
1402 // additional constraint: e^T \alpha = constant
1403 //
1404 class Solver_NUMC : public Solver
1405 {
1406 public:
1407  Solver_NUMC(int32_t n_class, float64_t svm_nu)
1408  {
1409  nr_class=n_class;
1410  nu=svm_nu;
1411  }
1412 
1413  void Solve(
1414  int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
1415  const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
1416  float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
1417  {
1418  this->si = p_si;
1419  Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking, use_bias);
1420  }
1421  float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases,float64_t* normwcw);
1422 
1423 private:
1424  SolutionInfo *si;
1425  int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
1426  float64_t calculate_rho();
1427  bool be_shrunk(
1428  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1429  float64_t Gmax4);
1430  void do_shrinking();
1431 
1432 private:
1433  int32_t nr_class;
1434  float64_t nu;
1435 };
1436 
1437 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw)
1438 {
1439  clone(y, p_y,l);
1440  clone(alpha,p_alpha,l);
1441 
1442  alpha_status = SG_MALLOC(char, l);
1443  for(int32_t i=0;i<l;i++)
1444  update_alpha_status(i);
1445 
1446  float64_t* class_count = SG_MALLOC(float64_t, nr_class);
1447  float64_t* outputs = SG_MALLOC(float64_t, l);
1448 
1449  for (int32_t i=0; i<nr_class; i++)
1450  {
1451  class_count[i]=0;
1452  biases[i+1]=0;
1453  }
1454 
1455  for (int32_t i=0; i<active_size; i++)
1456  {
1457  update_alpha_status(i);
1458  if(!is_upper_bound(i) && !is_lower_bound(i))
1459  class_count[(int32_t) y[i]]++;
1460  }
1461 
1462  //CMath::display_vector(class_count, nr_class, "class_count");
1463 
1464  float64_t mu=((float64_t) nr_class)/(nu*l);
1465  //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu);
1466 
1467  float64_t rho=0;
1468  float64_t quad=0;
1469  float64_t* zero_counts = SG_MALLOC(float64_t, nr_class);
1470  float64_t normwc_const = 0;
1471 
1472  for (int32_t i=0; i<nr_class; i++)
1473  {
1474  zero_counts[i]=-INF;
1475  normwcw[i]=0;
1476  }
1477 
1478  for (int32_t i=0; i<active_size; i++)
1479  {
1480  float64_t sum_free=0;
1481  float64_t sum_atbound=0;
1482  float64_t sum_zero_count=0;
1483 
1484  Qfloat* Q_i = Q->get_Q(i,active_size);
1485  outputs[i]=0;
1486 
1487  for (int j=0; j<active_size; j++)
1488  {
1489  quad+= alpha[i]*alpha[j]*Q_i[j];
1490  float64_t tmp= alpha[j]*Q_i[j]/mu;
1491 
1492  if(!is_upper_bound(i) && !is_lower_bound(i))
1493  sum_free+=tmp;
1494  else
1495  sum_atbound+=tmp;
1496 
1497  if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i])
1498  sum_zero_count+= tmp;
1499 
1500  SVC_QMC* QMC=(SVC_QMC*) Q;
1501  float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j);
1502  if (y[i]==y[j])
1503  normwcw[(int32_t) y[i]]+=norm_tmp;
1504 
1505  normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp;
1506  normwc_const+=norm_tmp;
1507  }
1508 
1509  if (class_count[(int32_t) y[i]] == 0)
1510  {
1511  if (zero_counts[(int32_t) y[i]]<sum_zero_count)
1512  zero_counts[(int32_t) y[i]]=sum_zero_count;
1513  }
1514 
1515  biases[(int32_t) y[i]+1]-=sum_free;
1516  if (class_count[(int32_t) y[i]] != 0.0)
1517  rho+=sum_free/class_count[(int32_t) y[i]];
1518  outputs[i]+=sum_free+sum_atbound;
1519  }
1520 
1521  for (int32_t i=0; i<nr_class; i++)
1522  {
1523  if (class_count[i] == 0.0)
1524  rho+=zero_counts[i];
1525 
1526  normwcw[i]+=normwc_const/CMath::sq(nr_class);
1527  normwcw[i]=CMath::sqrt(normwcw[i]);
1528  }
1529 
1530  SGVector<float64_t>::display_vector(normwcw, nr_class, "normwcw");
1531 
1532  rho/=nr_class;
1533 
1534  SG_SPRINT("rho=%f\n", rho);
1535 
1536  float64_t sumb=0;
1537  for (int32_t i=0; i<nr_class; i++)
1538  {
1539  if (class_count[i] != 0.0)
1540  biases[i+1]=biases[i+1]/class_count[i]+rho;
1541  else
1542  biases[i+1]+=rho-zero_counts[i];
1543 
1544  SG_SPRINT("biases=%f\n", biases[i+1]);
1545 
1546  sumb+=biases[i+1];
1547  }
1548  SG_SPRINT("sumb=%f\n", sumb);
1549 
1550  SG_FREE(zero_counts);
1551 
1552  for (int32_t i=0; i<l; i++)
1553  outputs[i]+=biases[(int32_t) y[i]+1];
1554 
1555  biases[0]=rho;
1556 
1557  //CMath::display_vector(outputs, l, "outputs");
1558 
1559 
1560  float64_t xi=0;
1561  for (int32_t i=0; i<active_size; i++)
1562  {
1563  if (is_lower_bound(i))
1564  continue;
1565  xi+=rho-outputs[i];
1566  }
1567 
1568  //SG_SPRINT("xi=%f\n", xi);
1569 
1570  //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu);
1571 
1572  float64_t primal=0.5*quad- nr_class*rho+xi*mu;
1573 
1574  //SG_SPRINT("primal=%10.10f\n", primal);
1575 
1576  SG_FREE(y);
1577  SG_FREE(alpha);
1578  SG_FREE(alpha_status);
1579 
1580  return primal;
1581 }
1582 
1583 
1584 // return 1 if already optimal, return 0 otherwise
1585 int32_t Solver_NUMC::select_working_set(
1586  int32_t &out_i, int32_t &out_j, float64_t &gap)
1587 {
1588  // return i,j such that y_i = y_j and
1589  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1590  // j: minimizes the decrease of obj value
1591  // (if quadratic coefficient <= 0, replace it with tau)
1592  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1593 
1594  int32_t retval=0;
1595  float64_t best_gap=0;
1596  int32_t best_out_i=-1;
1597  int32_t best_out_j=-1;
1598 
1599  float64_t* Gmaxp = SG_MALLOC(float64_t, nr_class);
1600  float64_t* Gmaxp2 = SG_MALLOC(float64_t, nr_class);
1601  int32_t* Gmaxp_idx = SG_MALLOC(int32_t, nr_class);
1602 
1603  int32_t* Gmin_idx = SG_MALLOC(int32_t, nr_class);
1604  float64_t* obj_diff_min = SG_MALLOC(float64_t, nr_class);
1605 
1606  for (int32_t i=0; i<nr_class; i++)
1607  {
1608  Gmaxp[i]=-INF;
1609  Gmaxp2[i]=-INF;
1610  Gmaxp_idx[i]=-1;
1611  Gmin_idx[i]=-1;
1612  obj_diff_min[i]=INF;
1613  }
1614 
1615  for(int32_t t=0;t<active_size;t++)
1616  {
1617  int32_t cidx=y[t];
1618  if(!is_upper_bound(t))
1619  {
1620  if(-G[t] >= Gmaxp[cidx])
1621  {
1622  Gmaxp[cidx] = -G[t];
1623  Gmaxp_idx[cidx] = t;
1624  }
1625  }
1626  }
1627 
1628  for(int32_t j=0;j<active_size;j++)
1629  {
1630  int32_t cidx=y[j];
1631  int32_t ip = Gmaxp_idx[cidx];
1632  const Qfloat *Q_ip = NULL;
1633  if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1634  Q_ip = Q->get_Q(ip,active_size);
1635 
1636  if (!is_lower_bound(j))
1637  {
1638  float64_t grad_diff=Gmaxp[cidx]+G[j];
1639  if (G[j] >= Gmaxp2[cidx])
1640  Gmaxp2[cidx] = G[j];
1641  if (grad_diff > 0)
1642  {
1643  float64_t obj_diff;
1644  float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1645  if (quad_coef > 0)
1646  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1647  else
1648  obj_diff = -(grad_diff*grad_diff)/TAU;
1649 
1650  if (obj_diff <= obj_diff_min[cidx])
1651  {
1652  Gmin_idx[cidx]=j;
1653  obj_diff_min[cidx] = obj_diff;
1654  }
1655  }
1656  }
1657 
1658  gap=Gmaxp[cidx]+Gmaxp2[cidx];
1659  if (gap>=best_gap && Gmin_idx[cidx]>=0 &&
1660  Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size)
1661  {
1662  out_i = Gmaxp_idx[cidx];
1663  out_j = Gmin_idx[cidx];
1664 
1665  best_gap=gap;
1666  best_out_i=out_i;
1667  best_out_j=out_j;
1668  }
1669  }
1670 
1671  gap=best_gap;
1672  out_i=best_out_i;
1673  out_j=best_out_j;
1674 
1675  SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]);
1676 
1677 
1678  if(gap < eps)
1679  retval=1;
1680 
1681  SG_FREE(Gmaxp);
1682  SG_FREE(Gmaxp2);
1683  SG_FREE(Gmaxp_idx);
1684  SG_FREE(Gmin_idx);
1685  SG_FREE(obj_diff_min);
1686 
1687  return retval;
1688 }
1689 
1690 bool Solver_NUMC::be_shrunk(
1691  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1692  float64_t Gmax4)
1693 {
1694  return false;
1695 }
1696 
1697 void Solver_NUMC::do_shrinking()
1698 {
1699 }
1700 
1701 float64_t Solver_NUMC::calculate_rho()
1702 {
1703  return 0;
1704 }
1705 
1706 
1707 //
1708 // Q matrices for various formulations
1709 //
1710 class SVC_Q: public LibSVMKernel
1711 {
1712 public:
1713  SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1714  :LibSVMKernel(prob.l, prob.x, param)
1715  {
1716  clone(y,y_,prob.l);
1717  cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
1718  QD = SG_MALLOC(Qfloat, prob.l);
1719  for(int32_t i=0;i<prob.l;i++)
1720  QD[i]= (Qfloat)kernel_function(i,i);
1721  }
1722 
1723  Qfloat *get_Q(int32_t i, int32_t len) const
1724  {
1725  Qfloat *data;
1726  int32_t start;
1727  if((start = cache->get_data(i,&data,len)) < len)
1728  compute_Q_parallel(data, y, i, start, len);
1729 
1730  return data;
1731  }
1732 
1733  Qfloat *get_QD() const
1734  {
1735  return QD;
1736  }
1737 
1738  void swap_index(int32_t i, int32_t j) const
1739  {
1740  cache->swap_index(i,j);
1741  LibSVMKernel::swap_index(i,j);
1742  CMath::swap(y[i],y[j]);
1743  CMath::swap(QD[i],QD[j]);
1744  }
1745 
1746  ~SVC_Q()
1747  {
1748  SG_FREE(y);
1749  delete cache;
1750  SG_FREE(QD);
1751  }
1752 private:
1753  schar *y;
1754  Cache *cache;
1755  Qfloat *QD;
1756 };
1757 
1758 
1759 class ONE_CLASS_Q: public LibSVMKernel
1760 {
1761 public:
1762  ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1763  :LibSVMKernel(prob.l, prob.x, param)
1764  {
1765  cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
1766  QD = SG_MALLOC(Qfloat, prob.l);
1767  for(int32_t i=0;i<prob.l;i++)
1768  QD[i]= (Qfloat)kernel_function(i,i);
1769  }
1770 
1771  Qfloat *get_Q(int32_t i, int32_t len) const
1772  {
1773  Qfloat *data;
1774  int32_t start;
1775  if((start = cache->get_data(i,&data,len)) < len)
1776  compute_Q_parallel(data, NULL, i, start, len);
1777 
1778  return data;
1779  }
1780 
1781  Qfloat *get_QD() const
1782  {
1783  return QD;
1784  }
1785 
1786  void swap_index(int32_t i, int32_t j) const
1787  {
1788  cache->swap_index(i,j);
1789  LibSVMKernel::swap_index(i,j);
1790  CMath::swap(QD[i],QD[j]);
1791  }
1792 
1793  ~ONE_CLASS_Q()
1794  {
1795  delete cache;
1796  SG_FREE(QD);
1797  }
1798 private:
1799  Cache *cache;
1800  Qfloat *QD;
1801 };
1802 
1803 class SVR_Q: public LibSVMKernel
1804 {
1805 public:
1806  SVR_Q(const svm_problem& prob, const svm_parameter& param)
1807  :LibSVMKernel(prob.l, prob.x, param)
1808  {
1809  l = prob.l;
1810  cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20)));
1811  QD = SG_MALLOC(Qfloat, 2*l);
1812  sign = SG_MALLOC(schar, 2*l);
1813  index = SG_MALLOC(int32_t, 2*l);
1814  for(int32_t k=0;k<l;k++)
1815  {
1816  sign[k] = 1;
1817  sign[k+l] = -1;
1818  index[k] = k;
1819  index[k+l] = k;
1820  QD[k]= (Qfloat)kernel_function(k,k);
1821  QD[k+l]=QD[k];
1822  }
1823  buffer[0] = SG_MALLOC(Qfloat, 2*l);
1824  buffer[1] = SG_MALLOC(Qfloat, 2*l);
1825  next_buffer = 0;
1826  }
1827 
1828  void swap_index(int32_t i, int32_t j) const
1829  {
1830  CMath::swap(sign[i],sign[j]);
1831  CMath::swap(index[i],index[j]);
1832  CMath::swap(QD[i],QD[j]);
1833  }
1834 
1835  Qfloat *get_Q(int32_t i, int32_t len) const
1836  {
1837  Qfloat *data;
1838  int32_t real_i = index[i];
1839  if(cache->get_data(real_i,&data,l) < l)
1840  compute_Q_parallel(data, NULL, real_i, 0, l);
1841 
1842  // reorder and copy
1843  Qfloat *buf = buffer[next_buffer];
1844  next_buffer = 1 - next_buffer;
1845  schar si = sign[i];
1846  for(int32_t j=0;j<len;j++)
1847  buf[j] = si * sign[j] * data[index[j]];
1848  return buf;
1849  }
1850 
1851  Qfloat *get_QD() const
1852  {
1853  return QD;
1854  }
1855 
1856  ~SVR_Q()
1857  {
1858  delete cache;
1859  SG_FREE(sign);
1860  SG_FREE(index);
1861  SG_FREE(buffer[0]);
1862  SG_FREE(buffer[1]);
1863  SG_FREE(QD);
1864  }
1865 
1866 private:
1867  int32_t l;
1868  Cache *cache;
1869  schar *sign;
1870  int32_t *index;
1871  mutable int32_t next_buffer;
1872  Qfloat *buffer[2];
1873  Qfloat *QD;
1874 };
1875 
1876 //
1877 // construct and solve various formulations
1878 //
1879 static void solve_c_svc(
1880  const svm_problem *prob, const svm_parameter* param,
1881  float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
1882 {
1883  int32_t l = prob->l;
1884  schar *y = SG_MALLOC(schar, l);
1885 
1886  int32_t i;
1887 
1888  for(i=0;i<l;i++)
1889  {
1890  alpha[i] = 0;
1891  if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1892  }
1893 
1894  Solver s;
1895  s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y,
1896  alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
1897 
1898  float64_t sum_alpha=0;
1899  for(i=0;i<l;i++)
1900  sum_alpha += alpha[i];
1901 
1902  if (Cp==Cn)
1903  SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l));
1904 
1905  for(i=0;i<l;i++)
1906  alpha[i] *= y[i];
1907 
1908  SG_FREE(y);
1909 }
1910 
1911 
1912 //two weighted datasets
1913 void solve_c_svc_weighted(
1914  const svm_problem *prob, const svm_parameter* param,
1915  float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
1916 {
1917  int l = prob->l;
1918  float64_t *minus_ones = SG_MALLOC(float64_t, l);
1919  schar *y = SG_MALLOC(schar, l);
1920 
1921  int i;
1922 
1923  for(i=0;i<l;i++)
1924  {
1925  alpha[i] = 0;
1926  minus_ones[i] = -1;
1927  if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1928  }
1929 
1930  WeightedSolver s = WeightedSolver(prob->C);
1931  s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1932  alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
1933 
1934  float64_t sum_alpha=0;
1935  for(i=0;i<l;i++)
1936  sum_alpha += alpha[i];
1937 
1938  //if (Cp==Cn)
1939  // SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l));
1940 
1941  for(i=0;i<l;i++)
1942  alpha[i] *= y[i];
1943 
1944  SG_FREE(minus_ones);
1945  SG_FREE(y);
1946 }
1947 
1948 static void solve_nu_svc(
1949  const svm_problem *prob, const svm_parameter *param,
1950  float64_t *alpha, Solver::SolutionInfo* si)
1951 {
1952  int32_t i;
1953  int32_t l = prob->l;
1954  float64_t nu = param->nu;
1955 
1956  schar *y = SG_MALLOC(schar, l);
1957 
1958  for(i=0;i<l;i++)
1959  if(prob->y[i]>0)
1960  y[i] = +1;
1961  else
1962  y[i] = -1;
1963 
1964  float64_t sum_pos = nu*l/2;
1965  float64_t sum_neg = nu*l/2;
1966 
1967  for(i=0;i<l;i++)
1968  if(y[i] == +1)
1969  {
1970  alpha[i] = CMath::min(1.0,sum_pos);
1971  sum_pos -= alpha[i];
1972  }
1973  else
1974  {
1975  alpha[i] = CMath::min(1.0,sum_neg);
1976  sum_neg -= alpha[i];
1977  }
1978 
1979  float64_t *zeros = SG_MALLOC(float64_t, l);
1980 
1981  for(i=0;i<l;i++)
1982  zeros[i] = 0;
1983 
1984  Solver_NU s;
1985  s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1986  alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
1987  float64_t r = si->r;
1988 
1989  SG_SINFO("C = %f\n",1/r);
1990 
1991  for(i=0;i<l;i++)
1992  alpha[i] *= y[i]/r;
1993 
1994  si->rho /= r;
1995  si->obj /= (r*r);
1996  si->upper_bound_p = 1/r;
1997  si->upper_bound_n = 1/r;
1998 
1999  SG_FREE(y);
2000  SG_FREE(zeros);
2001 }
2002 
2003 static void solve_nu_multiclass_svc(const svm_problem *prob,
2004  const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model)
2005 {
2006  int32_t l = prob->l;
2007  float64_t nu = param->nu;
2008 
2009  float64_t *alpha = SG_MALLOC(float64_t, prob->l);
2010  schar *y = SG_MALLOC(schar, l);
2011 
2012  for(int32_t i=0;i<l;i++)
2013  {
2014  alpha[i] = 0;
2015  y[i]=prob->y[i];
2016  }
2017 
2018  int32_t nr_class=param->nr_class;
2019  float64_t* sum_class = SG_MALLOC(float64_t, nr_class);
2020 
2021  for (int32_t j=0; j<nr_class; j++)
2022  sum_class[j] = nu*l/nr_class;
2023 
2024  for(int32_t i=0;i<l;i++)
2025  {
2026  alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]);
2027  sum_class[int32_t(y[i])] -= alpha[i];
2028  }
2029  SG_FREE(sum_class);
2030 
2031 
2032  float64_t *zeros = SG_MALLOC(float64_t, l);
2033 
2034  for (int32_t i=0;i<l;i++)
2035  zeros[i] = 0;
2036 
2037  Solver_NUMC s(nr_class, nu);
2038  SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l));
2039 
2040  s.Solve(l, Q, zeros, y,
2041  alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
2042 
2043 
2044  int32_t* class_sv_count=SG_MALLOC(int32_t, nr_class);
2045 
2046  for (int32_t i=0; i<nr_class; i++)
2047  class_sv_count[i]=0;
2048 
2049  for (int32_t i=0; i<l; i++)
2050  {
2051  if (CMath::abs(alpha[i]) > 0)
2052  class_sv_count[(int32_t) y[i]]++;
2053  }
2054 
2055  model->l=l;
2056  // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper
2057  model->rho = SG_MALLOC(float64_t, nr_class+1);
2058  model->nr_class = nr_class;
2059  model->label = NULL;
2060  model->SV = SG_MALLOC(svm_node*,nr_class);
2061  model->nSV = SG_MALLOC(int32_t, nr_class);
2062  model->sv_coef = SG_MALLOC(float64_t *,nr_class);
2063  model->normwcw = SG_MALLOC(float64_t,nr_class);
2064 
2065  for (int32_t i=0; i<nr_class+1; i++)
2066  model->rho[i]=0;
2067 
2068  model->objective = si->obj;
2069 
2070  if (param->use_bias)
2071  {
2072  SG_SDEBUG("Computing biases and primal objective\n");
2073  float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw);
2074  SG_SINFO("Primal = %10.10f\n", primal);
2075  }
2076 
2077  for (int32_t i=0; i<nr_class; i++)
2078  {
2079  model->nSV[i]=class_sv_count[i];
2080  model->SV[i] = SG_MALLOC(svm_node,class_sv_count[i]);
2081  model->sv_coef[i] = SG_MALLOC(float64_t,class_sv_count[i]);
2082  class_sv_count[i]=0;
2083  }
2084 
2085  for (int32_t i=0;i<prob->l;i++)
2086  {
2087  if(fabs(alpha[i]) > 0)
2088  {
2089  model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index;
2090  model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i];
2091  class_sv_count[(int32_t) y[i]]++;
2092  }
2093  }
2094 
2095  SG_FREE(y);
2096  SG_FREE(zeros);
2097  SG_FREE(alpha);
2098 }
2099 
2100 static void solve_one_class(
2101  const svm_problem *prob, const svm_parameter *param,
2102  float64_t *alpha, Solver::SolutionInfo* si)
2103 {
2104  int32_t l = prob->l;
2105  float64_t *zeros = SG_MALLOC(float64_t, l);
2106  schar *ones = SG_MALLOC(schar, l);
2107  int32_t i;
2108 
2109  int32_t n = (int32_t)(param->nu*prob->l); // # of alpha's at upper bound
2110 
2111  for(i=0;i<n;i++)
2112  alpha[i] = 1;
2113  if(n<prob->l)
2114  alpha[n] = param->nu * prob->l - n;
2115  for(i=n+1;i<l;i++)
2116  alpha[i] = 0;
2117 
2118  for(i=0;i<l;i++)
2119  {
2120  zeros[i] = 0;
2121  ones[i] = 1;
2122  }
2123 
2124  Solver s;
2125  s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
2126  alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
2127 
2128  SG_FREE(zeros);
2129  SG_FREE(ones);
2130 }
2131 
2132 static void solve_epsilon_svr(
2133  const svm_problem *prob, const svm_parameter *param,
2134  float64_t *alpha, Solver::SolutionInfo* si)
2135 {
2136  int32_t l = prob->l;
2137  float64_t *alpha2 = SG_MALLOC(float64_t, 2*l);
2138  float64_t *linear_term = SG_MALLOC(float64_t, 2*l);
2139  schar *y = SG_MALLOC(schar, 2*l);
2140  int32_t i;
2141 
2142  for(i=0;i<l;i++)
2143  {
2144  alpha2[i] = 0;
2145  linear_term[i] = param->p - prob->y[i];
2146  y[i] = 1;
2147 
2148  alpha2[i+l] = 0;
2149  linear_term[i+l] = param->p + prob->y[i];
2150  y[i+l] = -1;
2151  }
2152 
2153  Solver s;
2154  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
2155  alpha2, param->C, param->C, param->eps, si, param->shrinking, param->use_bias);
2156 
2157  float64_t sum_alpha = 0;
2158  for(i=0;i<l;i++)
2159  {
2160  alpha[i] = alpha2[i] - alpha2[i+l];
2161  sum_alpha += fabs(alpha[i]);
2162  }
2163  SG_SINFO("nu = %f\n",sum_alpha/(param->C*l));
2164 
2165  SG_FREE(alpha2);
2166  SG_FREE(linear_term);
2167  SG_FREE(y);
2168 }
2169 
2170 static void solve_nu_svr(
2171  const svm_problem *prob, const svm_parameter *param,
2172  float64_t *alpha, Solver::SolutionInfo* si)
2173 {
2174  int32_t l = prob->l;
2175  float64_t C = param->C;
2176  float64_t *alpha2 = SG_MALLOC(float64_t, 2*l);
2177  float64_t *linear_term = SG_MALLOC(float64_t, 2*l);
2178  schar *y = SG_MALLOC(schar, 2*l);
2179  int32_t i;
2180 
2181  float64_t sum = C * param->nu * l / 2;
2182  for(i=0;i<l;i++)
2183  {
2184  alpha2[i] = alpha2[i+l] = CMath::min(sum,C);
2185  sum -= alpha2[i];
2186 
2187  linear_term[i] = - prob->y[i];
2188  y[i] = 1;
2189 
2190  linear_term[i+l] = prob->y[i];
2191  y[i+l] = -1;
2192  }
2193 
2194  Solver_NU s;
2195  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
2196  alpha2, C, C, param->eps, si, param->shrinking, param->use_bias);
2197 
2198  SG_SINFO("epsilon = %f\n",-si->r);
2199 
2200  for(i=0;i<l;i++)
2201  alpha[i] = alpha2[i] - alpha2[i+l];
2202 
2203  SG_FREE(alpha2);
2204  SG_FREE(linear_term);
2205  SG_FREE(y);
2206 }
2207 
2208 //
2209 // decision_function
2210 //
2211 struct decision_function
2212 {
2213  float64_t *alpha;
2214  float64_t rho;
2215  float64_t objective;
2216 };
2217 
2218 decision_function svm_train_one(
2219  const svm_problem *prob, const svm_parameter *param,
2220  float64_t Cp, float64_t Cn)
2221 {
2222  float64_t *alpha = SG_MALLOC(float64_t, prob->l);
2223  Solver::SolutionInfo si;
2224  switch(param->svm_type)
2225  {
2226  case C_SVC:
2227  solve_c_svc(prob,param,alpha,&si,Cp,Cn);
2228  break;
2229  case NU_SVC:
2230  solve_nu_svc(prob,param,alpha,&si);
2231  break;
2232  case ONE_CLASS:
2233  solve_one_class(prob,param,alpha,&si);
2234  break;
2235  case EPSILON_SVR:
2236  solve_epsilon_svr(prob,param,alpha,&si);
2237  break;
2238  case NU_SVR:
2239  solve_nu_svr(prob,param,alpha,&si);
2240  break;
2241  }
2242 
2243  SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
2244 
2245  // output SVs
2246  if (param->svm_type != ONE_CLASS)
2247  {
2248  int32_t nSV = 0;
2249  int32_t nBSV = 0;
2250  for(int32_t i=0;i<prob->l;i++)
2251  {
2252  if(fabs(alpha[i]) > 0)
2253  {
2254  ++nSV;
2255  if(prob->y[i] > 0)
2256  {
2257  if(fabs(alpha[i]) >= si.upper_bound_p)
2258  ++nBSV;
2259  }
2260  else
2261  {
2262  if(fabs(alpha[i]) >= si.upper_bound_n)
2263  ++nBSV;
2264  }
2265  }
2266  }
2267  SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV);
2268  }
2269 
2270  decision_function f;
2271  f.alpha = alpha;
2272  f.rho = si.rho;
2273  f.objective=si.obj;
2274  return f;
2275 }
2276 
2277 //
2278 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2279 // perm, length l, must be allocated before calling this subroutine
2280 void svm_group_classes(
2281  const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret,
2282  int32_t **start_ret, int32_t **count_ret, int32_t *perm)
2283 {
2284  int32_t l = prob->l;
2285  int32_t max_nr_class = 16;
2286  int32_t nr_class = 0;
2287  int32_t *label = SG_MALLOC(int32_t, max_nr_class);
2288  int32_t *count = SG_MALLOC(int32_t, max_nr_class);
2289  int32_t *data_label = SG_MALLOC(int32_t, l);
2290  int32_t i;
2291 
2292  for(i=0;i<l;i++)
2293  {
2294  int32_t this_label=(int32_t) prob->y[i];
2295  int32_t j;
2296  for(j=0;j<nr_class;j++)
2297  {
2298  if(this_label == label[j])
2299  {
2300  ++count[j];
2301  break;
2302  }
2303  }
2304  data_label[i] = j;
2305  if(j == nr_class)
2306  {
2307  if(nr_class == max_nr_class)
2308  {
2309  max_nr_class *= 2;
2310  label=SG_REALLOC(int32_t, label,max_nr_class);
2311  count=SG_REALLOC(int32_t, count,max_nr_class);
2312  }
2313  label[nr_class] = this_label;
2314  count[nr_class] = 1;
2315  ++nr_class;
2316  }
2317  }
2318 
2319  int32_t *start = SG_MALLOC(int32_t, nr_class);
2320  start[0] = 0;
2321  for(i=1;i<nr_class;i++)
2322  start[i] = start[i-1]+count[i-1];
2323  for(i=0;i<l;i++)
2324  {
2325  perm[start[data_label[i]]] = i;
2326  ++start[data_label[i]];
2327  }
2328  start[0] = 0;
2329  for(i=1;i<nr_class;i++)
2330  start[i] = start[i-1]+count[i-1];
2331 
2332  *nr_class_ret = nr_class;
2333  *label_ret = label;
2334  *start_ret = start;
2335  *count_ret = count;
2336  SG_FREE(data_label);
2337 }
2338 
2339 //
2340 // Interface functions
2341 //
2342 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2343 {
2344  svm_model *model = SG_MALLOC(svm_model,1);
2345  model->param = *param;
2346  model->free_sv = 0; // XXX
2347 
2348  if(param->svm_type == ONE_CLASS ||
2349  param->svm_type == EPSILON_SVR ||
2350  param->svm_type == NU_SVR)
2351  {
2352  SG_SINFO("training one class svm or doing epsilon sv regression\n");
2353 
2354  // regression or one-class-svm
2355  model->nr_class = 2;
2356  model->label = NULL;
2357  model->nSV = NULL;
2358  model->sv_coef = SG_MALLOC(float64_t *,1);
2359  decision_function f = svm_train_one(prob,param,0,0);
2360  model->rho = SG_MALLOC(float64_t, 1);
2361  model->rho[0] = f.rho;
2362  model->objective = f.objective;
2363 
2364  int32_t nSV = 0;
2365  int32_t i;
2366  for(i=0;i<prob->l;i++)
2367  if(fabs(f.alpha[i]) > 0) ++nSV;
2368  model->l = nSV;
2369  model->SV = SG_MALLOC(svm_node *,nSV);
2370  model->sv_coef[0] = SG_MALLOC(float64_t, nSV);
2371  int32_t j = 0;
2372  for(i=0;i<prob->l;i++)
2373  if(fabs(f.alpha[i]) > 0)
2374  {
2375  model->SV[j] = prob->x[i];
2376  model->sv_coef[0][j] = f.alpha[i];
2377  ++j;
2378  }
2379 
2380  SG_FREE(f.alpha);
2381  }
2382  else if(param->svm_type == NU_MULTICLASS_SVC)
2383  {
2384  Solver::SolutionInfo si;
2385  solve_nu_multiclass_svc(prob,param,&si,model);
2386  SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
2387  }
2388  else
2389  {
2390  // classification
2391  int32_t l = prob->l;
2392  int32_t nr_class;
2393  int32_t *label = NULL;
2394  int32_t *start = NULL;
2395  int32_t *count = NULL;
2396  int32_t *perm = SG_MALLOC(int32_t, l);
2397 
2398  // group training data of the same class
2399  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2400  svm_node **x = SG_MALLOC(svm_node *,l);
2401  float64_t *C = SG_MALLOC(float64_t,l);
2402  float64_t *pv = SG_MALLOC(float64_t,l);
2403 
2404 
2405  int32_t i;
2406  for(i=0;i<l;i++) {
2407  x[i] = prob->x[perm[i]];
2408  C[i] = prob->C[perm[i]];
2409 
2410  if (prob->pv)
2411  {
2412  pv[i] = prob->pv[perm[i]];
2413  }
2414  else
2415  {
2416  //no custom linear term is set
2417  pv[i] = -1.0;
2418  }
2419 
2420  }
2421 
2422 
2423  // calculate weighted C
2424  float64_t *weighted_C = SG_MALLOC(float64_t, nr_class);
2425  for(i=0;i<nr_class;i++)
2426  weighted_C[i] = param->C;
2427  for(i=0;i<param->nr_weight;i++)
2428  {
2429  int32_t j;
2430  for(j=0;j<nr_class;j++)
2431  if(param->weight_label[i] == label[j])
2432  break;
2433  if(j == nr_class)
2434  SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]);
2435  else
2436  weighted_C[j] *= param->weight[i];
2437  }
2438 
2439  // train k*(k-1)/2 models
2440 
2441  bool *nonzero = SG_MALLOC(bool,l);
2442  for(i=0;i<l;i++)
2443  nonzero[i] = false;
2444  decision_function *f = SG_MALLOC(decision_function,nr_class*(nr_class-1)/2);
2445 
2446  int32_t p = 0;
2447  for(i=0;i<nr_class;i++)
2448  for(int32_t j=i+1;j<nr_class;j++)
2449  {
2450  svm_problem sub_prob;
2451  int32_t si = start[i], sj = start[j];
2452  int32_t ci = count[i], cj = count[j];
2453  sub_prob.l = ci+cj;
2454  sub_prob.x = SG_MALLOC(svm_node *,sub_prob.l);
2455  sub_prob.y = SG_MALLOC(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err
2456  sub_prob.C = SG_MALLOC(float64_t,sub_prob.l+1);
2457  sub_prob.pv = SG_MALLOC(float64_t,sub_prob.l+1);
2458 
2459  int32_t k;
2460  for(k=0;k<ci;k++)
2461  {
2462  sub_prob.x[k] = x[si+k];
2463  sub_prob.y[k] = +1;
2464  sub_prob.C[k] = C[si+k];
2465  sub_prob.pv[k] = pv[si+k];
2466 
2467  }
2468  for(k=0;k<cj;k++)
2469  {
2470  sub_prob.x[ci+k] = x[sj+k];
2471  sub_prob.y[ci+k] = -1;
2472  sub_prob.C[ci+k] = C[sj+k];
2473  sub_prob.pv[ci+k] = pv[sj+k];
2474  }
2475  sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err
2476  sub_prob.C[sub_prob.l]=-1;
2477  sub_prob.pv[sub_prob.l]=-1;
2478 
2479  f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2480  for(k=0;k<ci;k++)
2481  if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2482  nonzero[si+k] = true;
2483  for(k=0;k<cj;k++)
2484  if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2485  nonzero[sj+k] = true;
2486  SG_FREE(sub_prob.x);
2487  SG_FREE(sub_prob.y);
2488  SG_FREE(sub_prob.C);
2489  SG_FREE(sub_prob.pv);
2490  ++p;
2491  }
2492 
2493  // build output
2494 
2495  model->objective = f[0].objective;
2496  model->nr_class = nr_class;
2497 
2498  model->label = SG_MALLOC(int32_t, nr_class);
2499  for(i=0;i<nr_class;i++)
2500  model->label[i] = label[i];
2501 
2502  model->rho = SG_MALLOC(float64_t, nr_class*(nr_class-1)/2);
2503  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2504  model->rho[i] = f[i].rho;
2505 
2506  int32_t total_sv = 0;
2507  int32_t *nz_count = SG_MALLOC(int32_t, nr_class);
2508  model->nSV = SG_MALLOC(int32_t, nr_class);
2509  for(i=0;i<nr_class;i++)
2510  {
2511  int32_t nSV = 0;
2512  for(int32_t j=0;j<count[i];j++)
2513  if(nonzero[start[i]+j])
2514  {
2515  ++nSV;
2516  ++total_sv;
2517  }
2518  model->nSV[i] = nSV;
2519  nz_count[i] = nSV;
2520  }
2521 
2522  SG_SINFO("Total nSV = %d\n",total_sv);
2523 
2524  model->l = total_sv;
2525  model->SV = SG_MALLOC(svm_node *,total_sv);
2526  p = 0;
2527  for(i=0;i<l;i++)
2528  if(nonzero[i]) model->SV[p++] = x[i];
2529 
2530  int32_t *nz_start = SG_MALLOC(int32_t, nr_class);
2531  nz_start[0] = 0;
2532  for(i=1;i<nr_class;i++)
2533  nz_start[i] = nz_start[i-1]+nz_count[i-1];
2534 
2535  model->sv_coef = SG_MALLOC(float64_t *,nr_class-1);
2536  for(i=0;i<nr_class-1;i++)
2537  model->sv_coef[i] = SG_MALLOC(float64_t, total_sv);
2538 
2539  p = 0;
2540  for(i=0;i<nr_class;i++)
2541  for(int32_t j=i+1;j<nr_class;j++)
2542  {
2543  // classifier (i,j): coefficients with
2544  // i are in sv_coef[j-1][nz_start[i]...],
2545  // j are in sv_coef[i][nz_start[j]...]
2546 
2547  int32_t si = start[i];
2548  int32_t sj = start[j];
2549  int32_t ci = count[i];
2550  int32_t cj = count[j];
2551 
2552  int32_t q = nz_start[i];
2553  int32_t k;
2554  for(k=0;k<ci;k++)
2555  if(nonzero[si+k])
2556  model->sv_coef[j-1][q++] = f[p].alpha[k];
2557  q = nz_start[j];
2558  for(k=0;k<cj;k++)
2559  if(nonzero[sj+k])
2560  model->sv_coef[i][q++] = f[p].alpha[ci+k];
2561  ++p;
2562  }
2563 
2564  SG_FREE(label);
2565  SG_FREE(count);
2566  SG_FREE(perm);
2567  SG_FREE(start);
2568  SG_FREE(x);
2569  SG_FREE(C);
2570  SG_FREE(pv);
2571  SG_FREE(weighted_C);
2572  SG_FREE(nonzero);
2573  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2574  SG_FREE(f[i].alpha);
2575  SG_FREE(f);
2576  SG_FREE(nz_count);
2577  SG_FREE(nz_start);
2578  }
2579  return model;
2580 }
2581 
2582 void svm_destroy_model(svm_model* model)
2583 {
2584  if(model->free_sv && model->l > 0)
2585  SG_FREE((void *)(model->SV[0]));
2586  for(int32_t i=0;i<model->nr_class-1;i++)
2587  SG_FREE(model->sv_coef[i]);
2588  SG_FREE(model->SV);
2589  SG_FREE(model->sv_coef);
2590  SG_FREE(model->rho);
2591  SG_FREE(model->label);
2592  SG_FREE(model->nSV);
2593  SG_FREE(model);
2594 }
2595 
2596 void svm_destroy_param(svm_parameter* param)
2597 {
2598  SG_FREE(param->weight_label);
2599  SG_FREE(param->weight);
2600 }
2601 
2602 const char *svm_check_parameter(
2603  const svm_problem *prob, const svm_parameter *param)
2604 {
2605  // svm_type
2606 
2607  int32_t svm_type = param->svm_type;
2608  if(svm_type != C_SVC &&
2609  svm_type != NU_SVC &&
2610  svm_type != ONE_CLASS &&
2611  svm_type != EPSILON_SVR &&
2612  svm_type != NU_SVR &&
2613  svm_type != NU_MULTICLASS_SVC)
2614  return "unknown svm type";
2615 
2616  // kernel_type, degree
2617 
2618  int32_t kernel_type = param->kernel_type;
2619  if(kernel_type != LINEAR &&
2620  kernel_type != POLY &&
2621  kernel_type != RBF &&
2622  kernel_type != SIGMOID &&
2623  kernel_type != PRECOMPUTED)
2624  return "unknown kernel type";
2625 
2626  if(param->degree < 0)
2627  return "degree of polynomial kernel < 0";
2628 
2629  // cache_size,eps,C,nu,p,shrinking
2630 
2631  if(param->cache_size <= 0)
2632  return "cache_size <= 0";
2633 
2634  if(param->eps <= 0)
2635  return "eps <= 0";
2636 
2637  if(svm_type == C_SVC ||
2638  svm_type == EPSILON_SVR ||
2639  svm_type == NU_SVR)
2640  if(param->C <= 0)
2641  return "C <= 0";
2642 
2643  if(svm_type == NU_SVC ||
2644  svm_type == ONE_CLASS ||
2645  svm_type == NU_SVR)
2646  if(param->nu <= 0 || param->nu > 1)
2647  return "nu <= 0 or nu > 1";
2648 
2649  if(svm_type == EPSILON_SVR)
2650  if(param->p < 0)
2651  return "p < 0";
2652 
2653  if(param->shrinking != 0 &&
2654  param->shrinking != 1)
2655  return "shrinking != 0 and shrinking != 1";
2656 
2657 
2658  // check whether nu-svc is feasible
2659 
2660  if(svm_type == NU_SVC)
2661  {
2662  int32_t l = prob->l;
2663  int32_t max_nr_class = 16;
2664  int32_t nr_class = 0;
2665  int32_t *label = SG_MALLOC(int32_t, max_nr_class);
2666  int32_t *count = SG_MALLOC(int32_t, max_nr_class);
2667 
2668  int32_t i;
2669  for(i=0;i<l;i++)
2670  {
2671  int32_t this_label = (int32_t) prob->y[i];
2672  int32_t j;
2673  for(j=0;j<nr_class;j++)
2674  if(this_label == label[j])
2675  {
2676  ++count[j];
2677  break;
2678  }
2679  if(j == nr_class)
2680  {
2681  if(nr_class == max_nr_class)
2682  {
2683  max_nr_class *= 2;
2684  label=SG_REALLOC(int32_t, label, max_nr_class);
2685  count=SG_REALLOC(int32_t, count, max_nr_class);
2686  }
2687  label[nr_class] = this_label;
2688  count[nr_class] = 1;
2689  ++nr_class;
2690  }
2691  }
2692 
2693  for(i=0;i<nr_class;i++)
2694  {
2695  int32_t n1 = count[i];
2696  for(int32_t j=i+1;j<nr_class;j++)
2697  {
2698  int32_t n2 = count[j];
2699  if(param->nu*(n1+n2)/2 > CMath::min(n1,n2))
2700  {
2701  SG_FREE(label);
2702  SG_FREE(count);
2703  return "specified nu is infeasible";
2704  }
2705  }
2706  }
2707  SG_FREE(label);
2708  SG_FREE(count);
2709  }
2710 
2711  return NULL;
2712 }
2713 }
2714 #endif // DOXYGEN_SHOULD_SKIP_THIS

SHOGUN Machine Learning Toolbox - Documentation