SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
shogun_liblinear.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007-2009 The LIBLINEAR Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * 3. Neither name of copyright holders nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <shogun/lib/config.h>
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 #ifdef HAVE_LAPACK
36 #include <math.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <stdarg.h>
41 
45 #include <shogun/lib/Time.h>
46 #include <shogun/lib/Signal.h>
47 
48 using namespace shogun;
49 
50 l2r_lr_fun::l2r_lr_fun(const problem *p, float64_t* Cs)
51 {
52  int l=p->l;
53 
54  this->m_prob = p;
55 
56  z = SG_MALLOC(double, l);
57  D = SG_MALLOC(double, l);
58  C = Cs;
59 }
60 
61 l2r_lr_fun::~l2r_lr_fun()
62 {
63  SG_FREE(z);
64  SG_FREE(D);
65 }
66 
67 
68 double l2r_lr_fun::fun(double *w)
69 {
70  int i;
71  double f=0;
72  float64_t *y=m_prob->y;
73  int l=m_prob->l;
74  int32_t n=m_prob->n;
75 
76  Xv(w, z);
77  for(i=0;i<l;i++)
78  {
79  double yz = y[i]*z[i];
80  if (yz >= 0)
81  f += C[i]*log(1 + exp(-yz));
82  else
83  f += C[i]*(-yz+log(1 + exp(yz)));
84  }
85  f += 0.5 *SGVector<float64_t>::dot(w,w,n);
86 
87  return(f);
88 }
89 
90 void l2r_lr_fun::grad(double *w, double *g)
91 {
92  int i;
93  float64_t *y=m_prob->y;
94  int l=m_prob->l;
95  int w_size=get_nr_variable();
96 
97  for(i=0;i<l;i++)
98  {
99  z[i] = 1/(1 + exp(-y[i]*z[i]));
100  D[i] = z[i]*(1-z[i]);
101  z[i] = C[i]*(z[i]-1)*y[i];
102  }
103  XTv(z, g);
104 
105  for(i=0;i<w_size;i++)
106  g[i] = w[i] + g[i];
107 }
108 
109 int l2r_lr_fun::get_nr_variable()
110 {
111  return m_prob->n;
112 }
113 
114 void l2r_lr_fun::Hv(double *s, double *Hs)
115 {
116  int i;
117  int l=m_prob->l;
118  int w_size=get_nr_variable();
119  double *wa = SG_MALLOC(double, l);
120 
121  Xv(s, wa);
122  for(i=0;i<l;i++)
123  wa[i] = C[i]*D[i]*wa[i];
124 
125  XTv(wa, Hs);
126  for(i=0;i<w_size;i++)
127  Hs[i] = s[i] + Hs[i];
128  SG_FREE(wa);
129 }
130 
131 void l2r_lr_fun::Xv(double *v, double *res_Xv)
132 {
133  int32_t l=m_prob->l;
134  int32_t n=m_prob->n;
135  float64_t bias=0;
136 
137  if (m_prob->use_bias)
138  {
139  n--;
140  bias=v[n];
141  }
142 
143  m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
144 }
145 
146 void l2r_lr_fun::XTv(double *v, double *res_XTv)
147 {
148  int l=m_prob->l;
149  int32_t n=m_prob->n;
150 
151  memset(res_XTv, 0, sizeof(double)*m_prob->n);
152 
153  if (m_prob->use_bias)
154  n--;
155 
156  for (int32_t i=0;i<l;i++)
157  {
158  m_prob->x->add_to_dense_vec(v[i], i, res_XTv, n);
159 
160  if (m_prob->use_bias)
161  res_XTv[n]+=v[i];
162  }
163 }
164 
165 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *p, double* Cs)
166 {
167  int l=p->l;
168 
169  this->m_prob = p;
170 
171  z = SG_MALLOC(double, l);
172  D = SG_MALLOC(double, l);
173  I = SG_MALLOC(int, l);
174  C=Cs;
175 
176 }
177 
178 l2r_l2_svc_fun::~l2r_l2_svc_fun()
179 {
180  SG_FREE(z);
181  SG_FREE(D);
182  SG_FREE(I);
183 }
184 
185 double l2r_l2_svc_fun::fun(double *w)
186 {
187  int i;
188  double f=0;
189  float64_t *y=m_prob->y;
190  int l=m_prob->l;
191  int w_size=get_nr_variable();
192 
193  Xv(w, z);
194  for(i=0;i<l;i++)
195  {
196  z[i] = y[i]*z[i];
197  double d = 1-z[i];
198  if (d > 0)
199  f += C[i]*d*d;
200  }
201  f += 0.5*SGVector<float64_t>::dot(w, w, w_size);
202 
203  return(f);
204 }
205 
206 void l2r_l2_svc_fun::grad(double *w, double *g)
207 {
208  int i;
209  float64_t *y=m_prob->y;
210  int l=m_prob->l;
211  int w_size=get_nr_variable();
212 
213  sizeI = 0;
214  for (i=0;i<l;i++)
215  if (z[i] < 1)
216  {
217  z[sizeI] = C[i]*y[i]*(z[i]-1);
218  I[sizeI] = i;
219  sizeI++;
220  }
221  subXTv(z, g);
222 
223  for(i=0;i<w_size;i++)
224  g[i] = w[i] + 2*g[i];
225 }
226 
227 int l2r_l2_svc_fun::get_nr_variable()
228 {
229  return m_prob->n;
230 }
231 
232 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
233 {
234  int i;
235  int l=m_prob->l;
236  int w_size=get_nr_variable();
237  double *wa = SG_MALLOC(double, l);
238 
239  subXv(s, wa);
240  for(i=0;i<sizeI;i++)
241  wa[i] = C[I[i]]*wa[i];
242 
243  subXTv(wa, Hs);
244  for(i=0;i<w_size;i++)
245  Hs[i] = s[i] + 2*Hs[i];
246  SG_FREE(wa);
247 }
248 
249 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv)
250 {
251  int32_t l=m_prob->l;
252  int32_t n=m_prob->n;
253  float64_t bias=0;
254 
255  if (m_prob->use_bias)
256  {
257  n--;
258  bias=v[n];
259  }
260 
261  m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
262 }
263 
264 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv)
265 {
266  int32_t n=m_prob->n;
267  float64_t bias=0;
268 
269  if (m_prob->use_bias)
270  {
271  n--;
272  bias=v[n];
273  }
274 
275  m_prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias);
276 
277  /*for (int32_t i=0;i<sizeI;i++)
278  {
279  res_Xv[i]=m_prob->x->dense_dot(I[i], v, n);
280 
281  if (m_prob->use_bias)
282  res_Xv[i]+=bias;
283  }*/
284 }
285 
286 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
287 {
288  int32_t n=m_prob->n;
289 
290  if (m_prob->use_bias)
291  n--;
292 
293  memset(XTv, 0, sizeof(float64_t)*m_prob->n);
294  for (int32_t i=0;i<sizeI;i++)
295  {
296  m_prob->x->add_to_dense_vec(v[i], I[i], XTv, n);
297 
298  if (m_prob->use_bias)
299  XTv[n]+=v[i];
300  }
301 }
302 
303 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *Cs, double p):
304  l2r_l2_svc_fun(prob, Cs)
305 {
306  m_p = p;
307 }
308 
309 double l2r_l2_svr_fun::fun(double *w)
310 {
311  int i;
312  double f=0;
313  double *y=m_prob->y;
314  int l=m_prob->l;
315  int w_size=get_nr_variable();
316  double d;
317 
318  Xv(w, z);
319 
320  for(i=0;i<w_size;i++)
321  f += w[i]*w[i];
322  f /= 2;
323  for(i=0;i<l;i++)
324  {
325  d = z[i] - y[i];
326  if(d < -m_p)
327  f += C[i]*(d+m_p)*(d+m_p);
328  else if(d > m_p)
329  f += C[i]*(d-m_p)*(d-m_p);
330  }
331 
332  return(f);
333 }
334 
335 void l2r_l2_svr_fun::grad(double *w, double *g)
336 {
337  int i;
338  double *y=m_prob->y;
339  int l=m_prob->l;
340  int w_size=get_nr_variable();
341  double d;
342 
343  sizeI = 0;
344  for(i=0;i<l;i++)
345  {
346  d = z[i] - y[i];
347 
348  // generate index set I
349  if(d < -m_p)
350  {
351  z[sizeI] = C[i]*(d+m_p);
352  I[sizeI] = i;
353  sizeI++;
354  }
355  else if(d > m_p)
356  {
357  z[sizeI] = C[i]*(d-m_p);
358  I[sizeI] = i;
359  sizeI++;
360  }
361 
362  }
363  subXTv(z, g);
364 
365  for(i=0;i<w_size;i++)
366  g[i] = w[i] + 2*g[i];
367 }
368 
369 
370 // A coordinate descent algorithm for
371 // multi-class support vector machines by Crammer and Singer
372 //
373 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
374 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
375 //
376 // where e^m_i = 0 if y_i = m,
377 // e^m_i = 1 if y_i != m,
378 // C^m_i = C if m = y_i,
379 // C^m_i = 0 if m != y_i,
380 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
381 //
382 // Given:
383 // x, y, C
384 // eps is the stopping tolerance
385 //
386 // solution will be put in w
387 
388 #define GETI(i) (prob->y[i])
389 // To support weights for instances, use GETI(i) (i)
390 
391 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class,
392  double *weighted_C, double *w0_reg,
393  double epsilon, int max_it, double max_time,
394  mcsvm_state* given_state)
395 {
396  this->w_size = p->n;
397  this->l = p->l;
398  this->nr_class = n_class;
399  this->eps = epsilon;
400  this->max_iter = max_it;
401  this->prob = p;
402  this->C = weighted_C;
403  this->w0 = w0_reg;
404  this->max_train_time = max_time;
405  this->state = given_state;
406 }
407 
408 Solver_MCSVM_CS::~Solver_MCSVM_CS()
409 {
410 }
411 
412 int compare_double(const void *a, const void *b)
413 {
414  if(*(double *)a > *(double *)b)
415  return -1;
416  if(*(double *)a < *(double *)b)
417  return 1;
418  return 0;
419 }
420 
421 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
422 {
423  int r;
424  double *D=SGVector<float64_t>::clone_vector(state->B, active_i);
425 
426  if(yi < active_i)
427  D[yi] += A_i*C_yi;
428  qsort(D, active_i, sizeof(double), compare_double);
429 
430  double beta = D[0] - A_i*C_yi;
431  for(r=1;r<active_i && beta<r*D[r];r++)
432  beta += D[r];
433 
434  beta /= r;
435  for(r=0;r<active_i;r++)
436  {
437  if(r == yi)
438  alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i);
439  else
440  alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i);
441  }
442  SG_FREE(D);
443 }
444 
445 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
446 {
447  double bound = 0;
448  if(m == yi)
449  bound = C[int32_t(GETI(i))];
450  if(alpha_i == bound && state->G[m] < minG)
451  return true;
452  return false;
453 }
454 
455 void Solver_MCSVM_CS::solve()
456 {
457  int i, m, s, k;
458  int iter = 0;
459  double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val;
460  int *index,*d_ind,*alpha_index,*y_index,*active_size_i;
461 
462  if (!state->allocated)
463  {
464  state->w = SG_CALLOC(double, nr_class*w_size);
465  state->B = SG_CALLOC(double, nr_class);
466  state->G = SG_CALLOC(double, nr_class);
467  state->alpha = SG_CALLOC(double, l*nr_class);
468  state->alpha_new = SG_CALLOC(double, nr_class);
469  state->index = SG_CALLOC(int, l);
470  state->QD = SG_CALLOC(double, l);
471  state->d_ind = SG_CALLOC(int, nr_class);
472  state->d_val = SG_CALLOC(double, nr_class);
473  state->alpha_index = SG_CALLOC(int, nr_class*l);
474  state->y_index = SG_CALLOC(int, l);
475  state->active_size_i = SG_CALLOC(int, l);
476  state->allocated = true;
477  }
478  w = state->w;
479  B = state->B;
480  G = state->G;
481  alpha = state->alpha;
482  alpha_new = state->alpha_new;
483  index = state->index;
484  QD = state->QD;
485  d_ind = state->d_ind;
486  d_val = state->d_val;
487  alpha_index = state->alpha_index;
488  y_index = state->y_index;
489  active_size_i = state->active_size_i;
490 
491  double* tx = SG_MALLOC(double, w_size);
492  int dim = prob->x->get_dim_feature_space();
493 
494  int active_size = l;
495  double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking
496  bool start_from_all = true;
497  CTime start_time;
498  // initial
499  if (!state->inited)
500  {
501  for(i=0;i<l;i++)
502  {
503  for(m=0;m<nr_class;m++)
504  alpha_index[i*nr_class+m] = m;
505 
506  QD[i] = prob->x->dot(i, prob->x,i);
507  if (prob->use_bias)
508  QD[i] += 1.0;
509 
510  active_size_i[i] = nr_class;
511  y_index[i] = prob->y[i];
512  index[i] = i;
513  }
514  state->inited = true;
515  }
516 
517  while(iter < max_iter && !CSignal::cancel_computations())
518  {
519  double stopping = -CMath::INFTY;
520  for(i=0;i<active_size;i++)
521  {
522  int j = i+rand()%(active_size-i);
523  CMath::swap(index[i], index[j]);
524  }
525  for(s=0;s<active_size;s++)
526  {
527  i = index[s];
528  double Ai = QD[i];
529  double *alpha_i = &alpha[i*nr_class];
530  int *alpha_index_i = &alpha_index[i*nr_class];
531 
532  if(Ai > 0)
533  {
534  for(m=0;m<active_size_i[i];m++)
535  G[m] = 1;
536  if(y_index[i] < active_size_i[i])
537  G[y_index[i]] = 0;
538 
539  memset(tx,0,dim*sizeof(double));
540  prob->x->add_to_dense_vec(1.0,i,tx,dim);
541  for (k=0; k<dim; k++)
542  {
543  if (tx[k]==0.0)
544  continue;
545 
546  double* w_i = &w[k*nr_class];
547  for (m=0; m<active_size_i[i]; m++)
548  G[m] += w_i[alpha_index_i[m]]*tx[k];
549  }
550 
551  // experimental
552  // ***
553  if (prob->use_bias)
554  {
555  double *w_i = &w[(w_size-1)*nr_class];
556  for(m=0; m<active_size_i[i]; m++)
557  G[m] += w_i[alpha_index_i[m]];
558  }
559  if (w0)
560  {
561  for (k=0; k<dim; k++)
562  {
563  double *w0_i = &w0[k*nr_class];
564  for(m=0; m<active_size_i[i]; m++)
565  G[m] += w0_i[alpha_index_i[m]];
566  }
567  }
568  // ***
569 
570  double minG = CMath::INFTY;
571  double maxG = -CMath::INFTY;
572  for(m=0;m<active_size_i[i];m++)
573  {
574  if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
575  minG = G[m];
576  if(G[m] > maxG)
577  maxG = G[m];
578  }
579  if(y_index[i] < active_size_i[i])
580  if(alpha_i[int32_t(prob->y[i])] < C[int32_t(GETI(i))] && G[y_index[i]] < minG)
581  minG = G[y_index[i]];
582 
583  for(m=0;m<active_size_i[i];m++)
584  {
585  if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
586  {
587  active_size_i[i]--;
588  while(active_size_i[i]>m)
589  {
590  if(!be_shrunk(i, active_size_i[i], y_index[i],
591  alpha_i[alpha_index_i[active_size_i[i]]], minG))
592  {
593  CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
594  CMath::swap(G[m], G[active_size_i[i]]);
595  if(y_index[i] == active_size_i[i])
596  y_index[i] = m;
597  else if(y_index[i] == m)
598  y_index[i] = active_size_i[i];
599  break;
600  }
601  active_size_i[i]--;
602  }
603  }
604  }
605 
606  if(active_size_i[i] <= 1)
607  {
608  active_size--;
609  CMath::swap(index[s], index[active_size]);
610  s--;
611  continue;
612  }
613 
614  if(maxG-minG <= 1e-12)
615  continue;
616  else
617  stopping = CMath::CMath::max(maxG - minG, stopping);
618 
619  for(m=0;m<active_size_i[i];m++)
620  B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
621 
622  solve_sub_problem(Ai, y_index[i], C[int32_t(GETI(i))], active_size_i[i], alpha_new);
623  int nz_d = 0;
624  for(m=0;m<active_size_i[i];m++)
625  {
626  double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
627  alpha_i[alpha_index_i[m]] = alpha_new[m];
628  if(fabs(d) >= 1e-12)
629  {
630  d_ind[nz_d] = alpha_index_i[m];
631  d_val[nz_d] = d;
632  nz_d++;
633  }
634  }
635 
636  memset(tx,0,dim*sizeof(double));
637  prob->x->add_to_dense_vec(1.0,i,tx,dim);
638  for (k=0; k<dim; k++)
639  {
640  if (tx[k]==0.0)
641  continue;
642 
643  double* w_i = &w[k*nr_class];
644  for (m=0; m<nz_d; m++)
645  w_i[d_ind[m]] += d_val[m]*tx[k];
646  }
647  // experimental
648  // ***
649  if (prob->use_bias)
650  {
651  double *w_i = &w[(w_size-1)*nr_class];
652  for(m=0;m<nz_d;m++)
653  w_i[d_ind[m]] += d_val[m];
654  }
655  // ***
656  }
657  }
658 
659  iter++;
660  /*
661  if(iter % 10 == 0)
662  {
663  SG_SINFO(".");
664  }
665  */
666 
667  if(stopping < eps_shrink)
668  {
669  if(stopping < eps && start_from_all == true)
670  break;
671  else
672  {
673  active_size = l;
674  for(i=0;i<l;i++)
675  active_size_i[i] = nr_class;
676  //SG_SINFO("*");
677  eps_shrink = CMath::max(eps_shrink/2, eps);
678  start_from_all = true;
679  }
680  }
681  else
682  start_from_all = false;
683 
684  if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff())
685  break;
686  }
687 
688  SG_SINFO("\noptimization finished, #iter = %d\n",iter);
689  if (iter >= max_iter)
690  SG_SINFO("Warning: reaching max number of iterations\n");
691 
692  SG_FREE(tx);
693 }
694 
695 //
696 // Interface functions
697 //
698 
699 void destroy_model(struct model *model_)
700 {
701  SG_FREE(model_->w);
702  SG_FREE(model_->label);
703  SG_FREE(model_);
704 }
705 
706 void destroy_param(parameter* param)
707 {
708  SG_FREE(param->weight_label);
709  SG_FREE(param->weight);
710 }
711 #endif //HAVE_LAPACK
712 #endif // DOXYGEN_SHOULD_SKIP_THIS

SHOGUN Machine Learning Toolbox - Documentation