SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
shogun_liblinear.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) 2007-2009 The LIBLINEAR Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * 3. Neither name of copyright holders nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <shogun/lib/config.h>
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <stdarg.h>
40 
44 #include <shogun/lib/Time.h>
45 #include <shogun/lib/Signal.h>
46 
47 using namespace shogun;
48 
49 l2r_lr_fun::l2r_lr_fun(const liblinear_problem *p, float64_t* Cs)
50 {
51  int l=p->l;
52 
53  this->m_prob = p;
54 
55  z = SG_MALLOC(double, l);
56  D = SG_MALLOC(double, l);
57  C = Cs;
58 }
59 
60 l2r_lr_fun::~l2r_lr_fun()
61 {
62  SG_FREE(z);
63  SG_FREE(D);
64 }
65 
66 
67 double l2r_lr_fun::fun(double *w)
68 {
69  int i;
70  double f=0;
71  float64_t *y=m_prob->y;
72  int l=m_prob->l;
73  int32_t n=m_prob->n;
74 
75  Xv(w, z);
76  for(i=0;i<l;i++)
77  {
78  double yz = y[i]*z[i];
79  if (yz >= 0)
80  f += C[i]*log(1 + exp(-yz));
81  else
82  f += C[i]*(-yz+log(1 + exp(yz)));
83  }
84  f += 0.5 *CMath::dot(w,w,n);
85 
86  return(f);
87 }
88 
89 void l2r_lr_fun::grad(double *w, double *g)
90 {
91  int i;
92  float64_t *y=m_prob->y;
93  int l=m_prob->l;
94  int w_size=get_nr_variable();
95 
96  for(i=0;i<l;i++)
97  {
98  z[i] = 1/(1 + exp(-y[i]*z[i]));
99  D[i] = z[i]*(1-z[i]);
100  z[i] = C[i]*(z[i]-1)*y[i];
101  }
102  XTv(z, g);
103 
104  for(i=0;i<w_size;i++)
105  g[i] = w[i] + g[i];
106 }
107 
108 int l2r_lr_fun::get_nr_variable()
109 {
110  return m_prob->n;
111 }
112 
113 void l2r_lr_fun::Hv(double *s, double *Hs)
114 {
115  int i;
116  int l=m_prob->l;
117  int w_size=get_nr_variable();
118  double *wa = SG_MALLOC(double, l);
119 
120  Xv(s, wa);
121  for(i=0;i<l;i++)
122  wa[i] = C[i]*D[i]*wa[i];
123 
124  XTv(wa, Hs);
125  for(i=0;i<w_size;i++)
126  Hs[i] = s[i] + Hs[i];
127  SG_FREE(wa);
128 }
129 
130 void l2r_lr_fun::Xv(double *v, double *res_Xv)
131 {
132  int32_t l=m_prob->l;
133  int32_t n=m_prob->n;
134  float64_t bias=0;
135 
136  if (m_prob->use_bias)
137  {
138  n--;
139  bias=v[n];
140  }
141 
142  m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
143 }
144 
145 void l2r_lr_fun::XTv(double *v, double *res_XTv)
146 {
147  int l=m_prob->l;
148  int32_t n=m_prob->n;
149 
150  memset(res_XTv, 0, sizeof(double)*m_prob->n);
151 
152  if (m_prob->use_bias)
153  n--;
154 
155  for (int32_t i=0;i<l;i++)
156  {
157  m_prob->x->add_to_dense_vec(v[i], i, res_XTv, n);
158 
159  if (m_prob->use_bias)
160  res_XTv[n]+=v[i];
161  }
162 }
163 
164 l2r_l2_svc_fun::l2r_l2_svc_fun(const liblinear_problem *p, double* Cs)
165 {
166  int l=p->l;
167 
168  this->m_prob = p;
169 
170  z = SG_MALLOC(double, l);
171  D = SG_MALLOC(double, l);
172  I = SG_MALLOC(int, l);
173  C=Cs;
174 
175 }
176 
177 l2r_l2_svc_fun::~l2r_l2_svc_fun()
178 {
179  SG_FREE(z);
180  SG_FREE(D);
181  SG_FREE(I);
182 }
183 
184 double l2r_l2_svc_fun::fun(double *w)
185 {
186  int i;
187  double f=0;
188  float64_t *y=m_prob->y;
189  int l=m_prob->l;
190  int w_size=get_nr_variable();
191 
192  Xv(w, z);
193  for(i=0;i<l;i++)
194  {
195  z[i] = y[i]*z[i];
196  double d = 1-z[i];
197  if (d > 0)
198  f += C[i]*d*d;
199  }
200  f += 0.5*CMath::dot(w, w, w_size);
201 
202  return(f);
203 }
204 
205 void l2r_l2_svc_fun::grad(double *w, double *g)
206 {
207  int i;
208  float64_t *y=m_prob->y;
209  int l=m_prob->l;
210  int w_size=get_nr_variable();
211 
212  sizeI = 0;
213  for (i=0;i<l;i++)
214  if (z[i] < 1)
215  {
216  z[sizeI] = C[i]*y[i]*(z[i]-1);
217  I[sizeI] = i;
218  sizeI++;
219  }
220  subXTv(z, g);
221 
222  for(i=0;i<w_size;i++)
223  g[i] = w[i] + 2*g[i];
224 }
225 
226 int l2r_l2_svc_fun::get_nr_variable()
227 {
228  return m_prob->n;
229 }
230 
231 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
232 {
233  int i;
234  int l=m_prob->l;
235  int w_size=get_nr_variable();
236  double *wa = SG_MALLOC(double, l);
237 
238  subXv(s, wa);
239  for(i=0;i<sizeI;i++)
240  wa[i] = C[I[i]]*wa[i];
241 
242  subXTv(wa, Hs);
243  for(i=0;i<w_size;i++)
244  Hs[i] = s[i] + 2*Hs[i];
245  SG_FREE(wa);
246 }
247 
248 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv)
249 {
250  int32_t l=m_prob->l;
251  int32_t n=m_prob->n;
252  float64_t bias=0;
253 
254  if (m_prob->use_bias)
255  {
256  n--;
257  bias=v[n];
258  }
259 
260  m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
261 }
262 
263 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv)
264 {
265  int32_t n=m_prob->n;
266  float64_t bias=0;
267 
268  if (m_prob->use_bias)
269  {
270  n--;
271  bias=v[n];
272  }
273 
274  m_prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias);
275 
276  /*for (int32_t i=0;i<sizeI;i++)
277  {
278  res_Xv[i]=m_prob->x->dense_dot(I[i], v, n);
279 
280  if (m_prob->use_bias)
281  res_Xv[i]+=bias;
282  }*/
283 }
284 
285 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
286 {
287  int32_t n=m_prob->n;
288 
289  if (m_prob->use_bias)
290  n--;
291 
292  memset(XTv, 0, sizeof(float64_t)*m_prob->n);
293  for (int32_t i=0;i<sizeI;i++)
294  {
295  m_prob->x->add_to_dense_vec(v[i], I[i], XTv, n);
296 
297  if (m_prob->use_bias)
298  XTv[n]+=v[i];
299  }
300 }
301 
302 l2r_l2_svr_fun::l2r_l2_svr_fun(const liblinear_problem *prob, double *Cs, double p):
303  l2r_l2_svc_fun(prob, Cs)
304 {
305  m_p = p;
306 }
307 
308 double l2r_l2_svr_fun::fun(double *w)
309 {
310  int i;
311  double f=0;
312  double *y=m_prob->y;
313  int l=m_prob->l;
314  int w_size=get_nr_variable();
315  double d;
316 
317  Xv(w, z);
318 
319  for(i=0;i<w_size;i++)
320  f += w[i]*w[i];
321  f /= 2;
322  for(i=0;i<l;i++)
323  {
324  d = z[i] - y[i];
325  if(d < -m_p)
326  f += C[i]*(d+m_p)*(d+m_p);
327  else if(d > m_p)
328  f += C[i]*(d-m_p)*(d-m_p);
329  }
330 
331  return(f);
332 }
333 
334 void l2r_l2_svr_fun::grad(double *w, double *g)
335 {
336  int i;
337  double *y=m_prob->y;
338  int l=m_prob->l;
339  int w_size=get_nr_variable();
340  double d;
341 
342  sizeI = 0;
343  for(i=0;i<l;i++)
344  {
345  d = z[i] - y[i];
346 
347  // generate index set I
348  if(d < -m_p)
349  {
350  z[sizeI] = C[i]*(d+m_p);
351  I[sizeI] = i;
352  sizeI++;
353  }
354  else if(d > m_p)
355  {
356  z[sizeI] = C[i]*(d-m_p);
357  I[sizeI] = i;
358  sizeI++;
359  }
360 
361  }
362  subXTv(z, g);
363 
364  for(i=0;i<w_size;i++)
365  g[i] = w[i] + 2*g[i];
366 }
367 
368 
369 // A coordinate descent algorithm for
370 // multi-class support vector machines by Crammer and Singer
371 //
372 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
373 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
374 //
375 // where e^m_i = 0 if y_i = m,
376 // e^m_i = 1 if y_i != m,
377 // C^m_i = C if m = y_i,
378 // C^m_i = 0 if m != y_i,
379 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
380 //
381 // Given:
382 // x, y, C
383 // eps is the stopping tolerance
384 //
385 // solution will be put in w
386 
387 #define GETI(i) (prob->y[i])
388 // To support weights for instances, use GETI(i) (i)
389 
390 Solver_MCSVM_CS::Solver_MCSVM_CS(const liblinear_problem *p, int n_class,
391  double *weighted_C, double *w0_reg,
392  double epsilon, int max_it, double max_time,
393  mcsvm_state* given_state)
394 {
395  this->w_size = p->n;
396  this->l = p->l;
397  this->nr_class = n_class;
398  this->eps = epsilon;
399  this->max_iter = max_it;
400  this->prob = p;
401  this->C = weighted_C;
402  this->w0 = w0_reg;
403  this->max_train_time = max_time;
404  this->state = given_state;
405 }
406 
407 Solver_MCSVM_CS::~Solver_MCSVM_CS()
408 {
409 }
410 
411 int compare_double(const void *a, const void *b)
412 {
413  if(*(double *)a > *(double *)b)
414  return -1;
415  if(*(double *)a < *(double *)b)
416  return 1;
417  return 0;
418 }
419 
420 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
421 {
422  int r;
423  double *D=SGVector<float64_t>::clone_vector(state->B, active_i);
424 
425  if(yi < active_i)
426  D[yi] += A_i*C_yi;
427  qsort(D, active_i, sizeof(double), compare_double);
428 
429  double beta = D[0] - A_i*C_yi;
430  for(r=1;r<active_i && beta<r*D[r];r++)
431  beta += D[r];
432 
433  beta /= r;
434  for(r=0;r<active_i;r++)
435  {
436  if(r == yi)
437  alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i);
438  else
439  alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i);
440  }
441  SG_FREE(D);
442 }
443 
444 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
445 {
446  double bound = 0;
447  if(m == yi)
448  bound = C[int32_t(GETI(i))];
449  if(alpha_i == bound && state->G[m] < minG)
450  return true;
451  return false;
452 }
453 
454 void Solver_MCSVM_CS::solve()
455 {
456  int i, m, s, k;
457  int iter = 0;
458  double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val;
459  int *index,*d_ind,*alpha_index,*y_index,*active_size_i;
460 
461  if (!state->allocated)
462  {
463  state->w = SG_CALLOC(double, nr_class*w_size);
464  state->B = SG_CALLOC(double, nr_class);
465  state->G = SG_CALLOC(double, nr_class);
466  state->alpha = SG_CALLOC(double, l*nr_class);
467  state->alpha_new = SG_CALLOC(double, nr_class);
468  state->index = SG_CALLOC(int, l);
469  state->QD = SG_CALLOC(double, l);
470  state->d_ind = SG_CALLOC(int, nr_class);
471  state->d_val = SG_CALLOC(double, nr_class);
472  state->alpha_index = SG_CALLOC(int, nr_class*l);
473  state->y_index = SG_CALLOC(int, l);
474  state->active_size_i = SG_CALLOC(int, l);
475  state->allocated = true;
476  }
477  w = state->w;
478  B = state->B;
479  G = state->G;
480  alpha = state->alpha;
481  alpha_new = state->alpha_new;
482  index = state->index;
483  QD = state->QD;
484  d_ind = state->d_ind;
485  d_val = state->d_val;
486  alpha_index = state->alpha_index;
487  y_index = state->y_index;
488  active_size_i = state->active_size_i;
489 
490  double* tx = SG_MALLOC(double, w_size);
491  int dim = prob->x->get_dim_feature_space();
492 
493  int active_size = l;
494  double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking
495  bool start_from_all = true;
496  CTime start_time;
497  // initial
498  if (!state->inited)
499  {
500  for(i=0;i<l;i++)
501  {
502  for(m=0;m<nr_class;m++)
503  alpha_index[i*nr_class+m] = m;
504 
505  QD[i] = prob->x->dot(i, prob->x,i);
506  if (prob->use_bias)
507  QD[i] += 1.0;
508 
509  active_size_i[i] = nr_class;
510  y_index[i] = prob->y[i];
511  index[i] = i;
512  }
513  state->inited = true;
514  }
515 
516  while(iter < max_iter && !CSignal::cancel_computations())
517  {
518  double stopping = -CMath::INFTY;
519  for(i=0;i<active_size;i++)
520  {
521  int j = CMath::random(i, active_size-1);
522  CMath::swap(index[i], index[j]);
523  }
524  for(s=0;s<active_size;s++)
525  {
526  i = index[s];
527  double Ai = QD[i];
528  double *alpha_i = &alpha[i*nr_class];
529  int *alpha_index_i = &alpha_index[i*nr_class];
530 
531  if(Ai > 0)
532  {
533  for(m=0;m<active_size_i[i];m++)
534  G[m] = 1;
535  if(y_index[i] < active_size_i[i])
536  G[y_index[i]] = 0;
537 
538  memset(tx,0,dim*sizeof(double));
539  prob->x->add_to_dense_vec(1.0,i,tx,dim);
540  for (k=0; k<dim; k++)
541  {
542  if (tx[k]==0.0)
543  continue;
544 
545  double* w_i = &w[k*nr_class];
546  for (m=0; m<active_size_i[i]; m++)
547  G[m] += w_i[alpha_index_i[m]]*tx[k];
548  }
549 
550  // experimental
551  // ***
552  if (prob->use_bias)
553  {
554  double *w_i = &w[(w_size-1)*nr_class];
555  for(m=0; m<active_size_i[i]; m++)
556  G[m] += w_i[alpha_index_i[m]];
557  }
558  if (w0)
559  {
560  for (k=0; k<dim; k++)
561  {
562  double *w0_i = &w0[k*nr_class];
563  for(m=0; m<active_size_i[i]; m++)
564  G[m] += w0_i[alpha_index_i[m]];
565  }
566  }
567  // ***
568 
569  double minG = CMath::INFTY;
570  double maxG = -CMath::INFTY;
571  for(m=0;m<active_size_i[i];m++)
572  {
573  if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
574  minG = G[m];
575  if(G[m] > maxG)
576  maxG = G[m];
577  }
578  if(y_index[i] < active_size_i[i])
579  if(alpha_i[int32_t(prob->y[i])] < C[int32_t(GETI(i))] && G[y_index[i]] < minG)
580  minG = G[y_index[i]];
581 
582  for(m=0;m<active_size_i[i];m++)
583  {
584  if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
585  {
586  active_size_i[i]--;
587  while(active_size_i[i]>m)
588  {
589  if(!be_shrunk(i, active_size_i[i], y_index[i],
590  alpha_i[alpha_index_i[active_size_i[i]]], minG))
591  {
592  CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
593  CMath::swap(G[m], G[active_size_i[i]]);
594  if(y_index[i] == active_size_i[i])
595  y_index[i] = m;
596  else if(y_index[i] == m)
597  y_index[i] = active_size_i[i];
598  break;
599  }
600  active_size_i[i]--;
601  }
602  }
603  }
604 
605  if(active_size_i[i] <= 1)
606  {
607  active_size--;
608  CMath::swap(index[s], index[active_size]);
609  s--;
610  continue;
611  }
612 
613  if(maxG-minG <= 1e-12)
614  continue;
615  else
616  stopping = CMath::CMath::max(maxG - minG, stopping);
617 
618  for(m=0;m<active_size_i[i];m++)
619  B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
620 
621  solve_sub_problem(Ai, y_index[i], C[int32_t(GETI(i))], active_size_i[i], alpha_new);
622  int nz_d = 0;
623  for(m=0;m<active_size_i[i];m++)
624  {
625  double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
626  alpha_i[alpha_index_i[m]] = alpha_new[m];
627  if(fabs(d) >= 1e-12)
628  {
629  d_ind[nz_d] = alpha_index_i[m];
630  d_val[nz_d] = d;
631  nz_d++;
632  }
633  }
634 
635  memset(tx,0,dim*sizeof(double));
636  prob->x->add_to_dense_vec(1.0,i,tx,dim);
637  for (k=0; k<dim; k++)
638  {
639  if (tx[k]==0.0)
640  continue;
641 
642  double* w_i = &w[k*nr_class];
643  for (m=0; m<nz_d; m++)
644  w_i[d_ind[m]] += d_val[m]*tx[k];
645  }
646  // experimental
647  // ***
648  if (prob->use_bias)
649  {
650  double *w_i = &w[(w_size-1)*nr_class];
651  for(m=0;m<nz_d;m++)
652  w_i[d_ind[m]] += d_val[m];
653  }
654  // ***
655  }
656  }
657 
658  iter++;
659  /*
660  if(iter % 10 == 0)
661  {
662  SG_SINFO(".")
663  }
664  */
665 
666  if(stopping < eps_shrink)
667  {
668  if(stopping < eps && start_from_all == true)
669  break;
670  else
671  {
672  active_size = l;
673  for(i=0;i<l;i++)
674  active_size_i[i] = nr_class;
675  //SG_SINFO("*")
676  eps_shrink = CMath::max(eps_shrink/2, eps);
677  start_from_all = true;
678  }
679  }
680  else
681  start_from_all = false;
682 
683  if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff())
684  break;
685  }
686 
687  SG_SINFO("\noptimization finished, #iter = %d\n",iter)
688  if (iter >= max_iter)
689  SG_SINFO("Warning: reaching max number of iterations\n")
690 
691  SG_FREE(tx);
692 }
693 
694 //
695 // Interface functions
696 //
697 
698 void destroy_model(struct liblinear_model *model_)
699 {
700  SG_FREE(model_->w);
701  SG_FREE(model_->label);
702  SG_FREE(model_);
703 }
704 
705 void destroy_param(liblinear_parameter* param)
706 {
707  SG_FREE(param->weight_label);
708  SG_FREE(param->weight);
709 }
710 #endif // DOXYGEN_SHOULD_SKIP_THIS
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47
static const float64_t INFTY
infinity
Definition: Math.h:2048
static uint64_t random()
Definition: Math.h:1019
static T * clone_vector(const T *vec, int32_t len)
Definition: SGVector.cpp:215
static const float64_t epsilon
Definition: libbmrm.cpp:25
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
#define GETI(i)
Definition: LibLinear.cpp:1170
double float64_t
Definition: common.h:50
static T max(T a, T b)
Definition: Math.h:168
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
static bool cancel_computations()
Definition: Signal.h:86
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
static T min(T a, T b)
Definition: Math.h:157
#define SG_SINFO(...)
Definition: SGIO.h:173
static void swap(T &a, T &b)
Definition: Math.h:438
Matrix::Scalar max(Matrix m)
Definition: Redux.h:66

SHOGUN 机器学习工具包 - 项目文档