SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
LibLinear.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2010 Soeren Sonnenburg
8  * Copyright (c) 2007-2009 The LIBLINEAR Project.
9  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #include <shogun/io/SGIO.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Time.h>
16 #include <shogun/base/Parameter.h>
21 
22 using namespace shogun;
23 
26 {
27  init();
28 }
29 
32 {
33  init();
35 }
36 
38  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
40 {
41  init();
42  C1=C;
43  C2=C;
44  use_bias=true;
45 
46  set_features(traindat);
47  set_labels(trainlab);
49 }
50 
51 void CLibLinear::init()
52 {
54  use_bias=false;
55  C1=1;
56  C2=1;
58  epsilon=1e-5;
60  set_compute_bias(false);
61 
62  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
63  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
64  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
66  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
67  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
69  SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
70  SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
71  "Type of LibLinear solver.", MS_NOT_AVAILABLE);
72 }
73 
75 {
76 }
77 
79 {
83 
84  if (data)
85  {
86  if (!data->has_property(FP_DOT))
87  SG_ERROR("Specified features are not of type CDotFeatures\n")
88 
89  set_features((CDotFeatures*) data);
90  }
92 
93 
94  int32_t num_train_labels=m_labels->get_num_labels();
95  int32_t num_feat=features->get_dim_feature_space();
96  int32_t num_vec=features->get_num_vectors();
97 
100  {
101  if (num_feat!=num_train_labels)
102  {
103  SG_ERROR("L1 methods require the data to be transposed: "
104  "number of features %d does not match number of "
105  "training labels %d\n",
106  num_feat, num_train_labels);
107  }
108  CMath::swap(num_feat, num_vec);
109 
110  }
111  else
112  {
113  if (num_vec!=num_train_labels)
114  {
115  SG_ERROR("number of vectors %d does not match "
116  "number of training labels %d\n",
117  num_vec, num_train_labels);
118  }
119  }
120  if (use_bias)
121  w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
122  else
123  w=SGVector<float64_t>(num_feat);
124 
125  liblinear_problem prob;
126  if (use_bias)
127  {
129  prob.n=w.vlen+1;
130  else
131  prob.n=w.vlen;
132  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
133  }
134  else
135  {
136  prob.n=w.vlen;
137  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
138  }
139  prob.l=num_vec;
140  prob.x=features;
141  prob.y=SG_MALLOC(double, prob.l);
142  float64_t* Cs=SG_MALLOC(double, prob.l);
143  prob.use_bias=use_bias;
144  double Cp=C1;
145  double Cn=C2;
146 
147  for (int32_t i=0; i<prob.l; i++)
148  {
149  prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
150  if (prob.y[i] == +1)
151  Cs[i]=C1;
152  else if (prob.y[i] == -1)
153  Cs[i]=C2;
154  else
155  SG_ERROR("labels should be +1/-1 only\n")
156  }
157 
158  int pos = 0;
159  int neg = 0;
160  for(int i=0;i<prob.l;i++)
161  {
162  if(prob.y[i]==+1)
163  pos++;
164  }
165  neg = prob.l - pos;
166 
167  SG_INFO("%d training points %d dims\n", prob.l, prob.n)
168 
169  function *fun_obj=NULL;
170  switch (liblinear_solver_type)
171  {
172  case L2R_LR:
173  {
174  fun_obj=new l2r_lr_fun(&prob, Cs);
175  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
176  SG_DEBUG("starting L2R_LR training via tron\n")
177  tron_obj.tron(w.vector, m_max_train_time);
178  SG_DEBUG("done with tron\n")
179  delete fun_obj;
180  break;
181  }
182  case L2R_L2LOSS_SVC:
183  {
184  fun_obj=new l2r_l2_svc_fun(&prob, Cs);
185  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
186  tron_obj.tron(w.vector, m_max_train_time);
187  delete fun_obj;
188  break;
189  }
190  case L2R_L2LOSS_SVC_DUAL:
191  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
192  break;
193  case L2R_L1LOSS_SVC_DUAL:
194  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
195  break;
196  case L1R_L2LOSS_SVC:
197  {
198  //ASSUME FEATURES ARE TRANSPOSED ALREADY
199  solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
200  break;
201  }
202  case L1R_LR:
203  {
204  //ASSUME FEATURES ARE TRANSPOSED ALREADY
205  solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
206  break;
207  }
208  case L2R_LR_DUAL:
209  {
210  solve_l2r_lr_dual(&prob, epsilon, Cp, Cn);
211  break;
212  }
213  default:
214  SG_ERROR("Error: unknown solver_type\n")
215  break;
216  }
217 
218  if (use_bias)
219  set_bias(w[w.vlen]);
220  else
221  set_bias(0);
222 
223  SG_FREE(prob.y);
224  SG_FREE(Cs);
225 
226  return true;
227 }
228 
229 // A coordinate descent algorithm for
230 // L1-loss and L2-loss SVM dual problems
231 //
232 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
233 // s.t. 0 <= alpha_i <= upper_bound_i,
234 //
235 // where Qij = yi yj xi^T xj and
236 // D is a diagonal matrix
237 //
238 // In L1-SVM case:
239 // upper_bound_i = Cp if y_i = 1
240 // upper_bound_i = Cn if y_i = -1
241 // D_ii = 0
242 // In L2-SVM case:
243 // upper_bound_i = INF
244 // D_ii = 1/(2*Cp) if y_i = 1
245 // D_ii = 1/(2*Cn) if y_i = -1
246 //
247 // Given:
248 // x, y, Cp, Cn
249 // eps is the stopping tolerance
250 //
251 // solution will be put in w
252 
253 #undef GETI
254 #define GETI(i) (y[i]+1)
255 // To support weights for instances, use GETI(i) (i)
256 
257 void CLibLinear::solve_l2r_l1l2_svc(
258  const liblinear_problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
259 {
260  int l = prob->l;
261  int w_size = prob->n;
262  int i, s, iter = 0;
263  double C, d, G;
264  double *QD = SG_MALLOC(double, l);
265  int *index = SG_MALLOC(int, l);
266  double *alpha = SG_MALLOC(double, l);
267  int32_t *y = SG_MALLOC(int32_t, l);
268  int active_size = l;
269 
270  // PG: projected gradient, for shrinking and stopping
271  double PG;
272  double PGmax_old = CMath::INFTY;
273  double PGmin_old = -CMath::INFTY;
274  double PGmax_new, PGmin_new;
275 
276  // default solver_type: L2R_L2LOSS_SVC_DUAL
277  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
278  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
279  if(st == L2R_L1LOSS_SVC_DUAL)
280  {
281  diag[0] = 0;
282  diag[2] = 0;
283  upper_bound[0] = Cn;
284  upper_bound[2] = Cp;
285  }
286 
287  int n = prob->n;
288 
289  if (prob->use_bias)
290  n--;
291 
292  for(i=0; i<w_size; i++)
293  w[i] = 0;
294 
295  for(i=0; i<l; i++)
296  {
297  alpha[i] = 0;
298  if(prob->y[i] > 0)
299  {
300  y[i] = +1;
301  }
302  else
303  {
304  y[i] = -1;
305  }
306  QD[i] = diag[GETI(i)];
307 
308  QD[i] += prob->x->dot(i, prob->x,i);
309  index[i] = i;
310  }
311 
312 
313  CTime start_time;
314  while (iter < max_iterations && !CSignal::cancel_computations())
315  {
316  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
317  break;
318 
319  PGmax_new = -CMath::INFTY;
320  PGmin_new = CMath::INFTY;
321 
322  for (i=0; i<active_size; i++)
323  {
324  int j = CMath::random(i, active_size-1);
325  CMath::swap(index[i], index[j]);
326  }
327 
328  for (s=0;s<active_size;s++)
329  {
330  i = index[s];
331  int32_t yi = y[i];
332 
333  G = prob->x->dense_dot(i, w.vector, n);
334  if (prob->use_bias)
335  G+=w.vector[n];
336 
337  if (m_linear_term.vector)
338  G = G*yi + m_linear_term.vector[i];
339  else
340  G = G*yi-1;
341 
342  C = upper_bound[GETI(i)];
343  G += alpha[i]*diag[GETI(i)];
344 
345  PG = 0;
346  if (alpha[i] == 0)
347  {
348  if (G > PGmax_old)
349  {
350  active_size--;
351  CMath::swap(index[s], index[active_size]);
352  s--;
353  continue;
354  }
355  else if (G < 0)
356  PG = G;
357  }
358  else if (alpha[i] == C)
359  {
360  if (G < PGmin_old)
361  {
362  active_size--;
363  CMath::swap(index[s], index[active_size]);
364  s--;
365  continue;
366  }
367  else if (G > 0)
368  PG = G;
369  }
370  else
371  PG = G;
372 
373  PGmax_new = CMath::max(PGmax_new, PG);
374  PGmin_new = CMath::min(PGmin_new, PG);
375 
376  if(fabs(PG) > 1.0e-12)
377  {
378  double alpha_old = alpha[i];
379  alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
380  d = (alpha[i] - alpha_old)*yi;
381 
382  prob->x->add_to_dense_vec(d, i, w.vector, n);
383 
384  if (prob->use_bias)
385  w.vector[n]+=d;
386  }
387  }
388 
389  iter++;
390  float64_t gap=PGmax_new - PGmin_new;
391  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
392 
393  if(gap <= eps)
394  {
395  if(active_size == l)
396  break;
397  else
398  {
399  active_size = l;
400  PGmax_old = CMath::INFTY;
401  PGmin_old = -CMath::INFTY;
402  continue;
403  }
404  }
405  PGmax_old = PGmax_new;
406  PGmin_old = PGmin_new;
407  if (PGmax_old <= 0)
408  PGmax_old = CMath::INFTY;
409  if (PGmin_old >= 0)
410  PGmin_old = -CMath::INFTY;
411  }
412 
413  SG_DONE()
414  SG_INFO("optimization finished, #iter = %d\n",iter)
415  if (iter >= max_iterations)
416  {
417  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
418  "(also see liblinear FAQ)\n\n");
419  }
420 
421  // calculate objective value
422 
423  double v = 0;
424  int nSV = 0;
425  for(i=0; i<w_size; i++)
426  v += w.vector[i]*w.vector[i];
427  for(i=0; i<l; i++)
428  {
429  v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
430  if(alpha[i] > 0)
431  ++nSV;
432  }
433  SG_INFO("Objective value = %lf\n",v/2)
434  SG_INFO("nSV = %d\n",nSV)
435 
436  SG_FREE(QD);
437  SG_FREE(alpha);
438  SG_FREE(y);
439  SG_FREE(index);
440 }
441 
442 // A coordinate descent algorithm for
443 // L1-regularized L2-loss support vector classification
444 //
445 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
446 //
447 // Given:
448 // x, y, Cp, Cn
449 // eps is the stopping tolerance
450 //
451 // solution will be put in w
452 
453 #undef GETI
454 #define GETI(i) (y[i]+1)
455 // To support weights for instances, use GETI(i) (i)
456 
457 void CLibLinear::solve_l1r_l2_svc(
458  liblinear_problem *prob_col, double eps, double Cp, double Cn)
459 {
460  int l = prob_col->l;
461  int w_size = prob_col->n;
462  int j, s, iter = 0;
463  int active_size = w_size;
464  int max_num_linesearch = 20;
465 
466  double sigma = 0.01;
467  double d, G_loss, G, H;
468  double Gmax_old = CMath::INFTY;
469  double Gmax_new;
470  double Gmax_init=0;
471  double d_old, d_diff;
472  double loss_old=0, loss_new;
473  double appxcond, cond;
474 
475  int *index = SG_MALLOC(int, w_size);
476  int32_t *y = SG_MALLOC(int32_t, l);
477  double *b = SG_MALLOC(double, l); // b = 1-ywTx
478  double *xj_sq = SG_MALLOC(double, w_size);
479 
480  CDotFeatures* x = (CDotFeatures*) prob_col->x;
481  void* iterator;
482  int32_t ind;
483  float64_t val;
484 
485  double C[3] = {Cn,0,Cp};
486 
487  int n = prob_col->n;
488  if (prob_col->use_bias)
489  n--;
490 
491  for(j=0; j<l; j++)
492  {
493  b[j] = 1;
494  if(prob_col->y[j] > 0)
495  y[j] = 1;
496  else
497  y[j] = -1;
498  }
499 
500  for(j=0; j<w_size; j++)
501  {
502  w.vector[j] = 0;
503  index[j] = j;
504  xj_sq[j] = 0;
505 
506  if (use_bias && j==n)
507  {
508  for (ind=0; ind<l; ind++)
509  xj_sq[n] += C[GETI(ind)];
510  }
511  else
512  {
513  iterator=x->get_feature_iterator(j);
514  while (x->get_next_feature(ind, val, iterator))
515  xj_sq[j] += C[GETI(ind)]*val*val;
516  x->free_feature_iterator(iterator);
517  }
518  }
519 
520 
521  CTime start_time;
522  while (iter < max_iterations && !CSignal::cancel_computations())
523  {
524  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
525  break;
526 
527  Gmax_new = 0;
528 
529  for(j=0; j<active_size; j++)
530  {
531  int i = CMath::random(j, active_size-1);
532  CMath::swap(index[i], index[j]);
533  }
534 
535  for(s=0; s<active_size; s++)
536  {
537  j = index[s];
538  G_loss = 0;
539  H = 0;
540 
541  if (use_bias && j==n)
542  {
543  for (ind=0; ind<l; ind++)
544  {
545  if(b[ind] > 0)
546  {
547  double tmp = C[GETI(ind)]*y[ind];
548  G_loss -= tmp*b[ind];
549  H += tmp*y[ind];
550  }
551  }
552  }
553  else
554  {
555  iterator=x->get_feature_iterator(j);
556 
557  while (x->get_next_feature(ind, val, iterator))
558  {
559  if(b[ind] > 0)
560  {
561  double tmp = C[GETI(ind)]*val*y[ind];
562  G_loss -= tmp*b[ind];
563  H += tmp*val*y[ind];
564  }
565  }
566  x->free_feature_iterator(iterator);
567  }
568 
569  G_loss *= 2;
570 
571  G = G_loss;
572  H *= 2;
573  H = CMath::max(H, 1e-12);
574 
575  double Gp = G+1;
576  double Gn = G-1;
577  double violation = 0;
578  if(w.vector[j] == 0)
579  {
580  if(Gp < 0)
581  violation = -Gp;
582  else if(Gn > 0)
583  violation = Gn;
584  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
585  {
586  active_size--;
587  CMath::swap(index[s], index[active_size]);
588  s--;
589  continue;
590  }
591  }
592  else if(w.vector[j] > 0)
593  violation = fabs(Gp);
594  else
595  violation = fabs(Gn);
596 
597  Gmax_new = CMath::max(Gmax_new, violation);
598 
599  // obtain Newton direction d
600  if(Gp <= H*w.vector[j])
601  d = -Gp/H;
602  else if(Gn >= H*w.vector[j])
603  d = -Gn/H;
604  else
605  d = -w.vector[j];
606 
607  if(fabs(d) < 1.0e-12)
608  continue;
609 
610  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
611  d_old = 0;
612  int num_linesearch;
613  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
614  {
615  d_diff = d_old - d;
616  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
617 
618  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
619  if(appxcond <= 0)
620  {
621  if (use_bias && j==n)
622  {
623  for (ind=0; ind<l; ind++)
624  b[ind] += d_diff*y[ind];
625  break;
626  }
627  else
628  {
629  iterator=x->get_feature_iterator(j);
630  while (x->get_next_feature(ind, val, iterator))
631  b[ind] += d_diff*val*y[ind];
632 
633  x->free_feature_iterator(iterator);
634  break;
635  }
636  }
637 
638  if(num_linesearch == 0)
639  {
640  loss_old = 0;
641  loss_new = 0;
642 
643  if (use_bias && j==n)
644  {
645  for (ind=0; ind<l; ind++)
646  {
647  if(b[ind] > 0)
648  loss_old += C[GETI(ind)]*b[ind]*b[ind];
649  double b_new = b[ind] + d_diff*y[ind];
650  b[ind] = b_new;
651  if(b_new > 0)
652  loss_new += C[GETI(ind)]*b_new*b_new;
653  }
654  }
655  else
656  {
657  iterator=x->get_feature_iterator(j);
658  while (x->get_next_feature(ind, val, iterator))
659  {
660  if(b[ind] > 0)
661  loss_old += C[GETI(ind)]*b[ind]*b[ind];
662  double b_new = b[ind] + d_diff*val*y[ind];
663  b[ind] = b_new;
664  if(b_new > 0)
665  loss_new += C[GETI(ind)]*b_new*b_new;
666  }
667  x->free_feature_iterator(iterator);
668  }
669  }
670  else
671  {
672  loss_new = 0;
673  if (use_bias && j==n)
674  {
675  for (ind=0; ind<l; ind++)
676  {
677  double b_new = b[ind] + d_diff*y[ind];
678  b[ind] = b_new;
679  if(b_new > 0)
680  loss_new += C[GETI(ind)]*b_new*b_new;
681  }
682  }
683  else
684  {
685  iterator=x->get_feature_iterator(j);
686  while (x->get_next_feature(ind, val, iterator))
687  {
688  double b_new = b[ind] + d_diff*val*y[ind];
689  b[ind] = b_new;
690  if(b_new > 0)
691  loss_new += C[GETI(ind)]*b_new*b_new;
692  }
693  x->free_feature_iterator(iterator);
694  }
695  }
696 
697  cond = cond + loss_new - loss_old;
698  if(cond <= 0)
699  break;
700  else
701  {
702  d_old = d;
703  d *= 0.5;
704  delta *= 0.5;
705  }
706  }
707 
708  w.vector[j] += d;
709 
710  // recompute b[] if line search takes too many steps
711  if(num_linesearch >= max_num_linesearch)
712  {
713  SG_INFO("#")
714  for(int i=0; i<l; i++)
715  b[i] = 1;
716 
717  for(int i=0; i<n; i++)
718  {
719  if(w.vector[i]==0)
720  continue;
721 
722  iterator=x->get_feature_iterator(i);
723  while (x->get_next_feature(ind, val, iterator))
724  b[ind] -= w.vector[i]*val*y[ind];
725  x->free_feature_iterator(iterator);
726  }
727 
728  if (use_bias && w.vector[n])
729  {
730  for (ind=0; ind<l; ind++)
731  b[ind] -= w.vector[n]*y[ind];
732  }
733  }
734  }
735 
736  if(iter == 0)
737  Gmax_init = Gmax_new;
738  iter++;
739 
740  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
741  -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
742 
743  if(Gmax_new <= eps*Gmax_init)
744  {
745  if(active_size == w_size)
746  break;
747  else
748  {
749  active_size = w_size;
750  Gmax_old = CMath::INFTY;
751  continue;
752  }
753  }
754 
755  Gmax_old = Gmax_new;
756  }
757 
758  SG_DONE()
759  SG_INFO("optimization finished, #iter = %d\n", iter)
760  if(iter >= max_iterations)
761  SG_WARNING("\nWARNING: reaching max number of iterations\n")
762 
763  // calculate objective value
764 
765  double v = 0;
766  int nnz = 0;
767  for(j=0; j<w_size; j++)
768  {
769  if(w.vector[j] != 0)
770  {
771  v += fabs(w.vector[j]);
772  nnz++;
773  }
774  }
775  for(j=0; j<l; j++)
776  if(b[j] > 0)
777  v += C[GETI(j)]*b[j]*b[j];
778 
779  SG_INFO("Objective value = %lf\n", v)
780  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
781 
782  SG_FREE(index);
783  SG_FREE(y);
784  SG_FREE(b);
785  SG_FREE(xj_sq);
786 }
787 
788 // A coordinate descent algorithm for
789 // L1-regularized logistic regression problems
790 //
791 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
792 //
793 // Given:
794 // x, y, Cp, Cn
795 // eps is the stopping tolerance
796 //
797 // solution will be put in w
798 
799 #undef GETI
800 #define GETI(i) (y[i]+1)
801 // To support weights for instances, use GETI(i) (i)
802 
803 void CLibLinear::solve_l1r_lr(
804  const liblinear_problem *prob_col, double eps,
805  double Cp, double Cn)
806 {
807  int l = prob_col->l;
808  int w_size = prob_col->n;
809  int j, s, iter = 0;
810  int active_size = w_size;
811  int max_num_linesearch = 20;
812 
813  double x_min = 0;
814  double sigma = 0.01;
815  double d, G, H;
816  double Gmax_old = CMath::INFTY;
817  double Gmax_new;
818  double Gmax_init=0;
819  double sum1, appxcond1;
820  double sum2, appxcond2;
821  double cond;
822 
823  int *index = SG_MALLOC(int, w_size);
824  int32_t *y = SG_MALLOC(int32_t, l);
825  double *exp_wTx = SG_MALLOC(double, l);
826  double *exp_wTx_new = SG_MALLOC(double, l);
827  double *xj_max = SG_MALLOC(double, w_size);
828  double *C_sum = SG_MALLOC(double, w_size);
829  double *xjneg_sum = SG_MALLOC(double, w_size);
830  double *xjpos_sum = SG_MALLOC(double, w_size);
831 
832  CDotFeatures* x = prob_col->x;
833  void* iterator;
834  int ind;
835  double val;
836 
837  double C[3] = {Cn,0,Cp};
838 
839  int n = prob_col->n;
840  if (prob_col->use_bias)
841  n--;
842 
843  for(j=0; j<l; j++)
844  {
845  exp_wTx[j] = 1;
846  if(prob_col->y[j] > 0)
847  y[j] = 1;
848  else
849  y[j] = -1;
850  }
851  for(j=0; j<w_size; j++)
852  {
853  w.vector[j] = 0;
854  index[j] = j;
855  xj_max[j] = 0;
856  C_sum[j] = 0;
857  xjneg_sum[j] = 0;
858  xjpos_sum[j] = 0;
859 
860  if (use_bias && j==n)
861  {
862  for (ind=0; ind<l; ind++)
863  {
864  x_min = CMath::min(x_min, 1.0);
865  xj_max[j] = CMath::max(xj_max[j], 1.0);
866  C_sum[j] += C[GETI(ind)];
867  if(y[ind] == -1)
868  xjneg_sum[j] += C[GETI(ind)];
869  else
870  xjpos_sum[j] += C[GETI(ind)];
871  }
872  }
873  else
874  {
875  iterator=x->get_feature_iterator(j);
876  while (x->get_next_feature(ind, val, iterator))
877  {
878  x_min = CMath::min(x_min, val);
879  xj_max[j] = CMath::max(xj_max[j], val);
880  C_sum[j] += C[GETI(ind)];
881  if(y[ind] == -1)
882  xjneg_sum[j] += C[GETI(ind)]*val;
883  else
884  xjpos_sum[j] += C[GETI(ind)]*val;
885  }
886  x->free_feature_iterator(iterator);
887  }
888  }
889 
890  CTime start_time;
891  while (iter < max_iterations && !CSignal::cancel_computations())
892  {
893  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
894  break;
895 
896  Gmax_new = 0;
897 
898  for(j=0; j<active_size; j++)
899  {
900  int i = CMath::random(j, active_size-1);
901  CMath::swap(index[i], index[j]);
902  }
903 
904  for(s=0; s<active_size; s++)
905  {
906  j = index[s];
907  sum1 = 0;
908  sum2 = 0;
909  H = 0;
910 
911  if (use_bias && j==n)
912  {
913  for (ind=0; ind<l; ind++)
914  {
915  double exp_wTxind = exp_wTx[ind];
916  double tmp1 = 1.0/(1+exp_wTxind);
917  double tmp2 = C[GETI(ind)]*tmp1;
918  double tmp3 = tmp2*exp_wTxind;
919  sum2 += tmp2;
920  sum1 += tmp3;
921  H += tmp1*tmp3;
922  }
923  }
924  else
925  {
926  iterator=x->get_feature_iterator(j);
927  while (x->get_next_feature(ind, val, iterator))
928  {
929  double exp_wTxind = exp_wTx[ind];
930  double tmp1 = val/(1+exp_wTxind);
931  double tmp2 = C[GETI(ind)]*tmp1;
932  double tmp3 = tmp2*exp_wTxind;
933  sum2 += tmp2;
934  sum1 += tmp3;
935  H += tmp1*tmp3;
936  }
937  x->free_feature_iterator(iterator);
938  }
939 
940  G = -sum2 + xjneg_sum[j];
941 
942  double Gp = G+1;
943  double Gn = G-1;
944  double violation = 0;
945  if(w.vector[j] == 0)
946  {
947  if(Gp < 0)
948  violation = -Gp;
949  else if(Gn > 0)
950  violation = Gn;
951  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
952  {
953  active_size--;
954  CMath::swap(index[s], index[active_size]);
955  s--;
956  continue;
957  }
958  }
959  else if(w.vector[j] > 0)
960  violation = fabs(Gp);
961  else
962  violation = fabs(Gn);
963 
964  Gmax_new = CMath::max(Gmax_new, violation);
965 
966  // obtain Newton direction d
967  if(Gp <= H*w.vector[j])
968  d = -Gp/H;
969  else if(Gn >= H*w.vector[j])
970  d = -Gn/H;
971  else
972  d = -w.vector[j];
973 
974  if(fabs(d) < 1.0e-12)
975  continue;
976 
977  d = CMath::min(CMath::max(d,-10.0),10.0);
978 
979  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
980  int num_linesearch;
981  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
982  {
983  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
984 
985  if(x_min >= 0)
986  {
987  double tmp = exp(d*xj_max[j]);
988  appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
989  appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
990  if(CMath::min(appxcond1,appxcond2) <= 0)
991  {
992  if (use_bias && j==n)
993  {
994  for (ind=0; ind<l; ind++)
995  exp_wTx[ind] *= exp(d);
996  }
997 
998  else
999  {
1000  iterator=x->get_feature_iterator(j);
1001  while (x->get_next_feature(ind, val, iterator))
1002  exp_wTx[ind] *= exp(d*val);
1003  x->free_feature_iterator(iterator);
1004  }
1005  break;
1006  }
1007  }
1008 
1009  cond += d*xjneg_sum[j];
1010 
1011  int i = 0;
1012 
1013  if (use_bias && j==n)
1014  {
1015  for (ind=0; ind<l; ind++)
1016  {
1017  double exp_dx = exp(d);
1018  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1019  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1020  i++;
1021  }
1022  }
1023  else
1024  {
1025 
1026  iterator=x->get_feature_iterator(j);
1027  while (x->get_next_feature(ind, val, iterator))
1028  {
1029  double exp_dx = exp(d*val);
1030  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1031  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1032  i++;
1033  }
1034  x->free_feature_iterator(iterator);
1035  }
1036 
1037  if(cond <= 0)
1038  {
1039  i = 0;
1040  if (use_bias && j==n)
1041  {
1042  for (ind=0; ind<l; ind++)
1043  {
1044  exp_wTx[ind] = exp_wTx_new[i];
1045  i++;
1046  }
1047  }
1048  else
1049  {
1050  iterator=x->get_feature_iterator(j);
1051  while (x->get_next_feature(ind, val, iterator))
1052  {
1053  exp_wTx[ind] = exp_wTx_new[i];
1054  i++;
1055  }
1056  x->free_feature_iterator(iterator);
1057  }
1058  break;
1059  }
1060  else
1061  {
1062  d *= 0.5;
1063  delta *= 0.5;
1064  }
1065  }
1066 
1067  w.vector[j] += d;
1068 
1069  // recompute exp_wTx[] if line search takes too many steps
1070  if(num_linesearch >= max_num_linesearch)
1071  {
1072  SG_INFO("#")
1073  for(int i=0; i<l; i++)
1074  exp_wTx[i] = 0;
1075 
1076  for(int i=0; i<w_size; i++)
1077  {
1078  if(w.vector[i]==0) continue;
1079 
1080  if (use_bias && i==n)
1081  {
1082  for (ind=0; ind<l; ind++)
1083  exp_wTx[ind] += w.vector[i];
1084  }
1085  else
1086  {
1087  iterator=x->get_feature_iterator(i);
1088  while (x->get_next_feature(ind, val, iterator))
1089  exp_wTx[ind] += w.vector[i]*val;
1090  x->free_feature_iterator(iterator);
1091  }
1092  }
1093 
1094  for(int i=0; i<l; i++)
1095  exp_wTx[i] = exp(exp_wTx[i]);
1096  }
1097  }
1098 
1099  if(iter == 0)
1100  Gmax_init = Gmax_new;
1101  iter++;
1102  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1103 
1104  if(Gmax_new <= eps*Gmax_init)
1105  {
1106  if(active_size == w_size)
1107  break;
1108  else
1109  {
1110  active_size = w_size;
1111  Gmax_old = CMath::INFTY;
1112  continue;
1113  }
1114  }
1115 
1116  Gmax_old = Gmax_new;
1117  }
1118 
1119  SG_DONE()
1120  SG_INFO("optimization finished, #iter = %d\n", iter)
1121  if(iter >= max_iterations)
1122  SG_WARNING("\nWARNING: reaching max number of iterations\n")
1123 
1124  // calculate objective value
1125 
1126  double v = 0;
1127  int nnz = 0;
1128  for(j=0; j<w_size; j++)
1129  if(w.vector[j] != 0)
1130  {
1131  v += fabs(w.vector[j]);
1132  nnz++;
1133  }
1134  for(j=0; j<l; j++)
1135  if(y[j] == 1)
1136  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1137  else
1138  v += C[GETI(j)]*log(1+exp_wTx[j]);
1139 
1140  SG_INFO("Objective value = %lf\n", v)
1141  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
1142 
1143  SG_FREE(index);
1144  SG_FREE(y);
1145  SG_FREE(exp_wTx);
1146  SG_FREE(exp_wTx_new);
1147  SG_FREE(xj_max);
1148  SG_FREE(C_sum);
1149  SG_FREE(xjneg_sum);
1150  SG_FREE(xjpos_sum);
1151 }
1152 
1153 // A coordinate descent algorithm for
1154 // the dual of L2-regularized logistic regression problems
1155 //
1156 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1157 // s.t. 0 <= \alpha_i <= upper_bound_i,
1158 //
1159 // where Qij = yi yj xi^T xj and
1160 // upper_bound_i = Cp if y_i = 1
1161 // upper_bound_i = Cn if y_i = -1
1162 //
1163 // Given:
1164 // x, y, Cp, Cn
1165 // eps is the stopping tolerance
1166 //
1167 // solution will be put in w
1168 //
1169 // See Algorithm 5 of Yu et al., MLJ 2010
1170 
1171 #undef GETI
1172 #define GETI(i) (y[i]+1)
1173 // To support weights for instances, use GETI(i) (i)
1174 
1175 void CLibLinear::solve_l2r_lr_dual(const liblinear_problem *prob, double eps, double Cp, double Cn)
1176 {
1177  int l = prob->l;
1178  int w_size = prob->n;
1179  int i, s, iter = 0;
1180  double *xTx = new double[l];
1181  int max_iter = 1000;
1182  int *index = new int[l];
1183  double *alpha = new double[2*l]; // store alpha and C - alpha
1184  int32_t *y = new int32_t[l];
1185  int max_inner_iter = 100; // for inner Newton
1186  double innereps = 1e-2;
1187  double innereps_min = CMath::min(1e-8, eps);
1188  double upper_bound[3] = {Cn, 0, Cp};
1189  double Gmax_init = 0;
1190 
1191  for(i=0; i<l; i++)
1192  {
1193  if(prob->y[i] > 0)
1194  {
1195  y[i] = +1;
1196  }
1197  else
1198  {
1199  y[i] = -1;
1200  }
1201  }
1202 
1203  // Initial alpha can be set here. Note that
1204  // 0 < alpha[i] < upper_bound[GETI(i)]
1205  // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1206  for(i=0; i<l; i++)
1207  {
1208  alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
1209  alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1210  }
1211 
1212  for(i=0; i<w_size; i++)
1213  w[i] = 0;
1214  for(i=0; i<l; i++)
1215  {
1216  xTx[i] = prob->x->dot(i, prob->x,i);
1217  prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
1218 
1219  if (prob->use_bias)
1220  {
1221  w.vector[w_size]+=y[i]*alpha[2*i];
1222  xTx[i]+=1;
1223  }
1224  index[i] = i;
1225  }
1226 
1227  while (iter < max_iter)
1228  {
1229  for (i=0; i<l; i++)
1230  {
1231  int j = CMath::random(i, l-1);
1232  CMath::swap(index[i], index[j]);
1233  }
1234  int newton_iter = 0;
1235  double Gmax = 0;
1236  for (s=0; s<l; s++)
1237  {
1238  i = index[s];
1239  int32_t yi = y[i];
1240  double C = upper_bound[GETI(i)];
1241  double ywTx = 0, xisq = xTx[i];
1242 
1243  ywTx = prob->x->dense_dot(i, w.vector, w_size);
1244  if (prob->use_bias)
1245  ywTx+=w.vector[w_size];
1246 
1247  ywTx *= y[i];
1248  double a = xisq, b = ywTx;
1249 
1250  // Decide to minimize g_1(z) or g_2(z)
1251  int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1252  if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1253  {
1254  ind1 = 2*i+1;
1255  ind2 = 2*i;
1256  sign = -1;
1257  }
1258 
1259  // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1260  double alpha_old = alpha[ind1];
1261  double z = alpha_old;
1262  if(C - z < 0.5 * C)
1263  z = 0.1*z;
1264  double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
1265  Gmax = CMath::max(Gmax, CMath::abs(gp));
1266 
1267  // Newton method on the sub-problem
1268  const double eta = 0.1; // xi in the paper
1269  int inner_iter = 0;
1270  while (inner_iter <= max_inner_iter)
1271  {
1272  if(fabs(gp) < innereps)
1273  break;
1274  double gpp = a + C/(C-z)/z;
1275  double tmpz = z - gp/gpp;
1276  if(tmpz <= 0)
1277  z *= eta;
1278  else // tmpz in (0, C)
1279  z = tmpz;
1280  gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1281  newton_iter++;
1282  inner_iter++;
1283  }
1284 
1285  if(inner_iter > 0) // update w
1286  {
1287  alpha[ind1] = z;
1288  alpha[ind2] = C-z;
1289 
1290  prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
1291 
1292  if (prob->use_bias)
1293  w.vector[w_size]+=sign*(z-alpha_old)*yi;
1294  }
1295  }
1296 
1297  if(iter == 0)
1298  Gmax_init = Gmax;
1299  iter++;
1300 
1301  SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1302 
1303  if(Gmax < eps)
1304  break;
1305 
1306  if(newton_iter <= l/10)
1307  innereps = CMath::max(innereps_min, 0.1*innereps);
1308 
1309  }
1310 
1311  SG_DONE()
1312  SG_INFO("optimization finished, #iter = %d\n",iter)
1313  if (iter >= max_iter)
1314  SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n")
1315 
1316  // calculate objective value
1317 
1318  double v = 0;
1319  for(i=0; i<w_size; i++)
1320  v += w[i] * w[i];
1321  v *= 0.5;
1322  for(i=0; i<l; i++)
1323  v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1324  - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1325  SG_INFO("Objective value = %lf\n", v)
1326 
1327  delete [] xTx;
1328  delete [] alpha;
1329  delete [] y;
1330  delete [] index;
1331 }
1332 
1333 
1335 {
1336  if (!m_labels)
1337  SG_ERROR("Please assign labels first!\n")
1338 
1339  int32_t num_labels=m_labels->get_num_labels();
1340 
1341  if (num_labels!=linear_term.vlen)
1342  {
1343  SG_ERROR("Number of labels (%d) does not match number"
1344  " of entries (%d) in linear term \n", num_labels,
1345  linear_term.vlen);
1346  }
1347 
1348  m_linear_term=linear_term;
1349 }
1350 
1352 {
1354  SG_ERROR("Please assign linear term first!\n")
1355 
1356  return m_linear_term;
1357 }
1358 
1360 {
1361  if (!m_labels)
1362  SG_ERROR("Please assign labels first!\n")
1363 
1366 }
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47
#define SG_INFO(...)
Definition: SGIO.h:118
#define SG_DONE()
Definition: SGIO.h:157
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:221
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
L2 regularized linear logistic regression via dual.
Definition: LibLinear.h:41
virtual ~CLibLinear()
Definition: LibLinear.cpp:74
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual int32_t get_num_labels() const =0
static float64_t log10(float64_t v)
Definition: Math.h:897
L2 regularized SVM with L2-loss using newton in the primal.
Definition: LibLinear.h:32
L1 regularized logistic regression.
Definition: LibLinear.h:39
virtual int32_t get_num_vectors() const =0
void tron(float64_t *w, float64_t max_train_time)
Definition: tron.cpp:72
float64_t m_max_train_time
Definition: Machine.h:358
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
L1 regularized SVM with L2-loss using dual coordinate descent.
Definition: LibLinear.h:37
Features that support dot products among other operations.
Definition: DotFeatures.h:44
class Tron
Definition: tron.h:55
SGVector< float64_t > get_linear_term()
Definition: LibLinear.cpp:1351
static uint64_t random()
Definition: Math.h:1019
virtual int32_t get_dim_feature_space() const =0
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
LIBLINEAR_SOLVER_TYPE
Definition: LibLinear.h:25
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
void set_linear_term(const SGVector< float64_t > linear_term)
Definition: LibLinear.cpp:1334
void set_max_iterations(int32_t max_iter=1000)
Definition: LibLinear.h:164
#define GETI(i)
Definition: LibLinear.cpp:1172
static void clear_cancel()
Definition: Signal.cpp:129
L2 regularized linear logistic regression.
Definition: LibLinear.h:28
double float64_t
Definition: common.h:50
virtual void free_feature_iterator(void *iterator)=0
L2 regularized SVM with L2-loss using dual coordinate descent.
Definition: LibLinear.h:30
SGVector< float64_t > w
virtual void set_features(CDotFeatures *feat)
static T max(T a, T b)
Definition: Math.h:168
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
LIBLINEAR_SOLVER_TYPE liblinear_solver_type
Definition: LibLinear.h:218
static bool cancel_computations()
Definition: Signal.h:86
virtual void * get_feature_iterator(int32_t vector_index)=0
virtual void set_compute_bias(bool compute_bias)
CDotFeatures * features
float64_t epsilon
Definition: LibLinear.h:210
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)=0
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
int machine_int_t
Definition: common.h:59
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:157
static float64_t log(float64_t v)
Definition: Math.h:922
virtual bool train_machine(CFeatures *data=NULL)
Definition: LibLinear.cpp:78
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
static void swap(T &a, T &b)
Definition: Math.h:438
virtual void set_bias(float64_t b)
L2 regularized linear SVM with L1-loss using dual coordinate descent.
Definition: LibLinear.h:35
#define SG_WARNING(...)
Definition: SGIO.h:128
#define SG_ADD(...)
Definition: SGObject.h:84
int32_t max_iterations
Definition: LibLinear.h:212
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
#define SG_SABS_PROGRESS(...)
Definition: SGIO.h:188
SGVector< float64_t > m_linear_term
Definition: LibLinear.h:215
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation