SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LibLinear.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2010 Soeren Sonnenburg
8  * Copyright (c) 2007-2009 The LIBLINEAR Project.
9  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #include <shogun/io/SGIO.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Time.h>
16 #include <shogun/base/Parameter.h>
21 
22 using namespace shogun;
23 
26 {
27  init();
28 }
29 
32 {
33  init();
35 }
36 
38  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
40 {
41  init();
42  C1=C;
43  C2=C;
44  use_bias=true;
45 
46  set_features(traindat);
47  set_labels(trainlab);
49 }
50 
51 void CLibLinear::init()
52 {
54  use_bias=false;
55  C1=1;
56  C2=1;
58  epsilon=1e-5;
59 
60  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
61  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
62  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
64  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
65  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
67  SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
68  SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
69  "Type of LibLinear solver.", MS_NOT_AVAILABLE);
70 }
71 
73 {
74 }
75 
77 {
81 
82  if (data)
83  {
84  if (!data->has_property(FP_DOT))
85  SG_ERROR("Specified features are not of type CDotFeatures\n")
86 
87  set_features((CDotFeatures*) data);
88  }
90 
91 
92  int32_t num_train_labels=m_labels->get_num_labels();
93  int32_t num_feat=features->get_dim_feature_space();
94  int32_t num_vec=features->get_num_vectors();
95 
98  {
99  if (num_feat!=num_train_labels)
100  {
101  SG_ERROR("L1 methods require the data to be transposed: "
102  "number of features %d does not match number of "
103  "training labels %d\n",
104  num_feat, num_train_labels);
105  }
106  CMath::swap(num_feat, num_vec);
107 
108  }
109  else
110  {
111  if (num_vec!=num_train_labels)
112  {
113  SG_ERROR("number of vectors %d does not match "
114  "number of training labels %d\n",
115  num_vec, num_train_labels);
116  }
117  }
118  if (use_bias)
119  w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
120  else
121  w=SGVector<float64_t>(num_feat);
122 
123  liblinear_problem prob;
124  if (use_bias)
125  {
126  prob.n=w.vlen+1;
127  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
128  }
129  else
130  {
131  prob.n=w.vlen;
132  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
133  }
134  prob.l=num_vec;
135  prob.x=features;
136  prob.y=SG_MALLOC(double, prob.l);
137  float64_t* Cs=SG_MALLOC(double, prob.l);
138  prob.use_bias=use_bias;
139  double Cp=C1;
140  double Cn=C2;
141 
142  for (int32_t i=0; i<prob.l; i++)
143  {
144  prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
145  if (prob.y[i] == +1)
146  Cs[i]=C1;
147  else if (prob.y[i] == -1)
148  Cs[i]=C2;
149  else
150  SG_ERROR("labels should be +1/-1 only\n")
151  }
152 
153  int pos = 0;
154  int neg = 0;
155  for(int i=0;i<prob.l;i++)
156  {
157  if(prob.y[i]==+1)
158  pos++;
159  }
160  neg = prob.l - pos;
161 
162  SG_INFO("%d training points %d dims\n", prob.l, prob.n)
163 
164  function *fun_obj=NULL;
165  switch (liblinear_solver_type)
166  {
167  case L2R_LR:
168  {
169  fun_obj=new l2r_lr_fun(&prob, Cs);
170  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
171  SG_DEBUG("starting L2R_LR training via tron\n")
172  tron_obj.tron(w.vector, m_max_train_time);
173  SG_DEBUG("done with tron\n")
174  delete fun_obj;
175  break;
176  }
177  case L2R_L2LOSS_SVC:
178  {
179  fun_obj=new l2r_l2_svc_fun(&prob, Cs);
180  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
181  tron_obj.tron(w.vector, m_max_train_time);
182  delete fun_obj;
183  break;
184  }
185  case L2R_L2LOSS_SVC_DUAL:
186  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
187  break;
188  case L2R_L1LOSS_SVC_DUAL:
189  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
190  break;
191  case L1R_L2LOSS_SVC:
192  {
193  //ASSUME FEATURES ARE TRANSPOSED ALREADY
194  solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
195  break;
196  }
197  case L1R_LR:
198  {
199  //ASSUME FEATURES ARE TRANSPOSED ALREADY
200  solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
201  break;
202  }
203  case L2R_LR_DUAL:
204  {
205  solve_l2r_lr_dual(&prob, epsilon, Cp, Cn);
206  break;
207  }
208  default:
209  SG_ERROR("Error: unknown solver_type\n")
210  break;
211  }
212 
213  if (use_bias)
214  set_bias(w[w.vlen]);
215  else
216  set_bias(0);
217 
218  SG_FREE(prob.y);
219  SG_FREE(Cs);
220 
221  return true;
222 }
223 
224 // A coordinate descent algorithm for
225 // L1-loss and L2-loss SVM dual problems
226 //
227 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
228 // s.t. 0 <= alpha_i <= upper_bound_i,
229 //
230 // where Qij = yi yj xi^T xj and
231 // D is a diagonal matrix
232 //
233 // In L1-SVM case:
234 // upper_bound_i = Cp if y_i = 1
235 // upper_bound_i = Cn if y_i = -1
236 // D_ii = 0
237 // In L2-SVM case:
238 // upper_bound_i = INF
239 // D_ii = 1/(2*Cp) if y_i = 1
240 // D_ii = 1/(2*Cn) if y_i = -1
241 //
242 // Given:
243 // x, y, Cp, Cn
244 // eps is the stopping tolerance
245 //
246 // solution will be put in w
247 
248 #undef GETI
249 #define GETI(i) (y[i]+1)
250 // To support weights for instances, use GETI(i) (i)
251 
252 void CLibLinear::solve_l2r_l1l2_svc(
253  const liblinear_problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
254 {
255  int l = prob->l;
256  int w_size = prob->n;
257  int i, s, iter = 0;
258  double C, d, G;
259  double *QD = SG_MALLOC(double, l);
260  int *index = SG_MALLOC(int, l);
261  double *alpha = SG_MALLOC(double, l);
262  int32_t *y = SG_MALLOC(int32_t, l);
263  int active_size = l;
264 
265  // PG: projected gradient, for shrinking and stopping
266  double PG;
267  double PGmax_old = CMath::INFTY;
268  double PGmin_old = -CMath::INFTY;
269  double PGmax_new, PGmin_new;
270 
271  // default solver_type: L2R_L2LOSS_SVC_DUAL
272  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
273  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
274  if(st == L2R_L1LOSS_SVC_DUAL)
275  {
276  diag[0] = 0;
277  diag[2] = 0;
278  upper_bound[0] = Cn;
279  upper_bound[2] = Cp;
280  }
281 
282  int n = prob->n;
283 
284  if (prob->use_bias)
285  n--;
286 
287  for(i=0; i<w_size; i++)
288  w[i] = 0;
289 
290  for(i=0; i<l; i++)
291  {
292  alpha[i] = 0;
293  if(prob->y[i] > 0)
294  {
295  y[i] = +1;
296  }
297  else
298  {
299  y[i] = -1;
300  }
301  QD[i] = diag[GETI(i)];
302 
303  QD[i] += prob->x->dot(i, prob->x,i);
304  index[i] = i;
305  }
306 
307 
308  CTime start_time;
309  while (iter < max_iterations && !CSignal::cancel_computations())
310  {
311  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
312  break;
313 
314  PGmax_new = -CMath::INFTY;
315  PGmin_new = CMath::INFTY;
316 
317  for (i=0; i<active_size; i++)
318  {
319  int j = CMath::random(i, active_size-1);
320  CMath::swap(index[i], index[j]);
321  }
322 
323  for (s=0;s<active_size;s++)
324  {
325  i = index[s];
326  int32_t yi = y[i];
327 
328  G = prob->x->dense_dot(i, w.vector, n);
329  if (prob->use_bias)
330  G+=w.vector[n];
331 
332  if (m_linear_term.vector)
333  G = G*yi + m_linear_term.vector[i];
334  else
335  G = G*yi-1;
336 
337  C = upper_bound[GETI(i)];
338  G += alpha[i]*diag[GETI(i)];
339 
340  PG = 0;
341  if (alpha[i] == 0)
342  {
343  if (G > PGmax_old)
344  {
345  active_size--;
346  CMath::swap(index[s], index[active_size]);
347  s--;
348  continue;
349  }
350  else if (G < 0)
351  PG = G;
352  }
353  else if (alpha[i] == C)
354  {
355  if (G < PGmin_old)
356  {
357  active_size--;
358  CMath::swap(index[s], index[active_size]);
359  s--;
360  continue;
361  }
362  else if (G > 0)
363  PG = G;
364  }
365  else
366  PG = G;
367 
368  PGmax_new = CMath::max(PGmax_new, PG);
369  PGmin_new = CMath::min(PGmin_new, PG);
370 
371  if(fabs(PG) > 1.0e-12)
372  {
373  double alpha_old = alpha[i];
374  alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
375  d = (alpha[i] - alpha_old)*yi;
376 
377  prob->x->add_to_dense_vec(d, i, w.vector, n);
378 
379  if (prob->use_bias)
380  w.vector[n]+=d;
381  }
382  }
383 
384  iter++;
385  float64_t gap=PGmax_new - PGmin_new;
386  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
387 
388  if(gap <= eps)
389  {
390  if(active_size == l)
391  break;
392  else
393  {
394  active_size = l;
395  PGmax_old = CMath::INFTY;
396  PGmin_old = -CMath::INFTY;
397  continue;
398  }
399  }
400  PGmax_old = PGmax_new;
401  PGmin_old = PGmin_new;
402  if (PGmax_old <= 0)
403  PGmax_old = CMath::INFTY;
404  if (PGmin_old >= 0)
405  PGmin_old = -CMath::INFTY;
406  }
407 
408  SG_DONE()
409  SG_INFO("optimization finished, #iter = %d\n",iter)
410  if (iter >= max_iterations)
411  {
412  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
413  "(also see liblinear FAQ)\n\n");
414  }
415 
416  // calculate objective value
417 
418  double v = 0;
419  int nSV = 0;
420  for(i=0; i<w_size; i++)
421  v += w.vector[i]*w.vector[i];
422  for(i=0; i<l; i++)
423  {
424  v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
425  if(alpha[i] > 0)
426  ++nSV;
427  }
428  SG_INFO("Objective value = %lf\n",v/2)
429  SG_INFO("nSV = %d\n",nSV)
430 
431  SG_FREE(QD);
432  SG_FREE(alpha);
433  SG_FREE(y);
434  SG_FREE(index);
435 }
436 
437 // A coordinate descent algorithm for
438 // L1-regularized L2-loss support vector classification
439 //
440 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
441 //
442 // Given:
443 // x, y, Cp, Cn
444 // eps is the stopping tolerance
445 //
446 // solution will be put in w
447 
448 #undef GETI
449 #define GETI(i) (y[i]+1)
450 // To support weights for instances, use GETI(i) (i)
451 
452 void CLibLinear::solve_l1r_l2_svc(
453  liblinear_problem *prob_col, double eps, double Cp, double Cn)
454 {
455  int l = prob_col->l;
456  int w_size = prob_col->n;
457  int j, s, iter = 0;
458  int active_size = w_size;
459  int max_num_linesearch = 20;
460 
461  double sigma = 0.01;
462  double d, G_loss, G, H;
463  double Gmax_old = CMath::INFTY;
464  double Gmax_new;
465  double Gmax_init=0;
466  double d_old, d_diff;
467  double loss_old=0, loss_new;
468  double appxcond, cond;
469 
470  int *index = SG_MALLOC(int, w_size);
471  int32_t *y = SG_MALLOC(int32_t, l);
472  double *b = SG_MALLOC(double, l); // b = 1-ywTx
473  double *xj_sq = SG_MALLOC(double, w_size);
474 
475  CDotFeatures* x = (CDotFeatures*) prob_col->x;
476  void* iterator;
477  int32_t ind;
478  float64_t val;
479 
480  double C[3] = {Cn,0,Cp};
481 
482  int n = prob_col->n;
483  if (prob_col->use_bias)
484  n--;
485 
486  for(j=0; j<l; j++)
487  {
488  b[j] = 1;
489  if(prob_col->y[j] > 0)
490  y[j] = 1;
491  else
492  y[j] = -1;
493  }
494 
495  for(j=0; j<w_size; j++)
496  {
497  w.vector[j] = 0;
498  index[j] = j;
499  xj_sq[j] = 0;
500 
501  if (use_bias && j==n)
502  {
503  for (ind=0; ind<l; ind++)
504  xj_sq[n] += C[GETI(ind)];
505  }
506  else
507  {
508  iterator=x->get_feature_iterator(j);
509  while (x->get_next_feature(ind, val, iterator))
510  xj_sq[j] += C[GETI(ind)]*val*val;
511  x->free_feature_iterator(iterator);
512  }
513  }
514 
515 
516  CTime start_time;
517  while (iter < max_iterations && !CSignal::cancel_computations())
518  {
519  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
520  break;
521 
522  Gmax_new = 0;
523 
524  for(j=0; j<active_size; j++)
525  {
526  int i = CMath::random(j, active_size-1);
527  CMath::swap(index[i], index[j]);
528  }
529 
530  for(s=0; s<active_size; s++)
531  {
532  j = index[s];
533  G_loss = 0;
534  H = 0;
535 
536  if (use_bias && j==n)
537  {
538  for (ind=0; ind<l; ind++)
539  {
540  if(b[ind] > 0)
541  {
542  double tmp = C[GETI(ind)]*y[ind];
543  G_loss -= tmp*b[ind];
544  H += tmp*y[ind];
545  }
546  }
547  }
548  else
549  {
550  iterator=x->get_feature_iterator(j);
551 
552  while (x->get_next_feature(ind, val, iterator))
553  {
554  if(b[ind] > 0)
555  {
556  double tmp = C[GETI(ind)]*val*y[ind];
557  G_loss -= tmp*b[ind];
558  H += tmp*val*y[ind];
559  }
560  }
561  x->free_feature_iterator(iterator);
562  }
563 
564  G_loss *= 2;
565 
566  G = G_loss;
567  H *= 2;
568  H = CMath::max(H, 1e-12);
569 
570  double Gp = G+1;
571  double Gn = G-1;
572  double violation = 0;
573  if(w.vector[j] == 0)
574  {
575  if(Gp < 0)
576  violation = -Gp;
577  else if(Gn > 0)
578  violation = Gn;
579  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
580  {
581  active_size--;
582  CMath::swap(index[s], index[active_size]);
583  s--;
584  continue;
585  }
586  }
587  else if(w.vector[j] > 0)
588  violation = fabs(Gp);
589  else
590  violation = fabs(Gn);
591 
592  Gmax_new = CMath::max(Gmax_new, violation);
593 
594  // obtain Newton direction d
595  if(Gp <= H*w.vector[j])
596  d = -Gp/H;
597  else if(Gn >= H*w.vector[j])
598  d = -Gn/H;
599  else
600  d = -w.vector[j];
601 
602  if(fabs(d) < 1.0e-12)
603  continue;
604 
605  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
606  d_old = 0;
607  int num_linesearch;
608  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
609  {
610  d_diff = d_old - d;
611  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
612 
613  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
614  if(appxcond <= 0)
615  {
616  if (use_bias && j==n)
617  {
618  for (ind=0; ind<l; ind++)
619  b[ind] += d_diff*y[ind];
620  break;
621  }
622  else
623  {
624  iterator=x->get_feature_iterator(j);
625  while (x->get_next_feature(ind, val, iterator))
626  b[ind] += d_diff*val*y[ind];
627 
628  x->free_feature_iterator(iterator);
629  break;
630  }
631  }
632 
633  if(num_linesearch == 0)
634  {
635  loss_old = 0;
636  loss_new = 0;
637 
638  if (use_bias && j==n)
639  {
640  for (ind=0; ind<l; ind++)
641  {
642  if(b[ind] > 0)
643  loss_old += C[GETI(ind)]*b[ind]*b[ind];
644  double b_new = b[ind] + d_diff*y[ind];
645  b[ind] = b_new;
646  if(b_new > 0)
647  loss_new += C[GETI(ind)]*b_new*b_new;
648  }
649  }
650  else
651  {
652  iterator=x->get_feature_iterator(j);
653  while (x->get_next_feature(ind, val, iterator))
654  {
655  if(b[ind] > 0)
656  loss_old += C[GETI(ind)]*b[ind]*b[ind];
657  double b_new = b[ind] + d_diff*val*y[ind];
658  b[ind] = b_new;
659  if(b_new > 0)
660  loss_new += C[GETI(ind)]*b_new*b_new;
661  }
662  x->free_feature_iterator(iterator);
663  }
664  }
665  else
666  {
667  loss_new = 0;
668  if (use_bias && j==n)
669  {
670  for (ind=0; ind<l; ind++)
671  {
672  double b_new = b[ind] + d_diff*y[ind];
673  b[ind] = b_new;
674  if(b_new > 0)
675  loss_new += C[GETI(ind)]*b_new*b_new;
676  }
677  }
678  else
679  {
680  iterator=x->get_feature_iterator(j);
681  while (x->get_next_feature(ind, val, iterator))
682  {
683  double b_new = b[ind] + d_diff*val*y[ind];
684  b[ind] = b_new;
685  if(b_new > 0)
686  loss_new += C[GETI(ind)]*b_new*b_new;
687  }
688  x->free_feature_iterator(iterator);
689  }
690  }
691 
692  cond = cond + loss_new - loss_old;
693  if(cond <= 0)
694  break;
695  else
696  {
697  d_old = d;
698  d *= 0.5;
699  delta *= 0.5;
700  }
701  }
702 
703  w.vector[j] += d;
704 
705  // recompute b[] if line search takes too many steps
706  if(num_linesearch >= max_num_linesearch)
707  {
708  SG_INFO("#")
709  for(int i=0; i<l; i++)
710  b[i] = 1;
711 
712  for(int i=0; i<n; i++)
713  {
714  if(w.vector[i]==0)
715  continue;
716 
717  iterator=x->get_feature_iterator(i);
718  while (x->get_next_feature(ind, val, iterator))
719  b[ind] -= w.vector[i]*val*y[ind];
720  x->free_feature_iterator(iterator);
721  }
722 
723  if (use_bias && w.vector[n])
724  {
725  for (ind=0; ind<l; ind++)
726  b[ind] -= w.vector[n]*y[ind];
727  }
728  }
729  }
730 
731  if(iter == 0)
732  Gmax_init = Gmax_new;
733  iter++;
734 
735  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
736  -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
737 
738  if(Gmax_new <= eps*Gmax_init)
739  {
740  if(active_size == w_size)
741  break;
742  else
743  {
744  active_size = w_size;
745  Gmax_old = CMath::INFTY;
746  continue;
747  }
748  }
749 
750  Gmax_old = Gmax_new;
751  }
752 
753  SG_DONE()
754  SG_INFO("optimization finished, #iter = %d\n", iter)
755  if(iter >= max_iterations)
756  SG_WARNING("\nWARNING: reaching max number of iterations\n")
757 
758  // calculate objective value
759 
760  double v = 0;
761  int nnz = 0;
762  for(j=0; j<w_size; j++)
763  {
764  if(w.vector[j] != 0)
765  {
766  v += fabs(w.vector[j]);
767  nnz++;
768  }
769  }
770  for(j=0; j<l; j++)
771  if(b[j] > 0)
772  v += C[GETI(j)]*b[j]*b[j];
773 
774  SG_INFO("Objective value = %lf\n", v)
775  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
776 
777  SG_FREE(index);
778  SG_FREE(y);
779  SG_FREE(b);
780  SG_FREE(xj_sq);
781 }
782 
783 // A coordinate descent algorithm for
784 // L1-regularized logistic regression problems
785 //
786 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
787 //
788 // Given:
789 // x, y, Cp, Cn
790 // eps is the stopping tolerance
791 //
792 // solution will be put in w
793 
794 #undef GETI
795 #define GETI(i) (y[i]+1)
796 // To support weights for instances, use GETI(i) (i)
797 
798 void CLibLinear::solve_l1r_lr(
799  const liblinear_problem *prob_col, double eps,
800  double Cp, double Cn)
801 {
802  int l = prob_col->l;
803  int w_size = prob_col->n;
804  int j, s, iter = 0;
805  int active_size = w_size;
806  int max_num_linesearch = 20;
807 
808  double x_min = 0;
809  double sigma = 0.01;
810  double d, G, H;
811  double Gmax_old = CMath::INFTY;
812  double Gmax_new;
813  double Gmax_init=0;
814  double sum1, appxcond1;
815  double sum2, appxcond2;
816  double cond;
817 
818  int *index = SG_MALLOC(int, w_size);
819  int32_t *y = SG_MALLOC(int32_t, l);
820  double *exp_wTx = SG_MALLOC(double, l);
821  double *exp_wTx_new = SG_MALLOC(double, l);
822  double *xj_max = SG_MALLOC(double, w_size);
823  double *C_sum = SG_MALLOC(double, w_size);
824  double *xjneg_sum = SG_MALLOC(double, w_size);
825  double *xjpos_sum = SG_MALLOC(double, w_size);
826 
827  CDotFeatures* x = prob_col->x;
828  void* iterator;
829  int ind;
830  double val;
831 
832  double C[3] = {Cn,0,Cp};
833 
834  int n = prob_col->n;
835  if (prob_col->use_bias)
836  n--;
837 
838  for(j=0; j<l; j++)
839  {
840  exp_wTx[j] = 1;
841  if(prob_col->y[j] > 0)
842  y[j] = 1;
843  else
844  y[j] = -1;
845  }
846  for(j=0; j<w_size; j++)
847  {
848  w.vector[j] = 0;
849  index[j] = j;
850  xj_max[j] = 0;
851  C_sum[j] = 0;
852  xjneg_sum[j] = 0;
853  xjpos_sum[j] = 0;
854 
855  if (use_bias && j==n)
856  {
857  for (ind=0; ind<l; ind++)
858  {
859  x_min = CMath::min(x_min, 1.0);
860  xj_max[j] = CMath::max(xj_max[j], 1.0);
861  C_sum[j] += C[GETI(ind)];
862  if(y[ind] == -1)
863  xjneg_sum[j] += C[GETI(ind)];
864  else
865  xjpos_sum[j] += C[GETI(ind)];
866  }
867  }
868  else
869  {
870  iterator=x->get_feature_iterator(j);
871  while (x->get_next_feature(ind, val, iterator))
872  {
873  x_min = CMath::min(x_min, val);
874  xj_max[j] = CMath::max(xj_max[j], val);
875  C_sum[j] += C[GETI(ind)];
876  if(y[ind] == -1)
877  xjneg_sum[j] += C[GETI(ind)]*val;
878  else
879  xjpos_sum[j] += C[GETI(ind)]*val;
880  }
881  x->free_feature_iterator(iterator);
882  }
883  }
884 
885  CTime start_time;
886  while (iter < max_iterations && !CSignal::cancel_computations())
887  {
888  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
889  break;
890 
891  Gmax_new = 0;
892 
893  for(j=0; j<active_size; j++)
894  {
895  int i = CMath::random(j, active_size-1);
896  CMath::swap(index[i], index[j]);
897  }
898 
899  for(s=0; s<active_size; s++)
900  {
901  j = index[s];
902  sum1 = 0;
903  sum2 = 0;
904  H = 0;
905 
906  if (use_bias && j==n)
907  {
908  for (ind=0; ind<l; ind++)
909  {
910  double exp_wTxind = exp_wTx[ind];
911  double tmp1 = 1.0/(1+exp_wTxind);
912  double tmp2 = C[GETI(ind)]*tmp1;
913  double tmp3 = tmp2*exp_wTxind;
914  sum2 += tmp2;
915  sum1 += tmp3;
916  H += tmp1*tmp3;
917  }
918  }
919  else
920  {
921  iterator=x->get_feature_iterator(j);
922  while (x->get_next_feature(ind, val, iterator))
923  {
924  double exp_wTxind = exp_wTx[ind];
925  double tmp1 = val/(1+exp_wTxind);
926  double tmp2 = C[GETI(ind)]*tmp1;
927  double tmp3 = tmp2*exp_wTxind;
928  sum2 += tmp2;
929  sum1 += tmp3;
930  H += tmp1*tmp3;
931  }
932  x->free_feature_iterator(iterator);
933  }
934 
935  G = -sum2 + xjneg_sum[j];
936 
937  double Gp = G+1;
938  double Gn = G-1;
939  double violation = 0;
940  if(w.vector[j] == 0)
941  {
942  if(Gp < 0)
943  violation = -Gp;
944  else if(Gn > 0)
945  violation = Gn;
946  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
947  {
948  active_size--;
949  CMath::swap(index[s], index[active_size]);
950  s--;
951  continue;
952  }
953  }
954  else if(w.vector[j] > 0)
955  violation = fabs(Gp);
956  else
957  violation = fabs(Gn);
958 
959  Gmax_new = CMath::max(Gmax_new, violation);
960 
961  // obtain Newton direction d
962  if(Gp <= H*w.vector[j])
963  d = -Gp/H;
964  else if(Gn >= H*w.vector[j])
965  d = -Gn/H;
966  else
967  d = -w.vector[j];
968 
969  if(fabs(d) < 1.0e-12)
970  continue;
971 
972  d = CMath::min(CMath::max(d,-10.0),10.0);
973 
974  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
975  int num_linesearch;
976  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
977  {
978  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
979 
980  if(x_min >= 0)
981  {
982  double tmp = exp(d*xj_max[j]);
983  appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
984  appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
985  if(CMath::min(appxcond1,appxcond2) <= 0)
986  {
987  if (use_bias && j==n)
988  {
989  for (ind=0; ind<l; ind++)
990  exp_wTx[ind] *= exp(d);
991  }
992 
993  else
994  {
995  iterator=x->get_feature_iterator(j);
996  while (x->get_next_feature(ind, val, iterator))
997  exp_wTx[ind] *= exp(d*val);
998  x->free_feature_iterator(iterator);
999  }
1000  break;
1001  }
1002  }
1003 
1004  cond += d*xjneg_sum[j];
1005 
1006  int i = 0;
1007 
1008  if (use_bias && j==n)
1009  {
1010  for (ind=0; ind<l; ind++)
1011  {
1012  double exp_dx = exp(d);
1013  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1014  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1015  i++;
1016  }
1017  }
1018  else
1019  {
1020 
1021  iterator=x->get_feature_iterator(j);
1022  while (x->get_next_feature(ind, val, iterator))
1023  {
1024  double exp_dx = exp(d*val);
1025  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1026  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1027  i++;
1028  }
1029  x->free_feature_iterator(iterator);
1030  }
1031 
1032  if(cond <= 0)
1033  {
1034  i = 0;
1035  if (use_bias && j==n)
1036  {
1037  for (ind=0; ind<l; ind++)
1038  {
1039  exp_wTx[ind] = exp_wTx_new[i];
1040  i++;
1041  }
1042  }
1043  else
1044  {
1045  iterator=x->get_feature_iterator(j);
1046  while (x->get_next_feature(ind, val, iterator))
1047  {
1048  exp_wTx[ind] = exp_wTx_new[i];
1049  i++;
1050  }
1051  x->free_feature_iterator(iterator);
1052  }
1053  break;
1054  }
1055  else
1056  {
1057  d *= 0.5;
1058  delta *= 0.5;
1059  }
1060  }
1061 
1062  w.vector[j] += d;
1063 
1064  // recompute exp_wTx[] if line search takes too many steps
1065  if(num_linesearch >= max_num_linesearch)
1066  {
1067  SG_INFO("#")
1068  for(int i=0; i<l; i++)
1069  exp_wTx[i] = 0;
1070 
1071  for(int i=0; i<w_size; i++)
1072  {
1073  if(w.vector[i]==0) continue;
1074 
1075  if (use_bias && i==n)
1076  {
1077  for (ind=0; ind<l; ind++)
1078  exp_wTx[ind] += w.vector[i];
1079  }
1080  else
1081  {
1082  iterator=x->get_feature_iterator(i);
1083  while (x->get_next_feature(ind, val, iterator))
1084  exp_wTx[ind] += w.vector[i]*val;
1085  x->free_feature_iterator(iterator);
1086  }
1087  }
1088 
1089  for(int i=0; i<l; i++)
1090  exp_wTx[i] = exp(exp_wTx[i]);
1091  }
1092  }
1093 
1094  if(iter == 0)
1095  Gmax_init = Gmax_new;
1096  iter++;
1097  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1098 
1099  if(Gmax_new <= eps*Gmax_init)
1100  {
1101  if(active_size == w_size)
1102  break;
1103  else
1104  {
1105  active_size = w_size;
1106  Gmax_old = CMath::INFTY;
1107  continue;
1108  }
1109  }
1110 
1111  Gmax_old = Gmax_new;
1112  }
1113 
1114  SG_DONE()
1115  SG_INFO("optimization finished, #iter = %d\n", iter)
1116  if(iter >= max_iterations)
1117  SG_WARNING("\nWARNING: reaching max number of iterations\n")
1118 
1119  // calculate objective value
1120 
1121  double v = 0;
1122  int nnz = 0;
1123  for(j=0; j<w_size; j++)
1124  if(w.vector[j] != 0)
1125  {
1126  v += fabs(w.vector[j]);
1127  nnz++;
1128  }
1129  for(j=0; j<l; j++)
1130  if(y[j] == 1)
1131  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1132  else
1133  v += C[GETI(j)]*log(1+exp_wTx[j]);
1134 
1135  SG_INFO("Objective value = %lf\n", v)
1136  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
1137 
1138  SG_FREE(index);
1139  SG_FREE(y);
1140  SG_FREE(exp_wTx);
1141  SG_FREE(exp_wTx_new);
1142  SG_FREE(xj_max);
1143  SG_FREE(C_sum);
1144  SG_FREE(xjneg_sum);
1145  SG_FREE(xjpos_sum);
1146 }
1147 
1148 // A coordinate descent algorithm for
1149 // the dual of L2-regularized logistic regression problems
1150 //
1151 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1152 // s.t. 0 <= \alpha_i <= upper_bound_i,
1153 //
1154 // where Qij = yi yj xi^T xj and
1155 // upper_bound_i = Cp if y_i = 1
1156 // upper_bound_i = Cn if y_i = -1
1157 //
1158 // Given:
1159 // x, y, Cp, Cn
1160 // eps is the stopping tolerance
1161 //
1162 // solution will be put in w
1163 //
1164 // See Algorithm 5 of Yu et al., MLJ 2010
1165 
1166 #undef GETI
1167 #define GETI(i) (y[i]+1)
1168 // To support weights for instances, use GETI(i) (i)
1169 
1170 void CLibLinear::solve_l2r_lr_dual(const liblinear_problem *prob, double eps, double Cp, double Cn)
1171 {
1172  int l = prob->l;
1173  int w_size = prob->n;
1174  int i, s, iter = 0;
1175  double *xTx = new double[l];
1176  int max_iter = 1000;
1177  int *index = new int[l];
1178  double *alpha = new double[2*l]; // store alpha and C - alpha
1179  int32_t *y = SG_MALLOC(int32_t, l);
1180  int max_inner_iter = 100; // for inner Newton
1181  double innereps = 1e-2;
1182  double innereps_min = CMath::min(1e-8, eps);
1183  double upper_bound[3] = {Cn, 0, Cp};
1184  double Gmax_init = 0;
1185 
1186  for(i=0; i<l; i++)
1187  {
1188  if(prob->y[i] > 0)
1189  {
1190  y[i] = +1;
1191  }
1192  else
1193  {
1194  y[i] = -1;
1195  }
1196  }
1197 
1198  // Initial alpha can be set here. Note that
1199  // 0 < alpha[i] < upper_bound[GETI(i)]
1200  // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1201  for(i=0; i<l; i++)
1202  {
1203  alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
1204  alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1205  }
1206 
1207  for(i=0; i<w_size; i++)
1208  w[i] = 0;
1209  for(i=0; i<l; i++)
1210  {
1211  xTx[i] = prob->x->dot(i, prob->x,i);
1212  prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
1213 
1214  if (prob->use_bias)
1215  {
1216  w.vector[w_size]+=y[i]*alpha[2*i];
1217  xTx[i]+=1;
1218  }
1219  index[i] = i;
1220  }
1221 
1222  while (iter < max_iter)
1223  {
1224  for (i=0; i<l; i++)
1225  {
1226  int j = CMath::random(i, l-1);
1227  CMath::swap(index[i], index[j]);
1228  }
1229  int newton_iter = 0;
1230  double Gmax = 0;
1231  for (s=0; s<l; s++)
1232  {
1233  i = index[s];
1234  int32_t yi = y[i];
1235  double C = upper_bound[GETI(i)];
1236  double ywTx = 0, xisq = xTx[i];
1237 
1238  ywTx = prob->x->dense_dot(i, w.vector, w_size);
1239  if (prob->use_bias)
1240  ywTx+=w.vector[w_size];
1241 
1242  ywTx *= y[i];
1243  double a = xisq, b = ywTx;
1244 
1245  // Decide to minimize g_1(z) or g_2(z)
1246  int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1247  if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1248  {
1249  ind1 = 2*i+1;
1250  ind2 = 2*i;
1251  sign = -1;
1252  }
1253 
1254  // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1255  double alpha_old = alpha[ind1];
1256  double z = alpha_old;
1257  if(C - z < 0.5 * C)
1258  z = 0.1*z;
1259  double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
1260  Gmax = CMath::max(Gmax, CMath::abs(gp));
1261 
1262  // Newton method on the sub-problem
1263  const double eta = 0.1; // xi in the paper
1264  int inner_iter = 0;
1265  while (inner_iter <= max_inner_iter)
1266  {
1267  if(fabs(gp) < innereps)
1268  break;
1269  double gpp = a + C/(C-z)/z;
1270  double tmpz = z - gp/gpp;
1271  if(tmpz <= 0)
1272  z *= eta;
1273  else // tmpz in (0, C)
1274  z = tmpz;
1275  gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1276  newton_iter++;
1277  inner_iter++;
1278  }
1279 
1280  if(inner_iter > 0) // update w
1281  {
1282  alpha[ind1] = z;
1283  alpha[ind2] = C-z;
1284 
1285  prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
1286 
1287  if (prob->use_bias)
1288  w.vector[w_size]+=sign*(z-alpha_old)*yi;
1289  }
1290  }
1291 
1292  iter++;
1293  if(iter == 0)
1294  Gmax_init = Gmax;
1295 
1296  SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1297 
1298  if(Gmax < eps)
1299  break;
1300 
1301  if(newton_iter <= l/10)
1302  innereps = CMath::max(innereps_min, 0.1*innereps);
1303 
1304  }
1305 
1306  SG_DONE()
1307  SG_INFO("optimization finished, #iter = %d\n",iter)
1308  if (iter >= max_iter)
1309  SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n")
1310 
1311  // calculate objective value
1312 
1313  double v = 0;
1314  for(i=0; i<w_size; i++)
1315  v += w[i] * w[i];
1316  v *= 0.5;
1317  for(i=0; i<l; i++)
1318  v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1319  - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1320  SG_INFO("Objective value = %lf\n", v)
1321 
1322  delete [] xTx;
1323  delete [] alpha;
1324  delete [] y;
1325  delete [] index;
1326 }
1327 
1328 
1330 {
1331  if (!m_labels)
1332  SG_ERROR("Please assign labels first!\n")
1333 
1334  int32_t num_labels=m_labels->get_num_labels();
1335 
1336  if (num_labels!=linear_term.vlen)
1337  {
1338  SG_ERROR("Number of labels (%d) does not match number"
1339  " of entries (%d) in linear term \n", num_labels,
1340  linear_term.vlen);
1341  }
1342 
1343  m_linear_term=linear_term;
1344 }
1345 
1347 {
1349  SG_ERROR("Please assign linear term first!\n")
1350 
1351  return m_linear_term;
1352 }
1353 
1355 {
1356  if (!m_labels)
1357  SG_ERROR("Please assign labels first!\n")
1358 
1361 }

SHOGUN Machine Learning Toolbox - Documentation