SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LibLinear.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2010 Soeren Sonnenburg
8  * Copyright (c) 2007-2009 The LIBLINEAR Project.
9  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #ifdef HAVE_LAPACK
14 #include <shogun/io/SGIO.h>
15 #include <shogun/lib/Signal.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/base/Parameter.h>
22 
23 using namespace shogun;
24 
27 {
28  init();
29 }
30 
33 {
34  init();
36 }
37 
39  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
41 {
42  init();
43  C1=C;
44  C2=C;
45  use_bias=true;
46 
47  set_features(traindat);
48  set_labels(trainlab);
50 }
51 
52 void CLibLinear::init()
53 {
55  use_bias=false;
56  C1=1;
57  C2=1;
59  epsilon=1e-5;
60 
61  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
62  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
63  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
65  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
66  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
68  SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
69  SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
70  "Type of LibLinear solver.", MS_NOT_AVAILABLE);
71 }
72 
74 {
75 }
76 
78 {
82 
83  if (data)
84  {
85  if (!data->has_property(FP_DOT))
86  SG_ERROR("Specified features are not of type CDotFeatures\n");
87 
88  set_features((CDotFeatures*) data);
89  }
91 
92 
93  int32_t num_train_labels=m_labels->get_num_labels();
94  int32_t num_feat=features->get_dim_feature_space();
95  int32_t num_vec=features->get_num_vectors();
96 
99  {
100  if (num_feat!=num_train_labels)
101  {
102  SG_ERROR("L1 methods require the data to be transposed: "
103  "number of features %d does not match number of "
104  "training labels %d\n",
105  num_feat, num_train_labels);
106  }
107  CMath::swap(num_feat, num_vec);
108 
109  }
110  else
111  {
112  if (num_vec!=num_train_labels)
113  {
114  SG_ERROR("number of vectors %d does not match "
115  "number of training labels %d\n",
116  num_vec, num_train_labels);
117  }
118  }
119  if (use_bias)
120  w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
121  else
122  w=SGVector<float64_t>(num_feat);
123 
124  problem prob;
125  if (use_bias)
126  {
127  prob.n=w.vlen+1;
128  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
129  }
130  else
131  {
132  prob.n=w.vlen;
133  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
134  }
135  prob.l=num_vec;
136  prob.x=features;
137  prob.y=SG_MALLOC(double, prob.l);
138  float64_t* Cs=SG_MALLOC(double, prob.l);
139  prob.use_bias=use_bias;
140  double Cp=C1;
141  double Cn=C2;
142 
143  for (int32_t i=0; i<prob.l; i++)
144  {
145  prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
146  if (prob.y[i] == +1)
147  Cs[i]=C1;
148  else if (prob.y[i] == -1)
149  Cs[i]=C2;
150  else
151  SG_ERROR("labels should be +1/-1 only\n");
152  }
153 
154  int pos = 0;
155  int neg = 0;
156  for(int i=0;i<prob.l;i++)
157  {
158  if(prob.y[i]==+1)
159  pos++;
160  }
161  neg = prob.l - pos;
162 
163  SG_INFO("%d training points %d dims\n", prob.l, prob.n);
164 
165  function *fun_obj=NULL;
166  switch (liblinear_solver_type)
167  {
168  case L2R_LR:
169  {
170  fun_obj=new l2r_lr_fun(&prob, Cs);
171  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
172  SG_DEBUG("starting L2R_LR training via tron\n");
173  tron_obj.tron(w.vector, m_max_train_time);
174  SG_DEBUG("done with tron\n");
175  delete fun_obj;
176  break;
177  }
178  case L2R_L2LOSS_SVC:
179  {
180  fun_obj=new l2r_l2_svc_fun(&prob, Cs);
181  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
182  tron_obj.tron(w.vector, m_max_train_time);
183  delete fun_obj;
184  break;
185  }
186  case L2R_L2LOSS_SVC_DUAL:
187  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
188  break;
189  case L2R_L1LOSS_SVC_DUAL:
190  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
191  break;
192  case L1R_L2LOSS_SVC:
193  {
194  //ASSUME FEATURES ARE TRANSPOSED ALREADY
195  solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
196  break;
197  }
198  case L1R_LR:
199  {
200  //ASSUME FEATURES ARE TRANSPOSED ALREADY
201  solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
202  break;
203  }
204  case L2R_LR_DUAL:
205  {
206  solve_l2r_lr_dual(&prob, epsilon, Cp, Cn);
207  break;
208  }
209  default:
210  SG_ERROR("Error: unknown solver_type\n");
211  break;
212  }
213 
214  if (use_bias)
215  set_bias(w[w.vlen]);
216  else
217  set_bias(0);
218 
219  SG_FREE(prob.y);
220  SG_FREE(Cs);
221 
222  return true;
223 }
224 
225 // A coordinate descent algorithm for
226 // L1-loss and L2-loss SVM dual problems
227 //
228 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
229 // s.t. 0 <= alpha_i <= upper_bound_i,
230 //
231 // where Qij = yi yj xi^T xj and
232 // D is a diagonal matrix
233 //
234 // In L1-SVM case:
235 // upper_bound_i = Cp if y_i = 1
236 // upper_bound_i = Cn if y_i = -1
237 // D_ii = 0
238 // In L2-SVM case:
239 // upper_bound_i = INF
240 // D_ii = 1/(2*Cp) if y_i = 1
241 // D_ii = 1/(2*Cn) if y_i = -1
242 //
243 // Given:
244 // x, y, Cp, Cn
245 // eps is the stopping tolerance
246 //
247 // solution will be put in w
248 
249 #undef GETI
250 #define GETI(i) (y[i]+1)
251 // To support weights for instances, use GETI(i) (i)
252 
253 void CLibLinear::solve_l2r_l1l2_svc(
254  const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
255 {
256  int l = prob->l;
257  int w_size = prob->n;
258  int i, s, iter = 0;
259  double C, d, G;
260  double *QD = SG_MALLOC(double, l);
261  int *index = SG_MALLOC(int, l);
262  double *alpha = SG_MALLOC(double, l);
263  int32_t *y = SG_MALLOC(int32_t, l);
264  int active_size = l;
265 
266  // PG: projected gradient, for shrinking and stopping
267  double PG;
268  double PGmax_old = CMath::INFTY;
269  double PGmin_old = -CMath::INFTY;
270  double PGmax_new, PGmin_new;
271 
272  // default solver_type: L2R_L2LOSS_SVC_DUAL
273  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
274  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
275  if(st == L2R_L1LOSS_SVC_DUAL)
276  {
277  diag[0] = 0;
278  diag[2] = 0;
279  upper_bound[0] = Cn;
280  upper_bound[2] = Cp;
281  }
282 
283  int n = prob->n;
284 
285  if (prob->use_bias)
286  n--;
287 
288  for(i=0; i<w_size; i++)
289  w[i] = 0;
290 
291  for(i=0; i<l; i++)
292  {
293  alpha[i] = 0;
294  if(prob->y[i] > 0)
295  {
296  y[i] = +1;
297  }
298  else
299  {
300  y[i] = -1;
301  }
302  QD[i] = diag[GETI(i)];
303 
304  QD[i] += prob->x->dot(i, prob->x,i);
305  index[i] = i;
306  }
307 
308 
309  CTime start_time;
310  while (iter < max_iterations && !CSignal::cancel_computations())
311  {
312  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
313  break;
314 
315  PGmax_new = -CMath::INFTY;
316  PGmin_new = CMath::INFTY;
317 
318  for (i=0; i<active_size; i++)
319  {
320  int j = i+rand()%(active_size-i);
321  CMath::swap(index[i], index[j]);
322  }
323 
324  for (s=0;s<active_size;s++)
325  {
326  i = index[s];
327  int32_t yi = y[i];
328 
329  G = prob->x->dense_dot(i, w.vector, n);
330  if (prob->use_bias)
331  G+=w.vector[n];
332 
333  if (m_linear_term.vector)
334  G = G*yi + m_linear_term.vector[i];
335  else
336  G = G*yi-1;
337 
338  C = upper_bound[GETI(i)];
339  G += alpha[i]*diag[GETI(i)];
340 
341  PG = 0;
342  if (alpha[i] == 0)
343  {
344  if (G > PGmax_old)
345  {
346  active_size--;
347  CMath::swap(index[s], index[active_size]);
348  s--;
349  continue;
350  }
351  else if (G < 0)
352  PG = G;
353  }
354  else if (alpha[i] == C)
355  {
356  if (G < PGmin_old)
357  {
358  active_size--;
359  CMath::swap(index[s], index[active_size]);
360  s--;
361  continue;
362  }
363  else if (G > 0)
364  PG = G;
365  }
366  else
367  PG = G;
368 
369  PGmax_new = CMath::max(PGmax_new, PG);
370  PGmin_new = CMath::min(PGmin_new, PG);
371 
372  if(fabs(PG) > 1.0e-12)
373  {
374  double alpha_old = alpha[i];
375  alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
376  d = (alpha[i] - alpha_old)*yi;
377 
378  prob->x->add_to_dense_vec(d, i, w.vector, n);
379 
380  if (prob->use_bias)
381  w.vector[n]+=d;
382  }
383  }
384 
385  iter++;
386  float64_t gap=PGmax_new - PGmin_new;
387  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
388 
389  if(gap <= eps)
390  {
391  if(active_size == l)
392  break;
393  else
394  {
395  active_size = l;
396  PGmax_old = CMath::INFTY;
397  PGmin_old = -CMath::INFTY;
398  continue;
399  }
400  }
401  PGmax_old = PGmax_new;
402  PGmin_old = PGmin_new;
403  if (PGmax_old <= 0)
404  PGmax_old = CMath::INFTY;
405  if (PGmin_old >= 0)
406  PGmin_old = -CMath::INFTY;
407  }
408 
409  SG_DONE();
410  SG_INFO("optimization finished, #iter = %d\n",iter);
411  if (iter >= max_iterations)
412  {
413  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
414  "(also see liblinear FAQ)\n\n");
415  }
416 
417  // calculate objective value
418 
419  double v = 0;
420  int nSV = 0;
421  for(i=0; i<w_size; i++)
422  v += w.vector[i]*w.vector[i];
423  for(i=0; i<l; i++)
424  {
425  v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
426  if(alpha[i] > 0)
427  ++nSV;
428  }
429  SG_INFO("Objective value = %lf\n",v/2);
430  SG_INFO("nSV = %d\n",nSV);
431 
432  SG_FREE(QD);
433  SG_FREE(alpha);
434  SG_FREE(y);
435  SG_FREE(index);
436 }
437 
438 // A coordinate descent algorithm for
439 // L1-regularized L2-loss support vector classification
440 //
441 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
442 //
443 // Given:
444 // x, y, Cp, Cn
445 // eps is the stopping tolerance
446 //
447 // solution will be put in w
448 
449 #undef GETI
450 #define GETI(i) (y[i]+1)
451 // To support weights for instances, use GETI(i) (i)
452 
453 void CLibLinear::solve_l1r_l2_svc(
454  problem *prob_col, double eps, double Cp, double Cn)
455 {
456  int l = prob_col->l;
457  int w_size = prob_col->n;
458  int j, s, iter = 0;
459  int active_size = w_size;
460  int max_num_linesearch = 20;
461 
462  double sigma = 0.01;
463  double d, G_loss, G, H;
464  double Gmax_old = CMath::INFTY;
465  double Gmax_new;
466  double Gmax_init=0;
467  double d_old, d_diff;
468  double loss_old=0, loss_new;
469  double appxcond, cond;
470 
471  int *index = SG_MALLOC(int, w_size);
472  int32_t *y = SG_MALLOC(int32_t, l);
473  double *b = SG_MALLOC(double, l); // b = 1-ywTx
474  double *xj_sq = SG_MALLOC(double, w_size);
475 
476  CDotFeatures* x = (CDotFeatures*) prob_col->x;
477  void* iterator;
478  int32_t ind;
479  float64_t val;
480 
481  double C[3] = {Cn,0,Cp};
482 
483  int n = prob_col->n;
484  if (prob_col->use_bias)
485  n--;
486 
487  for(j=0; j<l; j++)
488  {
489  b[j] = 1;
490  if(prob_col->y[j] > 0)
491  y[j] = 1;
492  else
493  y[j] = -1;
494  }
495 
496  for(j=0; j<w_size; j++)
497  {
498  w.vector[j] = 0;
499  index[j] = j;
500  xj_sq[j] = 0;
501 
502  if (use_bias && j==n)
503  {
504  for (ind=0; ind<l; ind++)
505  xj_sq[n] += C[GETI(ind)];
506  }
507  else
508  {
509  iterator=x->get_feature_iterator(j);
510  while (x->get_next_feature(ind, val, iterator))
511  xj_sq[j] += C[GETI(ind)]*val*val;
512  x->free_feature_iterator(iterator);
513  }
514  }
515 
516 
517  CTime start_time;
518  while (iter < max_iterations && !CSignal::cancel_computations())
519  {
520  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
521  break;
522 
523  Gmax_new = 0;
524 
525  for(j=0; j<active_size; j++)
526  {
527  int i = j+rand()%(active_size-j);
528  CMath::swap(index[i], index[j]);
529  }
530 
531  for(s=0; s<active_size; s++)
532  {
533  j = index[s];
534  G_loss = 0;
535  H = 0;
536 
537  if (use_bias && j==n)
538  {
539  for (ind=0; ind<l; ind++)
540  {
541  if(b[ind] > 0)
542  {
543  double tmp = C[GETI(ind)]*y[ind];
544  G_loss -= tmp*b[ind];
545  H += tmp*y[ind];
546  }
547  }
548  }
549  else
550  {
551  iterator=x->get_feature_iterator(j);
552 
553  while (x->get_next_feature(ind, val, iterator))
554  {
555  if(b[ind] > 0)
556  {
557  double tmp = C[GETI(ind)]*val*y[ind];
558  G_loss -= tmp*b[ind];
559  H += tmp*val*y[ind];
560  }
561  }
562  x->free_feature_iterator(iterator);
563  }
564 
565  G_loss *= 2;
566 
567  G = G_loss;
568  H *= 2;
569  H = CMath::max(H, 1e-12);
570 
571  double Gp = G+1;
572  double Gn = G-1;
573  double violation = 0;
574  if(w.vector[j] == 0)
575  {
576  if(Gp < 0)
577  violation = -Gp;
578  else if(Gn > 0)
579  violation = Gn;
580  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
581  {
582  active_size--;
583  CMath::swap(index[s], index[active_size]);
584  s--;
585  continue;
586  }
587  }
588  else if(w.vector[j] > 0)
589  violation = fabs(Gp);
590  else
591  violation = fabs(Gn);
592 
593  Gmax_new = CMath::max(Gmax_new, violation);
594 
595  // obtain Newton direction d
596  if(Gp <= H*w.vector[j])
597  d = -Gp/H;
598  else if(Gn >= H*w.vector[j])
599  d = -Gn/H;
600  else
601  d = -w.vector[j];
602 
603  if(fabs(d) < 1.0e-12)
604  continue;
605 
606  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
607  d_old = 0;
608  int num_linesearch;
609  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
610  {
611  d_diff = d_old - d;
612  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
613 
614  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
615  if(appxcond <= 0)
616  {
617  if (use_bias && j==n)
618  {
619  for (ind=0; ind<l; ind++)
620  b[ind] += d_diff*y[ind];
621  break;
622  }
623  else
624  {
625  iterator=x->get_feature_iterator(j);
626  while (x->get_next_feature(ind, val, iterator))
627  b[ind] += d_diff*val*y[ind];
628 
629  x->free_feature_iterator(iterator);
630  break;
631  }
632  }
633 
634  if(num_linesearch == 0)
635  {
636  loss_old = 0;
637  loss_new = 0;
638 
639  if (use_bias && j==n)
640  {
641  for (ind=0; ind<l; ind++)
642  {
643  if(b[ind] > 0)
644  loss_old += C[GETI(ind)]*b[ind]*b[ind];
645  double b_new = b[ind] + d_diff*y[ind];
646  b[ind] = b_new;
647  if(b_new > 0)
648  loss_new += C[GETI(ind)]*b_new*b_new;
649  }
650  }
651  else
652  {
653  iterator=x->get_feature_iterator(j);
654  while (x->get_next_feature(ind, val, iterator))
655  {
656  if(b[ind] > 0)
657  loss_old += C[GETI(ind)]*b[ind]*b[ind];
658  double b_new = b[ind] + d_diff*val*y[ind];
659  b[ind] = b_new;
660  if(b_new > 0)
661  loss_new += C[GETI(ind)]*b_new*b_new;
662  }
663  x->free_feature_iterator(iterator);
664  }
665  }
666  else
667  {
668  loss_new = 0;
669  if (use_bias && j==n)
670  {
671  for (ind=0; ind<l; ind++)
672  {
673  double b_new = b[ind] + d_diff*y[ind];
674  b[ind] = b_new;
675  if(b_new > 0)
676  loss_new += C[GETI(ind)]*b_new*b_new;
677  }
678  }
679  else
680  {
681  iterator=x->get_feature_iterator(j);
682  while (x->get_next_feature(ind, val, iterator))
683  {
684  double b_new = b[ind] + d_diff*val*y[ind];
685  b[ind] = b_new;
686  if(b_new > 0)
687  loss_new += C[GETI(ind)]*b_new*b_new;
688  }
689  x->free_feature_iterator(iterator);
690  }
691  }
692 
693  cond = cond + loss_new - loss_old;
694  if(cond <= 0)
695  break;
696  else
697  {
698  d_old = d;
699  d *= 0.5;
700  delta *= 0.5;
701  }
702  }
703 
704  w.vector[j] += d;
705 
706  // recompute b[] if line search takes too many steps
707  if(num_linesearch >= max_num_linesearch)
708  {
709  SG_INFO("#");
710  for(int i=0; i<l; i++)
711  b[i] = 1;
712 
713  for(int i=0; i<n; i++)
714  {
715  if(w.vector[i]==0)
716  continue;
717 
718  iterator=x->get_feature_iterator(i);
719  while (x->get_next_feature(ind, val, iterator))
720  b[ind] -= w.vector[i]*val*y[ind];
721  x->free_feature_iterator(iterator);
722  }
723 
724  if (use_bias && w.vector[n])
725  {
726  for (ind=0; ind<l; ind++)
727  b[ind] -= w.vector[n]*y[ind];
728  }
729  }
730  }
731 
732  if(iter == 0)
733  Gmax_init = Gmax_new;
734  iter++;
735 
736  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
737  -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
738 
739  if(Gmax_new <= eps*Gmax_init)
740  {
741  if(active_size == w_size)
742  break;
743  else
744  {
745  active_size = w_size;
746  Gmax_old = CMath::INFTY;
747  continue;
748  }
749  }
750 
751  Gmax_old = Gmax_new;
752  }
753 
754  SG_DONE();
755  SG_INFO("optimization finished, #iter = %d\n", iter);
756  if(iter >= max_iterations)
757  SG_WARNING("\nWARNING: reaching max number of iterations\n");
758 
759  // calculate objective value
760 
761  double v = 0;
762  int nnz = 0;
763  for(j=0; j<w_size; j++)
764  {
765  if(w.vector[j] != 0)
766  {
767  v += fabs(w.vector[j]);
768  nnz++;
769  }
770  }
771  for(j=0; j<l; j++)
772  if(b[j] > 0)
773  v += C[GETI(j)]*b[j]*b[j];
774 
775  SG_INFO("Objective value = %lf\n", v);
776  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
777 
778  SG_FREE(index);
779  SG_FREE(y);
780  SG_FREE(b);
781  SG_FREE(xj_sq);
782 }
783 
784 // A coordinate descent algorithm for
785 // L1-regularized logistic regression problems
786 //
787 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
788 //
789 // Given:
790 // x, y, Cp, Cn
791 // eps is the stopping tolerance
792 //
793 // solution will be put in w
794 
795 #undef GETI
796 #define GETI(i) (y[i]+1)
797 // To support weights for instances, use GETI(i) (i)
798 
799 void CLibLinear::solve_l1r_lr(
800  const problem *prob_col, double eps,
801  double Cp, double Cn)
802 {
803  int l = prob_col->l;
804  int w_size = prob_col->n;
805  int j, s, iter = 0;
806  int active_size = w_size;
807  int max_num_linesearch = 20;
808 
809  double x_min = 0;
810  double sigma = 0.01;
811  double d, G, H;
812  double Gmax_old = CMath::INFTY;
813  double Gmax_new;
814  double Gmax_init=0;
815  double sum1, appxcond1;
816  double sum2, appxcond2;
817  double cond;
818 
819  int *index = SG_MALLOC(int, w_size);
820  int32_t *y = SG_MALLOC(int32_t, l);
821  double *exp_wTx = SG_MALLOC(double, l);
822  double *exp_wTx_new = SG_MALLOC(double, l);
823  double *xj_max = SG_MALLOC(double, w_size);
824  double *C_sum = SG_MALLOC(double, w_size);
825  double *xjneg_sum = SG_MALLOC(double, w_size);
826  double *xjpos_sum = SG_MALLOC(double, w_size);
827 
828  CDotFeatures* x = prob_col->x;
829  void* iterator;
830  int ind;
831  double val;
832 
833  double C[3] = {Cn,0,Cp};
834 
835  int n = prob_col->n;
836  if (prob_col->use_bias)
837  n--;
838 
839  for(j=0; j<l; j++)
840  {
841  exp_wTx[j] = 1;
842  if(prob_col->y[j] > 0)
843  y[j] = 1;
844  else
845  y[j] = -1;
846  }
847  for(j=0; j<w_size; j++)
848  {
849  w.vector[j] = 0;
850  index[j] = j;
851  xj_max[j] = 0;
852  C_sum[j] = 0;
853  xjneg_sum[j] = 0;
854  xjpos_sum[j] = 0;
855 
856  if (use_bias && j==n)
857  {
858  for (ind=0; ind<l; ind++)
859  {
860  x_min = CMath::min(x_min, 1.0);
861  xj_max[j] = CMath::max(xj_max[j], 1.0);
862  C_sum[j] += C[GETI(ind)];
863  if(y[ind] == -1)
864  xjneg_sum[j] += C[GETI(ind)];
865  else
866  xjpos_sum[j] += C[GETI(ind)];
867  }
868  }
869  else
870  {
871  iterator=x->get_feature_iterator(j);
872  while (x->get_next_feature(ind, val, iterator))
873  {
874  x_min = CMath::min(x_min, val);
875  xj_max[j] = CMath::max(xj_max[j], val);
876  C_sum[j] += C[GETI(ind)];
877  if(y[ind] == -1)
878  xjneg_sum[j] += C[GETI(ind)]*val;
879  else
880  xjpos_sum[j] += C[GETI(ind)]*val;
881  }
882  x->free_feature_iterator(iterator);
883  }
884  }
885 
886  CTime start_time;
887  while (iter < max_iterations && !CSignal::cancel_computations())
888  {
889  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
890  break;
891 
892  Gmax_new = 0;
893 
894  for(j=0; j<active_size; j++)
895  {
896  int i = j+rand()%(active_size-j);
897  CMath::swap(index[i], index[j]);
898  }
899 
900  for(s=0; s<active_size; s++)
901  {
902  j = index[s];
903  sum1 = 0;
904  sum2 = 0;
905  H = 0;
906 
907  if (use_bias && j==n)
908  {
909  for (ind=0; ind<l; ind++)
910  {
911  double exp_wTxind = exp_wTx[ind];
912  double tmp1 = 1.0/(1+exp_wTxind);
913  double tmp2 = C[GETI(ind)]*tmp1;
914  double tmp3 = tmp2*exp_wTxind;
915  sum2 += tmp2;
916  sum1 += tmp3;
917  H += tmp1*tmp3;
918  }
919  }
920  else
921  {
922  iterator=x->get_feature_iterator(j);
923  while (x->get_next_feature(ind, val, iterator))
924  {
925  double exp_wTxind = exp_wTx[ind];
926  double tmp1 = val/(1+exp_wTxind);
927  double tmp2 = C[GETI(ind)]*tmp1;
928  double tmp3 = tmp2*exp_wTxind;
929  sum2 += tmp2;
930  sum1 += tmp3;
931  H += tmp1*tmp3;
932  }
933  x->free_feature_iterator(iterator);
934  }
935 
936  G = -sum2 + xjneg_sum[j];
937 
938  double Gp = G+1;
939  double Gn = G-1;
940  double violation = 0;
941  if(w.vector[j] == 0)
942  {
943  if(Gp < 0)
944  violation = -Gp;
945  else if(Gn > 0)
946  violation = Gn;
947  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
948  {
949  active_size--;
950  CMath::swap(index[s], index[active_size]);
951  s--;
952  continue;
953  }
954  }
955  else if(w.vector[j] > 0)
956  violation = fabs(Gp);
957  else
958  violation = fabs(Gn);
959 
960  Gmax_new = CMath::max(Gmax_new, violation);
961 
962  // obtain Newton direction d
963  if(Gp <= H*w.vector[j])
964  d = -Gp/H;
965  else if(Gn >= H*w.vector[j])
966  d = -Gn/H;
967  else
968  d = -w.vector[j];
969 
970  if(fabs(d) < 1.0e-12)
971  continue;
972 
973  d = CMath::min(CMath::max(d,-10.0),10.0);
974 
975  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
976  int num_linesearch;
977  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
978  {
979  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
980 
981  if(x_min >= 0)
982  {
983  double tmp = exp(d*xj_max[j]);
984  appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
985  appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
986  if(CMath::min(appxcond1,appxcond2) <= 0)
987  {
988  if (use_bias && j==n)
989  {
990  for (ind=0; ind<l; ind++)
991  exp_wTx[ind] *= exp(d);
992  }
993 
994  else
995  {
996  iterator=x->get_feature_iterator(j);
997  while (x->get_next_feature(ind, val, iterator))
998  exp_wTx[ind] *= exp(d*val);
999  x->free_feature_iterator(iterator);
1000  }
1001  break;
1002  }
1003  }
1004 
1005  cond += d*xjneg_sum[j];
1006 
1007  int i = 0;
1008 
1009  if (use_bias && j==n)
1010  {
1011  for (ind=0; ind<l; ind++)
1012  {
1013  double exp_dx = exp(d);
1014  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1015  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1016  i++;
1017  }
1018  }
1019  else
1020  {
1021 
1022  iterator=x->get_feature_iterator(j);
1023  while (x->get_next_feature(ind, val, iterator))
1024  {
1025  double exp_dx = exp(d*val);
1026  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1027  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1028  i++;
1029  }
1030  x->free_feature_iterator(iterator);
1031  }
1032 
1033  if(cond <= 0)
1034  {
1035  i = 0;
1036  if (use_bias && j==n)
1037  {
1038  for (ind=0; ind<l; ind++)
1039  {
1040  exp_wTx[ind] = exp_wTx_new[i];
1041  i++;
1042  }
1043  }
1044  else
1045  {
1046  iterator=x->get_feature_iterator(j);
1047  while (x->get_next_feature(ind, val, iterator))
1048  {
1049  exp_wTx[ind] = exp_wTx_new[i];
1050  i++;
1051  }
1052  x->free_feature_iterator(iterator);
1053  }
1054  break;
1055  }
1056  else
1057  {
1058  d *= 0.5;
1059  delta *= 0.5;
1060  }
1061  }
1062 
1063  w.vector[j] += d;
1064 
1065  // recompute exp_wTx[] if line search takes too many steps
1066  if(num_linesearch >= max_num_linesearch)
1067  {
1068  SG_INFO("#");
1069  for(int i=0; i<l; i++)
1070  exp_wTx[i] = 0;
1071 
1072  for(int i=0; i<w_size; i++)
1073  {
1074  if(w.vector[i]==0) continue;
1075 
1076  if (use_bias && i==n)
1077  {
1078  for (ind=0; ind<l; ind++)
1079  exp_wTx[ind] += w.vector[i];
1080  }
1081  else
1082  {
1083  iterator=x->get_feature_iterator(i);
1084  while (x->get_next_feature(ind, val, iterator))
1085  exp_wTx[ind] += w.vector[i]*val;
1086  x->free_feature_iterator(iterator);
1087  }
1088  }
1089 
1090  for(int i=0; i<l; i++)
1091  exp_wTx[i] = exp(exp_wTx[i]);
1092  }
1093  }
1094 
1095  if(iter == 0)
1096  Gmax_init = Gmax_new;
1097  iter++;
1098  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
1099 
1100  if(Gmax_new <= eps*Gmax_init)
1101  {
1102  if(active_size == w_size)
1103  break;
1104  else
1105  {
1106  active_size = w_size;
1107  Gmax_old = CMath::INFTY;
1108  continue;
1109  }
1110  }
1111 
1112  Gmax_old = Gmax_new;
1113  }
1114 
1115  SG_DONE();
1116  SG_INFO("optimization finished, #iter = %d\n", iter);
1117  if(iter >= max_iterations)
1118  SG_WARNING("\nWARNING: reaching max number of iterations\n");
1119 
1120  // calculate objective value
1121 
1122  double v = 0;
1123  int nnz = 0;
1124  for(j=0; j<w_size; j++)
1125  if(w.vector[j] != 0)
1126  {
1127  v += fabs(w.vector[j]);
1128  nnz++;
1129  }
1130  for(j=0; j<l; j++)
1131  if(y[j] == 1)
1132  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1133  else
1134  v += C[GETI(j)]*log(1+exp_wTx[j]);
1135 
1136  SG_INFO("Objective value = %lf\n", v);
1137  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
1138 
1139  SG_FREE(index);
1140  SG_FREE(y);
1141  SG_FREE(exp_wTx);
1142  SG_FREE(exp_wTx_new);
1143  SG_FREE(xj_max);
1144  SG_FREE(C_sum);
1145  SG_FREE(xjneg_sum);
1146  SG_FREE(xjpos_sum);
1147 }
1148 
1149 // A coordinate descent algorithm for
1150 // the dual of L2-regularized logistic regression problems
1151 //
1152 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1153 // s.t. 0 <= \alpha_i <= upper_bound_i,
1154 //
1155 // where Qij = yi yj xi^T xj and
1156 // upper_bound_i = Cp if y_i = 1
1157 // upper_bound_i = Cn if y_i = -1
1158 //
1159 // Given:
1160 // x, y, Cp, Cn
1161 // eps is the stopping tolerance
1162 //
1163 // solution will be put in w
1164 //
1165 // See Algorithm 5 of Yu et al., MLJ 2010
1166 
1167 #undef GETI
1168 #define GETI(i) (y[i]+1)
1169 // To support weights for instances, use GETI(i) (i)
1170 
1171 void CLibLinear::solve_l2r_lr_dual(const problem *prob, double eps, double Cp, double Cn)
1172 {
1173  int l = prob->l;
1174  int w_size = prob->n;
1175  int i, s, iter = 0;
1176  double *xTx = new double[l];
1177  int max_iter = 1000;
1178  int *index = new int[l];
1179  double *alpha = new double[2*l]; // store alpha and C - alpha
1180  int32_t *y = SG_MALLOC(int32_t, l);
1181  int max_inner_iter = 100; // for inner Newton
1182  double innereps = 1e-2;
1183  double innereps_min = CMath::min(1e-8, eps);
1184  double upper_bound[3] = {Cn, 0, Cp};
1185  double Gmax_init = 0;
1186 
1187  for(i=0; i<l; i++)
1188  {
1189  if(prob->y[i] > 0)
1190  {
1191  y[i] = +1;
1192  }
1193  else
1194  {
1195  y[i] = -1;
1196  }
1197  }
1198 
1199  // Initial alpha can be set here. Note that
1200  // 0 < alpha[i] < upper_bound[GETI(i)]
1201  // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1202  for(i=0; i<l; i++)
1203  {
1204  alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
1205  alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1206  }
1207 
1208  for(i=0; i<w_size; i++)
1209  w[i] = 0;
1210  for(i=0; i<l; i++)
1211  {
1212  xTx[i] = prob->x->dot(i, prob->x,i);
1213  prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
1214 
1215  if (prob->use_bias)
1216  {
1217  w.vector[w_size]+=y[i]*alpha[2*i];
1218  xTx[i]+=1;
1219  }
1220  index[i] = i;
1221  }
1222 
1223  while (iter < max_iter)
1224  {
1225  for (i=0; i<l; i++)
1226  {
1227  int j = i+rand()%(l-i);
1228  CMath::swap(index[i], index[j]);
1229  }
1230  int newton_iter = 0;
1231  double Gmax = 0;
1232  for (s=0; s<l; s++)
1233  {
1234  i = index[s];
1235  int32_t yi = y[i];
1236  double C = upper_bound[GETI(i)];
1237  double ywTx = 0, xisq = xTx[i];
1238 
1239  ywTx = prob->x->dense_dot(i, w.vector, w_size);
1240  if (prob->use_bias)
1241  ywTx+=w.vector[w_size];
1242 
1243  ywTx *= y[i];
1244  double a = xisq, b = ywTx;
1245 
1246  // Decide to minimize g_1(z) or g_2(z)
1247  int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1248  if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1249  {
1250  ind1 = 2*i+1;
1251  ind2 = 2*i;
1252  sign = -1;
1253  }
1254 
1255  // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1256  double alpha_old = alpha[ind1];
1257  double z = alpha_old;
1258  if(C - z < 0.5 * C)
1259  z = 0.1*z;
1260  double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
1261  Gmax = CMath::max(Gmax, CMath::abs(gp));
1262 
1263  // Newton method on the sub-problem
1264  const double eta = 0.1; // xi in the paper
1265  int inner_iter = 0;
1266  while (inner_iter <= max_inner_iter)
1267  {
1268  if(fabs(gp) < innereps)
1269  break;
1270  double gpp = a + C/(C-z)/z;
1271  double tmpz = z - gp/gpp;
1272  if(tmpz <= 0)
1273  z *= eta;
1274  else // tmpz in (0, C)
1275  z = tmpz;
1276  gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1277  newton_iter++;
1278  inner_iter++;
1279  }
1280 
1281  if(inner_iter > 0) // update w
1282  {
1283  alpha[ind1] = z;
1284  alpha[ind2] = C-z;
1285 
1286  prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
1287 
1288  if (prob->use_bias)
1289  w.vector[w_size]+=sign*(z-alpha_old)*yi;
1290  }
1291  }
1292 
1293  iter++;
1294  if(iter == 0)
1295  Gmax_init = Gmax;
1296 
1297  SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
1298 
1299  if(Gmax < eps)
1300  break;
1301 
1302  if(newton_iter <= l/10)
1303  innereps = CMath::max(innereps_min, 0.1*innereps);
1304 
1305  }
1306 
1307  SG_DONE();
1308  SG_INFO("optimization finished, #iter = %d\n",iter);
1309  if (iter >= max_iter)
1310  SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1311 
1312  // calculate objective value
1313 
1314  double v = 0;
1315  for(i=0; i<w_size; i++)
1316  v += w[i] * w[i];
1317  v *= 0.5;
1318  for(i=0; i<l; i++)
1319  v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1320  - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1321  SG_INFO("Objective value = %lf\n", v);
1322 
1323  delete [] xTx;
1324  delete [] alpha;
1325  delete [] y;
1326  delete [] index;
1327 }
1328 
1329 
1331 {
1332  if (!m_labels)
1333  SG_ERROR("Please assign labels first!\n");
1334 
1335  int32_t num_labels=m_labels->get_num_labels();
1336 
1337  if (num_labels!=linear_term.vlen)
1338  {
1339  SG_ERROR("Number of labels (%d) does not match number"
1340  " of entries (%d) in linear term \n", num_labels,
1341  linear_term.vlen);
1342  }
1343 
1344  m_linear_term=linear_term;
1345 }
1346 
1348 {
1350  SG_ERROR("Please assign linear term first!\n");
1351 
1352  return m_linear_term;
1353 }
1354 
1356 {
1357  if (!m_labels)
1358  SG_ERROR("Please assign labels first!\n");
1359 
1362 }
1363 
1364 #endif //HAVE_LAPACK

SHOGUN Machine Learning Toolbox - Documentation