SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LibLinearRegression.cpp
Go to the documentation of this file.
1 
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation; either version 3 of the License, or
6  * (at your option) any later version.
7  *
8  * Copyright (C) 2012 Soeren Sonnenburg
9  */
10 
11 #include <shogun/lib/config.h>
12 #ifdef HAVE_LAPACK
17 #include <shogun/lib/Signal.h>
18 
19 using namespace shogun;
20 
23 {
24  init_defaults();
25 }
26 
29 {
30  init_defaults();
31  set_C(C);
32  set_features(feats);
33  set_labels(labs);
34 }
35 
36 void CLibLinearRegression::init_defaults()
37 {
38  set_C(1.0);
39  set_epsilon(1e-2);
40  set_max_iter(10000);
41  set_use_bias(false);
43 }
44 
45 void CLibLinearRegression::register_parameters()
46 {
47  SG_ADD(&m_C, "m_C", "regularization constant",MS_AVAILABLE);
48  SG_ADD(&m_epsilon, "m_epsilon", "tolerance epsilon",MS_NOT_AVAILABLE);
49  SG_ADD(&m_epsilon, "m_tube_epsilon", "svr tube epsilon",MS_AVAILABLE);
50  SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations",MS_NOT_AVAILABLE);
51  SG_ADD(&m_use_bias, "m_use_bias", "indicates whether bias should be used",MS_NOT_AVAILABLE);
52 }
53 
55 {
56 }
57 
59 {
61 
62  if (data)
63  set_features((CDotFeatures*)data);
64 
67 
68  int32_t num_train_labels=m_labels->get_num_labels();
69  int32_t num_feat=features->get_dim_feature_space();
70  int32_t num_vec=features->get_num_vectors();
71 
72  if (num_vec!=num_train_labels)
73  {
74  SG_ERROR("number of vectors %d does not match "
75  "number of training labels %d\n",
76  num_vec, num_train_labels);
77  }
78 
79  if (m_use_bias)
80  w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
81  else
82  w=SGVector<float64_t>(num_feat);
83 
84  problem prob;
85  if (m_use_bias)
86  {
87  prob.n=w.vlen+1;
88  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
89  }
90  else
91  {
92  prob.n=w.vlen;
93  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
94  }
95  prob.l=num_vec;
96  prob.x=features;
97  prob.y=SG_MALLOC(float64_t, prob.l);
98 
100  {
101  case L2R_L2LOSS_SVR:
102  {
103  double* Cs = SG_MALLOC(double, prob.l);
104  for(int i = 0; i < prob.l; i++)
105  Cs[i] = m_C;
106 
107  function *fun_obj=new l2r_l2_svr_fun(&prob, Cs, m_tube_epsilon);
108  CTron tron_obj(fun_obj, m_epsilon);
109  tron_obj.tron(w.vector, m_max_train_time);
110  delete fun_obj;
111  SG_FREE(Cs);
112  break;
113 
114  }
115  case L2R_L1LOSS_SVR_DUAL:
116  solve_l2r_l1l2_svr(&prob);
117  break;
118  case L2R_L2LOSS_SVR_DUAL:
119  solve_l2r_l1l2_svr(&prob);
120  break;
121  default:
122  SG_ERROR("Error: unknown regression type\n");
123  break;
124  }
125 
126  return true;
127 }
128 
129 // A coordinate descent algorithm for
130 // L1-loss and L2-loss epsilon-SVR dual problem
131 //
132 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
133 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
134 //
135 // where Qij = xi^T xj and
136 // D is a diagonal matrix
137 //
138 // In L1-SVM case:
139 // upper_bound_i = C
140 // lambda_i = 0
141 // In L2-SVM case:
142 // upper_bound_i = INF
143 // lambda_i = 1/(2*C)
144 //
145 // Given:
146 // x, y, p, C
147 // eps is the stopping tolerance
148 //
149 // solution will be put in w
150 //
151 // See Algorithm 4 of Ho and Lin, 2012
152 
153 #undef GETI
154 #define GETI(i) (0)
155 // To support weights for instances, use GETI(i) (i)
156 
157 void CLibLinearRegression::solve_l2r_l1l2_svr(const problem *prob)
158 {
159  int l = prob->l;
160  double C = m_C;
161  double p = m_tube_epsilon;
162  int w_size = prob->n;
163  double eps = m_epsilon;
164  int i, s, iter = 0;
165  int max_iter = 1000;
166  int active_size = l;
167  int *index = new int[l];
168 
169  double d, G, H;
170  double Gmax_old = CMath::INFTY;
171  double Gmax_new, Gnorm1_new;
172  double Gnorm1_init = 0.0;
173  double *beta = new double[l];
174  double *QD = new double[l];
175  double *y = prob->y;
176 
177  // L2R_L2LOSS_SVR_DUAL
178  double lambda[1], upper_bound[1];
179  lambda[0] = 0.5/C;
180  upper_bound[0] = CMath::INFTY;
181 
183  {
184  lambda[0] = 0;
185  upper_bound[0] = C;
186  }
187 
188  // Initial beta can be set here. Note that
189  // -upper_bound <= beta[i] <= upper_bound
190  for(i=0; i<l; i++)
191  beta[i] = 0;
192 
193  for(i=0; i<w_size; i++)
194  w[i] = 0;
195  for(i=0; i<l; i++)
196  {
197  QD[i] = prob->x->dot(i, prob->x,i);
198  prob->x->add_to_dense_vec(beta[i], i, w.vector, w_size);
199 
200  if (prob->use_bias)
201  w.vector[w_size]+=beta[i];
202 
203  index[i] = i;
204  }
205 
206 
207  while(iter < max_iter)
208  {
209  Gmax_new = 0;
210  Gnorm1_new = 0;
211 
212  for(i=0; i<active_size; i++)
213  {
214  int j = i+rand()%(active_size-i);
215  CMath::swap(index[i], index[j]);
216  }
217 
218  for(s=0; s<active_size; s++)
219  {
220  i = index[s];
221  G = -y[i] + lambda[GETI(i)]*beta[i];
222  H = QD[i] + lambda[GETI(i)];
223 
224  G += prob->x->dense_dot(i, w.vector, w_size);
225  if (prob->use_bias)
226  G+=w.vector[w_size];
227 
228  double Gp = G+p;
229  double Gn = G-p;
230  double violation = 0;
231  if(beta[i] == 0)
232  {
233  if(Gp < 0)
234  violation = -Gp;
235  else if(Gn > 0)
236  violation = Gn;
237  else if(Gp>Gmax_old && Gn<-Gmax_old)
238  {
239  active_size--;
240  CMath::swap(index[s], index[active_size]);
241  s--;
242  continue;
243  }
244  }
245  else if(beta[i] >= upper_bound[GETI(i)])
246  {
247  if(Gp > 0)
248  violation = Gp;
249  else if(Gp < -Gmax_old)
250  {
251  active_size--;
252  CMath::swap(index[s], index[active_size]);
253  s--;
254  continue;
255  }
256  }
257  else if(beta[i] <= -upper_bound[GETI(i)])
258  {
259  if(Gn < 0)
260  violation = -Gn;
261  else if(Gn > Gmax_old)
262  {
263  active_size--;
264  CMath::swap(index[s], index[active_size]);
265  s--;
266  continue;
267  }
268  }
269  else if(beta[i] > 0)
270  violation = fabs(Gp);
271  else
272  violation = fabs(Gn);
273 
274  Gmax_new = CMath::max(Gmax_new, violation);
275  Gnorm1_new += violation;
276 
277  // obtain Newton direction d
278  if(Gp < H*beta[i])
279  d = -Gp/H;
280  else if(Gn > H*beta[i])
281  d = -Gn/H;
282  else
283  d = -beta[i];
284 
285  if(fabs(d) < 1.0e-12)
286  continue;
287 
288  double beta_old = beta[i];
289  beta[i] = CMath::min(CMath::max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
290  d = beta[i]-beta_old;
291 
292  if(d != 0)
293  {
294  prob->x->add_to_dense_vec(d, i, w.vector, w_size);
295 
296  if (prob->use_bias)
297  w.vector[w_size]+=d;
298  }
299  }
300 
301  if(iter == 0)
302  Gnorm1_init = Gnorm1_new;
303  iter++;
304 
305  SG_SABS_PROGRESS(Gnorm1_new, -CMath::log10(Gnorm1_new), -CMath::log10(eps*Gnorm1_init), -CMath::log10(Gnorm1_init), 6);
306 
307  if(Gnorm1_new <= eps*Gnorm1_init)
308  {
309  if(active_size == l)
310  break;
311  else
312  {
313  active_size = l;
314  Gmax_old = CMath::INFTY;
315  continue;
316  }
317  }
318 
319  Gmax_old = Gmax_new;
320  }
321 
322  SG_DONE();
323  SG_INFO("\noptimization finished, #iter = %d\n", iter);
324  if(iter >= max_iter)
325  SG_INFO("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
326 
327  // calculate objective value
328  double v = 0;
329  int nSV = 0;
330  for(i=0; i<w_size; i++)
331  v += w[i]*w[i];
332  v = 0.5*v;
333  for(i=0; i<l; i++)
334  {
335  v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
336  if(beta[i] != 0)
337  nSV++;
338  }
339 
340  SG_INFO("Objective value = %lf\n", v);
341  SG_INFO("nSV = %d\n",nSV);
342 
343  delete [] beta;
344  delete [] QD;
345  delete [] index;
346 }
347 
348 #endif /* HAVE_LAPACK */

SHOGUN Machine Learning Toolbox - Documentation