SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NewtonSVM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Harshit Syal
8  * Copyright (C) 2012 Harshit Syal
9  */
10 #include <shogun/lib/config.h>
11 
12 #ifdef HAVE_LAPACK
17 #include <shogun/labels/Labels.h>
20 
21 //#define DEBUG_NEWTON
22 //#define V_NEWTON
23 using namespace shogun;
24 
26 : CLinearMachine(), C(1), use_bias(true)
27 {
28 }
29 
30 CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
32 {
33  lambda=1/c;
34  num_iter=itr;
35  prec=1e-6;
36  num_iter=20;
37  use_bias=true;
38  C=c;
39  set_features(traindat);
40  set_labels(trainlab);
41 }
42 
43 
45 {
46 }
47 
48 
50 {
53 
54  if (data)
55  {
56  if (!data->has_property(FP_DOT))
57  SG_ERROR("Specified features are not of type CDotFeatures\n")
58  set_features((CDotFeatures*) data);
59  }
60 
62 
63  SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels();
64  int32_t num_feat=features->get_dim_feature_space();
65  int32_t num_vec=features->get_num_vectors();
66 
67  //Assigning dimensions for whole class scope
68  x_n=num_vec;
69  x_d=num_feat;
70 
71  ASSERT(num_vec==train_labels.vlen)
72 
73  float64_t* weights = SG_CALLOC(float64_t, x_d+1);
74  float64_t* out=SG_MALLOC(float64_t, x_n);
76 
77  int32_t *sv=SG_MALLOC(int32_t, x_n), size_sv=0, iter=0;
78  float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1);
79  float64_t t;
80 
81  while(1)
82  {
83  iter++;
84 
85  if (iter>num_iter)
86  {
87  SG_PRINT("Maximum number of Newton steps reached. Try larger lambda")
88  break;
89  }
90 
91  obj_fun_linear(weights, out, &obj, sv, &size_sv, grad);
92 
93 #ifdef DEBUG_NEWTON
94  SG_PRINT("fun linear passed !\n")
95  SG_PRINT("Obj =%f\n", obj)
96  SG_PRINT("Grad=\n")
97 
98  for (int32_t i=0; i<x_d+1; i++)
99  SG_PRINT("grad[%d]=%.16g\n", i, grad[i])
100  SG_PRINT("SV=\n")
101 
102  for (int32_t i=0; i<size_sv; i++)
103  SG_PRINT("sv[%d]=%d\n", i, sv[i])
104 #endif
105 
107  float64_t* Xsv = SG_MALLOC(float64_t, x_d*size_sv);
108  for (int32_t k=0; k<size_sv; k++)
109  {
111  for (int32_t j=0; j<x_d; j++)
112  Xsv[k*x_d+j]=sgv.vector[j];
113  }
114  int32_t tx=x_d;
115  int32_t ty=size_sv;
117 
118 #ifdef DEBUG_NEWTON
119  SGMatrix<float64_t>::display_matrix(Xsv, x_d, size_sv);
120 #endif
121 
122  float64_t* lcrossdiag=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
123  float64_t* vector=SG_MALLOC(float64_t, x_d+1);
124 
125  for (int32_t i=0; i<x_d; i++)
126  vector[i]=lambda;
127 
128  vector[x_d]=0;
129 
130  SGMatrix<float64_t>::create_diagonal_matrix(lcrossdiag, vector, x_d+1);
131  float64_t* Xsv2=SG_MALLOC(float64_t, x_d*x_d);
132  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, x_d, x_d, size_sv,
133  1.0, Xsv, size_sv, Xsv, size_sv, 0.0, Xsv2, x_d);
134  float64_t* sum=SG_CALLOC(float64_t, x_d);
135 
136  for (int32_t j=0; j<x_d; j++)
137  {
138  for (int32_t i=0; i<size_sv; i++)
139  sum[j]+=Xsv[i+j*size_sv];
140  }
141 
142  float64_t* Xsv2sum=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
143 
144  for (int32_t i=0; i<x_d; i++)
145  {
146  for (int32_t j=0; j<x_d; j++)
147  Xsv2sum[j*(x_d+1)+i]=Xsv2[j*x_d+i];
148 
149  Xsv2sum[x_d*(x_d+1)+i]=sum[i];
150  }
151 
152  for (int32_t j=0; j<x_d; j++)
153  Xsv2sum[j*(x_d+1)+x_d]=sum[j];
154 
155  Xsv2sum[x_d*(x_d+1)+x_d]=size_sv;
156  float64_t* identity_matrix=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
157 
158  SGVector<float64_t>::fill_vector(vector, x_d+1, 1.0);
159 
160  SGMatrix<float64_t>::create_diagonal_matrix(identity_matrix, vector, x_d+1);
161  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, x_d+1, x_d+1,
162  x_d+1, 1.0, lcrossdiag, x_d+1, identity_matrix, x_d+1, 1.0,
163  Xsv2sum, x_d+1);
164 
165  float64_t* inverse=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
166  int32_t r=x_d+1;
167  SGMatrix<float64_t>::pinv(Xsv2sum, r, r, inverse);
168 
169  float64_t* step=SG_MALLOC(float64_t, r);
170  float64_t* s2=SG_MALLOC(float64_t, r);
171  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, r, 1, r, 1.0,
172  inverse, r, grad, r, 0.0, s2, r);
173 
174  for (int32_t i=0; i<r; i++)
175  step[i]=-s2[i];
176 
177  line_search_linear(weights, step, out, &t);
178 
179 #ifdef DEBUG_NEWTON
180  SG_PRINT("t=%f\n\n", t)
181 
182  for (int32_t i=0; i<x_n; i++)
183  SG_PRINT("out[%d]=%.16g\n", i, out[i])
184 
185  for (int32_t i=0; i<x_d+1; i++)
186  SG_PRINT("weights[%d]=%.16g\n", i, weights[i])
187 #endif
188 
190  float64_t newton_decrement;
191  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, r, -0.5,
192  step, r, grad, r, 0.0, &newton_decrement, 1);
193 #ifdef V_NEWTON
194  SG_PRINT("Itr=%d, Obj=%f, No of sv=%d, Newton dec=%0.3f, line search=%0.3f\n\n",
195  iter, obj, size_sv, newton_decrement, t);
196 #endif
197 
198  SG_FREE(Xsv);
199  SG_FREE(vector);
200  SG_FREE(lcrossdiag);
201  SG_FREE(Xsv2);
202  SG_FREE(Xsv2sum);
203  SG_FREE(identity_matrix);
204  SG_FREE(inverse);
205  SG_FREE(step);
206  SG_FREE(s2);
207 
208  if (newton_decrement*2<prec*obj)
209  break;
210  }
211 
212 #ifdef V_NEWTON
213  SG_PRINT("FINAL W AND BIAS Vector=\n\n")
214  CMath::display_matrix(weights, x_d+1, 1);
215 #endif
216 
217  set_w(SGVector<float64_t>(weights, x_d));
218  set_bias(weights[x_d]);
219 
220  SG_FREE(sv);
221  SG_FREE(grad);
222  SG_FREE(out);
223 
224  return true;
225 
226 
227 }
228 
229 void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
230  out, float64_t* tx)
231 {
232  SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels();
233  float64_t* outz=SG_MALLOC(float64_t, x_n);
234  float64_t* temp1=SG_MALLOC(float64_t, x_n);
235  float64_t* temp1forout=SG_MALLOC(float64_t, x_n);
236  float64_t* outzsv=SG_MALLOC(float64_t, x_n);
237  float64_t* Ysv=SG_MALLOC(float64_t, x_n);
238  float64_t* Xsv=SG_MALLOC(float64_t, x_n);
239  float64_t* temp2=SG_MALLOC(float64_t, x_d);
240  float64_t t=0.0;
241  float64_t* Xd=SG_MALLOC(float64_t, x_n);
242 
243  for (int32_t i=0; i<x_n; i++)
244  Xd[i]=features->dense_dot(i, d, x_d);
245 
247 
248 #ifdef DEBUG_NEWTON
249  CMath::display_vector(d, x_d+1, "Weight vector");
250 
251  for (int32_t i=0; i<x_d+1; i++)
252  SG_SPRINT("Xd[%d]=%.18g\n", i, Xd[i])
253 
254  CMath::display_vector(Xd, x_n, "XD vector=");
255 #endif
256 
257  float64_t wd;
258  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda,
259  weights, x_d, d, x_d, 0.0, &wd, 1);
260  float64_t tempg, dd;
261  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d,
262  x_d, d, x_d, 0.0, &dd, 1);
263 
264  float64_t g, h;
265  int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n);
266 
267  do
268  {
269  SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n);
270  memcpy(temp1forout, temp1, sizeof(float64_t)*x_n);
271  SGVector<float64_t>::scale_vector(t, temp1forout, x_n);
272  SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n);
273 
274  // Calculation of sv
275  sv_len=0;
276 
277  for (int32_t i=0; i<x_n; i++)
278  {
279  if (outz[i]>0)
280  sv[sv_len++]=i;
281  }
282 
283  //Calculation of gradient 'g'
284  for (int32_t i=0; i<sv_len; i++)
285  {
286  outzsv[i]=outz[sv[i]];
287  Ysv[i]=Y.vector[sv[i]];
288  Xsv[i]=Xd[sv[i]];
289  }
290 
291  memset(temp1, 0, sizeof(float64_t)*sv_len);
292  SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len);
293  tempg=SGVector<float64_t>::dot(temp1, Xsv, sv_len);
294  g=wd+(t*dd);
295  g-=tempg;
296 
297  // Calculation of second derivative 'h'
298  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0,
299  Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1);
300  h+=dd;
301 
302  // Calculation of 1D Newton step 'd'
303  t-=g/h;
304 
305  if (((g*g)/h)<1e-10)
306  break;
307 
308  } while(1);
309 
310  for (int32_t i=0; i<x_n; i++)
311  out[i]=outz[i];
312  *tx=t;
313 
314  SG_FREE(sv);
315  SG_FREE(temp1);
316  SG_FREE(temp2);
317  SG_FREE(temp1forout);
318  SG_FREE(outz);
319  SG_FREE(outzsv);
320  SG_FREE(Ysv);
321  SG_FREE(Xsv);
322  SG_FREE(Xd);
323 }
324 
325 void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out,
326  float64_t* obj, int32_t* sv, int32_t* numsv, float64_t* grad)
327 {
328  SGVector<float64_t> v=((CBinaryLabels*) m_labels)->get_labels();
329 
330  for (int32_t i=0; i<x_n; i++)
331  {
332  if (out[i]<0)
333  out[i]=0;
334  }
335 
336 #ifdef DEBUG_NEWTON
337  for (int32_t i=0; i<x_n; i++)
338  SG_SPRINT("out[%d]=%.16g\n", i, out[i])
339 #endif
340 
341  //create copy of w0
342  float64_t* w0=SG_MALLOC(float64_t, x_d+1);
343  memcpy(w0, weights, sizeof(float64_t)*(x_d));
344  w0[x_d]=0; //do not penalize b
345 
346  //create copy of out
347  float64_t* out1=SG_MALLOC(float64_t, x_n);
348 
349  //compute steps for obj
350  SGVector<float64_t>::vector_multiply(out1, out, out, x_n);
351  float64_t p1=SGVector<float64_t>::sum(out1, x_n)/2;
352  float64_t C1;
353  float64_t* w0copy=SG_MALLOC(float64_t, x_d+1);
354  memcpy(w0copy, w0, sizeof(float64_t)*(x_d+1));
355  SGVector<float64_t>::scale_vector(0.5, w0copy, x_d+1);
356  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d+1, lambda,
357  w0, x_d+1, w0copy, x_d+1, 0.0, &C1, 1);
358  *obj=p1+C1;
360  float64_t* temp=SG_CALLOC(float64_t, x_n); //temp = out.*Y
361  SGVector<float64_t>::vector_multiply(temp, out, v.vector, x_n);
362  float64_t* temp1=SG_CALLOC(float64_t, x_d);
364 
365  for (int32_t i=0; i<x_n; i++)
366  {
367  features->add_to_dense_vec(temp[i], i, temp1, x_d);
368 #ifdef DEBUG_NEWTON
369  SG_SPRINT("\ntemp[%d]=%f", i, temp[i])
370  CMath::display_vector(vec.vector, x_d, "vector");
371  CMath::display_vector(temp1, x_d, "debuging");
372 #endif
373  }
374  float64_t* p2=SG_MALLOC(float64_t, x_d+1);
375 
376  for (int32_t i=0; i<x_d; i++)
377  p2[i]=temp1[i];
378 
379  p2[x_d]=SGVector<float64_t>::sum(temp, x_n);
380  SGVector<float64_t>::add(grad, 1.0, w0, -1.0, p2, x_d+1);
381  int32_t sv_len=0;
382 
383  for (int32_t i=0; i<x_n; i++)
384  {
385  if (out[i]>0)
386  sv[sv_len++]=i;
387  }
388 
389  *numsv=sv_len;
390 
391  SG_FREE(w0);
392  SG_FREE(w0copy);
393  SG_FREE(out1);
394  SG_FREE(temp);
395  SG_FREE(temp1);
396  SG_FREE(p2);
397 }
398 #endif //HAVE_LAPACK

SHOGUN Machine Learning Toolbox - Documentation