SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SVM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 #include <shogun/io/SGIO.h>
13 #include <shogun/base/Parallel.h>
14 #include <shogun/base/Parameter.h>
15 
19 
20 #include <string.h>
21 
22 #ifdef HAVE_PTHREAD
23 #include <pthread.h>
24 #endif
25 
26 using namespace shogun;
27 
28 CSVM::CSVM(int32_t num_sv)
30 {
31  set_defaults(num_sv);
32 }
33 
36 {
37  set_defaults();
38  set_C(C,C);
39  set_labels(lab);
40  set_kernel(k);
41 }
42 
44 {
45  SG_UNREF(mkl);
46 }
47 
48 void CSVM::set_defaults(int32_t num_sv)
49 {
50  SG_ADD(&C1, "C1", "", MS_AVAILABLE);
51  SG_ADD(&C2, "C2", "", MS_AVAILABLE);
52  SG_ADD(&svm_loaded, "svm_loaded", "SVM is loaded.", MS_NOT_AVAILABLE);
53  SG_ADD(&epsilon, "epsilon", "", MS_AVAILABLE);
54  SG_ADD(&tube_epsilon, "tube_epsilon",
55  "Tube epsilon for support vector regression.", MS_AVAILABLE);
56  SG_ADD(&nu, "nu", "", MS_AVAILABLE);
57  SG_ADD(&objective, "objective", "", MS_NOT_AVAILABLE);
58  SG_ADD(&qpsize, "qpsize", "", MS_NOT_AVAILABLE);
59  SG_ADD(&use_shrinking, "use_shrinking", "Shrinking shall be used.",
61  SG_ADD((CSGObject**) &mkl, "mkl", "MKL object that svm optimizers need.",
63  SG_ADD(&m_linear_term, "linear_term", "Linear term in qp.",
65 
66  callback=NULL;
67  mkl=NULL;
68 
69  svm_loaded=false;
70 
71  epsilon=1e-5;
72  tube_epsilon=1e-2;
73 
74  nu=0.5;
75  C1=1;
76  C2=1;
77 
78  objective=0;
79 
80  qpsize=41;
81  use_bias=true;
82  use_shrinking=true;
84  use_linadd=true;
85 
86  if (num_sv>0)
87  create_new_model(num_sv);
88 }
89 
90 bool CSVM::load(FILE* modelfl)
91 {
92  bool result=true;
93  char char_buffer[1024];
94  int32_t int_buffer;
95  float64_t double_buffer;
96  int32_t line_number=1;
97 
99 
100  if (fscanf(modelfl,"%4s\n", char_buffer)==EOF)
101  {
102  result=false;
103  SG_ERROR("error in svm file, line nr:%d\n", line_number)
104  }
105  else
106  {
107  char_buffer[4]='\0';
108  if (strcmp("%SVM", char_buffer)!=0)
109  {
110  result=false;
111  SG_ERROR("error in svm file, line nr:%d\n", line_number)
112  }
113  line_number++;
114  }
115 
116  int_buffer=0;
117  if (fscanf(modelfl," numsv=%d; \n", &int_buffer) != 1)
118  {
119  result=false;
120  SG_ERROR("error in svm file, line nr:%d\n", line_number)
121  }
122 
123  if (!feof(modelfl))
124  line_number++;
125 
126  SG_INFO("loading %ld support vectors\n",int_buffer)
127  create_new_model(int_buffer);
128 
129  if (fscanf(modelfl," kernel='%s'; \n", char_buffer) != 1)
130  {
131  result=false;
132  SG_ERROR("error in svm file, line nr:%d\n", line_number)
133  }
134 
135  if (!feof(modelfl))
136  line_number++;
137 
138  double_buffer=0;
139 
140  if (fscanf(modelfl," b=%lf; \n", &double_buffer) != 1)
141  {
142  result=false;
143  SG_ERROR("error in svm file, line nr:%d\n", line_number)
144  }
145 
146  if (!feof(modelfl))
147  line_number++;
148 
149  set_bias(double_buffer);
150 
151  if (fscanf(modelfl,"%8s\n", char_buffer) == EOF)
152  {
153  result=false;
154  SG_ERROR("error in svm file, line nr:%d\n", line_number)
155  }
156  else
157  {
158  char_buffer[9]='\0';
159  if (strcmp("alphas=[", char_buffer)!=0)
160  {
161  result=false;
162  SG_ERROR("error in svm file, line nr:%d\n", line_number)
163  }
164  line_number++;
165  }
166 
167  for (int32_t i=0; i<get_num_support_vectors(); i++)
168  {
169  double_buffer=0;
170  int_buffer=0;
171 
172  if (fscanf(modelfl," \[%lf,%d]; \n", &double_buffer, &int_buffer) != 2)
173  {
174  result=false;
175  SG_ERROR("error in svm file, line nr:%d\n", line_number)
176  }
177 
178  if (!feof(modelfl))
179  line_number++;
180 
181  set_support_vector(i, int_buffer);
182  set_alpha(i, double_buffer);
183  }
184 
185  if (fscanf(modelfl,"%2s", char_buffer) == EOF)
186  {
187  result=false;
188  SG_ERROR("error in svm file, line nr:%d\n", line_number)
189  }
190  else
191  {
192  char_buffer[3]='\0';
193  if (strcmp("];", char_buffer)!=0)
194  {
195  result=false;
196  SG_ERROR("error in svm file, line nr:%d\n", line_number)
197  }
198  line_number++;
199  }
200 
201  svm_loaded=result;
203  return result;
204 }
205 
206 bool CSVM::save(FILE* modelfl)
207 {
209 
210  if (!kernel)
211  SG_ERROR("Kernel not defined!\n")
212 
213  SG_INFO("Writing model file...")
214  fprintf(modelfl,"%%SVM\n");
215  fprintf(modelfl,"numsv=%d;\n", get_num_support_vectors());
216  fprintf(modelfl,"kernel='%s';\n", kernel->get_name());
217  fprintf(modelfl,"b=%+10.16e;\n",get_bias());
218 
219  fprintf(modelfl, "alphas=\[\n");
220 
221  for(int32_t i=0; i<get_num_support_vectors(); i++)
222  fprintf(modelfl,"\t[%+10.16e,%d];\n",
224 
225  fprintf(modelfl, "];\n");
226 
227  SG_DONE()
229  return true ;
230 }
231 
233  (CMKL* mkl, const float64_t* sumw, const float64_t suma))
234 {
235  SG_REF(m);
236  SG_UNREF(mkl);
237  mkl=m;
238 
239  callback=cb;
240 }
241 
243 {
244  int32_t n=get_num_support_vectors();
245 
246  if (m_labels && kernel)
247  {
248  objective=0;
249  for (int32_t i=0; i<n; i++)
250  {
251  int32_t ii=get_support_vector(i);
252  objective-=get_alpha(i)*((CBinaryLabels*) m_labels)->get_label(ii);
253 
254  for (int32_t j=0; j<n; j++)
255  {
256  int32_t jj=get_support_vector(j);
257  objective+=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
258  }
259  }
260  }
261  else
262  SG_ERROR("cannot compute objective, labels or kernel not set\n")
263 
264  return objective;
265 }
266 
268 {
269  int32_t n=get_num_support_vectors();
270  float64_t regularizer=0;
271  float64_t loss=0;
272 
273 
274 
275  if (m_labels && kernel)
276  {
277  float64_t C2_tmp=C1;
278  if(C2>0)
279  {
280  C2_tmp=C2;
281  }
282 
283  for (int32_t i=0; i<n; i++)
284  {
285  int32_t ii=get_support_vector(i);
286  for (int32_t j=0; j<n; j++)
287  {
288  int32_t jj=get_support_vector(j);
289  regularizer-=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
290  }
291 
292  loss-=(C1*(-((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 + C2_tmp*(((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 )*CMath::max(0.0, 1.0-((CBinaryLabels*) m_labels)->get_label(ii)*apply_one(ii));
293  }
294 
295  }
296  else
297  SG_ERROR("cannot compute objective, labels or kernel not set\n")
298 
299  return regularizer+loss;
300 }
301 
303 {
304  if (m_linear_term.vlen==0)
305  return NULL;
306  float64_t* a = SG_MALLOC(float64_t, m_linear_term.vlen);
307 
308  memcpy(a, m_linear_term.vector,
309  m_linear_term.vlen*sizeof(float64_t));
310 
311  return a;
312 }
313 
315 {
316  ASSERT(linear_term.vector)
317 
318  if (!m_labels)
319  SG_ERROR("Please assign labels first!\n")
320 
321  int32_t num_labels=m_labels->get_num_labels();
322 
323  if (num_labels != linear_term.vlen)
324  {
325  SG_ERROR("Number of labels (%d) does not match number"
326  "of entries (%d) in linear term \n", num_labels, linear_term.vlen);
327  }
328 
329  m_linear_term=linear_term;
330 }
331 
333 {
334  return m_linear_term;
335 }

SHOGUN Machine Learning Toolbox - Documentation