SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SVM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 #include <shogun/io/SGIO.h>
13 #include <shogun/base/Parallel.h>
14 #include <shogun/base/Parameter.h>
15 
19 
20 #include <string.h>
21 
22 #ifdef HAVE_PTHREAD
23 #include <pthread.h>
24 #endif
25 
26 using namespace shogun;
27 
28 CSVM::CSVM(int32_t num_sv)
30 {
31  set_defaults(num_sv);
32 }
33 
36 {
37  set_defaults();
38  set_C(C,C);
39  set_labels(lab);
40  set_kernel(k);
41 }
42 
44 {
45  SG_UNREF(mkl);
46 }
47 
48 void CSVM::set_defaults(int32_t num_sv)
49 {
50  SG_ADD(&C1, "C1", "", MS_AVAILABLE);
51  SG_ADD(&C2, "C2", "", MS_AVAILABLE);
52  SG_ADD(&svm_loaded, "svm_loaded", "SVM is loaded.", MS_NOT_AVAILABLE);
53  SG_ADD(&epsilon, "epsilon", "", MS_AVAILABLE);
54  SG_ADD(&tube_epsilon, "tube_epsilon",
55  "Tube epsilon for support vector regression.", MS_AVAILABLE);
56  SG_ADD(&nu, "nu", "", MS_AVAILABLE);
57  SG_ADD(&objective, "objective", "", MS_NOT_AVAILABLE);
58  SG_ADD(&qpsize, "qpsize", "", MS_NOT_AVAILABLE);
59  SG_ADD(&use_shrinking, "use_shrinking", "Shrinking shall be used.",
61  SG_ADD((CSGObject**) &mkl, "mkl", "MKL object that svm optimizers need.",
63  SG_ADD(&m_linear_term, "linear_term", "Linear term in qp.",
65 
66  callback=NULL;
67  mkl=NULL;
68 
69  svm_loaded=false;
70 
71  epsilon=1e-5;
72  tube_epsilon=1e-2;
73 
74  nu=0.5;
75  C1=1;
76  C2=1;
77 
78  objective=0;
79 
80  qpsize=41;
81  use_bias=true;
82  use_shrinking=true;
84  use_linadd=true;
85 
86  if (num_sv>0)
87  create_new_model(num_sv);
88 }
89 
90 bool CSVM::load(FILE* modelfl)
91 {
92  bool result=true;
93  char char_buffer[1024];
94  int32_t int_buffer;
95  float64_t double_buffer;
96  int32_t line_number=1;
97 
99 
100  if (fscanf(modelfl,"%4s\n", char_buffer)==EOF)
101  {
102  result=false;
103  SG_ERROR("error in svm file, line nr:%d\n", line_number)
104  }
105  else
106  {
107  char_buffer[4]='\0';
108  if (strcmp("%SVM", char_buffer)!=0)
109  {
110  result=false;
111  SG_ERROR("error in svm file, line nr:%d\n", line_number)
112  }
113  line_number++;
114  }
115 
116  int_buffer=0;
117  if (fscanf(modelfl," numsv=%d; \n", &int_buffer) != 1)
118  {
119  result=false;
120  SG_ERROR("error in svm file, line nr:%d\n", line_number)
121  }
122 
123  if (!feof(modelfl))
124  line_number++;
125 
126  SG_INFO("loading %ld support vectors\n",int_buffer)
127  create_new_model(int_buffer);
128 
129  if (fscanf(modelfl," kernel='%s'; \n", char_buffer) != 1)
130  {
131  result=false;
132  SG_ERROR("error in svm file, line nr:%d\n", line_number)
133  }
134 
135  if (!feof(modelfl))
136  line_number++;
137 
138  double_buffer=0;
139 
140  if (fscanf(modelfl," b=%lf; \n", &double_buffer) != 1)
141  {
142  result=false;
143  SG_ERROR("error in svm file, line nr:%d\n", line_number)
144  }
145 
146  if (!feof(modelfl))
147  line_number++;
148 
149  set_bias(double_buffer);
150 
151  if (fscanf(modelfl,"%8s\n", char_buffer) == EOF)
152  {
153  result=false;
154  SG_ERROR("error in svm file, line nr:%d\n", line_number)
155  }
156  else
157  {
158  char_buffer[9]='\0';
159  if (strcmp("alphas=[", char_buffer)!=0)
160  {
161  result=false;
162  SG_ERROR("error in svm file, line nr:%d\n", line_number)
163  }
164  line_number++;
165  }
166 
167  for (int32_t i=0; i<get_num_support_vectors(); i++)
168  {
169  double_buffer=0;
170  int_buffer=0;
171 
172  if (fscanf(modelfl," \[%lf,%d]; \n", &double_buffer, &int_buffer) != 2)
173  {
174  result=false;
175  SG_ERROR("error in svm file, line nr:%d\n", line_number)
176  }
177 
178  if (!feof(modelfl))
179  line_number++;
180 
181  set_support_vector(i, int_buffer);
182  set_alpha(i, double_buffer);
183  }
184 
185  if (fscanf(modelfl,"%2s", char_buffer) == EOF)
186  {
187  result=false;
188  SG_ERROR("error in svm file, line nr:%d\n", line_number)
189  }
190  else
191  {
192  char_buffer[3]='\0';
193  if (strcmp("];", char_buffer)!=0)
194  {
195  result=false;
196  SG_ERROR("error in svm file, line nr:%d\n", line_number)
197  }
198  line_number++;
199  }
200 
201  svm_loaded=result;
203  return result;
204 }
205 
206 bool CSVM::save(FILE* modelfl)
207 {
209 
210  if (!kernel)
211  SG_ERROR("Kernel not defined!\n")
212 
213  SG_INFO("Writing model file...")
214  fprintf(modelfl,"%%SVM\n");
215  fprintf(modelfl,"numsv=%d;\n", get_num_support_vectors());
216  fprintf(modelfl,"kernel='%s';\n", kernel->get_name());
217  fprintf(modelfl,"b=%+10.16e;\n",get_bias());
218 
219  fprintf(modelfl, "alphas=\[\n");
220 
221  for(int32_t i=0; i<get_num_support_vectors(); i++)
222  fprintf(modelfl,"\t[%+10.16e,%d];\n",
224 
225  fprintf(modelfl, "];\n");
226 
227  SG_DONE()
229  return true ;
230 }
231 
233  (CMKL* mkl, const float64_t* sumw, const float64_t suma))
234 {
235  SG_REF(m);
236  SG_UNREF(mkl);
237  mkl=m;
238 
239  callback=cb;
240 }
241 
243 {
244  int32_t n=get_num_support_vectors();
245 
246  if (m_labels && kernel)
247  {
248  objective=0;
249  for (int32_t i=0; i<n; i++)
250  {
251  int32_t ii=get_support_vector(i);
252  objective-=get_alpha(i)*((CBinaryLabels*) m_labels)->get_label(ii);
253 
254  for (int32_t j=0; j<n; j++)
255  {
256  int32_t jj=get_support_vector(j);
257  objective+=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
258  }
259  }
260  }
261  else
262  SG_ERROR("cannot compute objective, labels or kernel not set\n")
263 
264  return objective;
265 }
266 
268 {
269  int32_t n=get_num_support_vectors();
270  float64_t regularizer=0;
271  float64_t loss=0;
272 
273 
274 
275  if (m_labels && kernel)
276  {
277  float64_t C2_tmp=C1;
278  if(C2>0)
279  {
280  C2_tmp=C2;
281  }
282 
283  for (int32_t i=0; i<n; i++)
284  {
285  int32_t ii=get_support_vector(i);
286  for (int32_t j=0; j<n; j++)
287  {
288  int32_t jj=get_support_vector(j);
289  regularizer-=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
290  }
291 
292  loss-=(C1*(-((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 + C2_tmp*(((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 )*CMath::max(0.0, 1.0-((CBinaryLabels*) m_labels)->get_label(ii)*apply_one(ii));
293  }
294 
295  }
296  else
297  SG_ERROR("cannot compute objective, labels or kernel not set\n")
298 
299  return regularizer+loss;
300 }
301 
303 {
304  if (m_linear_term.vlen==0)
305  return NULL;
306  float64_t* a = SG_MALLOC(float64_t, m_linear_term.vlen);
307 
308  memcpy(a, m_linear_term.vector,
309  m_linear_term.vlen*sizeof(float64_t));
310 
311  return a;
312 }
313 
315 {
316  ASSERT(linear_term.vector)
317 
318  if (!m_labels)
319  SG_ERROR("Please assign labels first!\n")
320 
321  int32_t num_labels=m_labels->get_num_labels();
322 
323  if (num_labels != linear_term.vlen)
324  {
325  SG_ERROR("Number of labels (%d) does not match number"
326  "of entries (%d) in linear term \n", num_labels, linear_term.vlen);
327  }
328 
329  m_linear_term=linear_term;
330 }
331 
333 {
334  return m_linear_term;
335 }
virtual float64_t apply_one(int32_t num)
virtual const char * get_name() const =0
bool use_shrinking
Definition: SVM.h:265
#define SG_INFO(...)
Definition: SGIO.h:118
#define SG_RESET_LOCALE
Definition: SGIO.h:86
#define SG_DONE()
Definition: SGIO.h:157
float64_t compute_svm_primal_objective()
Definition: SVM.cpp:267
int32_t qpsize
Definition: SVM.h:263
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
bool(* callback)(CMKL *mkl, const float64_t *sumw, const float64_t suma)
Definition: SVM.h:269
virtual ~CSVM()
Definition: SVM.cpp:43
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
void set_callback_function(CMKL *m, bool(*cb)(CMKL *mkl, const float64_t *sumw, const float64_t suma))
Definition: SVM.cpp:232
float64_t kernel(int32_t idx_a, int32_t idx_b)
Definition: Kernel.h:206
CMKL * mkl
Definition: SVM.h:272
virtual void set_linear_term(const SGVector< float64_t > linear_term)
Definition: SVM.cpp:314
A generic KernelMachine interface.
Definition: KernelMachine.h:51
float64_t epsilon
Definition: SVM.h:251
#define SG_REF(x)
Definition: SGObject.h:51
#define SG_SET_LOCALE_C
Definition: SGIO.h:85
virtual float64_t * get_linear_term_array()
Definition: SVM.cpp:302
void set_defaults(int32_t num_sv=0)
Definition: SVM.cpp:48
SGVector< float64_t > m_linear_term
Definition: SVM.h:246
float64_t compute_svm_dual_objective()
Definition: SVM.cpp:242
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
void set_bias(float64_t bias)
float64_t C2
Definition: SVM.h:259
double float64_t
Definition: common.h:50
bool set_alpha(int32_t idx, float64_t val)
virtual SGVector< float64_t > get_linear_term()
Definition: SVM.cpp:332
float64_t get_alpha(int32_t idx)
float64_t C1
Definition: SVM.h:257
static T max(T a, T b)
Definition: Math.h:168
bool set_support_vector(int32_t idx, int32_t val)
Multiple Kernel Learning.
Definition: MKL.h:95
int32_t get_support_vector(int32_t idx)
float64_t objective
Definition: SVM.h:261
float64_t tube_epsilon
Definition: SVM.h:253
bool load(FILE *svm_file)
Definition: SVM.cpp:90
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CSVM(int32_t num_sv=0)
Definition: SVM.cpp:28
bool save(FILE *svm_file)
Definition: SVM.cpp:206
The Kernel base class.
Definition: Kernel.h:158
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
bool svm_loaded
Definition: SVM.h:249
void set_kernel(CKernel *k)
#define SG_ADD(...)
Definition: SGObject.h:81
float64_t nu
Definition: SVM.h:255
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
void set_C(float64_t c_neg, float64_t c_pos)
Definition: SVM.h:118
bool create_new_model(int32_t num)

SHOGUN Machine Learning Toolbox - Documentation