00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "base/Parallel.h"
00014 #include "base/Parameter.h"
00015
00016 #include "classifier/svm/SVM.h"
00017 #include "classifier/mkl/MKL.h"
00018
00019 #include <string.h>
00020
00021 #ifndef WIN32
00022 #include <pthread.h>
00023 #endif
00024
00025 using namespace shogun;
00026
00027 CSVM::CSVM(int32_t num_sv)
00028 : CKernelMachine()
00029 {
00030 set_defaults(num_sv);
00031 }
00032
00033 CSVM::CSVM(float64_t C, CKernel* k, CLabels* lab)
00034 : CKernelMachine()
00035 {
00036 set_defaults();
00037 set_C(C,C);
00038 set_labels(lab);
00039 set_kernel(k);
00040 }
00041
00042 CSVM::~CSVM()
00043 {
00044 delete[] m_linear_term;
00045 SG_UNREF(mkl);
00046 }
00047
00048 void CSVM::set_defaults(int32_t num_sv)
00049 {
00050 m_parameters->add(&C1, "C1");
00051 m_parameters->add(&C2, "C2");
00052 m_parameters->add(&svm_loaded, "svm_loaded",
00053 "SVM is loaded.");
00054 m_parameters->add(&epsilon, "epsilon");
00055 m_parameters->add(&tube_epsilon, "tube_epsilon",
00056 "Tube epsilon for support vector regression.");
00057 m_parameters->add(&nu, "nu");
00058 m_parameters->add(&objective, "objective");
00059 m_parameters->add(&qpsize, "qpsize");
00060 m_parameters->add(&use_shrinking, "use_shrinking",
00061 "Shrinking shall be used.");
00062 m_parameters->add((CSGObject**) &mkl, "mkl",
00063 "MKL object that svm optimizers need.");
00064 m_parameters->add_vector(&m_linear_term, &m_linear_term_len,
00065 "linear_term",
00066 "Linear term in qp.");
00067
00068 callback=NULL;
00069 mkl=NULL;
00070
00071 svm_loaded=false;
00072
00073 epsilon=1e-5;
00074 tube_epsilon=1e-2;
00075
00076 nu=0.5;
00077 C1=1;
00078 C2=1;
00079
00080 objective=0;
00081
00082 qpsize=41;
00083 use_bias=true;
00084 use_shrinking=true;
00085 use_batch_computation=true;
00086 use_linadd=true;
00087
00088 m_linear_term = NULL;
00089 m_linear_term_len = 0;
00090
00091 if (num_sv>0)
00092 create_new_model(num_sv);
00093 }
00094
00095 bool CSVM::load(FILE* modelfl)
00096 {
00097 bool result=true;
00098 char char_buffer[1024];
00099 int32_t int_buffer;
00100 float64_t double_buffer;
00101 int32_t line_number=1;
00102
00103 SG_SET_LOCALE_C;
00104
00105 if (fscanf(modelfl,"%4s\n", char_buffer)==EOF)
00106 {
00107 result=false;
00108 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00109 }
00110 else
00111 {
00112 char_buffer[4]='\0';
00113 if (strcmp("%SVM", char_buffer)!=0)
00114 {
00115 result=false;
00116 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00117 }
00118 line_number++;
00119 }
00120
00121 int_buffer=0;
00122 if (fscanf(modelfl," numsv=%d; \n", &int_buffer) != 1)
00123 {
00124 result=false;
00125 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00126 }
00127
00128 if (!feof(modelfl))
00129 line_number++;
00130
00131 SG_INFO( "loading %ld support vectors\n",int_buffer);
00132 create_new_model(int_buffer);
00133
00134 if (fscanf(modelfl," kernel='%s'; \n", char_buffer) != 1)
00135 {
00136 result=false;
00137 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00138 }
00139
00140 if (!feof(modelfl))
00141 line_number++;
00142
00143 double_buffer=0;
00144
00145 if (fscanf(modelfl," b=%lf; \n", &double_buffer) != 1)
00146 {
00147 result=false;
00148 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00149 }
00150
00151 if (!feof(modelfl))
00152 line_number++;
00153
00154 set_bias(double_buffer);
00155
00156 if (fscanf(modelfl,"%8s\n", char_buffer) == EOF)
00157 {
00158 result=false;
00159 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00160 }
00161 else
00162 {
00163 char_buffer[9]='\0';
00164 if (strcmp("alphas=[", char_buffer)!=0)
00165 {
00166 result=false;
00167 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00168 }
00169 line_number++;
00170 }
00171
00172 for (int32_t i=0; i<get_num_support_vectors(); i++)
00173 {
00174 double_buffer=0;
00175 int_buffer=0;
00176
00177 if (fscanf(modelfl," \[%lf,%d]; \n", &double_buffer, &int_buffer) != 2)
00178 {
00179 result=false;
00180 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00181 }
00182
00183 if (!feof(modelfl))
00184 line_number++;
00185
00186 set_support_vector(i, int_buffer);
00187 set_alpha(i, double_buffer);
00188 }
00189
00190 if (fscanf(modelfl,"%2s", char_buffer) == EOF)
00191 {
00192 result=false;
00193 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00194 }
00195 else
00196 {
00197 char_buffer[3]='\0';
00198 if (strcmp("];", char_buffer)!=0)
00199 {
00200 result=false;
00201 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00202 }
00203 line_number++;
00204 }
00205
00206 svm_loaded=result;
00207 SG_RESET_LOCALE;
00208 return result;
00209 }
00210
00211 bool CSVM::save(FILE* modelfl)
00212 {
00213 SG_SET_LOCALE_C;
00214
00215 if (!kernel)
00216 SG_ERROR("Kernel not defined!\n");
00217
00218 SG_INFO( "Writing model file...");
00219 fprintf(modelfl,"%%SVM\n");
00220 fprintf(modelfl,"numsv=%d;\n", get_num_support_vectors());
00221 fprintf(modelfl,"kernel='%s';\n", kernel->get_name());
00222 fprintf(modelfl,"b=%+10.16e;\n",get_bias());
00223
00224 fprintf(modelfl, "alphas=\[\n");
00225
00226 for(int32_t i=0; i<get_num_support_vectors(); i++)
00227 fprintf(modelfl,"\t[%+10.16e,%d];\n",
00228 CSVM::get_alpha(i), get_support_vector(i));
00229
00230 fprintf(modelfl, "];\n");
00231
00232 SG_DONE();
00233 SG_RESET_LOCALE;
00234 return true ;
00235 }
00236
00237 void CSVM::set_callback_function(CMKL* m, bool (*cb)
00238 (CMKL* mkl, const float64_t* sumw, const float64_t suma))
00239 {
00240 SG_UNREF(mkl);
00241 mkl=m;
00242 SG_REF(mkl);
00243
00244 callback=cb;
00245 }
00246
00247 float64_t CSVM::compute_svm_dual_objective()
00248 {
00249 int32_t n=get_num_support_vectors();
00250
00251 if (labels && kernel)
00252 {
00253 objective=0;
00254 for (int32_t i=0; i<n; i++)
00255 {
00256 int32_t ii=get_support_vector(i);
00257 objective-=get_alpha(i)*labels->get_label(ii);
00258
00259 for (int32_t j=0; j<n; j++)
00260 {
00261 int32_t jj=get_support_vector(j);
00262 objective+=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
00263 }
00264 }
00265 }
00266 else
00267 SG_ERROR( "cannot compute objective, labels or kernel not set\n");
00268
00269 return objective;
00270 }
00271
00272 float64_t CSVM::compute_svm_primal_objective()
00273 {
00274 int32_t n=get_num_support_vectors();
00275 float64_t regularizer=0;
00276 float64_t loss=0;
00277
00278 if (labels && kernel)
00279 {
00280 for (int32_t i=0; i<n; i++)
00281 {
00282 int32_t ii=get_support_vector(i);
00283 for (int32_t j=0; j<n; j++)
00284 {
00285 int32_t jj=get_support_vector(j);
00286 regularizer-=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
00287 }
00288
00289 loss-=C1*CMath::max(0.0, 1.0-get_label(ii)*classify_example(ii));
00290 }
00291 }
00292 else
00293 SG_ERROR( "cannot compute objective, labels or kernel not set\n");
00294
00295 return regularizer+loss;
00296 }
00297
00298 float64_t* CSVM::get_linear_term_array()
00299 {
00300 if (m_linear_term_len == 0)
00301 return NULL;
00302
00303 float64_t* a = new float64_t[m_linear_term_len];
00304 memcpy(a, m_linear_term, m_linear_term_len*sizeof (float64_t));
00305
00306 return a;
00307 }
00308
00309 void CSVM::set_linear_term(float64_t* linear_term, index_t len)
00310 {
00311 ASSERT(linear_term);
00312
00313 if (!labels)
00314 SG_ERROR("Please assign labels first!\n");
00315
00316 int32_t num_labels=labels->get_num_labels();
00317
00318 if (num_labels != len)
00319 {
00320 SG_ERROR("Number of labels (%d) does not match number"
00321 "of entries (%d) in linear term \n", num_labels, len);
00322 }
00323
00324 delete[] m_linear_term;
00325
00326 m_linear_term_len = len;
00327 m_linear_term = new float64_t[len];
00328 memcpy(m_linear_term, linear_term, len*sizeof (float64_t));
00329 }
00330
00331 float64_t* CSVM::get_linear_term_ptr(index_t* y)
00332 {
00333 if (y == NULL)
00334 return NULL;
00335
00336 *y = m_linear_term_len;
00337 return m_linear_term;
00338 }