SVM.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "base/Parallel.h"
00014 #include "base/Parameter.h"
00015 
00016 #include "classifier/svm/SVM.h"
00017 #include "classifier/mkl/MKL.h"
00018 
00019 #include <string.h>
00020 
00021 #ifndef WIN32
00022 #include <pthread.h>
00023 #endif
00024 
00025 using namespace shogun;
00026 
00027 CSVM::CSVM(int32_t num_sv)
00028 : CKernelMachine()
00029 {
00030     set_defaults(num_sv);
00031 }
00032 
00033 CSVM::CSVM(float64_t C, CKernel* k, CLabels* lab)
00034 : CKernelMachine()
00035 {
00036     set_defaults();
00037     set_C(C,C);
00038     set_labels(lab);
00039     set_kernel(k);
00040 }
00041 
00042 CSVM::~CSVM()
00043 {
00044     delete[] m_linear_term;
00045     SG_UNREF(mkl);
00046 }
00047 
00048 void CSVM::set_defaults(int32_t num_sv)
00049 {
00050     m_parameters->add(&C1, "C1");
00051     m_parameters->add(&C2, "C2");
00052     m_parameters->add(&svm_loaded, "svm_loaded",
00053                       "SVM is loaded.");
00054     m_parameters->add(&epsilon, "epsilon");
00055     m_parameters->add(&tube_epsilon, "tube_epsilon",
00056                       "Tube epsilon for support vector regression.");
00057     m_parameters->add(&nu, "nu");
00058     m_parameters->add(&objective, "objective");
00059     m_parameters->add(&qpsize, "qpsize");
00060     m_parameters->add(&use_shrinking, "use_shrinking",
00061                       "Shrinking shall be used.");
00062     m_parameters->add((CSGObject**) &mkl, "mkl",
00063                       "MKL object that svm optimizers need.");
00064     m_parameters->add_vector(&m_linear_term, &m_linear_term_len,
00065                              "linear_term",
00066                              "Linear term in qp.");
00067 
00068     callback=NULL;
00069     mkl=NULL;
00070 
00071     svm_loaded=false;
00072 
00073     epsilon=1e-5;
00074     tube_epsilon=1e-2;
00075 
00076     nu=0.5;
00077     C1=1;
00078     C2=1;
00079 
00080     objective=0;
00081 
00082     qpsize=41;
00083     use_bias=true;
00084     use_shrinking=true;
00085     use_batch_computation=true;
00086     use_linadd=true;
00087 
00088     m_linear_term = NULL;
00089     m_linear_term_len = 0;
00090 
00091     if (num_sv>0)
00092         create_new_model(num_sv);
00093 }
00094 
00095 bool CSVM::load(FILE* modelfl)
00096 {
00097     bool result=true;
00098     char char_buffer[1024];
00099     int32_t int_buffer;
00100     float64_t double_buffer;
00101     int32_t line_number=1;
00102 
00103     SG_SET_LOCALE_C;
00104 
00105     if (fscanf(modelfl,"%4s\n", char_buffer)==EOF)
00106     {
00107         result=false;
00108         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00109     }
00110     else
00111     {
00112         char_buffer[4]='\0';
00113         if (strcmp("%SVM", char_buffer)!=0)
00114         {
00115             result=false;
00116             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00117         }
00118         line_number++;
00119     }
00120 
00121     int_buffer=0;
00122     if (fscanf(modelfl," numsv=%d; \n", &int_buffer) != 1)
00123     {
00124         result=false;
00125         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00126     }
00127 
00128     if (!feof(modelfl))
00129         line_number++;
00130 
00131     SG_INFO( "loading %ld support vectors\n",int_buffer);
00132     create_new_model(int_buffer);
00133 
00134     if (fscanf(modelfl," kernel='%s'; \n", char_buffer) != 1)
00135     {
00136         result=false;
00137         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00138     }
00139 
00140     if (!feof(modelfl))
00141         line_number++;
00142 
00143     double_buffer=0;
00144 
00145     if (fscanf(modelfl," b=%lf; \n", &double_buffer) != 1)
00146     {
00147         result=false;
00148         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00149     }
00150 
00151     if (!feof(modelfl))
00152         line_number++;
00153 
00154     set_bias(double_buffer);
00155 
00156     if (fscanf(modelfl,"%8s\n", char_buffer) == EOF)
00157     {
00158         result=false;
00159         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00160     }
00161     else
00162     {
00163         char_buffer[9]='\0';
00164         if (strcmp("alphas=[", char_buffer)!=0)
00165         {
00166             result=false;
00167             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00168         }
00169         line_number++;
00170     }
00171 
00172     for (int32_t i=0; i<get_num_support_vectors(); i++)
00173     {
00174         double_buffer=0;
00175         int_buffer=0;
00176 
00177         if (fscanf(modelfl," \[%lf,%d]; \n", &double_buffer, &int_buffer) != 2)
00178         {
00179             result=false;
00180             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00181         }
00182 
00183         if (!feof(modelfl))
00184             line_number++;
00185 
00186         set_support_vector(i, int_buffer);
00187         set_alpha(i, double_buffer);
00188     }
00189 
00190     if (fscanf(modelfl,"%2s", char_buffer) == EOF)
00191     {
00192         result=false;
00193         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00194     }
00195     else
00196     {
00197         char_buffer[3]='\0';
00198         if (strcmp("];", char_buffer)!=0)
00199         {
00200             result=false;
00201             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00202         }
00203         line_number++;
00204     }
00205 
00206     svm_loaded=result;
00207     SG_RESET_LOCALE;
00208     return result;
00209 }
00210 
00211 bool CSVM::save(FILE* modelfl)
00212 {
00213     SG_SET_LOCALE_C;
00214 
00215     if (!kernel)
00216         SG_ERROR("Kernel not defined!\n");
00217 
00218     SG_INFO( "Writing model file...");
00219     fprintf(modelfl,"%%SVM\n");
00220     fprintf(modelfl,"numsv=%d;\n", get_num_support_vectors());
00221     fprintf(modelfl,"kernel='%s';\n", kernel->get_name());
00222     fprintf(modelfl,"b=%+10.16e;\n",get_bias());
00223 
00224     fprintf(modelfl, "alphas=\[\n");
00225 
00226     for(int32_t i=0; i<get_num_support_vectors(); i++)
00227         fprintf(modelfl,"\t[%+10.16e,%d];\n",
00228                 CSVM::get_alpha(i), get_support_vector(i));
00229 
00230     fprintf(modelfl, "];\n");
00231 
00232     SG_DONE();
00233     SG_RESET_LOCALE;
00234     return true ;
00235 }
00236 
00237 void CSVM::set_callback_function(CMKL* m, bool (*cb)
00238         (CMKL* mkl, const float64_t* sumw, const float64_t suma))
00239 {
00240     SG_UNREF(mkl);
00241     mkl=m;
00242     SG_REF(mkl);
00243 
00244     callback=cb;
00245 }
00246 
00247 float64_t CSVM::compute_svm_dual_objective()
00248 {
00249     int32_t n=get_num_support_vectors();
00250 
00251     if (labels && kernel)
00252     {
00253         objective=0;
00254         for (int32_t i=0; i<n; i++)
00255         {
00256             int32_t ii=get_support_vector(i);
00257             objective-=get_alpha(i)*labels->get_label(ii);
00258 
00259             for (int32_t j=0; j<n; j++)
00260             {
00261                 int32_t jj=get_support_vector(j);
00262                 objective+=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
00263             }
00264         }
00265     }
00266     else
00267         SG_ERROR( "cannot compute objective, labels or kernel not set\n");
00268 
00269     return objective;
00270 }
00271 
00272 float64_t CSVM::compute_svm_primal_objective()
00273 {
00274     int32_t n=get_num_support_vectors();
00275     float64_t regularizer=0;
00276     float64_t loss=0;
00277 
00278     if (labels && kernel)
00279     {
00280         for (int32_t i=0; i<n; i++)
00281         {
00282             int32_t ii=get_support_vector(i);
00283             for (int32_t j=0; j<n; j++)
00284             {
00285                 int32_t jj=get_support_vector(j);
00286                 regularizer-=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
00287             }
00288 
00289             loss-=C1*CMath::max(0.0, 1.0-get_label(ii)*classify_example(ii));
00290         }
00291     }
00292     else
00293         SG_ERROR( "cannot compute objective, labels or kernel not set\n");
00294 
00295     return regularizer+loss;
00296 }
00297 
00298 float64_t* CSVM::get_linear_term_array()
00299 {
00300     if (m_linear_term_len == 0)
00301         return NULL;
00302 
00303     float64_t* a = new float64_t[m_linear_term_len];
00304     memcpy(a, m_linear_term, m_linear_term_len*sizeof (float64_t));
00305 
00306     return a;
00307 }
00308 
00309 void CSVM::set_linear_term(float64_t* linear_term, index_t len)
00310 {
00311     ASSERT(linear_term);
00312 
00313     if (!labels)
00314         SG_ERROR("Please assign labels first!\n");
00315 
00316     int32_t num_labels=labels->get_num_labels();
00317 
00318     if (num_labels != len)
00319     {
00320         SG_ERROR("Number of labels (%d) does not match number"
00321                 "of entries (%d) in linear term \n", num_labels, len);
00322     }
00323 
00324     delete[] m_linear_term;
00325 
00326     m_linear_term_len = len;
00327     m_linear_term = new float64_t[len];
00328     memcpy(m_linear_term, linear_term, len*sizeof (float64_t));
00329 }
00330 
00331 float64_t* CSVM::get_linear_term_ptr(index_t* y)
00332 {
00333     if (y == NULL)
00334         return NULL;
00335 
00336     *y = m_linear_term_len;
00337     return m_linear_term;
00338 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation