SHOGUN: SVMOcas.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2008 Vojtech Franc
00008  * Written (W) 2007-2009 Soeren Sonnenburg
00009  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/features/Labels.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/lib/Time.h>
00015 #include <shogun/base/Parameter.h>
00016 #include <shogun/base/Parallel.h>
00017 #include <shogun/machine/LinearMachine.h>
00018 #include <shogun/classifier/svm/SVMOcas.h>
00019 #include <shogun/features/DotFeatures.h>
00020 #include <shogun/features/Labels.h>
00021 
00022 using namespace shogun;
00023 
00024 CSVMOcas::CSVMOcas()
00025 : CLinearMachine()
00026 {
00027     init();
00028 }
00029 
00030 CSVMOcas::CSVMOcas(E_SVM_TYPE type)
00031 : CLinearMachine()
00032 {
00033     init();
00034     method=type;
00035 }
00036 
00037 CSVMOcas::CSVMOcas(
00038     float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00039 : CLinearMachine()
00040 {
00041     init();
00042     C1=C;
00043     C2=C;
00044 
00045     set_features(traindat);
00046     set_labels(trainlab);
00047 }
00048 
00049 
00050 CSVMOcas::~CSVMOcas()
00051 {
00052 }
00053 
00054 bool CSVMOcas::train_machine(CFeatures* data)
00055 {
00056     SG_INFO("C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize);
00057     SG_DEBUG("use_bias = %i\n", get_bias_enabled()) ;
00058 
00059     ASSERT(labels);
00060     if (data)
00061     {
00062         if (!data->has_property(FP_DOT))
00063             SG_ERROR("Specified features are not of type CDotFeatures\n");
00064         set_features((CDotFeatures*) data);
00065     }
00066     ASSERT(features);
00067     ASSERT(labels->is_two_class_labeling());
00068 
00069     lab=labels->get_labels();
00070     w_dim=features->get_dim_feature_space();
00071     int32_t num_vec=features->get_num_vectors();
00072 
00073     if (num_vec!=lab.vlen || num_vec<=0)
00074         SG_ERROR("num_vec=%d num_train_labels=%d\n", num_vec, lab.vlen);
00075 
00076     SG_FREE(w);
00077     w=SG_MALLOC(float64_t, w_dim);
00078     memset(w, 0, w_dim*sizeof(float64_t));
00079 
00080     SG_FREE(old_w);
00081     old_w=SG_MALLOC(float64_t, w_dim);
00082     memset(old_w, 0, w_dim*sizeof(float64_t));
00083     bias=0;
00084     old_bias=0;
00085 
00086     tmp_a_buf=SG_MALLOC(float64_t, w_dim);
00087     cp_value=SG_MALLOC(float64_t*, bufsize);
00088     memset(cp_value, sizeof(float64_t*)*bufsize, 0);
00089     cp_index=SG_MALLOC(uint32_t*, bufsize);
00090     memset(cp_index, sizeof(float64_t*)*bufsize, 0);
00091     cp_nz_dims=SG_MALLOC(uint32_t, bufsize);
00092     cp_bias=SG_MALLOC(float64_t, bufsize);
00093     memset(cp_bias, 0, sizeof(float64_t)*bufsize);
00094 
00095     float64_t TolAbs=0;
00096     float64_t QPBound=0;
00097     int32_t Method=0;
00098     if (method == SVM_OCAS)
00099         Method = 1;
00100     ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
00101             TolAbs, QPBound, get_max_train_time(), bufsize, Method, 
00102             &CSVMOcas::compute_W,
00103             &CSVMOcas::update_W, 
00104             &CSVMOcas::add_new_cut, 
00105             &CSVMOcas::compute_output,
00106             &CSVMOcas::sort,
00107             &CSVMOcas::print,
00108             this);
00109 
00110     SG_INFO("Ocas Converged after %d iterations\n"
00111             "==================================\n"
00112             "timing statistics:\n"
00113             "output_time: %f s\n"
00114             "sort_time: %f s\n"
00115             "add_time: %f s\n"
00116             "w_time: %f s\n"
00117             "solver_time %f s\n"
00118             "ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
00119             result.add_time, result.w_time, result.qp_solver_time, result.ocas_time);
00120 
00121     SG_FREE(tmp_a_buf);
00122 
00123     uint32_t num_cut_planes = result.nCutPlanes;
00124 
00125     SG_DEBUG("num_cut_planes=%d\n", num_cut_planes);
00126     for (uint32_t i=0; i<num_cut_planes; i++)
00127     {
00128         SG_DEBUG("cp_value[%d]=%p\n", i, cp_value);
00129         SG_FREE(cp_value[i]);
00130         SG_DEBUG("cp_index[%d]=%p\n", i, cp_index);
00131         SG_FREE(cp_index[i]);
00132     }
00133 
00134     SG_FREE(cp_value);
00135     cp_value=NULL;
00136     SG_FREE(cp_index);
00137     cp_index=NULL;
00138     SG_FREE(cp_nz_dims);
00139     cp_nz_dims=NULL;
00140     SG_FREE(cp_bias);
00141     cp_bias=NULL;
00142 
00143     lab.free_vector();
00144 
00145     SG_FREE(old_w);
00146     old_w=NULL;
00147 
00148     return true;
00149 }
00150 
00151 /*----------------------------------------------------------------------------------
00152   sq_norm_W = sparse_update_W( t ) does the following:
00153 
00154   W = oldW*(1-t) + t*W;
00155   sq_norm_W = W'*W;
00156 
00157   ---------------------------------------------------------------------------------*/
00158 float64_t CSVMOcas::update_W( float64_t t, void* ptr )
00159 {
00160   float64_t sq_norm_W = 0;         
00161   CSVMOcas* o = (CSVMOcas*) ptr;
00162   uint32_t nDim = (uint32_t) o->w_dim;
00163   float64_t* W=o->w;
00164   float64_t* oldW=o->old_w;
00165 
00166   for(uint32_t j=0; j <nDim; j++)
00167   {
00168       W[j] = oldW[j]*(1-t) + t*W[j];
00169       sq_norm_W += W[j]*W[j];
00170   }          
00171   o->bias=o->old_bias*(1-t) + t*o->bias;
00172   sq_norm_W += CMath::sq(o->bias);
00173 
00174   return( sq_norm_W );
00175 }
00176 
00177 /*----------------------------------------------------------------------------------
00178   sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following:
00179 
00180     new_a = sum(data_X(:,find(new_cut ~=0 )),2);
00181     new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a];
00182     sparse_A(:,nSel+1) = new_a;
00183 
00184   ---------------------------------------------------------------------------------*/
00185 int CSVMOcas::add_new_cut(
00186     float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
00187     uint32_t nSel, void* ptr)
00188 {
00189     CSVMOcas* o = (CSVMOcas*) ptr;
00190     CDotFeatures* f = o->features;
00191     uint32_t nDim=(uint32_t) o->w_dim;
00192     float64_t* y = o->lab.vector;
00193 
00194     float64_t** c_val = o->cp_value;
00195     uint32_t** c_idx = o->cp_index;
00196     uint32_t* c_nzd = o->cp_nz_dims;
00197     float64_t* c_bias = o->cp_bias;
00198 
00199     float64_t sq_norm_a;
00200     uint32_t i, j, nz_dims;
00201 
00202     /* temporary vector */
00203     float64_t* new_a = o->tmp_a_buf;
00204     memset(new_a, 0, sizeof(float64_t)*nDim);
00205 
00206     for(i=0; i < cut_length; i++) 
00207     {
00208         f->add_to_dense_vec(y[new_cut[i]], new_cut[i], new_a, nDim);
00209 
00210         if (o->use_bias)
00211             c_bias[nSel]+=y[new_cut[i]];
00212     }
00213 
00214     /* compute new_a'*new_a and count number of non-zerou dimensions */
00215     nz_dims = 0; 
00216     sq_norm_a = CMath::sq(c_bias[nSel]);
00217     for(j=0; j < nDim; j++ ) {
00218         if(new_a[j] != 0) {
00219             nz_dims++;
00220             sq_norm_a += new_a[j]*new_a[j];
00221         }
00222     }
00223 
00224     /* sparsify new_a and insert it to the last column of sparse_A */
00225     c_nzd[nSel] = nz_dims;
00226     c_idx[nSel]=NULL;
00227     c_val[nSel]=NULL;
00228 
00229     if(nz_dims > 0)
00230     {
00231         c_idx[nSel]=SG_MALLOC(uint32_t, nz_dims);
00232         c_val[nSel]=SG_MALLOC(float64_t, nz_dims);
00233 
00234         uint32_t idx=0;
00235         for(j=0; j < nDim; j++ )
00236         {
00237             if(new_a[j] != 0)
00238             {
00239                 c_idx[nSel][idx] = j;
00240                 c_val[nSel][idx++] = new_a[j];
00241             }
00242         }
00243     }
00244 
00245     new_col_H[nSel] = sq_norm_a;
00246 
00247     for(i=0; i < nSel; i++)
00248     {
00249         float64_t tmp = c_bias[nSel]*c_bias[i];
00250         for(j=0; j < c_nzd[i]; j++)
00251             tmp += new_a[c_idx[i][j]]*c_val[i][j];
00252 
00253         new_col_H[i] = tmp;
00254     }
00255     //CMath::display_vector(new_col_H, nSel+1, "new_col_H");
00256     //CMath::display_vector((int32_t*) c_idx[nSel], (int32_t) nz_dims, "c_idx");
00257     //CMath::display_vector((float64_t*) c_val[nSel], nz_dims, "c_val");
00258     return 0;
00259 }
00260 
00261 int CSVMOcas::sort(float64_t* vals, float64_t* data, uint32_t size)
00262 {
00263     CMath::qsort_index(vals, data, size);
00264     return 0;
00265 }
00266 
00267 /*----------------------------------------------------------------------
00268   sparse_compute_output( output ) does the follwing:
00269 
00270   output = data_X'*W;
00271   ----------------------------------------------------------------------*/
00272 int CSVMOcas::compute_output(float64_t *output, void* ptr)
00273 {
00274     CSVMOcas* o = (CSVMOcas*) ptr;
00275     CDotFeatures* f=o->features;
00276     int32_t nData=f->get_num_vectors();
00277 
00278     float64_t* y = o->lab.vector;
00279 
00280     f->dense_dot_range(output, 0, nData, y, o->w, o->w_dim, 0.0);
00281 
00282     for (int32_t i=0; i<nData; i++)
00283         output[i]+=y[i]*o->bias;
00284     //CMath::display_vector(o->w, o->w_dim, "w");
00285     //CMath::display_vector(output, nData, "out");
00286     return 0;
00287 }
00288 
00289 /*----------------------------------------------------------------------
00290   sq_norm_W = compute_W( alpha, nSel ) does the following:
00291 
00292   oldW = W;
00293   W = sparse_A(:,1:nSel)'*alpha;
00294   sq_norm_W = W'*W;
00295   dp_WoldW = W'*oldW';
00296 
00297   ----------------------------------------------------------------------*/
00298 void CSVMOcas::compute_W(
00299     float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha, uint32_t nSel,
00300     void* ptr )
00301 {
00302     CSVMOcas* o = (CSVMOcas*) ptr;
00303     uint32_t nDim= (uint32_t) o->w_dim;
00304     CMath::swap(o->w, o->old_w);
00305     float64_t* W=o->w;
00306     float64_t* oldW=o->old_w;
00307     memset(W, 0, sizeof(float64_t)*nDim);
00308     float64_t old_bias=o->bias;
00309     float64_t bias=0;
00310 
00311     float64_t** c_val = o->cp_value;
00312     uint32_t** c_idx = o->cp_index;
00313     uint32_t* c_nzd = o->cp_nz_dims;
00314     float64_t* c_bias = o->cp_bias;
00315 
00316     for(uint32_t i=0; i<nSel; i++)
00317     {
00318         uint32_t nz_dims = c_nzd[i];
00319 
00320         if(nz_dims > 0 && alpha[i] > 0)
00321         {
00322             for(uint32_t j=0; j < nz_dims; j++)
00323                 W[c_idx[i][j]] += alpha[i]*c_val[i][j];
00324         }
00325         bias += c_bias[i]*alpha[i];
00326     }
00327 
00328     *sq_norm_W = CMath::dot(W,W, nDim) + CMath::sq(bias);
00329     *dp_WoldW = CMath::dot(W,oldW, nDim) + bias*old_bias;
00330     //SG_PRINT("nSel=%d sq_norm_W=%f dp_WoldW=%f\n", nSel, *sq_norm_W, *dp_WoldW);
00331     
00332     o->bias = bias;
00333     o->old_bias = old_bias;
00334 }
00335 
00336 void CSVMOcas::init()
00337 {
00338     use_bias=true;
00339     bufsize=3000;
00340     C1=1;
00341     C2=1;
00342 
00343     epsilon=1e-3;
00344     method=SVM_OCAS;
00345     w=NULL;
00346     old_w=NULL;
00347     tmp_a_buf=NULL;
00348     lab.destroy_vector();
00349     cp_value=NULL;
00350     cp_index=NULL;
00351     cp_nz_dims=NULL;
00352     cp_bias=NULL;
00353 
00354     m_parameters->add(&C1, "C1",  "Cost constant 1.");
00355     m_parameters->add(&C2, "C2",  "Cost constant 2.");
00356     m_parameters->add(&use_bias, "use_bias",
00357             "Indicates if bias is used.");
00358     m_parameters->add(&epsilon, "epsilon", "Convergence precision.");
00359     m_parameters->add(&bufsize, "bufsize", "Maximum number of cutting planes.");
00360     m_parameters->add((machine_int_t*) &method, "method",
00361             "SVMOcas solver type.");
00362 }