SHOGUN: NewtonSVM.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Harshit Syal
00008  * Copyright (C) 2012 Harshit Syal
00009  */
00010 #include <shogun/lib/config.h>
00011 
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/classifier/svm/NewtonSVM.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/machine/LinearMachine.h>
00016 #include <shogun/features/DotFeatures.h>
00017 #include <shogun/labels/Labels.h>
00018 #include <shogun/labels/BinaryLabels.h>
00019 #include <shogun/mathematics/lapack.h>
00020 
00021 //#define DEBUG_NEWTON
00022 //#define V_NEWTON
00023 using namespace shogun;
00024 
00025 CNewtonSVM::CNewtonSVM()
00026 : CLinearMachine(), C(1), use_bias(true)
00027 {
00028 }
00029 
00030 CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
00031 : CLinearMachine()
00032 {
00033     lambda=1/c;
00034     num_iter=itr;
00035     prec=1e-6;
00036     num_iter=20;
00037     use_bias=true;
00038     C=c;
00039     set_features(traindat);
00040     set_labels(trainlab);
00041 }
00042 
00043 
00044 CNewtonSVM::~CNewtonSVM()
00045 {
00046 }
00047 
00048 
00049 bool CNewtonSVM::train_machine(CFeatures* data)
00050 {
00051     ASSERT(m_labels);
00052     ASSERT(m_labels->get_label_type() == LT_BINARY);
00053 
00054     if (data)
00055     {
00056         if (!data->has_property(FP_DOT))
00057             SG_ERROR("Specified features are not of type CDotFeatures\n");
00058         set_features((CDotFeatures*) data);
00059     }
00060 
00061     ASSERT(features);
00062 
00063     SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels();
00064     int32_t num_feat=features->get_dim_feature_space();
00065     int32_t num_vec=features->get_num_vectors();
00066 
00067     //Assigning dimensions for whole class scope
00068     x_n=num_vec;
00069     x_d=num_feat;
00070 
00071     ASSERT(num_vec==train_labels.vlen);
00072 
00073     float64_t* weights = SG_CALLOC(float64_t, x_d+1);
00074     float64_t* out=SG_MALLOC(float64_t, x_n);
00075     SGVector<float64_t>::fill_vector(out, x_n, 1.0);
00076 
00077     int32_t *sv=SG_MALLOC(int32_t, x_n), size_sv=0, iter=0;
00078     float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1);
00079     float64_t t;
00080 
00081     while(1)
00082     {
00083         iter++;
00084 
00085         if (iter>num_iter)
00086         {
00087             SG_PRINT("Maximum number of Newton steps reached. Try larger lambda");
00088             break;
00089         }
00090 
00091         obj_fun_linear(weights, out, &obj, sv, &size_sv, grad);
00092 
00093 #ifdef DEBUG_NEWTON
00094         SG_PRINT("fun linear passed !\n");
00095         SG_PRINT("Obj =%f\n", obj);
00096         SG_PRINT("Grad=\n");
00097 
00098         for (int32_t i=0; i<x_d+1; i++)
00099             SG_PRINT("grad[%d]=%.16g\n", i, grad[i]);
00100         SG_PRINT("SV=\n");
00101 
00102         for (int32_t i=0; i<size_sv; i++)
00103             SG_PRINT("sv[%d]=%d\n", i, sv[i]);
00104 #endif
00105 
00106         SGVector<float64_t> sgv;
00107         float64_t* Xsv = SG_MALLOC(float64_t, x_d*size_sv);
00108         for (int32_t k=0; k<size_sv; k++)
00109         {
00110             sgv=features->get_computed_dot_feature_vector(sv[k]);
00111             for (int32_t j=0; j<x_d; j++)
00112                 Xsv[k*x_d+j]=sgv.vector[j];
00113         }
00114         int32_t tx=x_d;
00115         int32_t ty=size_sv;
00116         SGMatrix<float64_t>::transpose_matrix(Xsv, tx, ty);
00117 
00118 #ifdef DEBUG_NEWTON
00119         SGMatrix<float64_t>::display_matrix(Xsv, x_d, size_sv);
00120 #endif
00121 
00122         float64_t* lcrossdiag=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00123         float64_t* vector=SG_MALLOC(float64_t, x_d+1);
00124 
00125         for (int32_t i=0; i<x_d; i++)
00126             vector[i]=lambda;
00127 
00128         vector[x_d]=0;
00129 
00130         SGMatrix<float64_t>::create_diagonal_matrix(lcrossdiag, vector, x_d+1);
00131         float64_t* Xsv2=SG_MALLOC(float64_t, x_d*x_d);
00132         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, x_d, x_d, size_sv,
00133                 1.0, Xsv, size_sv, Xsv, size_sv, 0.0, Xsv2, x_d);
00134         float64_t* sum=SG_CALLOC(float64_t, x_d);
00135 
00136         for (int32_t j=0; j<x_d; j++)
00137         {
00138             for (int32_t i=0; i<size_sv; i++)
00139                 sum[j]+=Xsv[i+j*size_sv];
00140         }
00141 
00142         float64_t* Xsv2sum=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00143 
00144         for (int32_t i=0; i<x_d; i++)
00145         {
00146             for (int32_t j=0; j<x_d; j++)
00147                 Xsv2sum[j*(x_d+1)+i]=Xsv2[j*x_d+i];
00148 
00149             Xsv2sum[x_d*(x_d+1)+i]=sum[i];
00150         }
00151 
00152         for (int32_t j=0; j<x_d; j++)
00153             Xsv2sum[j*(x_d+1)+x_d]=sum[j];
00154 
00155         Xsv2sum[x_d*(x_d+1)+x_d]=size_sv;
00156         float64_t* identity_matrix=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00157 
00158         SGVector<float64_t>::fill_vector(vector, x_d+1, 1.0);
00159 
00160         SGMatrix<float64_t>::create_diagonal_matrix(identity_matrix, vector, x_d+1);
00161         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, x_d+1, x_d+1,
00162                 x_d+1, 1.0, lcrossdiag, x_d+1, identity_matrix, x_d+1, 1.0,
00163                 Xsv2sum, x_d+1);
00164 
00165         float64_t* inverse=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00166         int32_t r=x_d+1;
00167         SGMatrix<float64_t>::pinv(Xsv2sum, r, r, inverse);
00168 
00169         float64_t* step=SG_MALLOC(float64_t, r);
00170         float64_t* s2=SG_MALLOC(float64_t, r);
00171         cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, r, 1, r, 1.0,
00172                 inverse, r, grad, r, 0.0, s2, r);
00173 
00174         for (int32_t i=0; i<r; i++)
00175             step[i]=-s2[i];
00176 
00177         line_search_linear(weights, step, out, &t);
00178 
00179 #ifdef DEBUG_NEWTON
00180         SG_PRINT("t=%f\n\n", t);
00181 
00182         for (int32_t i=0; i<x_n; i++)
00183             SG_PRINT("out[%d]=%.16g\n", i, out[i]);
00184 
00185         for (int32_t i=0; i<x_d+1; i++)
00186             SG_PRINT("weights[%d]=%.16g\n", i, weights[i]);
00187 #endif
00188 
00189         SGVector<float64_t>::vec1_plus_scalar_times_vec2(weights, t, step, r);
00190         float64_t newton_decrement;
00191         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, r, -0.5,
00192                 step, r, grad, r, 0.0, &newton_decrement, 1);
00193 #ifdef V_NEWTON
00194         SG_PRINT("Itr=%d, Obj=%f, No of sv=%d, Newton dec=%0.3f, line search=%0.3f\n\n",
00195                 iter, obj, size_sv, newton_decrement, t);
00196 #endif
00197 
00198         SG_FREE(Xsv);
00199         SG_FREE(vector);
00200         SG_FREE(lcrossdiag);
00201         SG_FREE(Xsv2);
00202         SG_FREE(Xsv2sum);
00203         SG_FREE(identity_matrix);
00204         SG_FREE(inverse);
00205         SG_FREE(step);
00206         SG_FREE(s2);
00207 
00208         if (newton_decrement*2<prec*obj)
00209             break;
00210     }
00211 
00212 #ifdef V_NEWTON
00213     SG_PRINT("FINAL W AND BIAS Vector=\n\n");
00214     CMath::display_matrix(weights, x_d+1, 1);
00215 #endif
00216 
00217     set_w(SGVector<float64_t>(weights, x_d));
00218     set_bias(weights[x_d]);
00219 
00220     SG_FREE(sv);
00221     SG_FREE(grad);
00222     SG_FREE(out);
00223 
00224     return true;
00225 
00226 
00227 }
00228 
00229 void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
00230         out, float64_t* tx)
00231 {
00232     SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels();
00233     float64_t* outz=SG_MALLOC(float64_t, x_n);
00234     float64_t* temp1=SG_MALLOC(float64_t, x_n);
00235     float64_t* temp1forout=SG_MALLOC(float64_t, x_n);
00236     float64_t* outzsv=SG_MALLOC(float64_t, x_n);
00237     float64_t* Ysv=SG_MALLOC(float64_t, x_n);
00238     float64_t* Xsv=SG_MALLOC(float64_t, x_n);
00239     float64_t* temp2=SG_MALLOC(float64_t, x_d);
00240     float64_t t=0.0;
00241     float64_t* Xd=SG_MALLOC(float64_t, x_n);
00242 
00243     for (int32_t i=0; i<x_n; i++)
00244         Xd[i]=features->dense_dot(i, d, x_d);
00245 
00246     SGVector<float64_t>::add_scalar(d[x_d], Xd, x_n);
00247 
00248 #ifdef DEBUG_NEWTON
00249     CMath::display_vector(d, x_d+1, "Weight vector");
00250 
00251     for (int32_t i=0; i<x_d+1; i++)
00252         SG_SPRINT("Xd[%d]=%.18g\n", i, Xd[i]);
00253 
00254     CMath::display_vector(Xd, x_n, "XD vector=");
00255 #endif
00256 
00257     float64_t wd;
00258     cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda,
00259             weights, x_d, d, x_d, 0.0, &wd, 1);
00260     float64_t tempg, dd;
00261     cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d,
00262             x_d, d, x_d, 0.0, &dd, 1);
00263 
00264     float64_t g, h;
00265     int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n);
00266 
00267     do
00268     {
00269         SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n);
00270         memcpy(temp1forout, temp1, sizeof(float64_t)*x_n);
00271         SGVector<float64_t>::scale_vector(t, temp1forout, x_n);
00272         SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n);
00273 
00274         // Calculation of sv
00275         sv_len=0;
00276 
00277         for (int32_t i=0; i<x_n; i++)
00278         {
00279             if (outz[i]>0)
00280                 sv[sv_len++]=i;
00281         }
00282 
00283         //Calculation of gradient 'g'
00284         for (int32_t i=0; i<sv_len; i++)
00285         {
00286             outzsv[i]=outz[sv[i]];
00287             Ysv[i]=Y.vector[sv[i]];
00288             Xsv[i]=Xd[sv[i]];
00289         }
00290 
00291         memset(temp1, 0, sizeof(float64_t)*sv_len);
00292         SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len);
00293         tempg=SGVector<float64_t>::dot(temp1, Xsv, sv_len);
00294         g=wd+(t*dd);
00295         g-=tempg;
00296 
00297         // Calculation of second derivative 'h'
00298         cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0,
00299                 Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1);
00300         h+=dd;
00301 
00302         // Calculation of 1D Newton step 'd'
00303         t-=g/h;
00304 
00305         if (((g*g)/h)<1e-10)
00306             break;
00307 
00308     } while(1);
00309 
00310     for (int32_t i=0; i<x_n; i++)
00311         out[i]=outz[i];
00312     *tx=t;
00313 
00314     SG_FREE(sv);
00315     SG_FREE(temp1);
00316     SG_FREE(temp2);
00317     SG_FREE(temp1forout);
00318     SG_FREE(outz);
00319     SG_FREE(outzsv);
00320     SG_FREE(Ysv);
00321     SG_FREE(Xsv);
00322     SG_FREE(Xd);
00323 }
00324 
00325 void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out,
00326         float64_t* obj, int32_t* sv, int32_t* numsv, float64_t* grad)
00327 {
00328     SGVector<float64_t> v=((CBinaryLabels*) m_labels)->get_labels();
00329 
00330     for (int32_t i=0; i<x_n; i++)
00331     {
00332         if (out[i]<0)
00333             out[i]=0;
00334     }
00335 
00336 #ifdef DEBUG_NEWTON
00337     for (int32_t i=0; i<x_n; i++)
00338         SG_SPRINT("out[%d]=%.16g\n", i, out[i]);
00339 #endif
00340 
00341     //create copy of w0
00342     float64_t* w0=SG_MALLOC(float64_t, x_d+1);
00343     memcpy(w0, weights, sizeof(float64_t)*(x_d));
00344     w0[x_d]=0; //do not penalize b
00345 
00346     //create copy of out
00347     float64_t* out1=SG_MALLOC(float64_t, x_n);
00348 
00349     //compute steps for obj
00350     SGVector<float64_t>::vector_multiply(out1, out, out, x_n);
00351     float64_t p1=SGVector<float64_t>::sum(out1, x_n)/2;
00352     float64_t C1;
00353     float64_t* w0copy=SG_MALLOC(float64_t, x_d+1);
00354     memcpy(w0copy, w0, sizeof(float64_t)*(x_d+1));
00355     SGVector<float64_t>::scale_vector(0.5, w0copy, x_d+1);
00356     cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d+1, lambda,
00357             w0, x_d+1, w0copy, x_d+1, 0.0, &C1, 1);
00358     *obj=p1+C1;
00359     SGVector<float64_t>::scale_vector(lambda, w0, x_d);
00360     float64_t* temp=SG_CALLOC(float64_t, x_n); //temp = out.*Y
00361     SGVector<float64_t>::vector_multiply(temp, out, v.vector, x_n);
00362     float64_t* temp1=SG_CALLOC(float64_t, x_d);
00363     SGVector<float64_t> vec;
00364 
00365     for (int32_t i=0; i<x_n; i++)
00366     {
00367         features->add_to_dense_vec(temp[i], i, temp1, x_d);
00368 #ifdef DEBUG_NEWTON
00369         SG_SPRINT("\ntemp[%d]=%f", i, temp[i]);
00370         CMath::display_vector(vec.vector, x_d, "vector");
00371         CMath::display_vector(temp1, x_d, "debuging");
00372 #endif
00373     }
00374     float64_t* p2=SG_MALLOC(float64_t, x_d+1);
00375 
00376     for (int32_t i=0; i<x_d; i++)
00377         p2[i]=temp1[i];
00378 
00379     p2[x_d]=SGVector<float64_t>::sum(temp, x_n);
00380     SGVector<float64_t>::add(grad, 1.0, w0, -1.0, p2, x_d+1);
00381     int32_t sv_len=0;
00382 
00383     for (int32_t i=0; i<x_n; i++)
00384     {
00385         if (out[i]>0)
00386             sv[sv_len++]=i;
00387     }
00388 
00389     *numsv=sv_len;
00390 
00391     SG_FREE(w0);
00392     SG_FREE(w0copy);
00393     SG_FREE(out1);
00394     SG_FREE(temp);
00395     SG_FREE(temp1);
00396     SG_FREE(p2);
00397 }
00398 #endif //HAVE_LAPACK