00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <shogun/lib/config.h>
00011
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/classifier/svm/NewtonSVM.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/machine/LinearMachine.h>
00016 #include <shogun/features/DotFeatures.h>
00017 #include <shogun/labels/Labels.h>
00018 #include <shogun/labels/BinaryLabels.h>
00019 #include <shogun/mathematics/lapack.h>
00020
00021
00022
00023 using namespace shogun;
00024
00025 CNewtonSVM::CNewtonSVM()
00026 : CLinearMachine(), C(1), use_bias(true)
00027 {
00028 }
00029
00030 CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
00031 : CLinearMachine()
00032 {
00033 lambda=1/c;
00034 num_iter=itr;
00035 prec=1e-6;
00036 num_iter=20;
00037 use_bias=true;
00038 C=c;
00039 set_features(traindat);
00040 set_labels(trainlab);
00041 }
00042
00043
00044 CNewtonSVM::~CNewtonSVM()
00045 {
00046 }
00047
00048
00049 bool CNewtonSVM::train_machine(CFeatures* data)
00050 {
00051 ASSERT(m_labels);
00052 ASSERT(m_labels->get_label_type() == LT_BINARY);
00053
00054 if (data)
00055 {
00056 if (!data->has_property(FP_DOT))
00057 SG_ERROR("Specified features are not of type CDotFeatures\n");
00058 set_features((CDotFeatures*) data);
00059 }
00060
00061 ASSERT(features);
00062
00063 SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels();
00064 int32_t num_feat=features->get_dim_feature_space();
00065 int32_t num_vec=features->get_num_vectors();
00066
00067
00068 x_n=num_vec;
00069 x_d=num_feat;
00070
00071 ASSERT(num_vec==train_labels.vlen);
00072
00073 float64_t* weights = SG_CALLOC(float64_t, x_d+1);
00074 float64_t* out=SG_MALLOC(float64_t, x_n);
00075 SGVector<float64_t>::fill_vector(out, x_n, 1.0);
00076
00077 int32_t *sv=SG_MALLOC(int32_t, x_n), size_sv=0, iter=0;
00078 float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1);
00079 float64_t t;
00080
00081 while(1)
00082 {
00083 iter++;
00084
00085 if (iter>num_iter)
00086 {
00087 SG_PRINT("Maximum number of Newton steps reached. Try larger lambda");
00088 break;
00089 }
00090
00091 obj_fun_linear(weights, out, &obj, sv, &size_sv, grad);
00092
00093 #ifdef DEBUG_NEWTON
00094 SG_PRINT("fun linear passed !\n");
00095 SG_PRINT("Obj =%f\n", obj);
00096 SG_PRINT("Grad=\n");
00097
00098 for (int32_t i=0; i<x_d+1; i++)
00099 SG_PRINT("grad[%d]=%.16g\n", i, grad[i]);
00100 SG_PRINT("SV=\n");
00101
00102 for (int32_t i=0; i<size_sv; i++)
00103 SG_PRINT("sv[%d]=%d\n", i, sv[i]);
00104 #endif
00105
00106 SGVector<float64_t> sgv;
00107 float64_t* Xsv = SG_MALLOC(float64_t, x_d*size_sv);
00108 for (int32_t k=0; k<size_sv; k++)
00109 {
00110 sgv=features->get_computed_dot_feature_vector(sv[k]);
00111 for (int32_t j=0; j<x_d; j++)
00112 Xsv[k*x_d+j]=sgv.vector[j];
00113 }
00114 int32_t tx=x_d;
00115 int32_t ty=size_sv;
00116 SGMatrix<float64_t>::transpose_matrix(Xsv, tx, ty);
00117
00118 #ifdef DEBUG_NEWTON
00119 SGMatrix<float64_t>::display_matrix(Xsv, x_d, size_sv);
00120 #endif
00121
00122 float64_t* lcrossdiag=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00123 float64_t* vector=SG_MALLOC(float64_t, x_d+1);
00124
00125 for (int32_t i=0; i<x_d; i++)
00126 vector[i]=lambda;
00127
00128 vector[x_d]=0;
00129
00130 SGMatrix<float64_t>::create_diagonal_matrix(lcrossdiag, vector, x_d+1);
00131 float64_t* Xsv2=SG_MALLOC(float64_t, x_d*x_d);
00132 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, x_d, x_d, size_sv,
00133 1.0, Xsv, size_sv, Xsv, size_sv, 0.0, Xsv2, x_d);
00134 float64_t* sum=SG_CALLOC(float64_t, x_d);
00135
00136 for (int32_t j=0; j<x_d; j++)
00137 {
00138 for (int32_t i=0; i<size_sv; i++)
00139 sum[j]+=Xsv[i+j*size_sv];
00140 }
00141
00142 float64_t* Xsv2sum=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00143
00144 for (int32_t i=0; i<x_d; i++)
00145 {
00146 for (int32_t j=0; j<x_d; j++)
00147 Xsv2sum[j*(x_d+1)+i]=Xsv2[j*x_d+i];
00148
00149 Xsv2sum[x_d*(x_d+1)+i]=sum[i];
00150 }
00151
00152 for (int32_t j=0; j<x_d; j++)
00153 Xsv2sum[j*(x_d+1)+x_d]=sum[j];
00154
00155 Xsv2sum[x_d*(x_d+1)+x_d]=size_sv;
00156 float64_t* identity_matrix=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00157
00158 SGVector<float64_t>::fill_vector(vector, x_d+1, 1.0);
00159
00160 SGMatrix<float64_t>::create_diagonal_matrix(identity_matrix, vector, x_d+1);
00161 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, x_d+1, x_d+1,
00162 x_d+1, 1.0, lcrossdiag, x_d+1, identity_matrix, x_d+1, 1.0,
00163 Xsv2sum, x_d+1);
00164
00165 float64_t* inverse=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
00166 int32_t r=x_d+1;
00167 SGMatrix<float64_t>::pinv(Xsv2sum, r, r, inverse);
00168
00169 float64_t* step=SG_MALLOC(float64_t, r);
00170 float64_t* s2=SG_MALLOC(float64_t, r);
00171 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, r, 1, r, 1.0,
00172 inverse, r, grad, r, 0.0, s2, r);
00173
00174 for (int32_t i=0; i<r; i++)
00175 step[i]=-s2[i];
00176
00177 line_search_linear(weights, step, out, &t);
00178
00179 #ifdef DEBUG_NEWTON
00180 SG_PRINT("t=%f\n\n", t);
00181
00182 for (int32_t i=0; i<x_n; i++)
00183 SG_PRINT("out[%d]=%.16g\n", i, out[i]);
00184
00185 for (int32_t i=0; i<x_d+1; i++)
00186 SG_PRINT("weights[%d]=%.16g\n", i, weights[i]);
00187 #endif
00188
00189 SGVector<float64_t>::vec1_plus_scalar_times_vec2(weights, t, step, r);
00190 float64_t newton_decrement;
00191 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, r, -0.5,
00192 step, r, grad, r, 0.0, &newton_decrement, 1);
00193 #ifdef V_NEWTON
00194 SG_PRINT("Itr=%d, Obj=%f, No of sv=%d, Newton dec=%0.3f, line search=%0.3f\n\n",
00195 iter, obj, size_sv, newton_decrement, t);
00196 #endif
00197
00198 SG_FREE(Xsv);
00199 SG_FREE(vector);
00200 SG_FREE(lcrossdiag);
00201 SG_FREE(Xsv2);
00202 SG_FREE(Xsv2sum);
00203 SG_FREE(identity_matrix);
00204 SG_FREE(inverse);
00205 SG_FREE(step);
00206 SG_FREE(s2);
00207
00208 if (newton_decrement*2<prec*obj)
00209 break;
00210 }
00211
00212 #ifdef V_NEWTON
00213 SG_PRINT("FINAL W AND BIAS Vector=\n\n");
00214 CMath::display_matrix(weights, x_d+1, 1);
00215 #endif
00216
00217 set_w(SGVector<float64_t>(weights, x_d));
00218 set_bias(weights[x_d]);
00219
00220 SG_FREE(sv);
00221 SG_FREE(grad);
00222 SG_FREE(out);
00223
00224 return true;
00225
00226
00227 }
00228
00229 void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
00230 out, float64_t* tx)
00231 {
00232 SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels();
00233 float64_t* outz=SG_MALLOC(float64_t, x_n);
00234 float64_t* temp1=SG_MALLOC(float64_t, x_n);
00235 float64_t* temp1forout=SG_MALLOC(float64_t, x_n);
00236 float64_t* outzsv=SG_MALLOC(float64_t, x_n);
00237 float64_t* Ysv=SG_MALLOC(float64_t, x_n);
00238 float64_t* Xsv=SG_MALLOC(float64_t, x_n);
00239 float64_t* temp2=SG_MALLOC(float64_t, x_d);
00240 float64_t t=0.0;
00241 float64_t* Xd=SG_MALLOC(float64_t, x_n);
00242
00243 for (int32_t i=0; i<x_n; i++)
00244 Xd[i]=features->dense_dot(i, d, x_d);
00245
00246 SGVector<float64_t>::add_scalar(d[x_d], Xd, x_n);
00247
00248 #ifdef DEBUG_NEWTON
00249 CMath::display_vector(d, x_d+1, "Weight vector");
00250
00251 for (int32_t i=0; i<x_d+1; i++)
00252 SG_SPRINT("Xd[%d]=%.18g\n", i, Xd[i]);
00253
00254 CMath::display_vector(Xd, x_n, "XD vector=");
00255 #endif
00256
00257 float64_t wd;
00258 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda,
00259 weights, x_d, d, x_d, 0.0, &wd, 1);
00260 float64_t tempg, dd;
00261 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d,
00262 x_d, d, x_d, 0.0, &dd, 1);
00263
00264 float64_t g, h;
00265 int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n);
00266
00267 do
00268 {
00269 SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n);
00270 memcpy(temp1forout, temp1, sizeof(float64_t)*x_n);
00271 SGVector<float64_t>::scale_vector(t, temp1forout, x_n);
00272 SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n);
00273
00274
00275 sv_len=0;
00276
00277 for (int32_t i=0; i<x_n; i++)
00278 {
00279 if (outz[i]>0)
00280 sv[sv_len++]=i;
00281 }
00282
00283
00284 for (int32_t i=0; i<sv_len; i++)
00285 {
00286 outzsv[i]=outz[sv[i]];
00287 Ysv[i]=Y.vector[sv[i]];
00288 Xsv[i]=Xd[sv[i]];
00289 }
00290
00291 memset(temp1, 0, sizeof(float64_t)*sv_len);
00292 SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len);
00293 tempg=SGVector<float64_t>::dot(temp1, Xsv, sv_len);
00294 g=wd+(t*dd);
00295 g-=tempg;
00296
00297
00298 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0,
00299 Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1);
00300 h+=dd;
00301
00302
00303 t-=g/h;
00304
00305 if (((g*g)/h)<1e-10)
00306 break;
00307
00308 } while(1);
00309
00310 for (int32_t i=0; i<x_n; i++)
00311 out[i]=outz[i];
00312 *tx=t;
00313
00314 SG_FREE(sv);
00315 SG_FREE(temp1);
00316 SG_FREE(temp2);
00317 SG_FREE(temp1forout);
00318 SG_FREE(outz);
00319 SG_FREE(outzsv);
00320 SG_FREE(Ysv);
00321 SG_FREE(Xsv);
00322 SG_FREE(Xd);
00323 }
00324
00325 void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out,
00326 float64_t* obj, int32_t* sv, int32_t* numsv, float64_t* grad)
00327 {
00328 SGVector<float64_t> v=((CBinaryLabels*) m_labels)->get_labels();
00329
00330 for (int32_t i=0; i<x_n; i++)
00331 {
00332 if (out[i]<0)
00333 out[i]=0;
00334 }
00335
00336 #ifdef DEBUG_NEWTON
00337 for (int32_t i=0; i<x_n; i++)
00338 SG_SPRINT("out[%d]=%.16g\n", i, out[i]);
00339 #endif
00340
00341
00342 float64_t* w0=SG_MALLOC(float64_t, x_d+1);
00343 memcpy(w0, weights, sizeof(float64_t)*(x_d));
00344 w0[x_d]=0;
00345
00346
00347 float64_t* out1=SG_MALLOC(float64_t, x_n);
00348
00349
00350 SGVector<float64_t>::vector_multiply(out1, out, out, x_n);
00351 float64_t p1=SGVector<float64_t>::sum(out1, x_n)/2;
00352 float64_t C1;
00353 float64_t* w0copy=SG_MALLOC(float64_t, x_d+1);
00354 memcpy(w0copy, w0, sizeof(float64_t)*(x_d+1));
00355 SGVector<float64_t>::scale_vector(0.5, w0copy, x_d+1);
00356 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d+1, lambda,
00357 w0, x_d+1, w0copy, x_d+1, 0.0, &C1, 1);
00358 *obj=p1+C1;
00359 SGVector<float64_t>::scale_vector(lambda, w0, x_d);
00360 float64_t* temp=SG_CALLOC(float64_t, x_n);
00361 SGVector<float64_t>::vector_multiply(temp, out, v.vector, x_n);
00362 float64_t* temp1=SG_CALLOC(float64_t, x_d);
00363 SGVector<float64_t> vec;
00364
00365 for (int32_t i=0; i<x_n; i++)
00366 {
00367 features->add_to_dense_vec(temp[i], i, temp1, x_d);
00368 #ifdef DEBUG_NEWTON
00369 SG_SPRINT("\ntemp[%d]=%f", i, temp[i]);
00370 CMath::display_vector(vec.vector, x_d, "vector");
00371 CMath::display_vector(temp1, x_d, "debuging");
00372 #endif
00373 }
00374 float64_t* p2=SG_MALLOC(float64_t, x_d+1);
00375
00376 for (int32_t i=0; i<x_d; i++)
00377 p2[i]=temp1[i];
00378
00379 p2[x_d]=SGVector<float64_t>::sum(temp, x_n);
00380 SGVector<float64_t>::add(grad, 1.0, w0, -1.0, p2, x_d+1);
00381 int32_t sv_len=0;
00382
00383 for (int32_t i=0; i<x_n; i++)
00384 {
00385 if (out[i]>0)
00386 sv[sv_len++]=i;
00387 }
00388
00389 *numsv=sv_len;
00390
00391 SG_FREE(w0);
00392 SG_FREE(w0copy);
00393 SG_FREE(out1);
00394 SG_FREE(temp);
00395 SG_FREE(temp1);
00396 SG_FREE(p2);
00397 }
00398 #endif //HAVE_LAPACK