SubGradientSVM.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2009 Soeren Sonnenburg
00008  * Written (W) 2007-2008 Vojtech Franc
00009  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/lib/config.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/lib/Signal.h>
00015 #include <shogun/lib/Time.h>
00016 #include <shogun/machine/LinearMachine.h>
00017 #include <shogun/classifier/svm/SubGradientSVM.h>
00018 #include <shogun/classifier/svm/QPBSVMLib.h>
00019 #include <shogun/features/DotFeatures.h>
00020 #include <shogun/labels/Labels.h>
00021 #include <shogun/labels/BinaryLabels.h>
00022 
00023 #undef DEBUG_SUBGRADIENTSVM
00024 
00025 using namespace shogun;
00026 
00027 CSubGradientSVM::CSubGradientSVM()
00028 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00029     qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00030 {
00031 }
00032 
00033 CSubGradientSVM::CSubGradientSVM(
00034     float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00035 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00036     qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00037 {
00038     set_features(traindat);
00039     set_labels(trainlab);
00040 }
00041 
00042 
00043 CSubGradientSVM::~CSubGradientSVM()
00044 {
00045 }
00046 
00047 /*
00048 int32_t CSubGradientSVM::find_active(int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00049 {
00050     int32_t delta_active=0;
00051     num_active=0;
00052     num_bound=0;
00053 
00054     for (int32_t i=0; i<num_vec; i++)
00055     {
00056         active[i]=0;
00057 
00058         //within margin/wrong side
00059         if (proj[i] < 1-work_epsilon)
00060         {
00061             idx_active[num_active++]=i;
00062             active[i]=1;
00063         }
00064 
00065         //on margin
00066         if (CMath::abs(proj[i]-1) <= work_epsilon)
00067         {
00068             idx_bound[num_bound++]=i;
00069             active[i]=2;
00070         }
00071 
00072         if (active[i]!=old_active[i])
00073             delta_active++;
00074     }
00075 
00076     return delta_active;
00077 }
00078 */
00079 
00080 int32_t CSubGradientSVM::find_active(
00081     int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00082 {
00083     delta_bound=0;
00084     delta_active=0;
00085     num_active=0;
00086     num_bound=0;
00087 
00088     for (int32_t i=0; i<num_vec; i++)
00089     {
00090         active[i]=0;
00091 
00092         //within margin/wrong side
00093         if (proj[i] < 1-autoselected_epsilon)
00094         {
00095             idx_active[num_active++]=i;
00096             active[i]=1;
00097         }
00098 
00099         //on margin
00100         if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00101         {
00102             idx_bound[num_bound++]=i;
00103             active[i]=2;
00104         }
00105 
00106         if (active[i]!=old_active[i])
00107             delta_active++;
00108 
00109         if (active[i]==2 && old_active[i]==2)
00110             delta_bound++;
00111     }
00112 
00113 
00114     if (delta_active==0 && work_epsilon<=epsilon) //we converged
00115         return 0;
00116     else if (delta_active==0) //lets decrease work_epsilon
00117     {
00118         work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00119         work_epsilon=CMath::max(work_epsilon, epsilon);
00120         num_bound=qpsize;
00121     }
00122 
00123     delta_bound=0;
00124     delta_active=0;
00125     num_active=0;
00126     num_bound=0;
00127 
00128     for (int32_t i=0; i<num_vec; i++)
00129     {
00130         tmp_proj[i]=CMath::abs(proj[i]-1);
00131         tmp_proj_idx[i]=i;
00132     }
00133 
00134     CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00135 
00136     autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)];
00137 
00138 #ifdef DEBUG_SUBGRADIENTSVM
00139     //SG_PRINT("autoseleps: %15.15f\n", autoselected_epsilon);
00140 #endif
00141 
00142     if (autoselected_epsilon>work_epsilon)
00143         autoselected_epsilon=work_epsilon;
00144 
00145     if (autoselected_epsilon<epsilon)
00146     {
00147         autoselected_epsilon=epsilon;
00148 
00149         int32_t i=0;
00150         while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00151             i++;
00152 
00153         //SG_PRINT("lower bound on epsilon requires %d variables in qp\n", i);
00154 
00155         if (i>=qpsize_max && autoselected_epsilon>epsilon) //qpsize limit
00156         {
00157             SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
00158             int32_t num_in_qp=i;
00159             while (--i>=0 && num_in_qp>=qpsize_max)
00160             {
00161                 if (tmp_proj[i] < autoselected_epsilon)
00162                 {
00163                     autoselected_epsilon=tmp_proj[i];
00164                     num_in_qp--;
00165                 }
00166             }
00167 
00168             //SG_PRINT("new qpsize will be %d, autoeps:%15.15f\n", num_in_qp, autoselected_epsilon);
00169         }
00170     }
00171 
00172     for (int32_t i=0; i<num_vec; i++)
00173     {
00174         active[i]=0;
00175 
00176         //within margin/wrong side
00177         if (proj[i] < 1-autoselected_epsilon)
00178         {
00179             idx_active[num_active++]=i;
00180             active[i]=1;
00181         }
00182 
00183         //on margin
00184         if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00185         {
00186             idx_bound[num_bound++]=i;
00187             active[i]=2;
00188         }
00189 
00190         if (active[i]!=old_active[i])
00191             delta_active++;
00192 
00193         if (active[i]==2 && old_active[i]==2)
00194             delta_bound++;
00195     }
00196 
00197     //SG_PRINT("delta_bound: %d of %d (%02.2f)\n", delta_bound, num_bound, 100.0*delta_bound/num_bound);
00198     return delta_active;
00199 }
00200 
00201 
00202 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
00203 {
00204     for (int32_t i=0; i<num_vec; i++)
00205     {
00206         if (active[i]==1 && old_active[i]!=1)
00207         {
00208             features->add_to_dense_vec(C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat);
00209             if (use_bias)
00210                 sum_Cy_active+=C1*((CBinaryLabels*) m_labels)->get_label(i);
00211         }
00212         else if (old_active[i]==1 && active[i]!=1)
00213         {
00214             features->add_to_dense_vec(-C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat);
00215             if (use_bias)
00216                 sum_Cy_active-=C1*((CBinaryLabels*) m_labels)->get_label(i);
00217         }
00218     }
00219 
00220     CMath::swap(active,old_active);
00221 }
00222 
00223 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
00224 {
00225     float64_t sum_B = 0;
00226     float64_t A_zero = 0.5*SGVector<float64_t>::dot(grad_w, grad_w, num_feat);
00227     float64_t B_zero = -SGVector<float64_t>::dot(w.vector, grad_w, num_feat);
00228 
00229     int32_t num_hinge=0;
00230 
00231     for (int32_t i=0; i<num_vec; i++)
00232     {
00233         float64_t p=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b);
00234         grad_proj[i]=p;
00235         if (p!=0)
00236         {
00237             hinge_point[num_hinge]=(proj[i]-1)/p;
00238             hinge_idx[num_hinge]=i;
00239             num_hinge++;
00240 
00241             if (p<0)
00242                 sum_B+=p;
00243         }
00244     }
00245     sum_B*=C1;
00246 
00247     CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00248 
00249 
00250     float64_t alpha = hinge_point[0];
00251     float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
00252 
00253     //CMath::display_vector(grad_w, num_feat, "grad_w");
00254     //CMath::display_vector(grad_proj, num_vec, "grad_proj");
00255     //CMath::display_vector(hinge_point, num_vec, "hinge_point");
00256     //SG_PRINT("A_zero=%f\n", A_zero);
00257     //SG_PRINT("B_zero=%f\n", B_zero);
00258     //SG_PRINT("sum_B=%f\n", sum_B);
00259     //SG_PRINT("alpha=%f\n", alpha);
00260     //SG_PRINT("grad_val=%f\n", grad_val);
00261 
00262     float64_t old_grad_val = grad_val;
00263     float64_t old_alpha = alpha;
00264 
00265     for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
00266     {
00267         alpha = hinge_point[i];
00268         grad_val = 2*A_zero*alpha + B_zero + sum_B;
00269 
00270         if (grad_val > 0)
00271         {
00272             ASSERT(old_grad_val-grad_val != 0);
00273             float64_t gamma = -grad_val/(old_grad_val-grad_val);
00274             alpha = old_alpha*gamma + (1-gamma)*alpha;
00275         }
00276         else
00277         {
00278             old_grad_val = grad_val;
00279             old_alpha = alpha;
00280 
00281             sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
00282             grad_val = 2*A_zero*alpha + B_zero + sum_B;
00283         }
00284     }
00285 
00286     return alpha;
00287 }
00288 
00289 float64_t CSubGradientSVM::compute_min_subgradient(
00290     int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
00291 {
00292     float64_t dir_deriv=0;
00293 
00294     if (num_bound > 0)
00295     {
00296 
00297         CTime t2;
00298         SGVector<float64_t>::add(v, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
00299 
00300         if (num_bound>=qpsize_max && num_it_noimprovement!=10) // if qp gets to large, lets just choose a random beta
00301         {
00302             //SG_PRINT("qpsize too large  (%d>=%d) choosing random subgradient/beta\n", num_bound, qpsize_max);
00303             for (int32_t i=0; i<num_bound; i++)
00304                 beta[i]=CMath::random(0.0,1.0);
00305         }
00306         else
00307         {
00308             memset(beta, 0, sizeof(float64_t)*num_bound);
00309 
00310             float64_t bias_const=0;
00311 
00312             if (use_bias)
00313                 bias_const=1;
00314 
00315             for (int32_t i=0; i<num_bound; i++)
00316             {
00317                 for (int32_t j=i; j<num_bound; j++)
00318                 {
00319                     Z[i*num_bound+j]= 2.0*C1*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*((CBinaryLabels*) m_labels)->get_label(idx_bound[j])*
00320                         (features->dot(idx_bound[i], features, idx_bound[j]) + bias_const);
00321 
00322                     Z[j*num_bound+i]=Z[i*num_bound+j];
00323 
00324                 }
00325 
00326                 Zv[i]=-2.0*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*
00327                     (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
00328             }
00329 
00330             //CMath::display_matrix(Z, num_bound, num_bound, "Z");
00331             //CMath::display_vector(Zv, num_bound, "Zv");
00332             t2.stop();
00333 #ifdef DEBUG_SUBGRADIENTSVM
00334             t2.time_diff_sec(true);
00335 #endif
00336 
00337             CTime t;
00338             CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
00339             //solver.set_solver(QPB_SOLVER_GRADDESC);
00340             //solver.set_solver(QPB_SOLVER_GS);
00341 #ifdef USE_CPLEX
00342             solver.set_solver(QPB_SOLVER_CPLEX);
00343 #else
00344             solver.set_solver(QPB_SOLVER_SCAS);
00345 #endif
00346 
00347             solver.solve_qp(beta, num_bound);
00348 
00349             t.stop();
00350 #ifdef DEBUG_SUBGRADIENTSVM
00351             tim+=t.time_diff_sec(true);
00352 #else
00353             tim+=t.time_diff_sec(false);
00354 #endif
00355 
00356             //CMath::display_vector(beta, num_bound, "beta gs");
00357             //solver.set_solver(QPB_SOLVER_CPLEX);
00358             //solver.solve_qp(beta, num_bound);
00359             //CMath::display_vector(beta, num_bound, "beta cplex");
00360 
00361             //CMath::display_vector(grad_w, num_feat, "grad_w");
00362             //SG_PRINT("grad_b:%f\n", grad_b);
00363         }
00364 
00365         SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
00366         grad_b = -sum_Cy_active;
00367 
00368         for (int32_t i=0; i<num_bound; i++)
00369         {
00370             features->add_to_dense_vec(-C1*beta[i]*((CBinaryLabels*) m_labels)->get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
00371             if (use_bias)
00372                 grad_b -=  C1 * ((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*beta[i];
00373         }
00374 
00375         dir_deriv = SGVector<float64_t>::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
00376         for (int32_t i=0; i<num_bound; i++)
00377         {
00378             float64_t val= C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b);
00379             dir_deriv += CMath::max(0.0, val);
00380         }
00381     }
00382     else
00383     {
00384         SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
00385         grad_b = -sum_Cy_active;
00386 
00387         dir_deriv = SGVector<float64_t>::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00388     }
00389 
00390     return dir_deriv;
00391 }
00392 
00393 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
00394 {
00395     float64_t result= 0.5 * SGVector<float64_t>::dot(w.vector,w.vector, num_feat);
00396 
00397     for (int32_t i=0; i<num_vec; i++)
00398     {
00399         if (proj[i]<1.0)
00400             result += C1 * (1.0-proj[i]);
00401     }
00402 
00403     return result;
00404 }
00405 
00406 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
00407 {
00408     for (int32_t i=0; i<num_vec; i++)
00409         proj[i]=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, w.vector, num_feat)+bias);
00410 }
00411 
00412 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
00413 {
00414     SGVector<float64_t>::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00415 }
00416 
00417 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
00418 {
00419     // alloc normal and bias inited with 0
00420     w=SGVector<float64_t>(num_feat);
00421     w.zero();
00422     //CMath::random_vector(w, num_feat, -1.0, 1.0);
00423     bias=0;
00424     num_it_noimprovement=0;
00425     grad_b=0;
00426     qpsize_limit=5000;
00427 
00428     grad_w=SG_MALLOC(float64_t, num_feat);
00429     memset(grad_w,0,sizeof(float64_t)*num_feat);
00430 
00431     sum_CXy_active=SG_MALLOC(float64_t, num_feat);
00432     memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
00433 
00434     v=SG_MALLOC(float64_t, num_feat);
00435     memset(v,0,sizeof(float64_t)*num_feat);
00436 
00437     old_v=SG_MALLOC(float64_t, num_feat);
00438     memset(old_v,0,sizeof(float64_t)*num_feat);
00439 
00440     sum_Cy_active=0;
00441 
00442     proj= SG_MALLOC(float64_t, num_vec);
00443     memset(proj,0,sizeof(float64_t)*num_vec);
00444 
00445     tmp_proj=SG_MALLOC(float64_t, num_vec);
00446     memset(proj,0,sizeof(float64_t)*num_vec);
00447 
00448     tmp_proj_idx= SG_MALLOC(int32_t, num_vec);
00449     memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
00450 
00451     grad_proj= SG_MALLOC(float64_t, num_vec);
00452     memset(grad_proj,0,sizeof(float64_t)*num_vec);
00453 
00454     hinge_point= SG_MALLOC(float64_t, num_vec);
00455     memset(hinge_point,0,sizeof(float64_t)*num_vec);
00456 
00457     hinge_idx= SG_MALLOC(int32_t, num_vec);
00458     memset(hinge_idx,0,sizeof(int32_t)*num_vec);
00459 
00460     active=SG_MALLOC(uint8_t, num_vec);
00461     memset(active,0,sizeof(uint8_t)*num_vec);
00462 
00463     old_active=SG_MALLOC(uint8_t, num_vec);
00464     memset(old_active,0,sizeof(uint8_t)*num_vec);
00465 
00466     idx_bound=SG_MALLOC(int32_t, num_vec);
00467     memset(idx_bound,0,sizeof(int32_t)*num_vec);
00468 
00469     idx_active=SG_MALLOC(int32_t, num_vec);
00470     memset(idx_active,0,sizeof(int32_t)*num_vec);
00471 
00472     Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
00473     memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00474 
00475     Zv=SG_MALLOC(float64_t, qpsize_limit);
00476     memset(Zv,0,sizeof(float64_t)*qpsize_limit);
00477 
00478     beta=SG_MALLOC(float64_t, qpsize_limit);
00479     memset(beta,0,sizeof(float64_t)*qpsize_limit);
00480 
00481     old_Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
00482     memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00483 
00484     old_Zv=SG_MALLOC(float64_t, qpsize_limit);
00485     memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
00486 
00487     old_beta=SG_MALLOC(float64_t, qpsize_limit);
00488     memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
00489 
00490 }
00491 
00492 void CSubGradientSVM::cleanup()
00493 {
00494     SG_FREE(hinge_idx);
00495     SG_FREE(hinge_point);
00496     SG_FREE(grad_proj);
00497     SG_FREE(proj);
00498     SG_FREE(tmp_proj);
00499     SG_FREE(tmp_proj_idx);
00500     SG_FREE(active);
00501     SG_FREE(old_active);
00502     SG_FREE(idx_bound);
00503     SG_FREE(idx_active);
00504     SG_FREE(sum_CXy_active);
00505     SG_FREE(grad_w);
00506     SG_FREE(v);
00507     SG_FREE(Z);
00508     SG_FREE(Zv);
00509     SG_FREE(beta);
00510     SG_FREE(old_v);
00511     SG_FREE(old_Z);
00512     SG_FREE(old_Zv);
00513     SG_FREE(old_beta);
00514 
00515     hinge_idx=NULL;
00516     proj=NULL;
00517     active=NULL;
00518     old_active=NULL;
00519     idx_bound=NULL;
00520     idx_active=NULL;
00521     sum_CXy_active=NULL;
00522     grad_w=NULL;
00523     v=NULL;
00524     Z=NULL;
00525     Zv=NULL;
00526     beta=NULL;
00527 }
00528 
00529 bool CSubGradientSVM::train_machine(CFeatures* data)
00530 {
00531     tim=0;
00532     SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00533     ASSERT(m_labels);
00534 
00535     if (data)
00536     {
00537         if (!data->has_property(FP_DOT))
00538             SG_ERROR("Specified features are not of type CDotFeatures\n");
00539         set_features((CDotFeatures*) data);
00540     }
00541     ASSERT(get_features());
00542 
00543     int32_t num_iterations=0;
00544     int32_t num_train_labels=m_labels->get_num_labels();
00545     int32_t num_feat=features->get_dim_feature_space();
00546     int32_t num_vec=features->get_num_vectors();
00547 
00548     ASSERT(num_vec==num_train_labels);
00549 
00550     init(num_vec, num_feat);
00551 
00552     int32_t num_active=0;
00553     int32_t num_bound=0;
00554     float64_t alpha=0;
00555     float64_t dir_deriv=0;
00556     float64_t obj=0;
00557     delta_active=num_vec;
00558     last_it_noimprovement=-1;
00559 
00560     work_epsilon=0.99;
00561     autoselected_epsilon=work_epsilon;
00562 
00563     compute_projection(num_feat, num_vec);
00564 
00565     CTime time;
00566 #ifdef DEBUG_SUBGRADIENTSVM
00567     float64_t loop_time=0;
00568 #endif
00569     while (!(CSignal::cancel_computations()))
00570     {
00571         CTime t;
00572         delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00573 
00574         update_active(num_feat, num_vec);
00575 
00576 #ifdef DEBUG_SUBGRADIENTSVM
00577         SG_PRINT("==================================================\niteration: %d ", num_iterations);
00578         obj=compute_objective(num_feat, num_vec);
00579         SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00580                 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00581 #else
00582       SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00583 #endif
00584         //CMath::display_vector(w, w_dim, "w");
00585         //SG_PRINT("bias: %f\n", bias);
00586         //CMath::display_vector(proj, num_vec, "proj");
00587         //CMath::display_vector(idx_active, num_active, "idx_active");
00588         //SG_PRINT("num_active: %d\n", num_active);
00589         //CMath::display_vector(idx_bound, num_bound, "idx_bound");
00590         //SG_PRINT("num_bound: %d\n", num_bound);
00591         //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
00592         //SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active);
00593         //CMath::display_vector(grad_w, num_feat, "grad_w");
00594         //SG_PRINT("grad_b:%f\n", grad_b);
00595 
00596         dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00597 
00598         alpha=line_search(num_feat, num_vec);
00599 
00600         if (num_it_noimprovement==10 || num_bound<qpsize_max)
00601         {
00602             float64_t norm_grad=SGVector<float64_t>::dot(grad_w, grad_w, num_feat) +
00603                 grad_b*grad_b;
00604 
00605 #ifdef DEBUG_SUBGRADIENTSVM
00606             SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00607                     "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00608                     work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00609 #else
00610             SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00611 #endif
00612 
00613             if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00614                 break;
00615             else
00616                 num_it_noimprovement=0;
00617         }
00618 
00619         if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00620         {
00621             if (last_it_noimprovement==num_iterations-1)
00622             {
00623                 SG_PRINT("no improvement...\n");
00624                 num_it_noimprovement++;
00625             }
00626             else
00627                 num_it_noimprovement=0;
00628 
00629             last_it_noimprovement=num_iterations;
00630         }
00631 
00632         SGVector<float64_t>::vec1_plus_scalar_times_vec2(w.vector, -alpha, grad_w, num_feat);
00633         bias-=alpha*grad_b;
00634 
00635         update_projection(alpha, num_vec);
00636         //compute_projection(num_feat, num_vec);
00637         //CMath::display_vector(w.vector, w_dim, "w");
00638         //SG_PRINT("bias: %f\n", bias);
00639         //CMath::display_vector(proj, num_vec, "proj");
00640 
00641         t.stop();
00642 #ifdef DEBUG_SUBGRADIENTSVM
00643         loop_time=t.time_diff_sec();
00644 #endif
00645         num_iterations++;
00646 
00647         if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00648             break;
00649     }
00650     SG_DONE();
00651 
00652     SG_INFO("converged after %d iterations\n", num_iterations);
00653 
00654     obj=compute_objective(num_feat, num_vec);
00655     SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
00656             obj, alpha, dir_deriv, num_bound, num_active);
00657 
00658 #ifdef DEBUG_SUBGRADIENTSVM
00659     CMath::display_vector(w.vector, w.vlen, "w");
00660     SG_PRINT("bias: %f\n", bias);
00661 #endif
00662     SG_INFO("solver time:%f s\n", tim);
00663 
00664     cleanup();
00665 
00666     return true;
00667 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation