00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/lib/config.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/lib/Signal.h>
00015 #include <shogun/lib/Time.h>
00016 #include <shogun/machine/LinearMachine.h>
00017 #include <shogun/classifier/svm/SubGradientSVM.h>
00018 #include <shogun/classifier/svm/QPBSVMLib.h>
00019 #include <shogun/features/DotFeatures.h>
00020 #include <shogun/features/Labels.h>
00021
00022 #undef DEBUG_SUBGRADIENTSVM
00023
00024 using namespace shogun;
00025
00026 CSubGradientSVM::CSubGradientSVM()
00027 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00028 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00029 {
00030 }
00031
00032 CSubGradientSVM::CSubGradientSVM(
00033 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00034 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00035 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00036 {
00037 set_features(traindat);
00038 set_labels(trainlab);
00039 }
00040
00041
00042 CSubGradientSVM::~CSubGradientSVM()
00043 {
00044 }
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 int32_t CSubGradientSVM::find_active(
00080 int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00081 {
00082 delta_bound=0;
00083 delta_active=0;
00084 num_active=0;
00085 num_bound=0;
00086
00087 for (int32_t i=0; i<num_vec; i++)
00088 {
00089 active[i]=0;
00090
00091
00092 if (proj[i] < 1-autoselected_epsilon)
00093 {
00094 idx_active[num_active++]=i;
00095 active[i]=1;
00096 }
00097
00098
00099 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00100 {
00101 idx_bound[num_bound++]=i;
00102 active[i]=2;
00103 }
00104
00105 if (active[i]!=old_active[i])
00106 delta_active++;
00107
00108 if (active[i]==2 && old_active[i]==2)
00109 delta_bound++;
00110 }
00111
00112
00113 if (delta_active==0 && work_epsilon<=epsilon)
00114 return 0;
00115 else if (delta_active==0)
00116 {
00117 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00118 work_epsilon=CMath::max(work_epsilon, epsilon);
00119 num_bound=qpsize;
00120 }
00121
00122 delta_bound=0;
00123 delta_active=0;
00124 num_active=0;
00125 num_bound=0;
00126
00127 for (int32_t i=0; i<num_vec; i++)
00128 {
00129 tmp_proj[i]=CMath::abs(proj[i]-1);
00130 tmp_proj_idx[i]=i;
00131 }
00132
00133 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00134
00135 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)];
00136
00137 #ifdef DEBUG_SUBGRADIENTSVM
00138
00139 #endif
00140
00141 if (autoselected_epsilon>work_epsilon)
00142 autoselected_epsilon=work_epsilon;
00143
00144 if (autoselected_epsilon<epsilon)
00145 {
00146 autoselected_epsilon=epsilon;
00147
00148 int32_t i=0;
00149 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00150 i++;
00151
00152
00153
00154 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00155 {
00156 SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
00157 int32_t num_in_qp=i;
00158 while (--i>=0 && num_in_qp>=qpsize_max)
00159 {
00160 if (tmp_proj[i] < autoselected_epsilon)
00161 {
00162 autoselected_epsilon=tmp_proj[i];
00163 num_in_qp--;
00164 }
00165 }
00166
00167
00168 }
00169 }
00170
00171 for (int32_t i=0; i<num_vec; i++)
00172 {
00173 active[i]=0;
00174
00175
00176 if (proj[i] < 1-autoselected_epsilon)
00177 {
00178 idx_active[num_active++]=i;
00179 active[i]=1;
00180 }
00181
00182
00183 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00184 {
00185 idx_bound[num_bound++]=i;
00186 active[i]=2;
00187 }
00188
00189 if (active[i]!=old_active[i])
00190 delta_active++;
00191
00192 if (active[i]==2 && old_active[i]==2)
00193 delta_bound++;
00194 }
00195
00196
00197 return delta_active;
00198 }
00199
00200
00201 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
00202 {
00203 for (int32_t i=0; i<num_vec; i++)
00204 {
00205 if (active[i]==1 && old_active[i]!=1)
00206 {
00207 features->add_to_dense_vec(C1*get_label(i), i, sum_CXy_active, num_feat);
00208 if (use_bias)
00209 sum_Cy_active+=C1*get_label(i);
00210 }
00211 else if (old_active[i]==1 && active[i]!=1)
00212 {
00213 features->add_to_dense_vec(-C1*get_label(i), i, sum_CXy_active, num_feat);
00214 if (use_bias)
00215 sum_Cy_active-=C1*get_label(i);
00216 }
00217 }
00218
00219 CMath::swap(active,old_active);
00220 }
00221
00222 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
00223 {
00224 float64_t sum_B = 0;
00225 float64_t A_zero = 0.5*CMath::dot(grad_w, grad_w, num_feat);
00226 float64_t B_zero = -CMath::dot(w, grad_w, num_feat);
00227
00228 int32_t num_hinge=0;
00229
00230 for (int32_t i=0; i<num_vec; i++)
00231 {
00232 float64_t p=get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b);
00233 grad_proj[i]=p;
00234 if (p!=0)
00235 {
00236 hinge_point[num_hinge]=(proj[i]-1)/p;
00237 hinge_idx[num_hinge]=i;
00238 num_hinge++;
00239
00240 if (p<0)
00241 sum_B+=p;
00242 }
00243 }
00244 sum_B*=C1;
00245
00246 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00247
00248
00249 float64_t alpha = hinge_point[0];
00250 float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261 float64_t old_grad_val = grad_val;
00262 float64_t old_alpha = alpha;
00263
00264 for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
00265 {
00266 alpha = hinge_point[i];
00267 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00268
00269 if (grad_val > 0)
00270 {
00271 ASSERT(old_grad_val-grad_val != 0);
00272 float64_t gamma = -grad_val/(old_grad_val-grad_val);
00273 alpha = old_alpha*gamma + (1-gamma)*alpha;
00274 }
00275 else
00276 {
00277 old_grad_val = grad_val;
00278 old_alpha = alpha;
00279
00280 sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
00281 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00282 }
00283 }
00284
00285 return alpha;
00286 }
00287
00288 float64_t CSubGradientSVM::compute_min_subgradient(
00289 int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
00290 {
00291 float64_t dir_deriv=0;
00292
00293 if (num_bound > 0)
00294 {
00295
00296 CTime t2;
00297 CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat);
00298
00299 if (num_bound>=qpsize_max && num_it_noimprovement!=10)
00300 {
00301
00302 for (int32_t i=0; i<num_bound; i++)
00303 beta[i]=CMath::random(0.0,1.0);
00304 }
00305 else
00306 {
00307 memset(beta, 0, sizeof(float64_t)*num_bound);
00308
00309 float64_t bias_const=0;
00310
00311 if (use_bias)
00312 bias_const=1;
00313
00314 for (int32_t i=0; i<num_bound; i++)
00315 {
00316 for (int32_t j=i; j<num_bound; j++)
00317 {
00318 Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])*
00319 (features->dot(idx_bound[i], features, idx_bound[j]) + bias_const);
00320
00321 Z[j*num_bound+i]=Z[i*num_bound+j];
00322
00323 }
00324
00325 Zv[i]=-2.0*C1*get_label(idx_bound[i])*
00326 (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
00327 }
00328
00329
00330
00331 t2.stop();
00332 #ifdef DEBUG_SUBGRADIENTSVM
00333 t2.time_diff_sec(true);
00334 #endif
00335
00336 CTime t;
00337 CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
00338
00339
00340 #ifdef USE_CPLEX
00341 solver.set_solver(QPB_SOLVER_CPLEX);
00342 #else
00343 solver.set_solver(QPB_SOLVER_SCAS);
00344 #endif
00345
00346 solver.solve_qp(beta, num_bound);
00347
00348 t.stop();
00349 #ifdef DEBUG_SUBGRADIENTSVM
00350 tim+=t.time_diff_sec(true);
00351 #else
00352 tim+=t.time_diff_sec(false);
00353 #endif
00354
00355
00356
00357
00358
00359
00360
00361
00362 }
00363
00364 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00365 grad_b = -sum_Cy_active;
00366
00367 for (int32_t i=0; i<num_bound; i++)
00368 {
00369 features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
00370 if (use_bias)
00371 grad_b -= C1 * get_label(idx_bound[i])*beta[i];
00372 }
00373
00374 dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
00375 for (int32_t i=0; i<num_bound; i++)
00376 {
00377 float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b);
00378 dir_deriv += CMath::max(0.0, val);
00379 }
00380 }
00381 else
00382 {
00383 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00384 grad_b = -sum_Cy_active;
00385
00386 dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00387 }
00388
00389 return dir_deriv;
00390 }
00391
00392 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
00393 {
00394 float64_t result= 0.5 * CMath::dot(w,w, num_feat);
00395
00396 for (int32_t i=0; i<num_vec; i++)
00397 {
00398 if (proj[i]<1.0)
00399 result += C1 * (1.0-proj[i]);
00400 }
00401
00402 return result;
00403 }
00404
00405 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
00406 {
00407 for (int32_t i=0; i<num_vec; i++)
00408 proj[i]=get_label(i)*(features->dense_dot(i, w, num_feat)+bias);
00409 }
00410
00411 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
00412 {
00413 CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00414 }
00415
00416 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
00417 {
00418
00419 SG_FREE(w);
00420 w=SG_MALLOC(float64_t, num_feat);
00421 w_dim=num_feat;
00422 memset(w,0,sizeof(float64_t)*num_feat);
00423
00424 bias=0;
00425 num_it_noimprovement=0;
00426 grad_b=0;
00427 qpsize_limit=5000;
00428
00429 grad_w=SG_MALLOC(float64_t, num_feat);
00430 memset(grad_w,0,sizeof(float64_t)*num_feat);
00431
00432 sum_CXy_active=SG_MALLOC(float64_t, num_feat);
00433 memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
00434
00435 v=SG_MALLOC(float64_t, num_feat);
00436 memset(v,0,sizeof(float64_t)*num_feat);
00437
00438 old_v=SG_MALLOC(float64_t, num_feat);
00439 memset(old_v,0,sizeof(float64_t)*num_feat);
00440
00441 sum_Cy_active=0;
00442
00443 proj= SG_MALLOC(float64_t, num_vec);
00444 memset(proj,0,sizeof(float64_t)*num_vec);
00445
00446 tmp_proj=SG_MALLOC(float64_t, num_vec);
00447 memset(proj,0,sizeof(float64_t)*num_vec);
00448
00449 tmp_proj_idx= SG_MALLOC(int32_t, num_vec);
00450 memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
00451
00452 grad_proj= SG_MALLOC(float64_t, num_vec);
00453 memset(grad_proj,0,sizeof(float64_t)*num_vec);
00454
00455 hinge_point= SG_MALLOC(float64_t, num_vec);
00456 memset(hinge_point,0,sizeof(float64_t)*num_vec);
00457
00458 hinge_idx= SG_MALLOC(int32_t, num_vec);
00459 memset(hinge_idx,0,sizeof(int32_t)*num_vec);
00460
00461 active=SG_MALLOC(uint8_t, num_vec);
00462 memset(active,0,sizeof(uint8_t)*num_vec);
00463
00464 old_active=SG_MALLOC(uint8_t, num_vec);
00465 memset(old_active,0,sizeof(uint8_t)*num_vec);
00466
00467 idx_bound=SG_MALLOC(int32_t, num_vec);
00468 memset(idx_bound,0,sizeof(int32_t)*num_vec);
00469
00470 idx_active=SG_MALLOC(int32_t, num_vec);
00471 memset(idx_active,0,sizeof(int32_t)*num_vec);
00472
00473 Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
00474 memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00475
00476 Zv=SG_MALLOC(float64_t, qpsize_limit);
00477 memset(Zv,0,sizeof(float64_t)*qpsize_limit);
00478
00479 beta=SG_MALLOC(float64_t, qpsize_limit);
00480 memset(beta,0,sizeof(float64_t)*qpsize_limit);
00481
00482 old_Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
00483 memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00484
00485 old_Zv=SG_MALLOC(float64_t, qpsize_limit);
00486 memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
00487
00488 old_beta=SG_MALLOC(float64_t, qpsize_limit);
00489 memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
00490
00491 }
00492
00493 void CSubGradientSVM::cleanup()
00494 {
00495 SG_FREE(hinge_idx);
00496 SG_FREE(hinge_point);
00497 SG_FREE(grad_proj);
00498 SG_FREE(proj);
00499 SG_FREE(tmp_proj);
00500 SG_FREE(tmp_proj_idx);
00501 SG_FREE(active);
00502 SG_FREE(old_active);
00503 SG_FREE(idx_bound);
00504 SG_FREE(idx_active);
00505 SG_FREE(sum_CXy_active);
00506 SG_FREE(grad_w);
00507 SG_FREE(v);
00508 SG_FREE(Z);
00509 SG_FREE(Zv);
00510 SG_FREE(beta);
00511 SG_FREE(old_v);
00512 SG_FREE(old_Z);
00513 SG_FREE(old_Zv);
00514 SG_FREE(old_beta);
00515
00516 hinge_idx=NULL;
00517 proj=NULL;
00518 active=NULL;
00519 old_active=NULL;
00520 idx_bound=NULL;
00521 idx_active=NULL;
00522 sum_CXy_active=NULL;
00523 grad_w=NULL;
00524 v=NULL;
00525 Z=NULL;
00526 Zv=NULL;
00527 beta=NULL;
00528 }
00529
00530 bool CSubGradientSVM::train_machine(CFeatures* data)
00531 {
00532 tim=0;
00533 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00534 ASSERT(labels);
00535
00536 if (data)
00537 {
00538 if (!data->has_property(FP_DOT))
00539 SG_ERROR("Specified features are not of type CDotFeatures\n");
00540 set_features((CDotFeatures*) data);
00541 }
00542 ASSERT(get_features());
00543
00544 int32_t num_iterations=0;
00545 int32_t num_train_labels=labels->get_num_labels();
00546 int32_t num_feat=features->get_dim_feature_space();
00547 int32_t num_vec=features->get_num_vectors();
00548
00549 ASSERT(num_vec==num_train_labels);
00550
00551 init(num_vec, num_feat);
00552
00553 int32_t num_active=0;
00554 int32_t num_bound=0;
00555 float64_t alpha=0;
00556 float64_t dir_deriv=0;
00557 float64_t obj=0;
00558 delta_active=num_vec;
00559 last_it_noimprovement=-1;
00560
00561 work_epsilon=0.99;
00562 autoselected_epsilon=work_epsilon;
00563
00564 compute_projection(num_feat, num_vec);
00565
00566 CTime time;
00567 #ifdef DEBUG_SUBGRADIENTSVM
00568 float64_t loop_time=0;
00569 #endif
00570 while (!(CSignal::cancel_computations()))
00571 {
00572 CTime t;
00573 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00574
00575 update_active(num_feat, num_vec);
00576
00577 #ifdef DEBUG_SUBGRADIENTSVM
00578 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00579 obj=compute_objective(num_feat, num_vec);
00580 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00581 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00582 #else
00583 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00584 #endif
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00598
00599 alpha=line_search(num_feat, num_vec);
00600
00601 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00602 {
00603 float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
00604 grad_b*grad_b;
00605
00606 #ifdef DEBUG_SUBGRADIENTSVM
00607 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00608 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00609 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00610 #else
00611 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00612 #endif
00613
00614 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00615 break;
00616 else
00617 num_it_noimprovement=0;
00618 }
00619
00620 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00621 {
00622 if (last_it_noimprovement==num_iterations-1)
00623 {
00624 SG_PRINT("no improvement...\n");
00625 num_it_noimprovement++;
00626 }
00627 else
00628 num_it_noimprovement=0;
00629
00630 last_it_noimprovement=num_iterations;
00631 }
00632
00633 CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
00634 bias-=alpha*grad_b;
00635
00636 update_projection(alpha, num_vec);
00637
00638
00639
00640
00641
00642 t.stop();
00643 #ifdef DEBUG_SUBGRADIENTSVM
00644 loop_time=t.time_diff_sec();
00645 #endif
00646 num_iterations++;
00647
00648 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00649 break;
00650 }
00651 SG_DONE();
00652
00653 SG_INFO("converged after %d iterations\n", num_iterations);
00654
00655 obj=compute_objective(num_feat, num_vec);
00656 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
00657 obj, alpha, dir_deriv, num_bound, num_active);
00658
00659 #ifdef DEBUG_SUBGRADIENTSVM
00660 CMath::display_vector(w, w_dim, "w");
00661 SG_PRINT("bias: %f\n", bias);
00662 #endif
00663 SG_INFO("solver time:%f s\n", tim);
00664
00665 cleanup();
00666
00667 return true;
00668 }