00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/lib/config.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/lib/Signal.h>
00015 #include <shogun/lib/Time.h>
00016 #include <shogun/machine/LinearMachine.h>
00017 #include <shogun/classifier/svm/SubGradientSVM.h>
00018 #include <shogun/classifier/svm/QPBSVMLib.h>
00019 #include <shogun/features/DotFeatures.h>
00020 #include <shogun/labels/Labels.h>
00021 #include <shogun/labels/BinaryLabels.h>
00022
00023 #undef DEBUG_SUBGRADIENTSVM
00024
00025 using namespace shogun;
00026
00027 CSubGradientSVM::CSubGradientSVM()
00028 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00029 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00030 {
00031 }
00032
00033 CSubGradientSVM::CSubGradientSVM(
00034 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00035 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00036 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00037 {
00038 set_features(traindat);
00039 set_labels(trainlab);
00040 }
00041
00042
00043 CSubGradientSVM::~CSubGradientSVM()
00044 {
00045 }
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080 int32_t CSubGradientSVM::find_active(
00081 int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00082 {
00083 delta_bound=0;
00084 delta_active=0;
00085 num_active=0;
00086 num_bound=0;
00087
00088 for (int32_t i=0; i<num_vec; i++)
00089 {
00090 active[i]=0;
00091
00092
00093 if (proj[i] < 1-autoselected_epsilon)
00094 {
00095 idx_active[num_active++]=i;
00096 active[i]=1;
00097 }
00098
00099
00100 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00101 {
00102 idx_bound[num_bound++]=i;
00103 active[i]=2;
00104 }
00105
00106 if (active[i]!=old_active[i])
00107 delta_active++;
00108
00109 if (active[i]==2 && old_active[i]==2)
00110 delta_bound++;
00111 }
00112
00113
00114 if (delta_active==0 && work_epsilon<=epsilon)
00115 return 0;
00116 else if (delta_active==0)
00117 {
00118 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00119 work_epsilon=CMath::max(work_epsilon, epsilon);
00120 num_bound=qpsize;
00121 }
00122
00123 delta_bound=0;
00124 delta_active=0;
00125 num_active=0;
00126 num_bound=0;
00127
00128 for (int32_t i=0; i<num_vec; i++)
00129 {
00130 tmp_proj[i]=CMath::abs(proj[i]-1);
00131 tmp_proj_idx[i]=i;
00132 }
00133
00134 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00135
00136 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)];
00137
00138 #ifdef DEBUG_SUBGRADIENTSVM
00139
00140 #endif
00141
00142 if (autoselected_epsilon>work_epsilon)
00143 autoselected_epsilon=work_epsilon;
00144
00145 if (autoselected_epsilon<epsilon)
00146 {
00147 autoselected_epsilon=epsilon;
00148
00149 int32_t i=0;
00150 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00151 i++;
00152
00153
00154
00155 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00156 {
00157 SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
00158 int32_t num_in_qp=i;
00159 while (--i>=0 && num_in_qp>=qpsize_max)
00160 {
00161 if (tmp_proj[i] < autoselected_epsilon)
00162 {
00163 autoselected_epsilon=tmp_proj[i];
00164 num_in_qp--;
00165 }
00166 }
00167
00168
00169 }
00170 }
00171
00172 for (int32_t i=0; i<num_vec; i++)
00173 {
00174 active[i]=0;
00175
00176
00177 if (proj[i] < 1-autoselected_epsilon)
00178 {
00179 idx_active[num_active++]=i;
00180 active[i]=1;
00181 }
00182
00183
00184 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00185 {
00186 idx_bound[num_bound++]=i;
00187 active[i]=2;
00188 }
00189
00190 if (active[i]!=old_active[i])
00191 delta_active++;
00192
00193 if (active[i]==2 && old_active[i]==2)
00194 delta_bound++;
00195 }
00196
00197
00198 return delta_active;
00199 }
00200
00201
00202 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
00203 {
00204 for (int32_t i=0; i<num_vec; i++)
00205 {
00206 if (active[i]==1 && old_active[i]!=1)
00207 {
00208 features->add_to_dense_vec(C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat);
00209 if (use_bias)
00210 sum_Cy_active+=C1*((CBinaryLabels*) m_labels)->get_label(i);
00211 }
00212 else if (old_active[i]==1 && active[i]!=1)
00213 {
00214 features->add_to_dense_vec(-C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat);
00215 if (use_bias)
00216 sum_Cy_active-=C1*((CBinaryLabels*) m_labels)->get_label(i);
00217 }
00218 }
00219
00220 CMath::swap(active,old_active);
00221 }
00222
00223 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
00224 {
00225 float64_t sum_B = 0;
00226 float64_t A_zero = 0.5*SGVector<float64_t>::dot(grad_w, grad_w, num_feat);
00227 float64_t B_zero = -SGVector<float64_t>::dot(w.vector, grad_w, num_feat);
00228
00229 int32_t num_hinge=0;
00230
00231 for (int32_t i=0; i<num_vec; i++)
00232 {
00233 float64_t p=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b);
00234 grad_proj[i]=p;
00235 if (p!=0)
00236 {
00237 hinge_point[num_hinge]=(proj[i]-1)/p;
00238 hinge_idx[num_hinge]=i;
00239 num_hinge++;
00240
00241 if (p<0)
00242 sum_B+=p;
00243 }
00244 }
00245 sum_B*=C1;
00246
00247 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00248
00249
00250 float64_t alpha = hinge_point[0];
00251 float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262 float64_t old_grad_val = grad_val;
00263 float64_t old_alpha = alpha;
00264
00265 for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
00266 {
00267 alpha = hinge_point[i];
00268 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00269
00270 if (grad_val > 0)
00271 {
00272 ASSERT(old_grad_val-grad_val != 0);
00273 float64_t gamma = -grad_val/(old_grad_val-grad_val);
00274 alpha = old_alpha*gamma + (1-gamma)*alpha;
00275 }
00276 else
00277 {
00278 old_grad_val = grad_val;
00279 old_alpha = alpha;
00280
00281 sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
00282 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00283 }
00284 }
00285
00286 return alpha;
00287 }
00288
00289 float64_t CSubGradientSVM::compute_min_subgradient(
00290 int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
00291 {
00292 float64_t dir_deriv=0;
00293
00294 if (num_bound > 0)
00295 {
00296
00297 CTime t2;
00298 SGVector<float64_t>::add(v, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
00299
00300 if (num_bound>=qpsize_max && num_it_noimprovement!=10)
00301 {
00302
00303 for (int32_t i=0; i<num_bound; i++)
00304 beta[i]=CMath::random(0.0,1.0);
00305 }
00306 else
00307 {
00308 memset(beta, 0, sizeof(float64_t)*num_bound);
00309
00310 float64_t bias_const=0;
00311
00312 if (use_bias)
00313 bias_const=1;
00314
00315 for (int32_t i=0; i<num_bound; i++)
00316 {
00317 for (int32_t j=i; j<num_bound; j++)
00318 {
00319 Z[i*num_bound+j]= 2.0*C1*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*((CBinaryLabels*) m_labels)->get_label(idx_bound[j])*
00320 (features->dot(idx_bound[i], features, idx_bound[j]) + bias_const);
00321
00322 Z[j*num_bound+i]=Z[i*num_bound+j];
00323
00324 }
00325
00326 Zv[i]=-2.0*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*
00327 (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
00328 }
00329
00330
00331
00332 t2.stop();
00333 #ifdef DEBUG_SUBGRADIENTSVM
00334 t2.time_diff_sec(true);
00335 #endif
00336
00337 CTime t;
00338 CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
00339
00340
00341 #ifdef USE_CPLEX
00342 solver.set_solver(QPB_SOLVER_CPLEX);
00343 #else
00344 solver.set_solver(QPB_SOLVER_SCAS);
00345 #endif
00346
00347 solver.solve_qp(beta, num_bound);
00348
00349 t.stop();
00350 #ifdef DEBUG_SUBGRADIENTSVM
00351 tim+=t.time_diff_sec(true);
00352 #else
00353 tim+=t.time_diff_sec(false);
00354 #endif
00355
00356
00357
00358
00359
00360
00361
00362
00363 }
00364
00365 SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
00366 grad_b = -sum_Cy_active;
00367
00368 for (int32_t i=0; i<num_bound; i++)
00369 {
00370 features->add_to_dense_vec(-C1*beta[i]*((CBinaryLabels*) m_labels)->get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
00371 if (use_bias)
00372 grad_b -= C1 * ((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*beta[i];
00373 }
00374
00375 dir_deriv = SGVector<float64_t>::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
00376 for (int32_t i=0; i<num_bound; i++)
00377 {
00378 float64_t val= C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b);
00379 dir_deriv += CMath::max(0.0, val);
00380 }
00381 }
00382 else
00383 {
00384 SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat);
00385 grad_b = -sum_Cy_active;
00386
00387 dir_deriv = SGVector<float64_t>::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00388 }
00389
00390 return dir_deriv;
00391 }
00392
00393 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
00394 {
00395 float64_t result= 0.5 * SGVector<float64_t>::dot(w.vector,w.vector, num_feat);
00396
00397 for (int32_t i=0; i<num_vec; i++)
00398 {
00399 if (proj[i]<1.0)
00400 result += C1 * (1.0-proj[i]);
00401 }
00402
00403 return result;
00404 }
00405
00406 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
00407 {
00408 for (int32_t i=0; i<num_vec; i++)
00409 proj[i]=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, w.vector, num_feat)+bias);
00410 }
00411
00412 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
00413 {
00414 SGVector<float64_t>::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00415 }
00416
00417 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
00418 {
00419
00420 w=SGVector<float64_t>(num_feat);
00421 w.zero();
00422
00423 bias=0;
00424 num_it_noimprovement=0;
00425 grad_b=0;
00426 qpsize_limit=5000;
00427
00428 grad_w=SG_MALLOC(float64_t, num_feat);
00429 memset(grad_w,0,sizeof(float64_t)*num_feat);
00430
00431 sum_CXy_active=SG_MALLOC(float64_t, num_feat);
00432 memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
00433
00434 v=SG_MALLOC(float64_t, num_feat);
00435 memset(v,0,sizeof(float64_t)*num_feat);
00436
00437 old_v=SG_MALLOC(float64_t, num_feat);
00438 memset(old_v,0,sizeof(float64_t)*num_feat);
00439
00440 sum_Cy_active=0;
00441
00442 proj= SG_MALLOC(float64_t, num_vec);
00443 memset(proj,0,sizeof(float64_t)*num_vec);
00444
00445 tmp_proj=SG_MALLOC(float64_t, num_vec);
00446 memset(proj,0,sizeof(float64_t)*num_vec);
00447
00448 tmp_proj_idx= SG_MALLOC(int32_t, num_vec);
00449 memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
00450
00451 grad_proj= SG_MALLOC(float64_t, num_vec);
00452 memset(grad_proj,0,sizeof(float64_t)*num_vec);
00453
00454 hinge_point= SG_MALLOC(float64_t, num_vec);
00455 memset(hinge_point,0,sizeof(float64_t)*num_vec);
00456
00457 hinge_idx= SG_MALLOC(int32_t, num_vec);
00458 memset(hinge_idx,0,sizeof(int32_t)*num_vec);
00459
00460 active=SG_MALLOC(uint8_t, num_vec);
00461 memset(active,0,sizeof(uint8_t)*num_vec);
00462
00463 old_active=SG_MALLOC(uint8_t, num_vec);
00464 memset(old_active,0,sizeof(uint8_t)*num_vec);
00465
00466 idx_bound=SG_MALLOC(int32_t, num_vec);
00467 memset(idx_bound,0,sizeof(int32_t)*num_vec);
00468
00469 idx_active=SG_MALLOC(int32_t, num_vec);
00470 memset(idx_active,0,sizeof(int32_t)*num_vec);
00471
00472 Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
00473 memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00474
00475 Zv=SG_MALLOC(float64_t, qpsize_limit);
00476 memset(Zv,0,sizeof(float64_t)*qpsize_limit);
00477
00478 beta=SG_MALLOC(float64_t, qpsize_limit);
00479 memset(beta,0,sizeof(float64_t)*qpsize_limit);
00480
00481 old_Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
00482 memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00483
00484 old_Zv=SG_MALLOC(float64_t, qpsize_limit);
00485 memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
00486
00487 old_beta=SG_MALLOC(float64_t, qpsize_limit);
00488 memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
00489
00490 }
00491
00492 void CSubGradientSVM::cleanup()
00493 {
00494 SG_FREE(hinge_idx);
00495 SG_FREE(hinge_point);
00496 SG_FREE(grad_proj);
00497 SG_FREE(proj);
00498 SG_FREE(tmp_proj);
00499 SG_FREE(tmp_proj_idx);
00500 SG_FREE(active);
00501 SG_FREE(old_active);
00502 SG_FREE(idx_bound);
00503 SG_FREE(idx_active);
00504 SG_FREE(sum_CXy_active);
00505 SG_FREE(grad_w);
00506 SG_FREE(v);
00507 SG_FREE(Z);
00508 SG_FREE(Zv);
00509 SG_FREE(beta);
00510 SG_FREE(old_v);
00511 SG_FREE(old_Z);
00512 SG_FREE(old_Zv);
00513 SG_FREE(old_beta);
00514
00515 hinge_idx=NULL;
00516 proj=NULL;
00517 active=NULL;
00518 old_active=NULL;
00519 idx_bound=NULL;
00520 idx_active=NULL;
00521 sum_CXy_active=NULL;
00522 grad_w=NULL;
00523 v=NULL;
00524 Z=NULL;
00525 Zv=NULL;
00526 beta=NULL;
00527 }
00528
00529 bool CSubGradientSVM::train_machine(CFeatures* data)
00530 {
00531 tim=0;
00532 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00533 ASSERT(m_labels);
00534
00535 if (data)
00536 {
00537 if (!data->has_property(FP_DOT))
00538 SG_ERROR("Specified features are not of type CDotFeatures\n");
00539 set_features((CDotFeatures*) data);
00540 }
00541 ASSERT(get_features());
00542
00543 int32_t num_iterations=0;
00544 int32_t num_train_labels=m_labels->get_num_labels();
00545 int32_t num_feat=features->get_dim_feature_space();
00546 int32_t num_vec=features->get_num_vectors();
00547
00548 ASSERT(num_vec==num_train_labels);
00549
00550 init(num_vec, num_feat);
00551
00552 int32_t num_active=0;
00553 int32_t num_bound=0;
00554 float64_t alpha=0;
00555 float64_t dir_deriv=0;
00556 float64_t obj=0;
00557 delta_active=num_vec;
00558 last_it_noimprovement=-1;
00559
00560 work_epsilon=0.99;
00561 autoselected_epsilon=work_epsilon;
00562
00563 compute_projection(num_feat, num_vec);
00564
00565 CTime time;
00566 #ifdef DEBUG_SUBGRADIENTSVM
00567 float64_t loop_time=0;
00568 #endif
00569 while (!(CSignal::cancel_computations()))
00570 {
00571 CTime t;
00572 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00573
00574 update_active(num_feat, num_vec);
00575
00576 #ifdef DEBUG_SUBGRADIENTSVM
00577 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00578 obj=compute_objective(num_feat, num_vec);
00579 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00580 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00581 #else
00582 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00583 #endif
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00597
00598 alpha=line_search(num_feat, num_vec);
00599
00600 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00601 {
00602 float64_t norm_grad=SGVector<float64_t>::dot(grad_w, grad_w, num_feat) +
00603 grad_b*grad_b;
00604
00605 #ifdef DEBUG_SUBGRADIENTSVM
00606 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00607 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00608 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00609 #else
00610 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00611 #endif
00612
00613 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00614 break;
00615 else
00616 num_it_noimprovement=0;
00617 }
00618
00619 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00620 {
00621 if (last_it_noimprovement==num_iterations-1)
00622 {
00623 SG_PRINT("no improvement...\n");
00624 num_it_noimprovement++;
00625 }
00626 else
00627 num_it_noimprovement=0;
00628
00629 last_it_noimprovement=num_iterations;
00630 }
00631
00632 SGVector<float64_t>::vec1_plus_scalar_times_vec2(w.vector, -alpha, grad_w, num_feat);
00633 bias-=alpha*grad_b;
00634
00635 update_projection(alpha, num_vec);
00636
00637
00638
00639
00640
00641 t.stop();
00642 #ifdef DEBUG_SUBGRADIENTSVM
00643 loop_time=t.time_diff_sec();
00644 #endif
00645 num_iterations++;
00646
00647 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00648 break;
00649 }
00650 SG_DONE();
00651
00652 SG_INFO("converged after %d iterations\n", num_iterations);
00653
00654 obj=compute_objective(num_feat, num_vec);
00655 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
00656 obj, alpha, dir_deriv, num_bound, num_active);
00657
00658 #ifdef DEBUG_SUBGRADIENTSVM
00659 CMath::display_vector(w.vector, w.vlen, "w");
00660 SG_PRINT("bias: %f\n", bias);
00661 #endif
00662 SG_INFO("solver time:%f s\n", tim);
00663
00664 cleanup();
00665
00666 return true;
00667 }