00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/lib/config.h>
00013
00014 #ifdef USE_CPLEX
00015
00016 #include <shogun/mathematics/Math.h>
00017 #include <shogun/lib/Signal.h>
00018 #include <shogun/lib/Time.h>
00019 #include <shogun/machine/LinearMachine.h>
00020 #include <shogun/classifier/SubGradientLPM.h>
00021 #include <shogun/classifier/svm/QPBSVMLib.h>
00022 #include <shogun/features/DotFeatures.h>
00023 #include <shogun/labels/Labels.h>
00024
00025 using namespace shogun;
00026
00027 #define DEBUG_SUBGRADIENTLPM
00028
00029 CSubGradientLPM::CSubGradientLPM()
00030 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00031 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00032 {
00033 }
00034
00035 CSubGradientLPM::CSubGradientLPM(
00036 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00037 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00038 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00039 {
00040 CLinearMachine::features=traindat;
00041 CLinearMachine::m_labels=trainlab;
00042 }
00043
00044
00045 CSubGradientLPM::~CSubGradientLPM()
00046 {
00047 cleanup();
00048 }
00049
00050 int32_t CSubGradientLPM::find_active(
00051 int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00052 {
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 delta_bound=0;
00080 delta_active=0;
00081 num_active=0;
00082 num_bound=0;
00083
00084 for (int32_t i=0; i<num_vec; i++)
00085 {
00086 active[i]=0;
00087
00088
00089 if (proj[i] < 1-autoselected_epsilon)
00090 {
00091 idx_active[num_active++]=i;
00092 active[i]=1;
00093 }
00094
00095
00096 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00097 {
00098 idx_bound[num_bound++]=i;
00099 active[i]=2;
00100 }
00101
00102 if (active[i]!=old_active[i])
00103 delta_active++;
00104
00105 if (active[i]==2 && old_active[i]==2)
00106 delta_bound++;
00107 }
00108
00109
00110 if (delta_active==0 && work_epsilon<=epsilon)
00111 return 0;
00112 else if (delta_active==0)
00113 {
00114 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00115 work_epsilon=CMath::max(work_epsilon, epsilon);
00116 num_bound=qpsize;
00117 }
00118
00119 delta_bound=0;
00120 delta_active=0;
00121 num_active=0;
00122 num_bound=0;
00123
00124 for (int32_t i=0; i<num_vec; i++)
00125 {
00126 tmp_proj[i]=CMath::abs(proj[i]-1);
00127 tmp_proj_idx[i]=i;
00128 }
00129
00130 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00131
00132 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec)];
00133
00134 #ifdef DEBUG_SUBGRADIENTSVM
00135
00136 #endif
00137
00138 if (autoselected_epsilon>work_epsilon)
00139 autoselected_epsilon=work_epsilon;
00140
00141 if (autoselected_epsilon<epsilon)
00142 {
00143 autoselected_epsilon=epsilon;
00144
00145 int32_t i=0;
00146 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00147 i++;
00148
00149
00150
00151 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00152 {
00153 SG_PRINT("qpsize limit (%d) reached\n", qpsize_max);
00154 int32_t num_in_qp=i;
00155 while (--i>=0 && num_in_qp>=qpsize_max)
00156 {
00157 if (tmp_proj[i] < autoselected_epsilon)
00158 {
00159 autoselected_epsilon=tmp_proj[i];
00160 num_in_qp--;
00161 }
00162 }
00163
00164
00165 }
00166 }
00167
00168 for (int32_t i=0; i<num_vec; i++)
00169 {
00170 active[i]=0;
00171
00172
00173 if (proj[i] < 1-autoselected_epsilon)
00174 {
00175 idx_active[num_active++]=i;
00176 active[i]=1;
00177 }
00178
00179
00180 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00181 {
00182 idx_bound[num_bound++]=i;
00183 active[i]=2;
00184 }
00185
00186 if (active[i]!=old_active[i])
00187 delta_active++;
00188
00189 if (active[i]==2 && old_active[i]==2)
00190 delta_bound++;
00191 }
00192
00193 pos_idx=0;
00194 neg_idx=0;
00195 zero_idx=0;
00196
00197 for (int32_t i=0; i<num_feat; i++)
00198 {
00199 if (w[i]>work_epsilon)
00200 {
00201 w_pos[pos_idx++]=i;
00202 grad_w[i]=1;
00203 }
00204 else if (w[i]<-work_epsilon)
00205 {
00206 w_neg[neg_idx++]=i;
00207 grad_w[i]=-1;
00208 }
00209
00210 if (CMath::abs(w[i])<=work_epsilon)
00211 {
00212 w_zero[zero_idx++]=i;
00213 grad_w[i]=-1;
00214 }
00215 }
00216
00217 return delta_active;
00218 }
00219
00220
00221 void CSubGradientLPM::update_active(int32_t num_feat, int32_t num_vec)
00222 {
00223 for (int32_t i=0; i<num_vec; i++)
00224 {
00225 int32_t lab = ((CBinaryLabels*) m_labels)->get_int_label(i);
00226 if (active[i]==1 && old_active[i]!=1)
00227 {
00228 features->add_to_dense_vec(C1*lab, i, sum_CXy_active, num_feat);
00229 if (use_bias)
00230 sum_Cy_active+=C1*lab;
00231 }
00232 else if (old_active[i]==1 && active[i]!=1)
00233 {
00234 features->add_to_dense_vec(-C1*lab, i, sum_CXy_active, num_feat);
00235 if (use_bias)
00236 sum_Cy_active-=C1*lab;
00237 }
00238 }
00239
00240 CMath::swap(active,old_active);
00241 }
00242
00243 float64_t CSubGradientLPM::line_search(int32_t num_feat, int32_t num_vec)
00244 {
00245 int32_t num_hinge=0;
00246 float64_t alpha=0;
00247 float64_t sgrad=0;
00248
00249 float64_t* A=SG_MALLOC(float64_t, num_feat+num_vec);
00250 float64_t* B=SG_MALLOC(float64_t, num_feat+num_vec);
00251 float64_t* C=SG_MALLOC(float64_t, num_feat+num_vec);
00252 float64_t* D=SG_MALLOC(float64_t, num_feat+num_vec);
00253
00254 for (int32_t i=0; i<num_feat+num_vec; i++)
00255 {
00256 if (i<num_feat)
00257 {
00258 A[i]=-grad_w[i];
00259 B[i]=w[i];
00260 C[i]=+grad_w[i];
00261 D[i]=-w[i];
00262 }
00263 else
00264 {
00265 float64_t p=get_label(i-num_feat)*(features->dense_dot(i-num_feat, grad_w, num_feat)+grad_b);
00266 grad_proj[i-num_feat]=p;
00267
00268 A[i]=0;
00269 B[i]=0;
00270 C[i]=C1*p;
00271 D[i]=C1*(1-proj[i-num_feat]);
00272 }
00273
00274 if (A[i]==C[i] && B[i]>D[i])
00275 sgrad+=A[i]+C[i];
00276 else if (A[i]==C[i] && B[i]==D[i])
00277 sgrad+=CMath::max(A[i],C[i]);
00278 else if (A[i]!=C[i])
00279 {
00280 hinge_point[num_hinge]=(D[i]-B[i])/(A[i]-C[i]);
00281 hinge_idx[num_hinge]=i;
00282 num_hinge++;
00283
00284 if (A[i]>C[i])
00285 sgrad+=C[i];
00286 if (A[i]<C[i])
00287 sgrad+=A[i];
00288 }
00289 }
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00301
00302
00303
00304 int32_t i=-1;
00305 while (i < num_hinge-1 && sgrad < 0)
00306 {
00307 i+=1;
00308
00309 if (A[hinge_idx[i]] > C[hinge_idx[i]])
00310 sgrad += A[hinge_idx[i]] - C[hinge_idx[i]];
00311 else
00312 sgrad += C[hinge_idx[i]] - A[hinge_idx[i]];
00313 }
00314
00315 alpha = hinge_point[i];
00316
00317 SG_FREE(D);
00318 SG_FREE(C);
00319 SG_FREE(B);
00320 SG_FREE(A);
00321
00322
00323 return alpha;
00324 }
00325
00326 float64_t CSubGradientLPM::compute_min_subgradient(
00327 int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
00328 {
00329 float64_t dir_deriv=0;
00330 solver->init(E_QP);
00331
00332 if (zero_idx+num_bound > 0)
00333 {
00334
00335
00336 CMath::add(grad_w, 1.0, grad_w, -1.0, sum_CXy_active, num_feat);
00337 grad_w[num_feat]= -sum_Cy_active;
00338
00339 grad_b = -sum_Cy_active;
00340
00341
00342
00343
00344
00345
00346 solver->setup_subgradientlpm_QP(C1, m_labels, (CSparseFeatures<float64_t>*) features, idx_bound, num_bound,
00347 w_zero, zero_idx,
00348 grad_w, num_feat+1,
00349 use_bias);
00350
00351 solver->optimize(beta);
00352
00353
00354
00355
00356 dir_deriv = CMath::dot(beta, grad_w, num_feat);
00357 dir_deriv-=beta[num_feat]*sum_Cy_active;
00358
00359 for (int32_t i=0; i<num_bound; i++)
00360 {
00361 float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], beta, num_feat)+ beta[num_feat]);
00362 dir_deriv += CMath::max(0.0, val);
00363 }
00364
00365 for (int32_t i=0; i<num_feat; i++)
00366 grad_w[i]=beta[i];
00367
00368 if (use_bias)
00369 grad_b=beta[num_feat];
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394 }
00395 else
00396 {
00397 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00398 grad_b = -sum_Cy_active;
00399
00400 dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00401 }
00402
00403 solver->cleanup();
00404
00405
00406
00407
00408
00409 return dir_deriv;
00410 }
00411
00412 float64_t CSubGradientLPM::compute_objective(int32_t num_feat, int32_t num_vec)
00413 {
00414 float64_t result= CMath::sum_abs(w, num_feat);
00415
00416 for (int32_t i=0; i<num_vec; i++)
00417 {
00418 if (proj[i]<1.0)
00419 result += C1 * (1.0-proj[i]);
00420 }
00421
00422 return result;
00423 }
00424
00425 void CSubGradientLPM::compute_projection(int32_t num_feat, int32_t num_vec)
00426 {
00427 for (int32_t i=0; i<num_vec; i++)
00428 proj[i]=get_label(i)*(features->dense_dot(i, w, num_feat) + bias);
00429 }
00430
00431 void CSubGradientLPM::update_projection(float64_t alpha, int32_t num_vec)
00432 {
00433 CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00434 }
00435
00436 void CSubGradientLPM::init(int32_t num_vec, int32_t num_feat)
00437 {
00438
00439 SG_FREE(w);
00440 w=SG_MALLOC(float64_t, num_feat);
00441 w_dim=num_feat;
00442 for (int32_t i=0; i<num_feat; i++)
00443 w[i]=1.0;
00444
00445 bias=0;
00446 num_it_noimprovement=0;
00447 grad_b=0;
00448
00449 w_pos=SG_MALLOC(int32_t, num_feat);
00450 memset(w_pos,0,sizeof(int32_t)*num_feat);
00451
00452 w_zero=SG_MALLOC(int32_t, num_feat);
00453 memset(w_zero,0,sizeof(int32_t)*num_feat);
00454
00455 w_neg=SG_MALLOC(int32_t, num_feat);
00456 memset(w_neg,0,sizeof(int32_t)*num_feat);
00457
00458 grad_w=SG_MALLOC(float64_t, num_feat+1);
00459 memset(grad_w,0,sizeof(float64_t)*(num_feat+1));
00460
00461 sum_CXy_active=SG_MALLOC(float64_t, num_feat);
00462 memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
00463
00464 sum_Cy_active=0;
00465
00466 proj=SG_MALLOC(float64_t, num_vec);
00467 memset(proj,0,sizeof(float64_t)*num_vec);
00468
00469 tmp_proj=SG_MALLOC(float64_t, num_vec);
00470 memset(proj,0,sizeof(float64_t)*num_vec);
00471
00472 tmp_proj_idx=SG_MALLOC(int32_t, num_vec);
00473 memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
00474
00475 grad_proj=SG_MALLOC(float64_t, num_vec);
00476 memset(grad_proj,0,sizeof(float64_t)*num_vec);
00477
00478 hinge_point=SG_MALLOC(float64_t, num_vec+num_feat);
00479 memset(hinge_point,0,sizeof(float64_t)*(num_vec+num_feat));
00480
00481 hinge_idx=SG_MALLOC(int32_t, num_vec+num_feat);
00482 memset(hinge_idx,0,sizeof(int32_t)*(num_vec+num_feat));
00483
00484 active=SG_MALLOC(uint8_t, num_vec);
00485 memset(active,0,sizeof(uint8_t)*num_vec);
00486
00487 old_active=SG_MALLOC(uint8_t, num_vec);
00488 memset(old_active,0,sizeof(uint8_t)*num_vec);
00489
00490 idx_bound=SG_MALLOC(int32_t, num_vec);
00491 memset(idx_bound,0,sizeof(int32_t)*num_vec);
00492
00493 idx_active=SG_MALLOC(int32_t, num_vec);
00494 memset(idx_active,0,sizeof(int32_t)*num_vec);
00495
00496 beta=SG_MALLOC(float64_t, num_feat+1+num_feat+num_vec);
00497 memset(beta,0,sizeof(float64_t)*num_feat+1+num_feat+num_vec);
00498
00499 solver=new CCplex();
00500 }
00501
00502 void CSubGradientLPM::cleanup()
00503 {
00504 SG_FREE(hinge_idx);
00505 SG_FREE(hinge_point);
00506 SG_FREE(grad_proj);
00507 SG_FREE(proj);
00508 SG_FREE(tmp_proj);
00509 SG_FREE(tmp_proj_idx);
00510 SG_FREE(active);
00511 SG_FREE(old_active);
00512 SG_FREE(idx_bound);
00513 SG_FREE(idx_active);
00514 SG_FREE(sum_CXy_active);
00515 SG_FREE(w_pos);
00516 SG_FREE(w_zero);
00517 SG_FREE(w_neg);
00518 SG_FREE(grad_w);
00519 SG_FREE(beta);
00520
00521 hinge_idx=NULL;
00522 hinge_point=NULL;
00523 grad_proj=NULL;
00524 proj=NULL;
00525 tmp_proj=NULL;
00526 tmp_proj_idx=NULL;
00527 active=NULL;
00528 old_active=NULL;
00529 idx_bound=NULL;
00530 idx_active=NULL;
00531 sum_CXy_active=NULL;
00532 w_pos=NULL;
00533 w_zero=NULL;
00534 w_neg=NULL;
00535 grad_w=NULL;
00536 beta=NULL;
00537
00538 delete solver;
00539 solver=NULL;
00540 }
00541
00542 bool CSubGradientLPM::train_machine(CFeatures* data)
00543 {
00544 lpmtim=0;
00545 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00546 ASSERT(m_labels);
00547 if (data)
00548 {
00549 if (!data->has_property(FP_DOT))
00550 SG_ERROR("Specified features are not of type CDotFeatures\n");
00551 set_features((CDotFeatures*) data);
00552 }
00553 ASSERT(features);
00554
00555 int32_t num_iterations=0;
00556 int32_t num_train_labels=m_labels->get_num_labels();
00557 int32_t num_feat=features->get_dim_feature_space();
00558 int32_t num_vec=features->get_num_vectors();
00559
00560 ASSERT(num_vec==num_train_labels);
00561
00562 init(num_vec, num_feat);
00563
00564 int32_t num_active=0;
00565 int32_t num_bound=0;
00566 float64_t alpha=0;
00567 float64_t dir_deriv=0;
00568 float64_t obj=0;
00569 delta_active=num_vec;
00570 last_it_noimprovement=-1;
00571
00572 work_epsilon=0.99;
00573 autoselected_epsilon=work_epsilon;
00574
00575 compute_projection(num_feat, num_vec);
00576
00577 CTime time;
00578 float64_t loop_time=0;
00579 while (!(CSignal::cancel_computations()))
00580 {
00581 CTime t;
00582 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00583
00584 update_active(num_feat, num_vec);
00585
00586 #ifdef DEBUG_SUBGRADIENTLPM
00587 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00588 obj=compute_objective(num_feat, num_vec);
00589 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00590 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00591 #else
00592 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00593 #endif
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00607
00608 alpha=line_search(num_feat, num_vec);
00609
00610 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00611 {
00612 float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
00613 grad_b*grad_b;
00614
00615 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00616 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00617 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00618
00619 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00620 break;
00621 else
00622 num_it_noimprovement=0;
00623 }
00624
00625
00626 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00627 {
00628 if (last_it_noimprovement==num_iterations-1)
00629 {
00630 SG_PRINT("no improvement...\n");
00631 num_it_noimprovement++;
00632 }
00633 else
00634 num_it_noimprovement=0;
00635
00636 last_it_noimprovement=num_iterations;
00637 }
00638
00639 CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
00640 bias-=alpha*grad_b;
00641
00642 update_projection(alpha, num_vec);
00643
00644 t.stop();
00645 loop_time=t.time_diff_sec();
00646 num_iterations++;
00647
00648 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00649 break;
00650 }
00651
00652 SG_INFO("converged after %d iterations\n", num_iterations);
00653
00654 obj=compute_objective(num_feat, num_vec);
00655 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
00656 obj, alpha, dir_deriv, num_bound, num_active);
00657
00658 #ifdef DEBUG_SUBGRADIENTLPM
00659 CMath::display_vector(w, w_dim, "w");
00660 SG_PRINT("bias: %f\n", bias);
00661 #endif
00662 SG_PRINT("solver time:%f s\n", lpmtim);
00663
00664 cleanup();
00665
00666 return true;
00667 }
00668 #endif //USE_CPLEX