00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <shogun/lib/config.h>
00024
00025 #ifdef USE_SVMLIGHT
00026
00027 #include <shogun/io/SGIO.h>
00028 #include <shogun/lib/Signal.h>
00029 #include <shogun/mathematics/Math.h>
00030 #include <shogun/lib/Time.h>
00031 #include <shogun/mathematics/lapack.h>
00032
00033 #include <shogun/features/SimpleFeatures.h>
00034 #include <shogun/classifier/svm/SVMLight.h>
00035 #include <shogun/classifier/svm/pr_loqo.h>
00036
00037 #include <shogun/kernel/Kernel.h>
00038 #include <shogun/machine/KernelMachine.h>
00039 #include <shogun/kernel/CombinedKernel.h>
00040
00041 #include <unistd.h>
00042
00043 #include <shogun/base/Parallel.h>
00044
00045 #ifdef HAVE_PTHREAD
00046 #include <pthread.h>
00047 #endif
00048
00049 using namespace shogun;
00050
00051 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00052 struct S_THREAD_PARAM_REACTIVATE_LINADD
00053 {
00054 CKernel* kernel;
00055 float64_t* lin;
00056 float64_t* last_lin;
00057 int32_t* active;
00058 int32_t* docs;
00059 int32_t start;
00060 int32_t end;
00061 };
00062
00063 struct S_THREAD_PARAM
00064 {
00065 float64_t * lin ;
00066 float64_t* W;
00067 int32_t start, end;
00068 int32_t * active2dnum ;
00069 int32_t * docs ;
00070 CKernel* kernel ;
00071 };
00072
00073 struct S_THREAD_PARAM_REACTIVATE_VANILLA
00074 {
00075 CKernel* kernel;
00076 float64_t* lin;
00077 float64_t* aicache;
00078 float64_t* a;
00079 float64_t* a_old;
00080 int32_t* changed2dnum;
00081 int32_t* inactive2dnum;
00082 int32_t* label;
00083 int32_t start;
00084 int32_t end;
00085 };
00086
00087 struct S_THREAD_PARAM_KERNEL
00088 {
00089 float64_t *Kval ;
00090 int32_t *KI, *KJ ;
00091 int32_t start, end;
00092 CSVMLight* svmlight;
00093 };
00094
00095 #endif // DOXYGEN_SHOULD_SKIP_THIS
00096
00097 void* CSVMLight::update_linear_component_linadd_helper(void* p)
00098 {
00099 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00100
00101 int32_t jj=0, j=0 ;
00102
00103 for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
00104 params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
00105
00106 return NULL ;
00107 }
00108
00109 void* CSVMLight::compute_kernel_helper(void* p)
00110 {
00111 S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
00112
00113 int32_t jj=0 ;
00114 for (jj=params->start;jj<params->end;jj++)
00115 params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
00116
00117 return NULL ;
00118 }
00119
00120 CSVMLight::CSVMLight()
00121 : CSVM()
00122 {
00123 init();
00124 set_kernel(NULL);
00125 }
00126
00127 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab)
00128 : CSVM(C, k, lab)
00129 {
00130 init();
00131 }
00132
00133 void CSVMLight::init()
00134 {
00135
00136 primal=NULL;
00137 dual=NULL;
00138 init_margin=0.15;
00139 init_iter=500;
00140 precision_violations=0;
00141 model_b=0;
00142 verbosity=1;
00143 opt_precision=DEF_PRECISION;
00144
00145
00146 W=NULL;
00147 model=SG_MALLOC(MODEL, 1);
00148 learn_parm=SG_MALLOC(LEARN_PARM, 1);
00149 model->supvec=NULL;
00150 model->alpha=NULL;
00151 model->index=NULL;
00152
00153
00154 mymaxdiff=1 ;
00155 mkl_converged=false;
00156 }
00157
00158 CSVMLight::~CSVMLight()
00159 {
00160
00161 SG_FREE(model->supvec);
00162 SG_FREE(model->alpha);
00163 SG_FREE(model->index);
00164 SG_FREE(model);
00165 SG_FREE(learn_parm);
00166
00167
00168 SG_FREE(W);
00169
00170
00171 SG_FREE(dual);
00172 SG_FREE(primal);
00173 }
00174
00175 bool CSVMLight::train_machine(CFeatures* data)
00176 {
00177
00178 mkl_converged=false;
00179 verbosity=1 ;
00180 init_margin=0.15;
00181 init_iter=500;
00182 precision_violations=0;
00183 opt_precision=DEF_PRECISION;
00184
00185 strcpy (learn_parm->predfile, "");
00186 learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
00187 learn_parm->sharedslack=0;
00188 learn_parm->remove_inconsistent=0;
00189 learn_parm->skip_final_opt_check=0;
00190 learn_parm->svm_maxqpsize=get_qpsize();
00191 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
00192 learn_parm->maxiter=100000;
00193 learn_parm->svm_iter_to_shrink=100;
00194 learn_parm->svm_c=C1;
00195 learn_parm->transduction_posratio=0.33;
00196 learn_parm->svm_costratio=C2/C1;
00197 learn_parm->svm_costratio_unlab=1.0;
00198 learn_parm->svm_unlabbound=1E-5;
00199 learn_parm->epsilon_crit=epsilon;
00200 learn_parm->epsilon_a=1E-15;
00201 learn_parm->compute_loo=0;
00202 learn_parm->rho=1.0;
00203 learn_parm->xa_depth=0;
00204
00205 if (!kernel)
00206 SG_ERROR( "SVM_light can not proceed without kernel!\n");
00207
00208 if (data)
00209 {
00210 if (labels->get_num_labels() != data->get_num_vectors())
00211 SG_ERROR("Number of training vectors does not match number of labels\n");
00212 kernel->init(data, data);
00213 }
00214
00215 if (!kernel->has_features())
00216 SG_ERROR( "SVM_light can not proceed without initialized kernel!\n");
00217
00218 ASSERT(labels && labels->get_num_labels());
00219 ASSERT(labels->is_two_class_labeling());
00220 ASSERT(kernel->get_num_vec_lhs()==labels->get_num_labels());
00221
00222
00223 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00224 kernel->clear_normal() ;
00225
00226
00227 SG_DEBUG( "threads = %i\n", parallel->get_num_threads()) ;
00228 SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ;
00229 SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ;
00230 SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ;
00231 SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ;
00232 SG_DEBUG( "kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) ;
00233 SG_DEBUG( "kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) ;
00234 SG_DEBUG( "get_solver_type() = %i\n", get_solver_type());
00235 SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ;
00236 SG_DEBUG( "get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) ;
00237 SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ;
00238
00239 use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) ||
00240 (get_linadd_enabled() && kernel->has_property(KP_LINADD)));
00241
00242 SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ;
00243
00244 if (kernel->get_kernel_type() == K_COMBINED)
00245 {
00246 CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel();
00247
00248 while (kn)
00249 {
00250
00251 kn->resize_kernel_cache(kn->get_cache_size());
00252 SG_UNREF(kn);
00253 kn = ((CCombinedKernel*) kernel)->get_next_kernel();
00254 }
00255 }
00256
00257 kernel->resize_kernel_cache(kernel->get_cache_size());
00258
00259
00260 svm_learn();
00261
00262
00263 create_new_model(model->sv_num-1);
00264 set_bias(-model->b);
00265 for (int32_t i=0; i<model->sv_num-1; i++)
00266 {
00267 set_alpha(i, model->alpha[i+1]);
00268 set_support_vector(i, model->supvec[i+1]);
00269 }
00270
00271
00272 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00273 {
00274 kernel->clear_normal() ;
00275 kernel->delete_optimization() ;
00276 }
00277
00278 if (use_kernel_cache)
00279 kernel->kernel_cache_cleanup();
00280
00281 return true ;
00282 }
00283
00284 int32_t CSVMLight::get_runtime()
00285 {
00286 clock_t start;
00287 start = clock();
00288 return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
00289 }
00290
00291
00292 void CSVMLight::svm_learn()
00293 {
00294 int32_t *inconsistent, i;
00295 int32_t misclassified,upsupvecnum;
00296 float64_t maxdiff, *lin, *c, *a;
00297 int32_t iterations;
00298 int32_t trainpos=0, trainneg=0 ;
00299 ASSERT(labels);
00300 SGVector<int32_t> lab=labels->get_int_labels();
00301 int32_t totdoc=lab.vlen;
00302 ASSERT(lab.vector && lab.vlen);
00303 int32_t* label=CMath::clone_vector(lab.vector, lab.vlen);
00304 lab.free_vector();
00305
00306 int32_t* docs=SG_MALLOC(int32_t, totdoc);
00307 SG_FREE(W);
00308 W=NULL;
00309 count = 0 ;
00310
00311 if (kernel->has_property(KP_KERNCOMBINATION) && callback)
00312 {
00313 W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
00314 for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
00315 W[i]=0;
00316 }
00317
00318 for (i=0; i<totdoc; i++)
00319 docs[i]=i;
00320
00321 float64_t *xi_fullset;
00322 float64_t *a_fullset;
00323 TIMING timing_profile;
00324 SHRINK_STATE shrink_state;
00325
00326 timing_profile.time_kernel=0;
00327 timing_profile.time_opti=0;
00328 timing_profile.time_shrink=0;
00329 timing_profile.time_update=0;
00330 timing_profile.time_model=0;
00331 timing_profile.time_check=0;
00332 timing_profile.time_select=0;
00333
00334
00335 if((learn_parm->svm_newvarsinqp < 2)
00336 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
00337 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00338 }
00339
00340 init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
00341
00342 inconsistent = SG_MALLOC(int32_t, totdoc);
00343 c = SG_MALLOC(float64_t, totdoc);
00344 a = SG_MALLOC(float64_t, totdoc);
00345 a_fullset = SG_MALLOC(float64_t, totdoc);
00346 xi_fullset = SG_MALLOC(float64_t, totdoc);
00347 lin = SG_MALLOC(float64_t, totdoc);
00348 if (m_linear_term.vlen>0)
00349 learn_parm->eps=get_linear_term_array();
00350 else
00351 {
00352 learn_parm->eps=SG_MALLOC(float64_t, totdoc);
00353 CMath::fill_vector(learn_parm->eps, totdoc, -1.0);
00354 }
00355
00356 learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
00357
00358 SG_FREE(model->supvec);
00359 SG_FREE(model->alpha);
00360 SG_FREE(model->index);
00361 model->supvec = SG_MALLOC(int32_t, totdoc+2);
00362 model->alpha = SG_MALLOC(float64_t, totdoc+2);
00363 model->index = SG_MALLOC(int32_t, totdoc+2);
00364
00365 model->at_upper_bound=0;
00366 model->b=0;
00367 model->supvec[0]=0;
00368 model->alpha[0]=0;
00369 model->totdoc=totdoc;
00370
00371 model->kernel=kernel;
00372
00373 model->sv_num=1;
00374 model->loo_error=-1;
00375 model->loo_recall=-1;
00376 model->loo_precision=-1;
00377 model->xa_error=-1;
00378 model->xa_recall=-1;
00379 model->xa_precision=-1;
00380
00381 for (i=0;i<totdoc;i++) {
00382 inconsistent[i]=0;
00383 c[i]=0;
00384 a[i]=0;
00385 lin[i]=0;
00386
00387 if(label[i] > 0) {
00388 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
00389 fabs((float64_t)label[i]);
00390 label[i]=1;
00391 trainpos++;
00392 }
00393 else if(label[i] < 0) {
00394 learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
00395 label[i]=-1;
00396 trainneg++;
00397 }
00398 else {
00399 learn_parm->svm_cost[i]=0;
00400 }
00401 }
00402
00403
00404 SG_DEBUG( "alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors());
00405
00406 if(m_alpha.vector && get_num_support_vectors()) {
00407 if(verbosity>=1) {
00408 SG_INFO( "Computing starting state...");
00409 }
00410
00411 float64_t* alpha = SG_MALLOC(float64_t, totdoc);
00412
00413 for (i=0; i<totdoc; i++)
00414 alpha[i]=0;
00415
00416 for (i=0; i<get_num_support_vectors(); i++)
00417 alpha[get_support_vector(i)]=get_alpha(i);
00418
00419 int32_t* index = SG_MALLOC(int32_t, totdoc);
00420 int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11);
00421 float64_t* aicache = SG_MALLOC(float64_t, totdoc);
00422 for (i=0;i<totdoc;i++) {
00423 index[i]=1;
00424 alpha[i]=fabs(alpha[i]);
00425 if(alpha[i]<0) alpha[i]=0;
00426 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
00427 }
00428
00429 if (use_kernel_cache)
00430 {
00431 if (callback &&
00432 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights())
00433 )
00434 {
00435 CCombinedKernel* k = (CCombinedKernel*) kernel;
00436 CKernel* kn = k->get_first_kernel();
00437
00438 while (kn)
00439 {
00440 for (i=0;i<totdoc;i++)
00441 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00442 && (kn->kernel_cache_space_available()))
00443 kn->cache_kernel_row(i);
00444
00445 for (i=0;i<totdoc;i++)
00446 if((alpha[i]==learn_parm->svm_cost[i])
00447 && (kn->kernel_cache_space_available()))
00448 kn->cache_kernel_row(i);
00449
00450 SG_UNREF(kn);
00451 kn = k->get_next_kernel();
00452 }
00453 }
00454 else
00455 {
00456 for (i=0;i<totdoc;i++)
00457 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00458 && (kernel->kernel_cache_space_available()))
00459 kernel->cache_kernel_row(i);
00460
00461 for (i=0;i<totdoc;i++)
00462 if((alpha[i]==learn_parm->svm_cost[i])
00463 && (kernel->kernel_cache_space_available()))
00464 kernel->cache_kernel_row(i);
00465 }
00466 }
00467 (void)compute_index(index,totdoc,index2dnum);
00468 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
00469 lin,aicache,NULL);
00470 (void)calculate_svm_model(docs,label,lin,alpha,a,c,
00471 index2dnum,index2dnum);
00472 for (i=0;i<totdoc;i++) {
00473 a[i]=alpha[i];
00474 }
00475
00476 SG_FREE(index);
00477 SG_FREE(index2dnum);
00478 SG_FREE(aicache);
00479 SG_FREE(alpha);
00480
00481 if(verbosity>=1)
00482 SG_DONE();
00483 }
00484 SG_DEBUG( "%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg);
00485 SG_DEBUG( "Optimizing...\n");
00486
00487
00488 iterations=optimize_to_convergence(docs,label,totdoc,
00489 &shrink_state,inconsistent,a,lin,
00490 c,&timing_profile,
00491 &maxdiff,(int32_t)-1,
00492 (int32_t)1);
00493
00494
00495 if(verbosity>=1) {
00496 if(verbosity==1)
00497 {
00498 SG_DONE();
00499 SG_DEBUG("(%ld iterations)", iterations);
00500 }
00501
00502 misclassified=0;
00503 for (i=0;(i<totdoc);i++) {
00504 if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
00505 misclassified++;
00506 }
00507
00508 SG_INFO( "Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
00509 misclassified,maxdiff);
00510
00511 SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b);
00512 if (maxdiff>epsilon)
00513 SG_WARNING( "maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon);
00514
00515 upsupvecnum=0;
00516 for (i=1;i<model->sv_num;i++)
00517 {
00518 if(fabs(model->alpha[i]) >=
00519 (learn_parm->svm_cost[model->supvec[i]]-
00520 learn_parm->epsilon_a))
00521 upsupvecnum++;
00522 }
00523 SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n",
00524 model->sv_num-1,upsupvecnum);
00525 }
00526
00527 shrink_state_cleanup(&shrink_state);
00528 SG_FREE(label);
00529 SG_FREE(inconsistent);
00530 SG_FREE(c);
00531 SG_FREE(a);
00532 SG_FREE(a_fullset);
00533 SG_FREE(xi_fullset);
00534 SG_FREE(lin);
00535 SG_FREE(learn_parm->eps);
00536 SG_FREE(learn_parm->svm_cost);
00537 SG_FREE(docs);
00538 }
00539
00540 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
00541 SHRINK_STATE *shrink_state,
00542 int32_t *inconsistent,
00543 float64_t *a, float64_t *lin, float64_t *c,
00544 TIMING *timing_profile, float64_t *maxdiff,
00545 int32_t heldout, int32_t retrain)
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562 {
00563 int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
00564 int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
00565 int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
00566 int32_t *working2dnum,*selexam;
00567 int32_t activenum;
00568 float64_t criterion, eq;
00569 float64_t *a_old;
00570 int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0;
00571 float64_t epsilon_crit_org;
00572 float64_t bestmaxdiff;
00573 float64_t worstmaxdiff;
00574 int32_t bestmaxdiffiter,terminate;
00575 bool reactivated=false;
00576
00577 float64_t *selcrit;
00578 float64_t *aicache;
00579 QP qp;
00580
00581 epsilon_crit_org=learn_parm->epsilon_crit;
00582 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
00583 learn_parm->epsilon_crit=2.0;
00584
00585 }
00586 learn_parm->epsilon_shrink=2;
00587 (*maxdiff)=1;
00588
00589 SG_DEBUG("totdoc:%d\n",totdoc);
00590 chosen = SG_MALLOC(int32_t, totdoc);
00591 last_suboptimal_at =SG_MALLOC(int32_t, totdoc);
00592 key =SG_MALLOC(int32_t, totdoc+11);
00593 selcrit =SG_MALLOC(float64_t, totdoc);
00594 selexam =SG_MALLOC(int32_t, totdoc);
00595 a_old =SG_MALLOC(float64_t, totdoc);
00596 aicache =SG_MALLOC(float64_t, totdoc);
00597 working2dnum =SG_MALLOC(int32_t, totdoc+11);
00598 active2dnum =SG_MALLOC(int32_t, totdoc+11);
00599 qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00600 qp.opt_ce0 =SG_MALLOC(float64_t, 1);
00601 qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize);
00602 qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00603 qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00604 qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00605 qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00606
00607 choosenum=0;
00608 inconsistentnum=0;
00609 if(!retrain) retrain=1;
00610 iteration=1;
00611 bestmaxdiffiter=1;
00612 bestmaxdiff=999999999;
00613 worstmaxdiff=1e-10;
00614 terminate=0;
00615
00616 kernel->set_time(iteration);
00617
00618 for (i=0;i<totdoc;i++) {
00619 chosen[i]=0;
00620 a_old[i]=a[i];
00621 last_suboptimal_at[i]=1;
00622 if(inconsistent[i])
00623 inconsistentnum++;
00624 }
00625 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00626 inactivenum=totdoc-activenum;
00627 clear_index(working2dnum);
00628
00629
00630 CTime start_time;
00631 mkl_converged=false;
00632
00633 #ifdef CYGWIN
00634 for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
00635 #else
00636 CSignal::clear_cancel();
00637 for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
00638 #endif
00639
00640 if(use_kernel_cache)
00641 kernel->set_time(iteration);
00642
00643 if(verbosity>=2) t0=get_runtime();
00644 if(verbosity>=3) {
00645 SG_DEBUG( "\nSelecting working set... ");
00646 }
00647
00648 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
00649 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00650
00651 i=0;
00652 for (jj=0;(j=working2dnum[jj])>=0;jj++) {
00653 if((chosen[j]>=(learn_parm->svm_maxqpsize/
00654 CMath::min(learn_parm->svm_maxqpsize,
00655 learn_parm->svm_newvarsinqp)))
00656 || (inconsistent[j])
00657 || (j == heldout)) {
00658 chosen[j]=0;
00659 choosenum--;
00660 }
00661 else {
00662 chosen[j]++;
00663 working2dnum[i++]=j;
00664 }
00665 }
00666 working2dnum[i]=-1;
00667
00668 if(retrain == 2) {
00669 choosenum=0;
00670 for (jj=0;(j=working2dnum[jj])>=0;jj++) {
00671 chosen[j]=0;
00672 }
00673 clear_index(working2dnum);
00674 for (i=0;i<totdoc;i++) {
00675 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
00676 chosen[i]=99999;
00677 choosenum++;
00678 a[i]=0;
00679 }
00680 }
00681 if(learn_parm->biased_hyperplane) {
00682 eq=0;
00683 for (i=0;i<totdoc;i++) {
00684 eq+=a[i]*label[i];
00685 }
00686 for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
00687 if((eq*label[i] > 0) && (a[i] > 0)) {
00688 chosen[i]=88888;
00689 choosenum++;
00690 if((eq*label[i]) > a[i]) {
00691 eq-=(a[i]*label[i]);
00692 a[i]=0;
00693 }
00694 else {
00695 a[i]-=(eq*label[i]);
00696 eq=0;
00697 }
00698 }
00699 }
00700 }
00701 compute_index(chosen,totdoc,working2dnum);
00702 }
00703 else
00704 {
00705 if(iteration % 101)
00706 {
00707 already_chosen=0;
00708 if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
00709 (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00710 {
00711
00712 already_chosen=select_next_qp_subproblem_grad(
00713 label,a,lin,c,totdoc,
00714 (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
00715 learn_parm->svm_newvarsinqp)/2),
00716 inconsistent,active2dnum,
00717 working2dnum,selcrit,selexam,1,
00718 key,chosen);
00719 choosenum+=already_chosen;
00720 }
00721 choosenum+=select_next_qp_subproblem_grad(
00722 label,a,lin,c,totdoc,
00723 CMath::min(learn_parm->svm_maxqpsize-choosenum,
00724 learn_parm->svm_newvarsinqp-already_chosen),
00725 inconsistent,active2dnum,
00726 working2dnum,selcrit,selexam,0,key,
00727 chosen);
00728 }
00729 else {
00730
00731
00732 choosenum+=select_next_qp_subproblem_rand(
00733 label,a,lin,c,totdoc,
00734 CMath::min(learn_parm->svm_maxqpsize-choosenum,
00735 learn_parm->svm_newvarsinqp),
00736 inconsistent,active2dnum,
00737 working2dnum,selcrit,selexam,key,
00738 chosen,iteration);
00739 }
00740 }
00741
00742 if(verbosity>=2) {
00743 SG_INFO( " %ld vectors chosen\n",choosenum);
00744 }
00745
00746 if(verbosity>=2) t1=get_runtime();
00747
00748 if (use_kernel_cache)
00749 {
00750
00751
00752 if ( callback &&
00753 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
00754 )
00755 {
00756 CCombinedKernel* k = (CCombinedKernel*) kernel;
00757 CKernel* kn = k->get_first_kernel();
00758
00759 while (kn)
00760 {
00761 kn->cache_multiple_kernel_rows(working2dnum, choosenum);
00762 SG_UNREF(kn);
00763 kn = k->get_next_kernel() ;
00764 }
00765 }
00766 else
00767 kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
00768 }
00769
00770 if(verbosity>=2) t2=get_runtime();
00771
00772 if(retrain != 2) {
00773 optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
00774 totdoc,working2dnum,choosenum,a,lin,c,
00775 aicache,&qp,&epsilon_crit_org);
00776 }
00777
00778 if(verbosity>=2) t3=get_runtime();
00779 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
00780 lin,aicache,c);
00781
00782 if(verbosity>=2) t4=get_runtime();
00783 supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
00784
00785 if(verbosity>=2) t5=get_runtime();
00786
00787 for (jj=0;(i=working2dnum[jj])>=0;jj++) {
00788 a_old[i]=a[i];
00789 }
00790
00791 retrain=check_optimality(label,a,lin,c,totdoc,
00792 maxdiff,epsilon_crit_org,&misclassified,
00793 inconsistent,active2dnum,last_suboptimal_at,
00794 iteration);
00795
00796 if(verbosity>=2) {
00797 t6=get_runtime();
00798 timing_profile->time_select+=t1-t0;
00799 timing_profile->time_kernel+=t2-t1;
00800 timing_profile->time_opti+=t3-t2;
00801 timing_profile->time_update+=t4-t3;
00802 timing_profile->time_model+=t5-t4;
00803 timing_profile->time_check+=t6-t5;
00804 }
00805
00806
00807 if((*maxdiff) < bestmaxdiff) {
00808 bestmaxdiff=(*maxdiff);
00809 bestmaxdiffiter=iteration;
00810 }
00811 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
00812
00813 terminate=1;
00814 retrain=0;
00815 SG_WARNING( "Relaxing KT-Conditions due to slow progress! Terminating!\n");
00816 }
00817
00818 noshrink= (get_shrinking_enabled()) ? 0 : 1;
00819
00820 if ((!callback) && (!retrain) && (inactivenum>0) &&
00821 ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00822 {
00823 t1=get_runtime();
00824 SG_DEBUG( "reactivating inactive examples\n");
00825
00826 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00827 iteration,inconsistent,
00828 docs,aicache,
00829 maxdiff);
00830 reactivated=true;
00831 SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org);
00832
00833 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00834 inactivenum=totdoc-activenum;
00835
00836 bestmaxdiff=(*maxdiff);
00837 bestmaxdiffiter=iteration;
00838
00839
00840 noshrink=1;
00841 retrain=0;
00842 if((*maxdiff) > learn_parm->epsilon_crit)
00843 {
00844 SG_INFO( "restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit);
00845 retrain=1;
00846 }
00847 timing_profile->time_shrink+=get_runtime()-t1;
00848 if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00849 || (verbosity>=2)) {
00850 SG_DONE();
00851 SG_DEBUG("Number of inactive variables = %ld\n", inactivenum);
00852 }
00853 }
00854
00855 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
00856 learn_parm->epsilon_crit=(*maxdiff);
00857 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
00858 learn_parm->epsilon_crit/=4.0;
00859 retrain=1;
00860 noshrink=1;
00861 }
00862 if(learn_parm->epsilon_crit<epsilon_crit_org)
00863 learn_parm->epsilon_crit=epsilon_crit_org;
00864
00865 if(verbosity>=2) {
00866 SG_INFO( " => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
00867 supvecnum,model->at_upper_bound,(*maxdiff));
00868
00869 }
00870 mymaxdiff=*maxdiff ;
00871
00872
00873 if (((iteration % 10) == 0) && (!noshrink) && !callback)
00874 {
00875 activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
00876 CMath::max((int32_t)(activenum/10),
00877 CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
00878 a,inconsistent, c, lin, label);
00879
00880 inactivenum=totdoc-activenum;
00881
00882 if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
00883 && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
00884
00885 kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
00886 (int32_t) (kernel->get_activenum_cache()-supvecnum)),
00887 shrink_state->active);
00888 }
00889 }
00890
00891 if (bestmaxdiff>worstmaxdiff)
00892 worstmaxdiff=bestmaxdiff;
00893
00894 SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6);
00895
00896
00897 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) {
00898 terminate = 1;
00899 retrain = 0;
00900 }
00901 }
00902
00903 SG_DEBUG( "inactive:%d\n", inactivenum);
00904
00905 if (inactivenum && !reactivated && !callback)
00906 {
00907 SG_DEBUG( "reactivating inactive examples\n");
00908 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00909 iteration,inconsistent,
00910 docs,aicache,
00911 maxdiff);
00912 SG_DEBUG( "done reactivating inactive examples\n");
00913
00914 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00915 inactivenum=totdoc-activenum;
00916
00917 bestmaxdiff=(*maxdiff);
00918 bestmaxdiffiter=iteration;
00919 }
00920
00921 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
00922 CSVM::set_objective(criterion);
00923
00924 SG_FREE(chosen);
00925 SG_FREE(last_suboptimal_at);
00926 SG_FREE(key);
00927 SG_FREE(selcrit);
00928 SG_FREE(selexam);
00929 SG_FREE(a_old);
00930 SG_FREE(aicache);
00931 SG_FREE(working2dnum);
00932 SG_FREE(active2dnum);
00933 SG_FREE(qp.opt_ce);
00934 SG_FREE(qp.opt_ce0);
00935 SG_FREE(qp.opt_g);
00936 SG_FREE(qp.opt_g0);
00937 SG_FREE(qp.opt_xinit);
00938 SG_FREE(qp.opt_low);
00939 SG_FREE(qp.opt_up);
00940
00941 learn_parm->epsilon_crit=epsilon_crit_org;
00942
00943 return(iteration);
00944 }
00945
00946 float64_t CSVMLight::compute_objective_function(
00947 float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00948 int32_t totdoc)
00949
00950
00951 {
00952
00953 float64_t criterion=0;
00954
00955 for (int32_t i=0;i<totdoc;i++)
00956 criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
00957
00958 return(criterion);
00959 }
00960
00961
00962 void CSVMLight::clear_index(int32_t *index)
00963
00964 {
00965 index[0]=-1;
00966 }
00967
00968 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
00969
00970 {
00971 register int32_t i;
00972 for (i=0;index[i] != -1;i++);
00973 index[i]=elem;
00974 index[i+1]=-1;
00975 }
00976
00977 int32_t CSVMLight::compute_index(
00978 int32_t *binfeature, int32_t range, int32_t *index)
00979
00980 {
00981 register int32_t i,ii;
00982
00983 ii=0;
00984 for (i=0;i<range;i++) {
00985 if(binfeature[i]) {
00986 index[ii]=i;
00987 ii++;
00988 }
00989 }
00990 for (i=0;i<4;i++) {
00991 index[ii+i]=-1;
00992 }
00993 return(ii);
00994 }
00995
00996
00997 void CSVMLight::optimize_svm(
00998 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
00999 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
01000 int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
01001 float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
01002
01003 {
01004 int32_t i;
01005 float64_t *a_v;
01006
01007
01008
01009
01010
01011
01012 compute_matrices_for_optimization(docs,label,
01013 exclude_from_eq_const,eq_target,chosen,
01014 active2dnum,working2dnum,a,lin,c,
01015 varnum,totdoc,aicache,qp);
01016
01017 if(verbosity>=3) {
01018 SG_DEBUG( "Running optimizer...");
01019 }
01020
01021 a_v=optimize_qp(qp,epsilon_crit_target,
01022 learn_parm->svm_maxqpsize,
01023 &(model->b),
01024 learn_parm->svm_maxqpsize);
01025
01026 if(verbosity>=3) {
01027 SG_DONE();
01028 }
01029
01030 for (i=0;i<varnum;i++)
01031 a[working2dnum[i]]=a_v[i];
01032 }
01033
01034 void CSVMLight::compute_matrices_for_optimization_parallel(
01035 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01036 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01037 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01038 float64_t *aicache, QP *qp)
01039 {
01040 if (parallel->get_num_threads()<=1)
01041 {
01042 compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
01043 chosen, active2dnum, key, a, lin, c,
01044 varnum, totdoc, aicache, qp) ;
01045 }
01046 #ifdef HAVE_PTHREAD
01047 else
01048 {
01049 register int32_t ki,kj,i,j;
01050 register float64_t kernel_temp;
01051
01052 qp->opt_n=varnum;
01053 qp->opt_ce0[0]=-eq_target;
01054 for (j=1;j<model->sv_num;j++) {
01055 if((!chosen[model->supvec[j]])
01056 && (!exclude_from_eq_const[(model->supvec[j])])) {
01057 qp->opt_ce0[0]+=model->alpha[j];
01058 }
01059 }
01060 if(learn_parm->biased_hyperplane)
01061 qp->opt_m=1;
01062 else
01063 qp->opt_m=0;
01064
01065
01066 for (i=0;i<varnum;i++) {
01067 qp->opt_g0[i]=lin[key[i]];
01068 }
01069
01070 ASSERT(parallel->get_num_threads()>1);
01071 int32_t *KI=SG_MALLOC(int32_t, varnum*varnum);
01072 int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum);
01073 int32_t Knum=0 ;
01074 float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2);
01075 for (i=0;i<varnum;i++) {
01076 ki=key[i];
01077 KI[Knum]=docs[ki] ;
01078 KJ[Knum]=docs[ki] ;
01079 Knum++ ;
01080 for (j=i+1;j<varnum;j++)
01081 {
01082 kj=key[j];
01083 KI[Knum]=docs[ki] ;
01084 KJ[Knum]=docs[kj] ;
01085 Knum++ ;
01086 }
01087 }
01088 ASSERT(Knum<=varnum*(varnum+1)/2);
01089
01090 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01091 S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1);
01092 int32_t step= Knum/parallel->get_num_threads();
01093
01094 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01095 {
01096 params[t].svmlight = this;
01097 params[t].start = t*step;
01098 params[t].end = (t+1)*step;
01099 params[t].KI=KI ;
01100 params[t].KJ=KJ ;
01101 params[t].Kval=Kval ;
01102 pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)¶ms[t]);
01103 }
01104 for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++)
01105 Kval[i]=compute_kernel(KI[i],KJ[i]) ;
01106
01107 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01108 pthread_join(threads[t], NULL);
01109
01110 SG_FREE(params);
01111 SG_FREE(threads);
01112
01113 Knum=0 ;
01114 for (i=0;i<varnum;i++) {
01115 ki=key[i];
01116
01117
01118 qp->opt_ce[i]=label[ki];
01119 qp->opt_low[i]=0;
01120 qp->opt_up[i]=learn_parm->svm_cost[ki];
01121
01122 kernel_temp=Kval[Knum] ;
01123 Knum++ ;
01124
01125 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01126
01127 qp->opt_g[varnum*i+i]=kernel_temp;
01128
01129 for (j=i+1;j<varnum;j++) {
01130 kj=key[j];
01131 kernel_temp=Kval[Knum] ;
01132 Knum++ ;
01133
01134 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01135 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01136
01137 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01138 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];
01139 }
01140
01141 if(verbosity>=3) {
01142 if(i % 20 == 0) {
01143 SG_DEBUG( "%ld..",i);
01144 }
01145 }
01146 }
01147
01148 SG_FREE(KI);
01149 SG_FREE(KJ);
01150 SG_FREE(Kval);
01151
01152 for (i=0;i<varnum;i++) {
01153
01154 qp->opt_xinit[i]=a[key[i]];
01155
01156 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
01157 }
01158
01159 if(verbosity>=3) {
01160 SG_DONE();
01161 }
01162 }
01163 #endif
01164 }
01165
01166 void CSVMLight::compute_matrices_for_optimization(
01167 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01168 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01169 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01170 float64_t *aicache, QP *qp)
01171 {
01172 register int32_t ki,kj,i,j;
01173 register float64_t kernel_temp;
01174
01175 qp->opt_n=varnum;
01176 qp->opt_ce0[0]=-eq_target;
01177 for (j=1;j<model->sv_num;j++) {
01178 if((!chosen[model->supvec[j]])
01179 && (!exclude_from_eq_const[(model->supvec[j])])) {
01180 qp->opt_ce0[0]+=model->alpha[j];
01181 }
01182 }
01183 if(learn_parm->biased_hyperplane)
01184 qp->opt_m=1;
01185 else
01186 qp->opt_m=0;
01187
01188
01189 for (i=0;i<varnum;i++) {
01190 qp->opt_g0[i]=lin[key[i]];
01191 }
01192
01193 for (i=0;i<varnum;i++) {
01194 ki=key[i];
01195
01196
01197 qp->opt_ce[i]=label[ki];
01198 qp->opt_low[i]=0;
01199 qp->opt_up[i]=learn_parm->svm_cost[ki];
01200
01201 kernel_temp=compute_kernel(docs[ki], docs[ki]);
01202
01203 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01204
01205 qp->opt_g[varnum*i+i]=kernel_temp;
01206
01207 for (j=i+1;j<varnum;j++) {
01208 kj=key[j];
01209 kernel_temp=compute_kernel(docs[ki], docs[kj]);
01210
01211
01212 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01213 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01214
01215 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01216 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];
01217 }
01218
01219 if(verbosity>=3) {
01220 if(i % 20 == 0) {
01221 SG_DEBUG( "%ld..",i);
01222 }
01223 }
01224 }
01225
01226 for (i=0;i<varnum;i++) {
01227
01228 qp->opt_xinit[i]=a[key[i]];
01229
01230 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
01231 }
01232
01233 if(verbosity>=3) {
01234 SG_DONE();
01235 }
01236 }
01237
01238
01239 int32_t CSVMLight::calculate_svm_model(
01240 int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
01241 float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
01242
01243
01244 {
01245 int32_t i,ii,pos,b_calculated=0,first_low,first_high;
01246 float64_t ex_c,b_temp,b_low,b_high;
01247
01248 if(verbosity>=3) {
01249 SG_DEBUG( "Calculating model...");
01250 }
01251
01252 if(!learn_parm->biased_hyperplane) {
01253 model->b=0;
01254 b_calculated=1;
01255 }
01256
01257 for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01258 if((a_old[i]>0) && (a[i]==0)) {
01259 pos=model->index[i];
01260 model->index[i]=-1;
01261 (model->sv_num)--;
01262 model->supvec[pos]=model->supvec[model->sv_num];
01263 model->alpha[pos]=model->alpha[model->sv_num];
01264 model->index[model->supvec[pos]]=pos;
01265 }
01266 else if((a_old[i]==0) && (a[i]>0)) {
01267 model->supvec[model->sv_num]=docs[i];
01268 model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
01269 model->index[i]=model->sv_num;
01270 (model->sv_num)++;
01271 }
01272 else if(a_old[i]==a[i]) {
01273 }
01274 else {
01275 model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
01276 }
01277
01278 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01279 if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
01280 (model->at_upper_bound)--;
01281 }
01282 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
01283 (model->at_upper_bound)++;
01284 }
01285
01286 if((!b_calculated)
01287 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {
01288 model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
01289 b_calculated=1;
01290 }
01291 }
01292
01293
01294
01295
01296 if(learn_parm->biased_hyperplane
01297 && (!b_calculated)
01298 && (model->sv_num-1 == model->at_upper_bound)) {
01299 first_low=1;
01300 first_high=1;
01301 b_low=0;
01302 b_high=0;
01303 for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01304 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01305 if(a[i]<ex_c) {
01306 if(label[i]>0) {
01307 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01308 if((b_temp>b_low) || (first_low)) {
01309 b_low=b_temp;
01310 first_low=0;
01311 }
01312 }
01313 else {
01314 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01315 if((b_temp<b_high) || (first_high)) {
01316 b_high=b_temp;
01317 first_high=0;
01318 }
01319 }
01320 }
01321 else {
01322 if(label[i]<0) {
01323 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01324 if((b_temp>b_low) || (first_low)) {
01325 b_low=b_temp;
01326 first_low=0;
01327 }
01328 }
01329 else {
01330 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01331 if((b_temp<b_high) || (first_high)) {
01332 b_high=b_temp;
01333 first_high=0;
01334 }
01335 }
01336 }
01337 }
01338 if(first_high) {
01339 model->b=-b_low;
01340 }
01341 else if(first_low) {
01342 model->b=-b_high;
01343 }
01344 else {
01345 model->b=-(b_high+b_low)/2.0;
01346 }
01347 }
01348
01349 if(verbosity>=3) {
01350 SG_DONE();
01351 }
01352
01353 return(model->sv_num-1);
01354 }
01355
01356 int32_t CSVMLight::check_optimality(
01357 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01358 float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
01359 int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
01360 int32_t iteration)
01361
01362 {
01363 int32_t i,ii,retrain;
01364 float64_t dist,ex_c,target;
01365
01366 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01367 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
01368 else
01369 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
01370 retrain=0;
01371 (*maxdiff)=0;
01372 (*misclassified)=0;
01373 for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01374 if((!inconsistent[i]) && label[i]) {
01375 dist=(lin[i]-model->b)*(float64_t)label[i];
01376
01377 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
01378 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01379 if(dist <= 0) {
01380 (*misclassified)++;
01381 }
01382 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
01383 if((dist-target)>(*maxdiff))
01384 (*maxdiff)=dist-target;
01385 }
01386 else if((a[i]<ex_c) && (dist < target)) {
01387 if((target-dist)>(*maxdiff))
01388 (*maxdiff)=target-dist;
01389 }
01390
01391
01392
01393
01394
01395 if((a[i]>(learn_parm->epsilon_a))
01396 && (a[i]<ex_c)) {
01397 last_suboptimal_at[i]=iteration;
01398 }
01399 else if((a[i]<=(learn_parm->epsilon_a))
01400 && (dist < (target+learn_parm->epsilon_shrink))) {
01401 last_suboptimal_at[i]=iteration;
01402 }
01403 else if((a[i]>=ex_c)
01404 && (dist > (target-learn_parm->epsilon_shrink))) {
01405 last_suboptimal_at[i]=iteration;
01406 }
01407 }
01408 }
01409
01410
01411 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
01412 retrain=1;
01413 }
01414 return(retrain);
01415 }
01416
01417 void CSVMLight::update_linear_component(
01418 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01419 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01420 float64_t *aicache, float64_t* c)
01421
01422
01423
01424
01425 {
01426 register int32_t i=0,ii=0,j=0,jj=0;
01427
01428 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01429 {
01430 if (callback)
01431 {
01432 update_linear_component_mkl_linadd(docs, label, active2dnum, a,
01433 a_old, working2dnum, totdoc, lin, aicache);
01434 }
01435 else
01436 {
01437 kernel->clear_normal();
01438
01439 int32_t num_working=0;
01440 for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01441 if(a[i] != a_old[i]) {
01442 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01443 num_working++;
01444 }
01445 }
01446
01447 if (num_working>0)
01448 {
01449 if (parallel->get_num_threads() < 2)
01450 {
01451 for (jj=0;(j=active2dnum[jj])>=0;jj++) {
01452 lin[j]+=kernel->compute_optimized(docs[j]);
01453 }
01454 }
01455 #ifdef HAVE_PTHREAD
01456 else
01457 {
01458 int32_t num_elem = 0 ;
01459 for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
01460
01461 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01462 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1);
01463 int32_t start = 0 ;
01464 int32_t step = num_elem/parallel->get_num_threads();
01465 int32_t end = step ;
01466
01467 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01468 {
01469 params[t].kernel = kernel ;
01470 params[t].lin = lin ;
01471 params[t].docs = docs ;
01472 params[t].active2dnum=active2dnum ;
01473 params[t].start = start ;
01474 params[t].end = end ;
01475 start=end ;
01476 end+=step ;
01477 pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)¶ms[t]) ;
01478 }
01479
01480 for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
01481 lin[j]+=kernel->compute_optimized(docs[j]);
01482 }
01483 void* ret;
01484 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01485 pthread_join(threads[t], &ret) ;
01486
01487 SG_FREE(params);
01488 SG_FREE(threads);
01489 }
01490 #endif
01491 }
01492 }
01493 }
01494 else
01495 {
01496 if (callback)
01497 {
01498 update_linear_component_mkl(docs, label, active2dnum,
01499 a, a_old, working2dnum, totdoc, lin, aicache);
01500 }
01501 else {
01502 for (jj=0;(i=working2dnum[jj])>=0;jj++) {
01503 if(a[i] != a_old[i]) {
01504 kernel->get_kernel_row(i,active2dnum,aicache);
01505 for (ii=0;(j=active2dnum[ii])>=0;ii++)
01506 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01507 }
01508 }
01509 }
01510 }
01511 }
01512
01513
01514 void CSVMLight::update_linear_component_mkl(
01515 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01516 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01517 float64_t *aicache)
01518 {
01519
01520 int32_t num = kernel->get_num_vec_rhs();
01521 int32_t num_weights = -1;
01522 int32_t num_kernels = kernel->get_num_subkernels() ;
01523 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
01524 ASSERT(num_weights==num_kernels);
01525
01526 if ((kernel->get_kernel_type()==K_COMBINED) &&
01527 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))
01528 {
01529 CCombinedKernel* k = (CCombinedKernel*) kernel;
01530 CKernel* kn = k->get_first_kernel() ;
01531 int32_t n = 0, i, j ;
01532
01533 while (kn!=NULL)
01534 {
01535 for (i=0;i<num;i++)
01536 {
01537 if(a[i] != a_old[i])
01538 {
01539 kn->get_kernel_row(i,NULL,aicache, true);
01540 for (j=0;j<num;j++)
01541 W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01542 }
01543 }
01544
01545 SG_UNREF(kn);
01546 kn = k->get_next_kernel();
01547 n++ ;
01548 }
01549 }
01550 else
01551 {
01552 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01553 float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01554
01555
01556 for (int32_t i=0; i<num_kernels; i++)
01557 {
01558 w_backup[i] = old_beta[i] ;
01559 w1[i]=0.0 ;
01560 }
01561 for (int32_t n=0; n<num_kernels; n++)
01562 {
01563 w1[n]=1.0 ;
01564 kernel->set_subkernel_weights(w1, num_weights) ;
01565
01566 for (int32_t i=0;i<num;i++)
01567 {
01568 if(a[i] != a_old[i])
01569 {
01570 for (int32_t j=0;j<num;j++)
01571 W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
01572 }
01573 }
01574 w1[n]=0.0 ;
01575 }
01576
01577
01578 kernel->set_subkernel_weights(w_backup,num_weights) ;
01579
01580 SG_FREE(w_backup);
01581 SG_FREE(w1);
01582 }
01583
01584 call_mkl_callback(a, label, lin);
01585 }
01586
01587
01588 void CSVMLight::update_linear_component_mkl_linadd(
01589 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01590 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01591 float64_t *aicache)
01592 {
01593
01594
01595
01596
01597 int32_t num = kernel->get_num_vec_rhs();
01598 int32_t num_weights = -1;
01599 int32_t num_kernels = kernel->get_num_subkernels() ;
01600 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
01601 ASSERT(num_weights==num_kernels);
01602
01603 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01604 float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01605
01606
01607 for (int32_t i=0; i<num_kernels; i++)
01608 {
01609 w_backup[i] = old_beta[i] ;
01610 w1[i]=1.0 ;
01611 }
01612
01613 kernel->set_subkernel_weights(w1, num_weights) ;
01614
01615
01616 kernel->clear_normal();
01617 for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
01618 if(a[i] != a_old[i]) {
01619 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01620 }
01621 }
01622
01623 if (parallel->get_num_threads() < 2)
01624 {
01625
01626 for (int32_t i=0; i<num; i++)
01627 kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01628 }
01629 #ifdef HAVE_PTHREAD
01630 else
01631 {
01632 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01633 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1);
01634 int32_t step= num/parallel->get_num_threads();
01635
01636 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01637 {
01638 params[t].kernel = kernel;
01639 params[t].W = W;
01640 params[t].start = t*step;
01641 params[t].end = (t+1)*step;
01642 pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)¶ms[t]);
01643 }
01644
01645 for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++)
01646 kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01647
01648 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01649 pthread_join(threads[t], NULL);
01650
01651 SG_FREE(params);
01652 SG_FREE(threads);
01653 }
01654 #endif
01655
01656
01657 kernel->set_subkernel_weights(w_backup,num_weights);
01658
01659 SG_FREE(w_backup);
01660 SG_FREE(w1);
01661
01662 call_mkl_callback(a, label, lin);
01663 }
01664
01665 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p)
01666 {
01667 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
01668
01669 int32_t num_kernels=params->kernel->get_num_subkernels();
01670
01671
01672 for (int32_t i=params->start; i<params->end; i++)
01673 params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
01674
01675 return NULL ;
01676 }
01677
01678 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
01679 {
01680 int32_t num = kernel->get_num_vec_rhs();
01681 int32_t num_kernels = kernel->get_num_subkernels() ;
01682
01683 int nk = (int) num_kernels;
01684
01685 float64_t suma=0;
01686 float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
01687 #ifdef HAVE_LAPACK
01688 double* alphay = SG_MALLOC(double, num);
01689
01690 for (int32_t i=0; i<num; i++)
01691 {
01692 alphay[i]=a[i]*label[i];
01693 suma+=a[i];
01694 }
01695
01696 for (int32_t i=0; i<num_kernels; i++)
01697 sumw[i]=0;
01698
01699 cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
01700 num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
01701
01702 SG_FREE(alphay);
01703 #else
01704 for (int32_t i=0; i<num; i++)
01705 suma += a[i];
01706
01707 for (int32_t d=0; d<num_kernels; d++)
01708 {
01709 sumw[d]=0;
01710 for (int32_t i=0; i<num; i++)
01711 sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
01712 }
01713 #endif
01714
01715 if (callback)
01716 mkl_converged=callback(mkl, sumw, suma);
01717
01718
01719 const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels);
01720
01721
01722 #ifdef HAVE_LAPACK
01723 cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
01724 nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
01725 #else
01726 for (int32_t i=0; i<num; i++)
01727 lin[i]=0 ;
01728 for (int32_t d=0; d<num_kernels; d++)
01729 if (new_beta[d]!=0)
01730 for (int32_t i=0; i<num; i++)
01731 lin[i] += new_beta[d]*W[i*num_kernels+d] ;
01732 #endif
01733
01734 SG_FREE(sumw);
01735 }
01736
01737
01738
01739
01740 int32_t CSVMLight::select_next_qp_subproblem_grad(
01741 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01742 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01743 int32_t *working2dnum, float64_t *selcrit, int32_t *select,
01744 int32_t cache_only, int32_t *key, int32_t *chosen)
01745
01746
01747
01748
01749 {
01750 int32_t choosenum,i,j,k,activedoc,inum,valid;
01751 float64_t s;
01752
01753 for (inum=0;working2dnum[inum]>=0;inum++);
01754 choosenum=0;
01755 activedoc=0;
01756 for (i=0;(j=active2dnum[i])>=0;i++) {
01757 s=-label[j];
01758 if(cache_only)
01759 {
01760 if (use_kernel_cache)
01761 valid=(kernel->kernel_cache_check(j));
01762 else
01763 valid = 1 ;
01764 }
01765 else
01766 valid=1;
01767 if(valid
01768 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01769 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01770 && (s>0)))
01771 && (!chosen[j])
01772 && (label[j])
01773 && (!inconsistent[j]))
01774 {
01775 selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01776 key[activedoc]=j;
01777 activedoc++;
01778 }
01779 }
01780 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01781 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01782 i=key[select[k]];
01783 chosen[i]=1;
01784 working2dnum[inum+choosenum]=i;
01785 choosenum+=1;
01786 if (use_kernel_cache)
01787 kernel->kernel_cache_touch(i);
01788
01789
01790 }
01791
01792 activedoc=0;
01793 for (i=0;(j=active2dnum[i])>=0;i++) {
01794 s=label[j];
01795 if(cache_only)
01796 {
01797 if (use_kernel_cache)
01798 valid=(kernel->kernel_cache_check(j));
01799 else
01800 valid = 1 ;
01801 }
01802 else
01803 valid=1;
01804 if(valid
01805 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01806 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01807 && (s>0)))
01808 && (!chosen[j])
01809 && (label[j])
01810 && (!inconsistent[j]))
01811 {
01812 selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01813
01814 key[activedoc]=j;
01815 activedoc++;
01816 }
01817 }
01818 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01819 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01820 i=key[select[k]];
01821 chosen[i]=1;
01822 working2dnum[inum+choosenum]=i;
01823 choosenum+=1;
01824 if (use_kernel_cache)
01825 kernel->kernel_cache_touch(i);
01826
01827 }
01828 working2dnum[inum+choosenum]=-1;
01829 return(choosenum);
01830 }
01831
01832 int32_t CSVMLight::select_next_qp_subproblem_rand(
01833 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01834 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01835 int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
01836 int32_t *chosen, int32_t iteration)
01837
01838
01839
01840
01841 {
01842 int32_t choosenum,i,j,k,activedoc,inum;
01843 float64_t s;
01844
01845 for (inum=0;working2dnum[inum]>=0;inum++);
01846 choosenum=0;
01847 activedoc=0;
01848 for (i=0;(j=active2dnum[i])>=0;i++) {
01849 s=-label[j];
01850 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01851 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01852 && (s>0)))
01853 && (!inconsistent[j])
01854 && (label[j])
01855 && (!chosen[j])) {
01856 selcrit[activedoc]=(j+iteration) % totdoc;
01857 key[activedoc]=j;
01858 activedoc++;
01859 }
01860 }
01861 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01862 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01863 i=key[select[k]];
01864 chosen[i]=1;
01865 working2dnum[inum+choosenum]=i;
01866 choosenum+=1;
01867 if (use_kernel_cache)
01868 kernel->kernel_cache_touch(i);
01869
01870 }
01871
01872 activedoc=0;
01873 for (i=0;(j=active2dnum[i])>=0;i++) {
01874 s=label[j];
01875 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01876 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01877 && (s>0)))
01878 && (!inconsistent[j])
01879 && (label[j])
01880 && (!chosen[j])) {
01881 selcrit[activedoc]=(j+iteration) % totdoc;
01882 key[activedoc]=j;
01883 activedoc++;
01884 }
01885 }
01886 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01887 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01888 i=key[select[k]];
01889 chosen[i]=1;
01890 working2dnum[inum+choosenum]=i;
01891 choosenum+=1;
01892 if (use_kernel_cache)
01893 kernel->kernel_cache_touch(i);
01894
01895 }
01896 working2dnum[inum+choosenum]=-1;
01897 return(choosenum);
01898 }
01899
01900
01901
01902 void CSVMLight::select_top_n(
01903 float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
01904 {
01905 register int32_t i,j;
01906
01907 for (i=0;(i<n) && (i<range);i++) {
01908 for (j=i;j>=0;j--) {
01909 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
01910 select[j]=select[j-1];
01911 }
01912 else {
01913 select[j]=i;
01914 j=-1;
01915 }
01916 }
01917 }
01918 if(n>0) {
01919 for (i=n;i<range;i++) {
01920 if(selcrit[i]>selcrit[select[n-1]]) {
01921 for (j=n-1;j>=0;j--) {
01922 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
01923 select[j]=select[j-1];
01924 }
01925 else {
01926 select[j]=i;
01927 j=-1;
01928 }
01929 }
01930 }
01931 }
01932 }
01933 }
01934
01935
01936
01937
01938 void CSVMLight::init_shrink_state(
01939 SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
01940 {
01941 int32_t i;
01942
01943 shrink_state->deactnum=0;
01944 shrink_state->active = SG_MALLOC(int32_t, totdoc);
01945 shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc);
01946 shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory);
01947 shrink_state->maxhistory=maxhistory;
01948 shrink_state->last_lin = SG_MALLOC(float64_t, totdoc);
01949 shrink_state->last_a = SG_MALLOC(float64_t, totdoc);
01950
01951 for (i=0;i<totdoc;i++) {
01952 shrink_state->active[i]=1;
01953 shrink_state->inactive_since[i]=0;
01954 shrink_state->last_a[i]=0;
01955 shrink_state->last_lin[i]=0;
01956 }
01957 }
01958
01959 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
01960 {
01961 SG_FREE(shrink_state->active);
01962 SG_FREE(shrink_state->inactive_since);
01963 if(shrink_state->deactnum > 0)
01964 SG_FREE((shrink_state->a_history[shrink_state->deactnum-1]));
01965 SG_FREE((shrink_state->a_history));
01966 SG_FREE((shrink_state->last_a));
01967 SG_FREE((shrink_state->last_lin));
01968 }
01969
01970 int32_t CSVMLight::shrink_problem(
01971 SHRINK_STATE *shrink_state, int32_t *active2dnum,
01972 int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
01973 int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
01974 float64_t* lin, int* label)
01975
01976
01977 {
01978 int32_t i,ii,change,activenum,lastiter;
01979 float64_t *a_old=NULL;
01980
01981 activenum=0;
01982 change=0;
01983 for (ii=0;active2dnum[ii]>=0;ii++) {
01984 i=active2dnum[ii];
01985 activenum++;
01986 lastiter=last_suboptimal_at[i];
01987
01988 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
01989 || (inconsistent[i])) {
01990 change++;
01991 }
01992 }
01993 if((change>=minshrink)
01994 && (shrink_state->deactnum<shrink_state->maxhistory)) {
01995
01996
01997 if(verbosity>=2) {
01998 SG_INFO( " Shrinking...");
01999 }
02000
02001 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) {
02002
02003 a_old=SG_MALLOC(float64_t, totdoc);
02004 shrink_state->a_history[shrink_state->deactnum]=a_old;
02005 for (i=0;i<totdoc;i++) {
02006 a_old[i]=a[i];
02007 }
02008 }
02009 for (ii=0;active2dnum[ii]>=0;ii++) {
02010 i=active2dnum[ii];
02011 lastiter=last_suboptimal_at[i];
02012 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
02013 || (inconsistent[i])) {
02014 shrink_state->active[i]=0;
02015 shrink_state->inactive_since[i]=shrink_state->deactnum;
02016 }
02017 }
02018 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
02019 shrink_state->deactnum++;
02020 if(kernel->has_property(KP_LINADD) && get_linadd_enabled())
02021 shrink_state->deactnum=0;
02022
02023 if(verbosity>=2) {
02024 SG_DONE();
02025 SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum);
02026 }
02027 }
02028 return(activenum);
02029 }
02030
02031 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p)
02032 {
02033 S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
02034
02035 CKernel* k = params->kernel;
02036 float64_t* lin = params->lin;
02037 float64_t* last_lin = params->last_lin;
02038 int32_t* active = params->active;
02039 int32_t* docs = params->docs;
02040 int32_t start = params->start;
02041 int32_t end = params->end;
02042
02043 for (int32_t i=start;i<end;i++)
02044 {
02045 if (!active[i])
02046 lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
02047
02048 last_lin[i]=lin[i];
02049 }
02050
02051 return NULL;
02052 }
02053
02054 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p)
02055 {
02056 S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
02057 ASSERT(params);
02058 ASSERT(params->kernel);
02059 ASSERT(params->lin);
02060 ASSERT(params->aicache);
02061 ASSERT(params->a);
02062 ASSERT(params->a_old);
02063 ASSERT(params->changed2dnum);
02064 ASSERT(params->inactive2dnum);
02065 ASSERT(params->label);
02066
02067 CKernel* k = params->kernel;
02068 float64_t* lin = params->lin;
02069 float64_t* aicache = params->aicache;
02070 float64_t* a= params->a;
02071 float64_t* a_old = params->a_old;
02072 int32_t* changed2dnum = params->changed2dnum;
02073 int32_t* inactive2dnum = params->inactive2dnum;
02074 int32_t* label = params->label;
02075 int32_t start = params->start;
02076 int32_t end = params->end;
02077
02078 for (int32_t ii=start;ii<end;ii++)
02079 {
02080 int32_t i=changed2dnum[ii];
02081 int32_t j=0;
02082 ASSERT(i>=0);
02083
02084 k->get_kernel_row(i,inactive2dnum,aicache);
02085 for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
02086 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02087 }
02088 return NULL;
02089 }
02090
02091 void CSVMLight::reactivate_inactive_examples(
02092 int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
02093 float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
02094 int32_t* docs, float64_t *aicache, float64_t *maxdiff)
02095
02096
02097
02098 {
02099 register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
02100 int32_t *changed,*inactive;
02101 register float64_t *a_old,dist;
02102 float64_t ex_c,target;
02103
02104 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
02105 a_old=shrink_state->last_a;
02106
02107 if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION))
02108 {
02109 SG_DEBUG( " clear normal - linadd\n");
02110 kernel->clear_normal();
02111
02112 int32_t num_modified=0;
02113 for (i=0;i<totdoc;i++) {
02114 if(a[i] != a_old[i]) {
02115 kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
02116 a_old[i]=a[i];
02117 num_modified++;
02118 }
02119 }
02120
02121 if (num_modified>0)
02122 {
02123 int32_t num_threads=parallel->get_num_threads();
02124 ASSERT(num_threads>0);
02125 if (num_threads < 2)
02126 {
02127 S_THREAD_PARAM_REACTIVATE_LINADD params;
02128 params.kernel=kernel;
02129 params.lin=lin;
02130 params.last_lin=shrink_state->last_lin;
02131 params.docs=docs;
02132 params.active=shrink_state->active;
02133 params.start=0;
02134 params.end=totdoc;
02135 reactivate_inactive_examples_linadd_helper((void*) ¶ms);
02136 }
02137 #ifdef HAVE_PTHREAD
02138 else
02139 {
02140 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
02141 S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads);
02142 int32_t step= totdoc/num_threads;
02143
02144 for (t=0; t<num_threads-1; t++)
02145 {
02146 params[t].kernel=kernel;
02147 params[t].lin=lin;
02148 params[t].last_lin=shrink_state->last_lin;
02149 params[t].docs=docs;
02150 params[t].active=shrink_state->active;
02151 params[t].start = t*step;
02152 params[t].end = (t+1)*step;
02153 pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)¶ms[t]);
02154 }
02155
02156 params[t].kernel=kernel;
02157 params[t].lin=lin;
02158 params[t].last_lin=shrink_state->last_lin;
02159 params[t].docs=docs;
02160 params[t].active=shrink_state->active;
02161 params[t].start = t*step;
02162 params[t].end = totdoc;
02163 reactivate_inactive_examples_linadd_helper((void*) ¶ms[t]);
02164
02165 for (t=0; t<num_threads-1; t++)
02166 pthread_join(threads[t], NULL);
02167
02168 SG_FREE(threads);
02169 SG_FREE(params);
02170 }
02171 #endif
02172
02173 }
02174 }
02175 else
02176 {
02177 float64_t *alphas = SG_MALLOC(float64_t, totdoc);
02178 int32_t *idx = SG_MALLOC(int32_t, totdoc);
02179 int32_t num_suppvec=0 ;
02180
02181 for (i=0; i<totdoc; i++)
02182 {
02183 if(a[i] != a_old[i])
02184 {
02185 alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
02186 idx[num_suppvec] = i ;
02187 a_old[i] = a[i] ;
02188 num_suppvec++ ;
02189 }
02190 }
02191
02192 if (num_suppvec>0)
02193 {
02194 int32_t num_inactive=0;
02195 int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc);
02196
02197 j=0;
02198 for (i=0;i<totdoc;i++)
02199 {
02200 if(!shrink_state->active[i])
02201 {
02202 inactive_idx[j++] = i;
02203 num_inactive++;
02204 }
02205 }
02206
02207 if (num_inactive>0)
02208 {
02209 float64_t* dest = SG_MALLOC(float64_t, num_inactive);
02210 memset(dest, 0, sizeof(float64_t)*num_inactive);
02211
02212 kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
02213
02214 j=0;
02215 for (i=0;i<totdoc;i++) {
02216 if(!shrink_state->active[i]) {
02217 lin[i] = shrink_state->last_lin[i] + dest[j++] ;
02218 }
02219 shrink_state->last_lin[i]=lin[i];
02220 }
02221
02222 SG_FREE(dest);
02223 }
02224 else
02225 {
02226 for (i=0;i<totdoc;i++)
02227 shrink_state->last_lin[i]=lin[i];
02228 }
02229 SG_FREE(inactive_idx);
02230 }
02231 SG_FREE(alphas);
02232 SG_FREE(idx);
02233 }
02234
02235 kernel->delete_optimization();
02236 }
02237 else
02238 {
02239 changed=SG_MALLOC(int32_t, totdoc);
02240 changed2dnum=SG_MALLOC(int32_t, totdoc+11);
02241 inactive=SG_MALLOC(int32_t, totdoc);
02242 inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
02243 for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
02244 {
02245 if(verbosity>=2) {
02246 SG_INFO( "%ld..",t);
02247 }
02248 a_old=shrink_state->a_history[t];
02249 for (i=0;i<totdoc;i++) {
02250 inactive[i]=((!shrink_state->active[i])
02251 && (shrink_state->inactive_since[i] == t));
02252 changed[i]= (a[i] != a_old[i]);
02253 }
02254 compute_index(inactive,totdoc,inactive2dnum);
02255 compute_index(changed,totdoc,changed2dnum);
02256
02257
02258 int32_t num_threads=parallel->get_num_threads();
02259 ASSERT(num_threads>0);
02260
02261 if (num_threads < 2)
02262 {
02263 for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
02264 kernel->get_kernel_row(i,inactive2dnum,aicache);
02265 for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02266 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02267 }
02268 }
02269 #ifdef HAVE_PTHREAD
02270 else
02271 {
02272
02273 int32_t num_changed=0;
02274 for (ii=0;changed2dnum[ii]>=0;ii++)
02275 num_changed++;
02276
02277 if (num_changed>0)
02278 {
02279 pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1);
02280 S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads);
02281 int32_t step= num_changed/num_threads;
02282
02283
02284 float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads);
02285 memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02286 float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads);
02287 memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02288
02289 int32_t thr;
02290 for (thr=0; thr<num_threads-1; thr++)
02291 {
02292 params[thr].kernel=kernel;
02293 params[thr].lin=&tmp_lin[thr*totdoc];
02294 params[thr].aicache=&tmp_aicache[thr*totdoc];
02295 params[thr].a=a;
02296 params[thr].a_old=a_old;
02297 params[thr].changed2dnum=changed2dnum;
02298 params[thr].inactive2dnum=inactive2dnum;
02299 params[thr].label=label;
02300 params[thr].start = thr*step;
02301 params[thr].end = (thr+1)*step;
02302 pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)¶ms[thr]);
02303 }
02304
02305 params[thr].kernel=kernel;
02306 params[thr].lin=&tmp_lin[thr*totdoc];
02307 params[thr].aicache=&tmp_aicache[thr*totdoc];
02308 params[thr].a=a;
02309 params[thr].a_old=a_old;
02310 params[thr].changed2dnum=changed2dnum;
02311 params[thr].inactive2dnum=inactive2dnum;
02312 params[thr].label=label;
02313 params[thr].start = thr*step;
02314 params[thr].end = num_changed;
02315 reactivate_inactive_examples_vanilla_helper((void*) ¶ms[thr]);
02316
02317 for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02318 lin[j]+=tmp_lin[totdoc*thr+j];
02319
02320 for (thr=0; thr<num_threads-1; thr++)
02321 {
02322 pthread_join(threads[thr], NULL);
02323
02324
02325 for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02326 lin[j]+=tmp_lin[totdoc*thr+j];
02327 }
02328
02329 SG_FREE(tmp_lin);
02330 SG_FREE(tmp_aicache);
02331 SG_FREE(threads);
02332 SG_FREE(params);
02333 }
02334 }
02335 #endif
02336 }
02337 SG_FREE(changed);
02338 SG_FREE(changed2dnum);
02339 SG_FREE(inactive);
02340 SG_FREE(inactive2dnum);
02341 }
02342
02343 (*maxdiff)=0;
02344 for (i=0;i<totdoc;i++) {
02345 shrink_state->inactive_since[i]=shrink_state->deactnum-1;
02346 if(!inconsistent[i]) {
02347 dist=(lin[i]-model->b)*(float64_t)label[i];
02348 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
02349 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
02350 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
02351 if((dist-target)>(*maxdiff))
02352 (*maxdiff)=dist-target;
02353 }
02354 else if((a[i]<ex_c) && (dist < target)) {
02355 if((target-dist)>(*maxdiff))
02356 (*maxdiff)=target-dist;
02357 }
02358 if((a[i]>(0+learn_parm->epsilon_a))
02359 && (a[i]<ex_c)) {
02360 shrink_state->active[i]=1;
02361 }
02362 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
02363 shrink_state->active[i]=1;
02364 }
02365 else if((a[i]>=ex_c)
02366 && (dist > (target-learn_parm->epsilon_shrink))) {
02367 shrink_state->active[i]=1;
02368 }
02369 else if(learn_parm->sharedslack) {
02370 shrink_state->active[i]=1;
02371 }
02372 }
02373 }
02374
02375 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) {
02376 for (i=0;i<totdoc;i++) {
02377 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
02378 }
02379 for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
02380 SG_FREE(shrink_state->a_history[t]);
02381 shrink_state->a_history[t]=0;
02382 }
02383 }
02384 }
02385
02386
02387
02388
02389 float64_t* CSVMLight::optimize_qp(
02390 QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
02391 int32_t& svm_maxqpsize)
02392 {
02393 register int32_t i, j, result;
02394 float64_t margin, obj_before, obj_after;
02395 float64_t sigdig, dist, epsilon_loqo;
02396 int32_t iter;
02397
02398 if(!primal) {
02399 primal=SG_MALLOC(float64_t, nx*3);
02400 dual=SG_MALLOC(float64_t, nx*2+1);
02401 }
02402
02403 obj_before=0;
02404 for(i=0;i<qp->opt_n;i++) {
02405 obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
02406 obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
02407 for(j=0;j<i;j++) {
02408 obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
02409 }
02410 }
02411
02412 result=STILL_RUNNING;
02413 qp->opt_ce0[0]*=(-1.0);
02414
02415
02416 for(margin=init_margin,iter=init_iter;
02417 (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
02418
02419 opt_precision=CMath::max(opt_precision, DEF_PRECISION);
02420 sigdig=-log10(opt_precision);
02421
02422 result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
02423 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
02424 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
02425 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
02426 (float64_t *)primal,(float64_t *)dual,
02427 (int32_t)(verbosity-2),
02428 (float64_t)sigdig,(int32_t)iter,
02429 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
02430
02431 if(CMath::is_nan(dual[0])) {
02432 if(verbosity>=2) {
02433 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n");
02434 }
02435 if(init_margin<0.80) {
02436 init_margin=(4.0*margin+1.0)/5.0;
02437 }
02438 margin=(margin+1.0)/2.0;
02439 (opt_precision)*=10.0;
02440 if(verbosity>=2) {
02441 SG_SDEBUG("Reducing precision of PR_LOQO.\n");
02442 }
02443 }
02444 else if(result!=OPTIMAL_SOLUTION) {
02445 iter+=2000;
02446 init_iter+=10;
02447 (opt_precision)*=10.0;
02448 if(verbosity>=2) {
02449 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result);
02450 }
02451 }
02452 }
02453
02454 if(qp->opt_m)
02455 model_b=dual[0];
02456 else
02457 model_b=0;
02458
02459
02460
02461 epsilon_loqo=1E-10;
02462 for(i=0;i<qp->opt_n;i++) {
02463 dist=-model_b*qp->opt_ce[i];
02464 dist+=(qp->opt_g0[i]+1.0);
02465 for(j=0;j<i;j++) {
02466 dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
02467 }
02468 for(j=i;j<qp->opt_n;j++) {
02469 dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
02470 }
02471
02472 if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
02473 epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
02474 }
02475 else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
02476 epsilon_loqo=primal[i]*2.0;
02477 }
02478 }
02479
02480 for(i=0;i<qp->opt_n;i++) {
02481 if(primal[i]<=(0+epsilon_loqo)) {
02482 primal[i]=0;
02483 }
02484 else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
02485 primal[i]=qp->opt_up[i];
02486 }
02487 }
02488
02489 obj_after=0;
02490 for(i=0;i<qp->opt_n;i++) {
02491 obj_after+=(qp->opt_g0[i]*primal[i]);
02492 obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
02493 for(j=0;j<i;j++) {
02494 obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
02495 }
02496 }
02497
02498
02499
02500 if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
02501 for(i=0;i<qp->opt_n;i++) {
02502 primal[i]=qp->opt_xinit[i];
02503 }
02504 model_b=0;
02505 if(svm_maxqpsize>2) {
02506 svm_maxqpsize--;
02507 }
02508 }
02509
02510 if(obj_after >= obj_before) {
02511 (opt_precision)/=100.0;
02512 precision_violations++;
02513 if(verbosity>=2) {
02514 SG_SDEBUG("Increasing Precision of PR_LOQO.\n");
02515 }
02516 }
02517
02518 if(precision_violations > 500) {
02519 (*epsilon_crit)*=10.0;
02520 precision_violations=0;
02521 SG_SINFO("Relaxing epsilon on KT-Conditions.\n");
02522 }
02523
02524 (*threshold)=model_b;
02525
02526 if(result!=OPTIMAL_SOLUTION) {
02527 SG_SERROR("PR_LOQO did not converge.\n");
02528 return(qp->opt_xinit);
02529 }
02530 else {
02531 return(primal);
02532 }
02533 }
02534
02535
02536 #endif //USE_SVMLIGHT