00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <shogun/lib/config.h>
00024
00025 #ifdef USE_SVMLIGHT
00026
00027 #include <shogun/io/SGIO.h>
00028 #include <shogun/lib/Signal.h>
00029 #include <shogun/mathematics/Math.h>
00030 #include <shogun/lib/Time.h>
00031 #include <shogun/mathematics/lapack.h>
00032
00033 #include <shogun/classifier/svm/SVMLight.h>
00034 #include <shogun/lib/external/pr_loqo.h>
00035
00036 #include <shogun/kernel/Kernel.h>
00037 #include <shogun/machine/KernelMachine.h>
00038 #include <shogun/kernel/CombinedKernel.h>
00039
00040 #include <unistd.h>
00041
00042 #include <shogun/base/Parallel.h>
00043 #include <shogun/labels/BinaryLabels.h>
00044
00045 #ifdef HAVE_PTHREAD
00046 #include <pthread.h>
00047 #endif
00048
00049 using namespace shogun;
00050
00051 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00052 struct S_THREAD_PARAM_REACTIVATE_LINADD
00053 {
00054 CKernel* kernel;
00055 float64_t* lin;
00056 float64_t* last_lin;
00057 int32_t* active;
00058 int32_t* docs;
00059 int32_t start;
00060 int32_t end;
00061 };
00062
00063 struct S_THREAD_PARAM
00064 {
00065 float64_t * lin ;
00066 float64_t* W;
00067 int32_t start, end;
00068 int32_t * active2dnum ;
00069 int32_t * docs ;
00070 CKernel* kernel ;
00071 };
00072
00073 struct S_THREAD_PARAM_REACTIVATE_VANILLA
00074 {
00075 CKernel* kernel;
00076 float64_t* lin;
00077 float64_t* aicache;
00078 float64_t* a;
00079 float64_t* a_old;
00080 int32_t* changed2dnum;
00081 int32_t* inactive2dnum;
00082 int32_t* label;
00083 int32_t start;
00084 int32_t end;
00085 };
00086
00087 struct S_THREAD_PARAM_KERNEL
00088 {
00089 float64_t *Kval ;
00090 int32_t *KI, *KJ ;
00091 int32_t start, end;
00092 CSVMLight* svmlight;
00093 };
00094
00095 #endif // DOXYGEN_SHOULD_SKIP_THIS
00096
00097 void* CSVMLight::update_linear_component_linadd_helper(void* p)
00098 {
00099 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00100
00101 int32_t jj=0, j=0 ;
00102
00103 for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
00104 params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
00105
00106 return NULL ;
00107 }
00108
00109 void* CSVMLight::compute_kernel_helper(void* p)
00110 {
00111 S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
00112
00113 int32_t jj=0 ;
00114 for (jj=params->start;jj<params->end;jj++)
00115 params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
00116
00117 return NULL ;
00118 }
00119
00120 CSVMLight::CSVMLight()
00121 : CSVM()
00122 {
00123 init();
00124 set_kernel(NULL);
00125 }
00126
00127 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab)
00128 : CSVM(C, k, lab)
00129 {
00130 init();
00131 }
00132
00133 void CSVMLight::init()
00134 {
00135
00136 primal=NULL;
00137 dual=NULL;
00138 init_margin=0.15;
00139 init_iter=500;
00140 precision_violations=0;
00141 model_b=0;
00142 verbosity=1;
00143 opt_precision=DEF_PRECISION;
00144
00145
00146 W=NULL;
00147 model=SG_MALLOC(MODEL, 1);
00148 learn_parm=SG_MALLOC(LEARN_PARM, 1);
00149 model->supvec=NULL;
00150 model->alpha=NULL;
00151 model->index=NULL;
00152
00153
00154 mymaxdiff=1 ;
00155 mkl_converged=false;
00156 }
00157
00158 CSVMLight::~CSVMLight()
00159 {
00160
00161 SG_FREE(model->supvec);
00162 SG_FREE(model->alpha);
00163 SG_FREE(model->index);
00164 SG_FREE(model);
00165 SG_FREE(learn_parm);
00166
00167
00168 SG_FREE(W);
00169
00170
00171 SG_FREE(dual);
00172 SG_FREE(primal);
00173 }
00174
00175 bool CSVMLight::train_machine(CFeatures* data)
00176 {
00177
00178 mkl_converged=false;
00179 verbosity=1 ;
00180 init_margin=0.15;
00181 init_iter=500;
00182 precision_violations=0;
00183 opt_precision=DEF_PRECISION;
00184
00185 strcpy (learn_parm->predfile, "");
00186 learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
00187 learn_parm->sharedslack=0;
00188 learn_parm->remove_inconsistent=0;
00189 learn_parm->skip_final_opt_check=0;
00190 learn_parm->svm_maxqpsize=get_qpsize();
00191 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
00192 learn_parm->maxiter=100000;
00193 learn_parm->svm_iter_to_shrink=100;
00194 learn_parm->svm_c=C1;
00195 learn_parm->transduction_posratio=0.33;
00196 learn_parm->svm_costratio=C2/C1;
00197 learn_parm->svm_costratio_unlab=1.0;
00198 learn_parm->svm_unlabbound=1E-5;
00199 learn_parm->epsilon_crit=epsilon;
00200 learn_parm->epsilon_a=1E-15;
00201 learn_parm->compute_loo=0;
00202 learn_parm->rho=1.0;
00203 learn_parm->xa_depth=0;
00204
00205 if (!kernel)
00206 SG_ERROR( "SVM_light can not proceed without kernel!\n");
00207
00208 if (data)
00209 {
00210 if (m_labels->get_num_labels() != data->get_num_vectors())
00211 {
00212 SG_ERROR("%s::train_machine(): Number of training vectors (%d) does"
00213 " not match number of labels (%d)\n", get_name(),
00214 data->get_num_vectors(), m_labels->get_num_labels());
00215 }
00216 kernel->init(data, data);
00217 }
00218
00219 if (!kernel->has_features())
00220 SG_ERROR( "SVM_light can not proceed without initialized kernel!\n");
00221
00222 ASSERT(m_labels && m_labels->get_num_labels());
00223 ASSERT(m_labels->get_label_type() == LT_BINARY);
00224 ASSERT(kernel->get_num_vec_lhs()==m_labels->get_num_labels());
00225
00226
00227 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00228 kernel->clear_normal() ;
00229
00230
00231 SG_DEBUG( "threads = %i\n", parallel->get_num_threads()) ;
00232 SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ;
00233 SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ;
00234 SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ;
00235 SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ;
00236 SG_DEBUG( "kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) ;
00237 SG_DEBUG( "kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) ;
00238 SG_DEBUG( "get_solver_type() = %i\n", get_solver_type());
00239 SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ;
00240 SG_DEBUG( "get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) ;
00241 SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ;
00242
00243 use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) ||
00244 (get_linadd_enabled() && kernel->has_property(KP_LINADD)));
00245
00246 SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ;
00247
00248 if (kernel->get_kernel_type() == K_COMBINED)
00249 {
00250 CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel();
00251
00252 while (kn)
00253 {
00254
00255 kn->resize_kernel_cache(kn->get_cache_size());
00256 SG_UNREF(kn);
00257 kn = ((CCombinedKernel*) kernel)->get_next_kernel();
00258 }
00259 }
00260
00261 kernel->resize_kernel_cache(kernel->get_cache_size());
00262
00263
00264 svm_learn();
00265
00266
00267 create_new_model(model->sv_num-1);
00268 set_bias(-model->b);
00269 for (int32_t i=0; i<model->sv_num-1; i++)
00270 {
00271 set_alpha(i, model->alpha[i+1]);
00272 set_support_vector(i, model->supvec[i+1]);
00273 }
00274
00275
00276 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00277 {
00278 kernel->clear_normal() ;
00279 kernel->delete_optimization() ;
00280 }
00281
00282 if (use_kernel_cache)
00283 kernel->kernel_cache_cleanup();
00284
00285 return true ;
00286 }
00287
00288 int32_t CSVMLight::get_runtime()
00289 {
00290 clock_t start;
00291 start = clock();
00292 return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
00293 }
00294
00295
00296 void CSVMLight::svm_learn()
00297 {
00298 int32_t *inconsistent, i;
00299 int32_t misclassified,upsupvecnum;
00300 float64_t maxdiff, *lin, *c, *a;
00301 int32_t iterations;
00302 int32_t trainpos=0, trainneg=0 ;
00303 ASSERT(m_labels);
00304 SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels();
00305 int32_t totdoc=lab.vlen;
00306 ASSERT(lab.vector && lab.vlen);
00307 int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen);
00308
00309 int32_t* docs=SG_MALLOC(int32_t, totdoc);
00310 SG_FREE(W);
00311 W=NULL;
00312 count = 0 ;
00313
00314 if (kernel->has_property(KP_KERNCOMBINATION) && callback)
00315 {
00316 W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
00317 for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
00318 W[i]=0;
00319 }
00320
00321 for (i=0; i<totdoc; i++)
00322 docs[i]=i;
00323
00324 float64_t *xi_fullset;
00325 float64_t *a_fullset;
00326 TIMING timing_profile;
00327 SHRINK_STATE shrink_state;
00328
00329 timing_profile.time_kernel=0;
00330 timing_profile.time_opti=0;
00331 timing_profile.time_shrink=0;
00332 timing_profile.time_update=0;
00333 timing_profile.time_model=0;
00334 timing_profile.time_check=0;
00335 timing_profile.time_select=0;
00336
00337
00338 if((learn_parm->svm_newvarsinqp < 2)
00339 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
00340 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00341 }
00342
00343 init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
00344
00345 inconsistent = SG_MALLOC(int32_t, totdoc);
00346 c = SG_MALLOC(float64_t, totdoc);
00347 a = SG_MALLOC(float64_t, totdoc);
00348 a_fullset = SG_MALLOC(float64_t, totdoc);
00349 xi_fullset = SG_MALLOC(float64_t, totdoc);
00350 lin = SG_MALLOC(float64_t, totdoc);
00351 if (m_linear_term.vlen>0)
00352 learn_parm->eps=get_linear_term_array();
00353 else
00354 {
00355 learn_parm->eps=SG_MALLOC(float64_t, totdoc);
00356 SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0);
00357 }
00358
00359 learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
00360
00361 SG_FREE(model->supvec);
00362 SG_FREE(model->alpha);
00363 SG_FREE(model->index);
00364 model->supvec = SG_MALLOC(int32_t, totdoc+2);
00365 model->alpha = SG_MALLOC(float64_t, totdoc+2);
00366 model->index = SG_MALLOC(int32_t, totdoc+2);
00367
00368 model->at_upper_bound=0;
00369 model->b=0;
00370 model->supvec[0]=0;
00371 model->alpha[0]=0;
00372 model->totdoc=totdoc;
00373
00374 model->kernel=kernel;
00375
00376 model->sv_num=1;
00377 model->loo_error=-1;
00378 model->loo_recall=-1;
00379 model->loo_precision=-1;
00380 model->xa_error=-1;
00381 model->xa_recall=-1;
00382 model->xa_precision=-1;
00383
00384 for (i=0;i<totdoc;i++) {
00385 inconsistent[i]=0;
00386 c[i]=0;
00387 a[i]=0;
00388 lin[i]=0;
00389
00390 if(label[i] > 0) {
00391 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
00392 fabs((float64_t)label[i]);
00393 label[i]=1;
00394 trainpos++;
00395 }
00396 else if(label[i] < 0) {
00397 learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
00398 label[i]=-1;
00399 trainneg++;
00400 }
00401 else {
00402 learn_parm->svm_cost[i]=0;
00403 }
00404 }
00405
00406
00407 SG_DEBUG( "alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors());
00408
00409 if(m_alpha.vector && get_num_support_vectors()) {
00410 if(verbosity>=1) {
00411 SG_INFO( "Computing starting state...");
00412 }
00413
00414 float64_t* alpha = SG_MALLOC(float64_t, totdoc);
00415
00416 for (i=0; i<totdoc; i++)
00417 alpha[i]=0;
00418
00419 for (i=0; i<get_num_support_vectors(); i++)
00420 alpha[get_support_vector(i)]=get_alpha(i);
00421
00422 int32_t* index = SG_MALLOC(int32_t, totdoc);
00423 int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11);
00424 float64_t* aicache = SG_MALLOC(float64_t, totdoc);
00425 for (i=0;i<totdoc;i++) {
00426 index[i]=1;
00427 alpha[i]=fabs(alpha[i]);
00428 if(alpha[i]<0) alpha[i]=0;
00429 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
00430 }
00431
00432 if (use_kernel_cache)
00433 {
00434 if (callback &&
00435 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights())
00436 )
00437 {
00438 CCombinedKernel* k = (CCombinedKernel*) kernel;
00439 CKernel* kn = k->get_first_kernel();
00440
00441 while (kn)
00442 {
00443 for (i=0;i<totdoc;i++)
00444 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00445 && (kn->kernel_cache_space_available()))
00446 kn->cache_kernel_row(i);
00447
00448 for (i=0;i<totdoc;i++)
00449 if((alpha[i]==learn_parm->svm_cost[i])
00450 && (kn->kernel_cache_space_available()))
00451 kn->cache_kernel_row(i);
00452
00453 SG_UNREF(kn);
00454 kn = k->get_next_kernel();
00455 }
00456 }
00457 else
00458 {
00459 for (i=0;i<totdoc;i++)
00460 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00461 && (kernel->kernel_cache_space_available()))
00462 kernel->cache_kernel_row(i);
00463
00464 for (i=0;i<totdoc;i++)
00465 if((alpha[i]==learn_parm->svm_cost[i])
00466 && (kernel->kernel_cache_space_available()))
00467 kernel->cache_kernel_row(i);
00468 }
00469 }
00470 compute_index(index,totdoc,index2dnum);
00471 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
00472 lin,aicache,NULL);
00473 calculate_svm_model(docs,label,lin,alpha,a,c,
00474 index2dnum,index2dnum);
00475 for (i=0;i<totdoc;i++) {
00476 a[i]=alpha[i];
00477 }
00478
00479 SG_FREE(index);
00480 SG_FREE(index2dnum);
00481 SG_FREE(aicache);
00482 SG_FREE(alpha);
00483
00484 if(verbosity>=1)
00485 SG_DONE();
00486 }
00487 SG_DEBUG( "%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg);
00488 SG_DEBUG( "Optimizing...\n");
00489
00490
00491 iterations=optimize_to_convergence(docs,label,totdoc,
00492 &shrink_state,inconsistent,a,lin,
00493 c,&timing_profile,
00494 &maxdiff,(int32_t)-1,
00495 (int32_t)1);
00496
00497
00498 if(verbosity>=1) {
00499 if(verbosity==1)
00500 {
00501 SG_DONE();
00502 SG_DEBUG("(%ld iterations)", iterations);
00503 }
00504
00505 misclassified=0;
00506 for (i=0;(i<totdoc);i++) {
00507 if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
00508 misclassified++;
00509 }
00510
00511 SG_INFO( "Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
00512 misclassified,maxdiff);
00513
00514 SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b);
00515 if (maxdiff>epsilon)
00516 SG_WARNING( "maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon);
00517
00518 upsupvecnum=0;
00519 for (i=1;i<model->sv_num;i++)
00520 {
00521 if(fabs(model->alpha[i]) >=
00522 (learn_parm->svm_cost[model->supvec[i]]-
00523 learn_parm->epsilon_a))
00524 upsupvecnum++;
00525 }
00526 SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n",
00527 model->sv_num-1,upsupvecnum);
00528 }
00529
00530 shrink_state_cleanup(&shrink_state);
00531 SG_FREE(label);
00532 SG_FREE(inconsistent);
00533 SG_FREE(c);
00534 SG_FREE(a);
00535 SG_FREE(a_fullset);
00536 SG_FREE(xi_fullset);
00537 SG_FREE(lin);
00538 SG_FREE(learn_parm->eps);
00539 SG_FREE(learn_parm->svm_cost);
00540 SG_FREE(docs);
00541 }
00542
00543 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
00544 SHRINK_STATE *shrink_state,
00545 int32_t *inconsistent,
00546 float64_t *a, float64_t *lin, float64_t *c,
00547 TIMING *timing_profile, float64_t *maxdiff,
00548 int32_t heldout, int32_t retrain)
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565 {
00566
00567 int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
00568 int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
00569 int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
00570 int32_t *working2dnum,*selexam;
00571 int32_t activenum;
00572 float64_t criterion, eq;
00573 float64_t *a_old;
00574 int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0;
00575 float64_t epsilon_crit_org;
00576 float64_t bestmaxdiff;
00577 float64_t worstmaxdiff;
00578 int32_t bestmaxdiffiter,terminate;
00579 bool reactivated=false;
00580
00581 float64_t *selcrit;
00582 float64_t *aicache;
00583 QP qp;
00584
00585 epsilon_crit_org=learn_parm->epsilon_crit;
00586 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
00587 learn_parm->epsilon_crit=2.0;
00588
00589 }
00590 learn_parm->epsilon_shrink=2;
00591 (*maxdiff)=1;
00592
00593 SG_DEBUG("totdoc:%d\n",totdoc);
00594 chosen = SG_MALLOC(int32_t, totdoc);
00595 last_suboptimal_at =SG_MALLOC(int32_t, totdoc);
00596 key =SG_MALLOC(int32_t, totdoc+11);
00597 selcrit =SG_MALLOC(float64_t, totdoc);
00598 selexam =SG_MALLOC(int32_t, totdoc);
00599 a_old =SG_MALLOC(float64_t, totdoc);
00600 aicache =SG_MALLOC(float64_t, totdoc);
00601 working2dnum =SG_MALLOC(int32_t, totdoc+11);
00602 active2dnum =SG_MALLOC(int32_t, totdoc+11);
00603 qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00604 qp.opt_ce0 =SG_MALLOC(float64_t, 1);
00605 qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize);
00606 qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00607 qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00608 qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00609 qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00610
00611 choosenum=0;
00612 inconsistentnum=0;
00613 if(!retrain) retrain=1;
00614 iteration=1;
00615 bestmaxdiffiter=1;
00616 bestmaxdiff=999999999;
00617 worstmaxdiff=1e-10;
00618 terminate=0;
00619
00620
00621 kernel->set_time(iteration);
00622
00623 for (i=0;i<totdoc;i++) {
00624 chosen[i]=0;
00625 a_old[i]=a[i];
00626 last_suboptimal_at[i]=1;
00627 if(inconsistent[i])
00628 inconsistentnum++;
00629 }
00630 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00631 inactivenum=totdoc-activenum;
00632 clear_index(working2dnum);
00633
00634
00635 CTime start_time;
00636 mkl_converged=false;
00637
00638
00639 #ifdef CYGWIN
00640 for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
00641 #else
00642 CSignal::clear_cancel();
00643 for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
00644 #endif
00645
00646 if(use_kernel_cache)
00647 kernel->set_time(iteration);
00648
00649 if(verbosity>=2) t0=get_runtime();
00650 if(verbosity>=3) {
00651 SG_DEBUG( "\nSelecting working set... ");
00652 }
00653
00654 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
00655 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00656
00657 i=0;
00658 for (jj=0;(j=working2dnum[jj])>=0;jj++) {
00659 if((chosen[j]>=(learn_parm->svm_maxqpsize/
00660 CMath::min(learn_parm->svm_maxqpsize,
00661 learn_parm->svm_newvarsinqp)))
00662 || (inconsistent[j])
00663 || (j == heldout)) {
00664 chosen[j]=0;
00665 choosenum--;
00666 }
00667 else {
00668 chosen[j]++;
00669 working2dnum[i++]=j;
00670 }
00671 }
00672 working2dnum[i]=-1;
00673
00674 if(retrain == 2) {
00675 choosenum=0;
00676 for (jj=0;(j=working2dnum[jj])>=0;jj++) {
00677 chosen[j]=0;
00678 }
00679 clear_index(working2dnum);
00680 for (i=0;i<totdoc;i++) {
00681 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
00682 chosen[i]=99999;
00683 choosenum++;
00684 a[i]=0;
00685 }
00686 }
00687 if(learn_parm->biased_hyperplane) {
00688 eq=0;
00689 for (i=0;i<totdoc;i++) {
00690 eq+=a[i]*label[i];
00691 }
00692 for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
00693 if((eq*label[i] > 0) && (a[i] > 0)) {
00694 chosen[i]=88888;
00695 choosenum++;
00696 if((eq*label[i]) > a[i]) {
00697 eq-=(a[i]*label[i]);
00698 a[i]=0;
00699 }
00700 else {
00701 a[i]-=(eq*label[i]);
00702 eq=0;
00703 }
00704 }
00705 }
00706 }
00707 compute_index(chosen,totdoc,working2dnum);
00708 }
00709 else
00710 {
00711 if(iteration % 101)
00712 {
00713 already_chosen=0;
00714 if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
00715 (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00716 {
00717
00718 already_chosen=select_next_qp_subproblem_grad(
00719 label,a,lin,c,totdoc,
00720 (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
00721 learn_parm->svm_newvarsinqp)/2),
00722 inconsistent,active2dnum,
00723 working2dnum,selcrit,selexam,1,
00724 key,chosen);
00725 choosenum+=already_chosen;
00726 }
00727 choosenum+=select_next_qp_subproblem_grad(
00728 label,a,lin,c,totdoc,
00729 CMath::min(learn_parm->svm_maxqpsize-choosenum,
00730 learn_parm->svm_newvarsinqp-already_chosen),
00731 inconsistent,active2dnum,
00732 working2dnum,selcrit,selexam,0,key,
00733 chosen);
00734 }
00735 else {
00736
00737
00738 choosenum+=select_next_qp_subproblem_rand(
00739 label,a,lin,c,totdoc,
00740 CMath::min(learn_parm->svm_maxqpsize-choosenum,
00741 learn_parm->svm_newvarsinqp),
00742 inconsistent,active2dnum,
00743 working2dnum,selcrit,selexam,key,
00744 chosen,iteration);
00745 }
00746 }
00747
00748 if(verbosity>=2) {
00749 SG_INFO( " %ld vectors chosen\n",choosenum);
00750 }
00751
00752 if(verbosity>=2) t1=get_runtime();
00753
00754 if (use_kernel_cache)
00755 {
00756
00757
00758 if ( callback &&
00759 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
00760 )
00761 {
00762 CCombinedKernel* k = (CCombinedKernel*) kernel;
00763 CKernel* kn = k->get_first_kernel();
00764
00765 while (kn)
00766 {
00767 kn->cache_multiple_kernel_rows(working2dnum, choosenum);
00768 SG_UNREF(kn);
00769 kn = k->get_next_kernel() ;
00770 }
00771 }
00772 else
00773 kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
00774 }
00775
00776 if(verbosity>=2) t2=get_runtime();
00777
00778 if(retrain != 2) {
00779 optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
00780 totdoc,working2dnum,choosenum,a,lin,c,
00781 aicache,&qp,&epsilon_crit_org);
00782 }
00783
00784 if(verbosity>=2) t3=get_runtime();
00785 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
00786 lin,aicache,c);
00787
00788 if(verbosity>=2) t4=get_runtime();
00789 supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
00790
00791 if(verbosity>=2) t5=get_runtime();
00792
00793 for (jj=0;(i=working2dnum[jj])>=0;jj++) {
00794 a_old[i]=a[i];
00795 }
00796
00797 retrain=check_optimality(label,a,lin,c,totdoc,
00798 maxdiff,epsilon_crit_org,&misclassified,
00799 inconsistent,active2dnum,last_suboptimal_at,
00800 iteration);
00801
00802 if(verbosity>=2) {
00803 t6=get_runtime();
00804 timing_profile->time_select+=t1-t0;
00805 timing_profile->time_kernel+=t2-t1;
00806 timing_profile->time_opti+=t3-t2;
00807 timing_profile->time_update+=t4-t3;
00808 timing_profile->time_model+=t5-t4;
00809 timing_profile->time_check+=t6-t5;
00810 }
00811
00812
00813 if((*maxdiff) < bestmaxdiff) {
00814 bestmaxdiff=(*maxdiff);
00815 bestmaxdiffiter=iteration;
00816 }
00817 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
00818
00819 terminate=1;
00820 retrain=0;
00821 SG_WARNING( "Relaxing KT-Conditions due to slow progress! Terminating!\n");
00822 SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter );
00823 }
00824
00825 noshrink= (get_shrinking_enabled()) ? 0 : 1;
00826
00827 if ((!callback) && (!retrain) && (inactivenum>0) &&
00828 ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00829 {
00830 t1=get_runtime();
00831 SG_DEBUG( "reactivating inactive examples\n");
00832
00833 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00834 iteration,inconsistent,
00835 docs,aicache,
00836 maxdiff);
00837 reactivated=true;
00838 SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org);
00839
00840 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00841 inactivenum=totdoc-activenum;
00842
00843 bestmaxdiff=(*maxdiff);
00844 bestmaxdiffiter=iteration;
00845
00846
00847 noshrink=1;
00848 retrain=0;
00849 if((*maxdiff) > learn_parm->epsilon_crit)
00850 {
00851 SG_INFO( "restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit);
00852 retrain=1;
00853 }
00854 timing_profile->time_shrink+=get_runtime()-t1;
00855 if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00856 || (verbosity>=2)) {
00857 SG_DONE();
00858 SG_DEBUG("Number of inactive variables = %ld\n", inactivenum);
00859 }
00860 }
00861
00862 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
00863 learn_parm->epsilon_crit=(*maxdiff);
00864 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
00865 learn_parm->epsilon_crit/=4.0;
00866 retrain=1;
00867 noshrink=1;
00868 }
00869 if(learn_parm->epsilon_crit<epsilon_crit_org)
00870 learn_parm->epsilon_crit=epsilon_crit_org;
00871
00872 if(verbosity>=2) {
00873 SG_INFO( " => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
00874 supvecnum,model->at_upper_bound,(*maxdiff));
00875
00876 }
00877 mymaxdiff=*maxdiff ;
00878
00879
00880 if (((iteration % 10) == 0) && (!noshrink) && !callback)
00881 {
00882 activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
00883 CMath::max((int32_t)(activenum/10),
00884 CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
00885 a,inconsistent, c, lin, label);
00886
00887 inactivenum=totdoc-activenum;
00888
00889 if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
00890 && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
00891
00892 kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
00893 (int32_t) (kernel->get_activenum_cache()-supvecnum)),
00894 shrink_state->active);
00895 }
00896 }
00897
00898 if (bestmaxdiff>worstmaxdiff)
00899 worstmaxdiff=bestmaxdiff;
00900
00901 SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6);
00902
00903
00904 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) {
00905 terminate = 1;
00906 retrain = 0;
00907 }
00908
00909 }
00910
00911 SG_DEBUG( "inactive:%d\n", inactivenum);
00912
00913 if (inactivenum && !reactivated && !callback)
00914 {
00915 SG_DEBUG( "reactivating inactive examples\n");
00916 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00917 iteration,inconsistent,
00918 docs,aicache,
00919 maxdiff);
00920 SG_DEBUG( "done reactivating inactive examples\n");
00921
00922 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00923 inactivenum=totdoc-activenum;
00924
00925 bestmaxdiff=(*maxdiff);
00926 bestmaxdiffiter=iteration;
00927 }
00928
00929
00930 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
00931 CSVM::set_objective(criterion);
00932
00933 SG_FREE(chosen);
00934 SG_FREE(last_suboptimal_at);
00935 SG_FREE(key);
00936 SG_FREE(selcrit);
00937 SG_FREE(selexam);
00938 SG_FREE(a_old);
00939 SG_FREE(aicache);
00940 SG_FREE(working2dnum);
00941 SG_FREE(active2dnum);
00942 SG_FREE(qp.opt_ce);
00943 SG_FREE(qp.opt_ce0);
00944 SG_FREE(qp.opt_g);
00945 SG_FREE(qp.opt_g0);
00946 SG_FREE(qp.opt_xinit);
00947 SG_FREE(qp.opt_low);
00948 SG_FREE(qp.opt_up);
00949
00950 learn_parm->epsilon_crit=epsilon_crit_org;
00951
00952 return(iteration);
00953 }
00954
00955 float64_t CSVMLight::compute_objective_function(
00956 float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00957 int32_t totdoc)
00958
00959
00960 {
00961
00962 float64_t criterion=0;
00963
00964 for (int32_t i=0;i<totdoc;i++)
00965 criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
00966
00967 return(criterion);
00968 }
00969
00970
00971 void CSVMLight::clear_index(int32_t *index)
00972
00973 {
00974 index[0]=-1;
00975 }
00976
00977 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
00978
00979 {
00980 register int32_t i;
00981 for (i=0;index[i] != -1;i++);
00982 index[i]=elem;
00983 index[i+1]=-1;
00984 }
00985
00986 int32_t CSVMLight::compute_index(
00987 int32_t *binfeature, int32_t range, int32_t *index)
00988
00989 {
00990 register int32_t i,ii;
00991
00992 ii=0;
00993 for (i=0;i<range;i++) {
00994 if(binfeature[i]) {
00995 index[ii]=i;
00996 ii++;
00997 }
00998 }
00999 for (i=0;i<4;i++) {
01000 index[ii+i]=-1;
01001 }
01002 return(ii);
01003 }
01004
01005
01006 void CSVMLight::optimize_svm(
01007 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01008 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
01009 int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
01010 float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
01011
01012 {
01013 int32_t i;
01014 float64_t *a_v;
01015
01016
01017
01018
01019
01020
01021 compute_matrices_for_optimization(docs,label,
01022 exclude_from_eq_const,eq_target,chosen,
01023 active2dnum,working2dnum,a,lin,c,
01024 varnum,totdoc,aicache,qp);
01025
01026 if(verbosity>=3) {
01027 SG_DEBUG( "Running optimizer...");
01028 }
01029
01030 a_v=optimize_qp(qp,epsilon_crit_target,
01031 learn_parm->svm_maxqpsize,
01032 &(model->b),
01033 learn_parm->svm_maxqpsize);
01034
01035 if(verbosity>=3) {
01036 SG_DONE();
01037 }
01038
01039 for (i=0;i<varnum;i++)
01040 a[working2dnum[i]]=a_v[i];
01041 }
01042
01043 void CSVMLight::compute_matrices_for_optimization_parallel(
01044 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01045 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01046 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01047 float64_t *aicache, QP *qp)
01048 {
01049 if (parallel->get_num_threads()<=1)
01050 {
01051 compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
01052 chosen, active2dnum, key, a, lin, c,
01053 varnum, totdoc, aicache, qp) ;
01054 }
01055 #ifdef HAVE_PTHREAD
01056 else
01057 {
01058 register int32_t ki,kj,i,j;
01059 register float64_t kernel_temp;
01060
01061 qp->opt_n=varnum;
01062 qp->opt_ce0[0]=-eq_target;
01063 for (j=1;j<model->sv_num;j++) {
01064 if((!chosen[model->supvec[j]])
01065 && (!exclude_from_eq_const[(model->supvec[j])])) {
01066 qp->opt_ce0[0]+=model->alpha[j];
01067 }
01068 }
01069 if(learn_parm->biased_hyperplane)
01070 qp->opt_m=1;
01071 else
01072 qp->opt_m=0;
01073
01074
01075 for (i=0;i<varnum;i++) {
01076 qp->opt_g0[i]=lin[key[i]];
01077 }
01078
01079 ASSERT(parallel->get_num_threads()>1);
01080 int32_t *KI=SG_MALLOC(int32_t, varnum*varnum);
01081 int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum);
01082 int32_t Knum=0 ;
01083 float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2);
01084 for (i=0;i<varnum;i++) {
01085 ki=key[i];
01086 KI[Knum]=docs[ki] ;
01087 KJ[Knum]=docs[ki] ;
01088 Knum++ ;
01089 for (j=i+1;j<varnum;j++)
01090 {
01091 kj=key[j];
01092 KI[Knum]=docs[ki] ;
01093 KJ[Knum]=docs[kj] ;
01094 Knum++ ;
01095 }
01096 }
01097 ASSERT(Knum<=varnum*(varnum+1)/2);
01098
01099 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01100 S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1);
01101 int32_t step= Knum/parallel->get_num_threads();
01102
01103 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01104 {
01105 params[t].svmlight = this;
01106 params[t].start = t*step;
01107 params[t].end = (t+1)*step;
01108 params[t].KI=KI ;
01109 params[t].KJ=KJ ;
01110 params[t].Kval=Kval ;
01111 pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)¶ms[t]);
01112 }
01113 for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++)
01114 Kval[i]=compute_kernel(KI[i],KJ[i]) ;
01115
01116 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01117 pthread_join(threads[t], NULL);
01118
01119 SG_FREE(params);
01120 SG_FREE(threads);
01121
01122 Knum=0 ;
01123 for (i=0;i<varnum;i++) {
01124 ki=key[i];
01125
01126
01127 qp->opt_ce[i]=label[ki];
01128 qp->opt_low[i]=0;
01129 qp->opt_up[i]=learn_parm->svm_cost[ki];
01130
01131 kernel_temp=Kval[Knum] ;
01132 Knum++ ;
01133
01134 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01135
01136 qp->opt_g[varnum*i+i]=kernel_temp;
01137
01138 for (j=i+1;j<varnum;j++) {
01139 kj=key[j];
01140 kernel_temp=Kval[Knum] ;
01141 Knum++ ;
01142
01143 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01144 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01145
01146 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01147 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];
01148 }
01149
01150 if(verbosity>=3) {
01151 if(i % 20 == 0) {
01152 SG_DEBUG( "%ld..",i);
01153 }
01154 }
01155 }
01156
01157 SG_FREE(KI);
01158 SG_FREE(KJ);
01159 SG_FREE(Kval);
01160
01161 for (i=0;i<varnum;i++) {
01162
01163 qp->opt_xinit[i]=a[key[i]];
01164
01165 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
01166 }
01167
01168 if(verbosity>=3) {
01169 SG_DONE();
01170 }
01171 }
01172 #endif
01173 }
01174
01175 void CSVMLight::compute_matrices_for_optimization(
01176 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01177 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01178 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01179 float64_t *aicache, QP *qp)
01180 {
01181 register int32_t ki,kj,i,j;
01182 register float64_t kernel_temp;
01183
01184 qp->opt_n=varnum;
01185 qp->opt_ce0[0]=-eq_target;
01186 for (j=1;j<model->sv_num;j++) {
01187 if((!chosen[model->supvec[j]])
01188 && (!exclude_from_eq_const[(model->supvec[j])])) {
01189 qp->opt_ce0[0]+=model->alpha[j];
01190 }
01191 }
01192 if(learn_parm->biased_hyperplane)
01193 qp->opt_m=1;
01194 else
01195 qp->opt_m=0;
01196
01197
01198 for (i=0;i<varnum;i++) {
01199 qp->opt_g0[i]=lin[key[i]];
01200 }
01201
01202 for (i=0;i<varnum;i++) {
01203 ki=key[i];
01204
01205
01206 qp->opt_ce[i]=label[ki];
01207 qp->opt_low[i]=0;
01208 qp->opt_up[i]=learn_parm->svm_cost[ki];
01209
01210 kernel_temp=compute_kernel(docs[ki], docs[ki]);
01211
01212 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01213
01214 qp->opt_g[varnum*i+i]=kernel_temp;
01215
01216 for (j=i+1;j<varnum;j++) {
01217 kj=key[j];
01218 kernel_temp=compute_kernel(docs[ki], docs[kj]);
01219
01220
01221 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01222 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01223
01224 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01225 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];
01226 }
01227
01228 if(verbosity>=3) {
01229 if(i % 20 == 0) {
01230 SG_DEBUG( "%ld..",i);
01231 }
01232 }
01233 }
01234
01235 for (i=0;i<varnum;i++) {
01236
01237 qp->opt_xinit[i]=a[key[i]];
01238
01239 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
01240 }
01241
01242 if(verbosity>=3) {
01243 SG_DONE();
01244 }
01245 }
01246
01247
01248 int32_t CSVMLight::calculate_svm_model(
01249 int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
01250 float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
01251
01252
01253 {
01254 int32_t i,ii,pos,b_calculated=0,first_low,first_high;
01255 float64_t ex_c,b_temp,b_low,b_high;
01256
01257 if(verbosity>=3) {
01258 SG_DEBUG( "Calculating model...");
01259 }
01260
01261 if(!learn_parm->biased_hyperplane) {
01262 model->b=0;
01263 b_calculated=1;
01264 }
01265
01266 for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01267 if((a_old[i]>0) && (a[i]==0)) {
01268 pos=model->index[i];
01269 model->index[i]=-1;
01270 (model->sv_num)--;
01271 model->supvec[pos]=model->supvec[model->sv_num];
01272 model->alpha[pos]=model->alpha[model->sv_num];
01273 model->index[model->supvec[pos]]=pos;
01274 }
01275 else if((a_old[i]==0) && (a[i]>0)) {
01276 model->supvec[model->sv_num]=docs[i];
01277 model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
01278 model->index[i]=model->sv_num;
01279 (model->sv_num)++;
01280 }
01281 else if(a_old[i]==a[i]) {
01282 }
01283 else {
01284 model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
01285 }
01286
01287 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01288 if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
01289 (model->at_upper_bound)--;
01290 }
01291 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
01292 (model->at_upper_bound)++;
01293 }
01294
01295 if((!b_calculated)
01296 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {
01297 model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
01298 b_calculated=1;
01299 }
01300 }
01301
01302
01303
01304
01305 if(learn_parm->biased_hyperplane
01306 && (!b_calculated)
01307 && (model->sv_num-1 == model->at_upper_bound)) {
01308 first_low=1;
01309 first_high=1;
01310 b_low=0;
01311 b_high=0;
01312 for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01313 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01314 if(a[i]<ex_c) {
01315 if(label[i]>0) {
01316 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01317 if((b_temp>b_low) || (first_low)) {
01318 b_low=b_temp;
01319 first_low=0;
01320 }
01321 }
01322 else {
01323 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01324 if((b_temp<b_high) || (first_high)) {
01325 b_high=b_temp;
01326 first_high=0;
01327 }
01328 }
01329 }
01330 else {
01331 if(label[i]<0) {
01332 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01333 if((b_temp>b_low) || (first_low)) {
01334 b_low=b_temp;
01335 first_low=0;
01336 }
01337 }
01338 else {
01339 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01340 if((b_temp<b_high) || (first_high)) {
01341 b_high=b_temp;
01342 first_high=0;
01343 }
01344 }
01345 }
01346 }
01347 if(first_high) {
01348 model->b=-b_low;
01349 }
01350 else if(first_low) {
01351 model->b=-b_high;
01352 }
01353 else {
01354 model->b=-(b_high+b_low)/2.0;
01355 }
01356 }
01357
01358 if(verbosity>=3) {
01359 SG_DONE();
01360 }
01361
01362 return(model->sv_num-1);
01363 }
01364
01365 int32_t CSVMLight::check_optimality(
01366 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01367 float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
01368 int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
01369 int32_t iteration)
01370
01371 {
01372 int32_t i,ii,retrain;
01373 float64_t dist,ex_c,target;
01374
01375 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01376 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
01377 else
01378 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
01379 retrain=0;
01380 (*maxdiff)=0;
01381 (*misclassified)=0;
01382 for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01383 if((!inconsistent[i]) && label[i]) {
01384 dist=(lin[i]-model->b)*(float64_t)label[i];
01385
01386 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
01387 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01388 if(dist <= 0) {
01389 (*misclassified)++;
01390 }
01391 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
01392 if((dist-target)>(*maxdiff))
01393 (*maxdiff)=dist-target;
01394 }
01395 else if((a[i]<ex_c) && (dist < target)) {
01396 if((target-dist)>(*maxdiff))
01397 (*maxdiff)=target-dist;
01398 }
01399
01400
01401
01402
01403
01404 if((a[i]>(learn_parm->epsilon_a))
01405 && (a[i]<ex_c)) {
01406 last_suboptimal_at[i]=iteration;
01407 }
01408 else if((a[i]<=(learn_parm->epsilon_a))
01409 && (dist < (target+learn_parm->epsilon_shrink))) {
01410 last_suboptimal_at[i]=iteration;
01411 }
01412 else if((a[i]>=ex_c)
01413 && (dist > (target-learn_parm->epsilon_shrink))) {
01414 last_suboptimal_at[i]=iteration;
01415 }
01416 }
01417 }
01418
01419
01420 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
01421 retrain=1;
01422 }
01423 return(retrain);
01424 }
01425
01426 void CSVMLight::update_linear_component(
01427 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01428 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01429 float64_t *aicache, float64_t* c)
01430
01431
01432
01433
01434 {
01435 register int32_t i=0,ii=0,j=0,jj=0;
01436
01437 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01438 {
01439 if (callback)
01440 {
01441 update_linear_component_mkl_linadd(docs, label, active2dnum, a,
01442 a_old, working2dnum, totdoc, lin, aicache);
01443 }
01444 else
01445 {
01446 kernel->clear_normal();
01447
01448 int32_t num_working=0;
01449 for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01450 if(a[i] != a_old[i]) {
01451 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01452 num_working++;
01453 }
01454 }
01455
01456 if (num_working>0)
01457 {
01458 if (parallel->get_num_threads() < 2)
01459 {
01460 for (jj=0;(j=active2dnum[jj])>=0;jj++) {
01461 lin[j]+=kernel->compute_optimized(docs[j]);
01462 }
01463 }
01464 #ifdef HAVE_PTHREAD
01465 else
01466 {
01467 int32_t num_elem = 0 ;
01468 for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
01469
01470 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01471 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1);
01472 int32_t start = 0 ;
01473 int32_t step = num_elem/parallel->get_num_threads();
01474 int32_t end = step ;
01475
01476 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01477 {
01478 params[t].kernel = kernel ;
01479 params[t].lin = lin ;
01480 params[t].docs = docs ;
01481 params[t].active2dnum=active2dnum ;
01482 params[t].start = start ;
01483 params[t].end = end ;
01484 start=end ;
01485 end+=step ;
01486 pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)¶ms[t]) ;
01487 }
01488
01489 for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
01490 lin[j]+=kernel->compute_optimized(docs[j]);
01491 }
01492 void* ret;
01493 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01494 pthread_join(threads[t], &ret) ;
01495
01496 SG_FREE(params);
01497 SG_FREE(threads);
01498 }
01499 #endif
01500 }
01501 }
01502 }
01503 else
01504 {
01505 if (callback)
01506 {
01507 update_linear_component_mkl(docs, label, active2dnum,
01508 a, a_old, working2dnum, totdoc, lin, aicache);
01509 }
01510 else {
01511 for (jj=0;(i=working2dnum[jj])>=0;jj++) {
01512 if(a[i] != a_old[i]) {
01513 kernel->get_kernel_row(i,active2dnum,aicache);
01514 for (ii=0;(j=active2dnum[ii])>=0;ii++)
01515 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01516 }
01517 }
01518 }
01519 }
01520 }
01521
01522
01523 void CSVMLight::update_linear_component_mkl(
01524 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01525 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01526 float64_t *aicache)
01527 {
01528
01529 int32_t num = kernel->get_num_vec_rhs();
01530 int32_t num_weights = -1;
01531 int32_t num_kernels = kernel->get_num_subkernels() ;
01532 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
01533 ASSERT(num_weights==num_kernels);
01534
01535 if ((kernel->get_kernel_type()==K_COMBINED) &&
01536 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))
01537 {
01538 CCombinedKernel* k = (CCombinedKernel*) kernel;
01539 CKernel* kn = k->get_first_kernel() ;
01540 int32_t n = 0, i, j ;
01541
01542 while (kn!=NULL)
01543 {
01544 for (i=0;i<num;i++)
01545 {
01546 if(a[i] != a_old[i])
01547 {
01548 kn->get_kernel_row(i,NULL,aicache, true);
01549 for (j=0;j<num;j++)
01550 W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01551 }
01552 }
01553
01554 SG_UNREF(kn);
01555 kn = k->get_next_kernel();
01556 n++ ;
01557 }
01558 }
01559 else
01560 {
01561 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01562 float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01563
01564
01565 for (int32_t i=0; i<num_kernels; i++)
01566 {
01567 w_backup[i] = old_beta[i] ;
01568 w1[i]=0.0 ;
01569 }
01570 for (int32_t n=0; n<num_kernels; n++)
01571 {
01572 w1[n]=1.0 ;
01573 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights));
01574
01575 for (int32_t i=0;i<num;i++)
01576 {
01577 if(a[i] != a_old[i])
01578 {
01579 for (int32_t j=0;j<num;j++)
01580 W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
01581 }
01582 }
01583 w1[n]=0.0 ;
01584 }
01585
01586
01587 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
01588
01589 SG_FREE(w_backup);
01590 SG_FREE(w1);
01591 }
01592
01593 call_mkl_callback(a, label, lin);
01594 }
01595
01596
01597 void CSVMLight::update_linear_component_mkl_linadd(
01598 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01599 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01600 float64_t *aicache)
01601 {
01602
01603
01604
01605
01606 int32_t num = kernel->get_num_vec_rhs();
01607 int32_t num_weights = -1;
01608 int32_t num_kernels = kernel->get_num_subkernels() ;
01609 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
01610 ASSERT(num_weights==num_kernels);
01611
01612 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01613 float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01614
01615
01616 for (int32_t i=0; i<num_kernels; i++)
01617 {
01618 w_backup[i] = old_beta[i] ;
01619 w1[i]=1.0 ;
01620 }
01621
01622 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights));
01623
01624
01625 kernel->clear_normal();
01626 for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
01627 if(a[i] != a_old[i]) {
01628 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01629 }
01630 }
01631
01632 if (parallel->get_num_threads() < 2)
01633 {
01634
01635 for (int32_t i=0; i<num; i++)
01636 kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01637 }
01638 #ifdef HAVE_PTHREAD
01639 else
01640 {
01641 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01642 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1);
01643 int32_t step= num/parallel->get_num_threads();
01644
01645 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01646 {
01647 params[t].kernel = kernel;
01648 params[t].W = W;
01649 params[t].start = t*step;
01650 params[t].end = (t+1)*step;
01651 pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)¶ms[t]);
01652 }
01653
01654 for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++)
01655 kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01656
01657 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01658 pthread_join(threads[t], NULL);
01659
01660 SG_FREE(params);
01661 SG_FREE(threads);
01662 }
01663 #endif
01664
01665
01666 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
01667
01668 call_mkl_callback(a, label, lin);
01669 }
01670
01671 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p)
01672 {
01673 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
01674
01675 int32_t num_kernels=params->kernel->get_num_subkernels();
01676
01677
01678 for (int32_t i=params->start; i<params->end; i++)
01679 params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
01680
01681 return NULL ;
01682 }
01683
01684 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
01685 {
01686 int32_t num = kernel->get_num_vec_rhs();
01687 int32_t num_kernels = kernel->get_num_subkernels() ;
01688
01689 float64_t suma=0;
01690 float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
01691 #ifdef HAVE_LAPACK
01692 int nk = (int) num_kernels;
01693 double* alphay = SG_MALLOC(double, num);
01694
01695 for (int32_t i=0; i<num; i++)
01696 {
01697 alphay[i]=a[i]*label[i];
01698 suma+=a[i];
01699 }
01700
01701 for (int32_t i=0; i<num_kernels; i++)
01702 sumw[i]=0;
01703
01704 cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
01705 num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
01706
01707 SG_FREE(alphay);
01708 #else
01709 for (int32_t i=0; i<num; i++)
01710 suma += a[i];
01711
01712 for (int32_t d=0; d<num_kernels; d++)
01713 {
01714 sumw[d]=0;
01715 for (int32_t i=0; i<num; i++)
01716 sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
01717 }
01718 #endif
01719
01720 if (callback)
01721 mkl_converged=callback(mkl, sumw, suma);
01722
01723
01724 const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels);
01725
01726
01727 #ifdef HAVE_LAPACK
01728 cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
01729 nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
01730 #else
01731 for (int32_t i=0; i<num; i++)
01732 lin[i]=0 ;
01733 for (int32_t d=0; d<num_kernels; d++)
01734 if (new_beta[d]!=0)
01735 for (int32_t i=0; i<num; i++)
01736 lin[i] += new_beta[d]*W[i*num_kernels+d] ;
01737 #endif
01738
01739 SG_FREE(sumw);
01740 }
01741
01742
01743
01744
01745 int32_t CSVMLight::select_next_qp_subproblem_grad(
01746 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01747 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01748 int32_t *working2dnum, float64_t *selcrit, int32_t *select,
01749 int32_t cache_only, int32_t *key, int32_t *chosen)
01750
01751
01752
01753
01754 {
01755 int32_t choosenum,i,j,k,activedoc,inum,valid;
01756 float64_t s;
01757
01758 for (inum=0;working2dnum[inum]>=0;inum++);
01759 choosenum=0;
01760 activedoc=0;
01761 for (i=0;(j=active2dnum[i])>=0;i++) {
01762 s=-label[j];
01763 if(cache_only)
01764 {
01765 if (use_kernel_cache)
01766 valid=(kernel->kernel_cache_check(j));
01767 else
01768 valid = 1 ;
01769 }
01770 else
01771 valid=1;
01772 if(valid
01773 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01774 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01775 && (s>0)))
01776 && (!chosen[j])
01777 && (label[j])
01778 && (!inconsistent[j]))
01779 {
01780 selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01781 key[activedoc]=j;
01782 activedoc++;
01783 }
01784 }
01785 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01786 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01787 i=key[select[k]];
01788 chosen[i]=1;
01789 working2dnum[inum+choosenum]=i;
01790 choosenum+=1;
01791 if (use_kernel_cache)
01792 kernel->kernel_cache_touch(i);
01793
01794
01795 }
01796
01797 activedoc=0;
01798 for (i=0;(j=active2dnum[i])>=0;i++) {
01799 s=label[j];
01800 if(cache_only)
01801 {
01802 if (use_kernel_cache)
01803 valid=(kernel->kernel_cache_check(j));
01804 else
01805 valid = 1 ;
01806 }
01807 else
01808 valid=1;
01809 if(valid
01810 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01811 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01812 && (s>0)))
01813 && (!chosen[j])
01814 && (label[j])
01815 && (!inconsistent[j]))
01816 {
01817 selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01818
01819 key[activedoc]=j;
01820 activedoc++;
01821 }
01822 }
01823 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01824 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01825 i=key[select[k]];
01826 chosen[i]=1;
01827 working2dnum[inum+choosenum]=i;
01828 choosenum+=1;
01829 if (use_kernel_cache)
01830 kernel->kernel_cache_touch(i);
01831
01832 }
01833 working2dnum[inum+choosenum]=-1;
01834 return(choosenum);
01835 }
01836
01837 int32_t CSVMLight::select_next_qp_subproblem_rand(
01838 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01839 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01840 int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
01841 int32_t *chosen, int32_t iteration)
01842
01843
01844
01845
01846 {
01847 int32_t choosenum,i,j,k,activedoc,inum;
01848 float64_t s;
01849
01850 for (inum=0;working2dnum[inum]>=0;inum++);
01851 choosenum=0;
01852 activedoc=0;
01853 for (i=0;(j=active2dnum[i])>=0;i++) {
01854 s=-label[j];
01855 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01856 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01857 && (s>0)))
01858 && (!inconsistent[j])
01859 && (label[j])
01860 && (!chosen[j])) {
01861 selcrit[activedoc]=(j+iteration) % totdoc;
01862 key[activedoc]=j;
01863 activedoc++;
01864 }
01865 }
01866 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01867 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01868 i=key[select[k]];
01869 chosen[i]=1;
01870 working2dnum[inum+choosenum]=i;
01871 choosenum+=1;
01872 if (use_kernel_cache)
01873 kernel->kernel_cache_touch(i);
01874
01875 }
01876
01877 activedoc=0;
01878 for (i=0;(j=active2dnum[i])>=0;i++) {
01879 s=label[j];
01880 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01881 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01882 && (s>0)))
01883 && (!inconsistent[j])
01884 && (label[j])
01885 && (!chosen[j])) {
01886 selcrit[activedoc]=(j+iteration) % totdoc;
01887 key[activedoc]=j;
01888 activedoc++;
01889 }
01890 }
01891 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01892 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01893 i=key[select[k]];
01894 chosen[i]=1;
01895 working2dnum[inum+choosenum]=i;
01896 choosenum+=1;
01897 if (use_kernel_cache)
01898 kernel->kernel_cache_touch(i);
01899
01900 }
01901 working2dnum[inum+choosenum]=-1;
01902 return(choosenum);
01903 }
01904
01905
01906
01907 void CSVMLight::select_top_n(
01908 float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
01909 {
01910 register int32_t i,j;
01911
01912 for (i=0;(i<n) && (i<range);i++) {
01913 for (j=i;j>=0;j--) {
01914 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
01915 select[j]=select[j-1];
01916 }
01917 else {
01918 select[j]=i;
01919 j=-1;
01920 }
01921 }
01922 }
01923 if(n>0) {
01924 for (i=n;i<range;i++) {
01925 if(selcrit[i]>selcrit[select[n-1]]) {
01926 for (j=n-1;j>=0;j--) {
01927 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
01928 select[j]=select[j-1];
01929 }
01930 else {
01931 select[j]=i;
01932 j=-1;
01933 }
01934 }
01935 }
01936 }
01937 }
01938 }
01939
01940
01941
01942
01943 void CSVMLight::init_shrink_state(
01944 SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
01945 {
01946 int32_t i;
01947
01948 shrink_state->deactnum=0;
01949 shrink_state->active = SG_MALLOC(int32_t, totdoc);
01950 shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc);
01951 shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory);
01952 shrink_state->maxhistory=maxhistory;
01953 shrink_state->last_lin = SG_MALLOC(float64_t, totdoc);
01954 shrink_state->last_a = SG_MALLOC(float64_t, totdoc);
01955
01956 for (i=0;i<totdoc;i++) {
01957 shrink_state->active[i]=1;
01958 shrink_state->inactive_since[i]=0;
01959 shrink_state->last_a[i]=0;
01960 shrink_state->last_lin[i]=0;
01961 }
01962 }
01963
01964 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
01965 {
01966 SG_FREE(shrink_state->active);
01967 SG_FREE(shrink_state->inactive_since);
01968 if(shrink_state->deactnum > 0)
01969 SG_FREE((shrink_state->a_history[shrink_state->deactnum-1]));
01970 SG_FREE((shrink_state->a_history));
01971 SG_FREE((shrink_state->last_a));
01972 SG_FREE((shrink_state->last_lin));
01973 }
01974
01975 int32_t CSVMLight::shrink_problem(
01976 SHRINK_STATE *shrink_state, int32_t *active2dnum,
01977 int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
01978 int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
01979 float64_t* lin, int* label)
01980
01981
01982 {
01983 int32_t i,ii,change,activenum,lastiter;
01984 float64_t *a_old=NULL;
01985
01986 activenum=0;
01987 change=0;
01988 for (ii=0;active2dnum[ii]>=0;ii++) {
01989 i=active2dnum[ii];
01990 activenum++;
01991 lastiter=last_suboptimal_at[i];
01992
01993 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
01994 || (inconsistent[i])) {
01995 change++;
01996 }
01997 }
01998 if((change>=minshrink)
01999 && (shrink_state->deactnum<shrink_state->maxhistory)) {
02000
02001
02002 if(verbosity>=2) {
02003 SG_INFO( " Shrinking...");
02004 }
02005
02006 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) {
02007
02008 a_old=SG_MALLOC(float64_t, totdoc);
02009 shrink_state->a_history[shrink_state->deactnum]=a_old;
02010 for (i=0;i<totdoc;i++) {
02011 a_old[i]=a[i];
02012 }
02013 }
02014 for (ii=0;active2dnum[ii]>=0;ii++) {
02015 i=active2dnum[ii];
02016 lastiter=last_suboptimal_at[i];
02017 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
02018 || (inconsistent[i])) {
02019 shrink_state->active[i]=0;
02020 shrink_state->inactive_since[i]=shrink_state->deactnum;
02021 }
02022 }
02023 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
02024 shrink_state->deactnum++;
02025 if(kernel->has_property(KP_LINADD) && get_linadd_enabled())
02026 shrink_state->deactnum=0;
02027
02028 if(verbosity>=2) {
02029 SG_DONE();
02030 SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum);
02031 }
02032 }
02033 return(activenum);
02034 }
02035
02036 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p)
02037 {
02038 S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
02039
02040 CKernel* k = params->kernel;
02041 float64_t* lin = params->lin;
02042 float64_t* last_lin = params->last_lin;
02043 int32_t* active = params->active;
02044 int32_t* docs = params->docs;
02045 int32_t start = params->start;
02046 int32_t end = params->end;
02047
02048 for (int32_t i=start;i<end;i++)
02049 {
02050 if (!active[i])
02051 lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
02052
02053 last_lin[i]=lin[i];
02054 }
02055
02056 return NULL;
02057 }
02058
02059 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p)
02060 {
02061 S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
02062 ASSERT(params);
02063 ASSERT(params->kernel);
02064 ASSERT(params->lin);
02065 ASSERT(params->aicache);
02066 ASSERT(params->a);
02067 ASSERT(params->a_old);
02068 ASSERT(params->changed2dnum);
02069 ASSERT(params->inactive2dnum);
02070 ASSERT(params->label);
02071
02072 CKernel* k = params->kernel;
02073 float64_t* lin = params->lin;
02074 float64_t* aicache = params->aicache;
02075 float64_t* a= params->a;
02076 float64_t* a_old = params->a_old;
02077 int32_t* changed2dnum = params->changed2dnum;
02078 int32_t* inactive2dnum = params->inactive2dnum;
02079 int32_t* label = params->label;
02080 int32_t start = params->start;
02081 int32_t end = params->end;
02082
02083 for (int32_t ii=start;ii<end;ii++)
02084 {
02085 int32_t i=changed2dnum[ii];
02086 int32_t j=0;
02087 ASSERT(i>=0);
02088
02089 k->get_kernel_row(i,inactive2dnum,aicache);
02090 for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
02091 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02092 }
02093 return NULL;
02094 }
02095
02096 void CSVMLight::reactivate_inactive_examples(
02097 int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
02098 float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
02099 int32_t* docs, float64_t *aicache, float64_t *maxdiff)
02100
02101
02102
02103 {
02104 register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
02105 int32_t *changed,*inactive;
02106 register float64_t *a_old,dist;
02107 float64_t ex_c,target;
02108
02109 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
02110 a_old=shrink_state->last_a;
02111
02112 if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION))
02113 {
02114 SG_DEBUG( " clear normal - linadd\n");
02115 kernel->clear_normal();
02116
02117 int32_t num_modified=0;
02118 for (i=0;i<totdoc;i++) {
02119 if(a[i] != a_old[i]) {
02120 kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
02121 a_old[i]=a[i];
02122 num_modified++;
02123 }
02124 }
02125
02126 if (num_modified>0)
02127 {
02128 int32_t num_threads=parallel->get_num_threads();
02129 ASSERT(num_threads>0);
02130 if (num_threads < 2)
02131 {
02132 S_THREAD_PARAM_REACTIVATE_LINADD params;
02133 params.kernel=kernel;
02134 params.lin=lin;
02135 params.last_lin=shrink_state->last_lin;
02136 params.docs=docs;
02137 params.active=shrink_state->active;
02138 params.start=0;
02139 params.end=totdoc;
02140 reactivate_inactive_examples_linadd_helper((void*) ¶ms);
02141 }
02142 #ifdef HAVE_PTHREAD
02143 else
02144 {
02145 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
02146 S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads);
02147 int32_t step= totdoc/num_threads;
02148
02149 for (t=0; t<num_threads-1; t++)
02150 {
02151 params[t].kernel=kernel;
02152 params[t].lin=lin;
02153 params[t].last_lin=shrink_state->last_lin;
02154 params[t].docs=docs;
02155 params[t].active=shrink_state->active;
02156 params[t].start = t*step;
02157 params[t].end = (t+1)*step;
02158 pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)¶ms[t]);
02159 }
02160
02161 params[t].kernel=kernel;
02162 params[t].lin=lin;
02163 params[t].last_lin=shrink_state->last_lin;
02164 params[t].docs=docs;
02165 params[t].active=shrink_state->active;
02166 params[t].start = t*step;
02167 params[t].end = totdoc;
02168 reactivate_inactive_examples_linadd_helper((void*) ¶ms[t]);
02169
02170 for (t=0; t<num_threads-1; t++)
02171 pthread_join(threads[t], NULL);
02172
02173 SG_FREE(threads);
02174 SG_FREE(params);
02175 }
02176 #endif
02177
02178 }
02179 }
02180 else
02181 {
02182 float64_t *alphas = SG_MALLOC(float64_t, totdoc);
02183 int32_t *idx = SG_MALLOC(int32_t, totdoc);
02184 int32_t num_suppvec=0 ;
02185
02186 for (i=0; i<totdoc; i++)
02187 {
02188 if(a[i] != a_old[i])
02189 {
02190 alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
02191 idx[num_suppvec] = i ;
02192 a_old[i] = a[i] ;
02193 num_suppvec++ ;
02194 }
02195 }
02196
02197 if (num_suppvec>0)
02198 {
02199 int32_t num_inactive=0;
02200 int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc);
02201
02202 j=0;
02203 for (i=0;i<totdoc;i++)
02204 {
02205 if(!shrink_state->active[i])
02206 {
02207 inactive_idx[j++] = i;
02208 num_inactive++;
02209 }
02210 }
02211
02212 if (num_inactive>0)
02213 {
02214 float64_t* dest = SG_MALLOC(float64_t, num_inactive);
02215 memset(dest, 0, sizeof(float64_t)*num_inactive);
02216
02217 kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
02218
02219 j=0;
02220 for (i=0;i<totdoc;i++) {
02221 if(!shrink_state->active[i]) {
02222 lin[i] = shrink_state->last_lin[i] + dest[j++] ;
02223 }
02224 shrink_state->last_lin[i]=lin[i];
02225 }
02226
02227 SG_FREE(dest);
02228 }
02229 else
02230 {
02231 for (i=0;i<totdoc;i++)
02232 shrink_state->last_lin[i]=lin[i];
02233 }
02234 SG_FREE(inactive_idx);
02235 }
02236 SG_FREE(alphas);
02237 SG_FREE(idx);
02238 }
02239
02240 kernel->delete_optimization();
02241 }
02242 else
02243 {
02244 changed=SG_MALLOC(int32_t, totdoc);
02245 changed2dnum=SG_MALLOC(int32_t, totdoc+11);
02246 inactive=SG_MALLOC(int32_t, totdoc);
02247 inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
02248 for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
02249 {
02250 if(verbosity>=2) {
02251 SG_INFO( "%ld..",t);
02252 }
02253 a_old=shrink_state->a_history[t];
02254 for (i=0;i<totdoc;i++) {
02255 inactive[i]=((!shrink_state->active[i])
02256 && (shrink_state->inactive_since[i] == t));
02257 changed[i]= (a[i] != a_old[i]);
02258 }
02259 compute_index(inactive,totdoc,inactive2dnum);
02260 compute_index(changed,totdoc,changed2dnum);
02261
02262
02263
02264 int32_t num_threads=1;
02265 ASSERT(num_threads>0);
02266
02267 if (num_threads < 2)
02268 {
02269 for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
02270 kernel->get_kernel_row(i,inactive2dnum,aicache);
02271 for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02272 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02273 }
02274 }
02275 #ifdef HAVE_PTHREAD
02276 else
02277 {
02278
02279 int32_t num_changed=0;
02280 for (ii=0;changed2dnum[ii]>=0;ii++)
02281 num_changed++;
02282
02283 if (num_changed>0)
02284 {
02285 pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1);
02286 S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads);
02287 int32_t step= num_changed/num_threads;
02288
02289
02290 float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads);
02291 memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02292 float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads);
02293 memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02294
02295 int32_t thr;
02296 for (thr=0; thr<num_threads-1; thr++)
02297 {
02298 params[thr].kernel=kernel;
02299 params[thr].lin=&tmp_lin[thr*totdoc];
02300 params[thr].aicache=&tmp_aicache[thr*totdoc];
02301 params[thr].a=a;
02302 params[thr].a_old=a_old;
02303 params[thr].changed2dnum=changed2dnum;
02304 params[thr].inactive2dnum=inactive2dnum;
02305 params[thr].label=label;
02306 params[thr].start = thr*step;
02307 params[thr].end = (thr+1)*step;
02308 pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)¶ms[thr]);
02309 }
02310
02311 params[thr].kernel=kernel;
02312 params[thr].lin=&tmp_lin[thr*totdoc];
02313 params[thr].aicache=&tmp_aicache[thr*totdoc];
02314 params[thr].a=a;
02315 params[thr].a_old=a_old;
02316 params[thr].changed2dnum=changed2dnum;
02317 params[thr].inactive2dnum=inactive2dnum;
02318 params[thr].label=label;
02319 params[thr].start = thr*step;
02320 params[thr].end = num_changed;
02321 reactivate_inactive_examples_vanilla_helper((void*) ¶ms[thr]);
02322
02323 for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02324 lin[j]+=tmp_lin[totdoc*thr+j];
02325
02326 for (thr=0; thr<num_threads-1; thr++)
02327 {
02328 pthread_join(threads[thr], NULL);
02329
02330
02331 for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02332 lin[j]+=tmp_lin[totdoc*thr+j];
02333 }
02334
02335 SG_FREE(tmp_lin);
02336 SG_FREE(tmp_aicache);
02337 SG_FREE(threads);
02338 SG_FREE(params);
02339 }
02340 }
02341 #endif
02342 }
02343 SG_FREE(changed);
02344 SG_FREE(changed2dnum);
02345 SG_FREE(inactive);
02346 SG_FREE(inactive2dnum);
02347 }
02348
02349 (*maxdiff)=0;
02350 for (i=0;i<totdoc;i++) {
02351 shrink_state->inactive_since[i]=shrink_state->deactnum-1;
02352 if(!inconsistent[i]) {
02353 dist=(lin[i]-model->b)*(float64_t)label[i];
02354 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
02355 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
02356 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
02357 if((dist-target)>(*maxdiff))
02358 (*maxdiff)=dist-target;
02359 }
02360 else if((a[i]<ex_c) && (dist < target)) {
02361 if((target-dist)>(*maxdiff))
02362 (*maxdiff)=target-dist;
02363 }
02364 if((a[i]>(0+learn_parm->epsilon_a))
02365 && (a[i]<ex_c)) {
02366 shrink_state->active[i]=1;
02367 }
02368 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
02369 shrink_state->active[i]=1;
02370 }
02371 else if((a[i]>=ex_c)
02372 && (dist > (target-learn_parm->epsilon_shrink))) {
02373 shrink_state->active[i]=1;
02374 }
02375 else if(learn_parm->sharedslack) {
02376 shrink_state->active[i]=1;
02377 }
02378 }
02379 }
02380
02381 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) {
02382 for (i=0;i<totdoc;i++) {
02383 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
02384 }
02385 for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
02386 SG_FREE(shrink_state->a_history[t]);
02387 shrink_state->a_history[t]=0;
02388 }
02389 }
02390 }
02391
02392
02393
02394
02395 float64_t* CSVMLight::optimize_qp(
02396 QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
02397 int32_t& svm_maxqpsize)
02398 {
02399 register int32_t i, j, result;
02400 float64_t margin, obj_before, obj_after;
02401 float64_t sigdig, dist, epsilon_loqo;
02402 int32_t iter;
02403
02404 if(!primal) {
02405 primal=SG_MALLOC(float64_t, nx*3);
02406 dual=SG_MALLOC(float64_t, nx*2+1);
02407 }
02408
02409 obj_before=0;
02410 for(i=0;i<qp->opt_n;i++) {
02411 obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
02412 obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
02413 for(j=0;j<i;j++) {
02414 obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
02415 }
02416 }
02417
02418 result=STILL_RUNNING;
02419 qp->opt_ce0[0]*=(-1.0);
02420
02421
02422 for(margin=init_margin,iter=init_iter;
02423 (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
02424
02425 opt_precision=CMath::max(opt_precision, DEF_PRECISION);
02426 sigdig=-log10(opt_precision);
02427
02428 result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
02429 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
02430 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
02431 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
02432 (float64_t *)primal,(float64_t *)dual,
02433 (int32_t)(verbosity-2),
02434 (float64_t)sigdig,(int32_t)iter,
02435 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
02436
02437 if(CMath::is_nan(dual[0])) {
02438 if(verbosity>=2) {
02439 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n");
02440 }
02441 if(init_margin<0.80) {
02442 init_margin=(4.0*margin+1.0)/5.0;
02443 }
02444 margin=(margin+1.0)/2.0;
02445 (opt_precision)*=10.0;
02446 if(verbosity>=2) {
02447 SG_SDEBUG("Reducing precision of PR_LOQO.\n");
02448 }
02449 }
02450 else if(result!=OPTIMAL_SOLUTION) {
02451 iter+=2000;
02452 init_iter+=10;
02453 (opt_precision)*=10.0;
02454 if(verbosity>=2) {
02455 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result);
02456 }
02457 }
02458 }
02459
02460 if(qp->opt_m)
02461 model_b=dual[0];
02462 else
02463 model_b=0;
02464
02465
02466
02467 epsilon_loqo=1E-10;
02468 for(i=0;i<qp->opt_n;i++) {
02469 dist=-model_b*qp->opt_ce[i];
02470 dist+=(qp->opt_g0[i]+1.0);
02471 for(j=0;j<i;j++) {
02472 dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
02473 }
02474 for(j=i;j<qp->opt_n;j++) {
02475 dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
02476 }
02477
02478 if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
02479 epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
02480 }
02481 else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
02482 epsilon_loqo=primal[i]*2.0;
02483 }
02484 }
02485
02486 for(i=0;i<qp->opt_n;i++) {
02487 if(primal[i]<=(0+epsilon_loqo)) {
02488 primal[i]=0;
02489 }
02490 else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
02491 primal[i]=qp->opt_up[i];
02492 }
02493 }
02494
02495 obj_after=0;
02496 for(i=0;i<qp->opt_n;i++) {
02497 obj_after+=(qp->opt_g0[i]*primal[i]);
02498 obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
02499 for(j=0;j<i;j++) {
02500 obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
02501 }
02502 }
02503
02504
02505
02506 if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
02507 for(i=0;i<qp->opt_n;i++) {
02508 primal[i]=qp->opt_xinit[i];
02509 }
02510 model_b=0;
02511 if(svm_maxqpsize>2) {
02512 svm_maxqpsize--;
02513 }
02514 }
02515
02516 if(obj_after >= obj_before) {
02517 (opt_precision)/=100.0;
02518 precision_violations++;
02519 if(verbosity>=2) {
02520 SG_SDEBUG("Increasing Precision of PR_LOQO.\n");
02521 }
02522 }
02523
02524
02525 if(precision_violations > 5000) {
02526 (*epsilon_crit)*=10.0;
02527 precision_violations=0;
02528 SG_SINFO("Relaxing epsilon on KT-Conditions.\n");
02529 }
02530
02531 (*threshold)=model_b;
02532
02533 if(result!=OPTIMAL_SOLUTION) {
02534 SG_SERROR("PR_LOQO did not converge.\n");
02535 return(qp->opt_xinit);
02536 }
02537 else {
02538 return(primal);
02539 }
02540 }
02541
02542
02543 #endif //USE_SVMLIGHT