00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/config.h>
00012
00013 #ifdef USE_SVMLIGHT
00014
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/mathematics/lapack.h>
00017 #include <shogun/lib/Signal.h>
00018 #include <shogun/mathematics/Math.h>
00019 #include <shogun/regression/svr/SVRLight.h>
00020 #include <shogun/machine/KernelMachine.h>
00021 #include <shogun/kernel/CombinedKernel.h>
00022 #include <shogun/labels/RegressionLabels.h>
00023
00024 #include <unistd.h>
00025
00026 #ifdef USE_CPLEX
00027 extern "C" {
00028 #include <ilcplex/cplex.h>
00029 }
00030 #endif
00031
00032 #include <shogun/base/Parallel.h>
00033
00034 #ifdef HAVE_PTHREAD
00035 #include <pthread.h>
00036 #endif
00037
00038 using namespace shogun;
00039
00040 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00041 struct S_THREAD_PARAM
00042 {
00043 float64_t* lin;
00044 int32_t start, end;
00045 int32_t* active2dnum;
00046 int32_t* docs;
00047 CKernel* kernel;
00048 int32_t num_vectors;
00049 };
00050 #endif // DOXYGEN_SHOULD_SKIP_THIS
00051
00052 CSVRLight::CSVRLight(float64_t C, float64_t eps, CKernel* k, CLabels* lab)
00053 : CSVMLight(C, k, lab)
00054 {
00055 set_tube_epsilon(eps);
00056 }
00057
00058 CSVRLight::CSVRLight()
00059 : CSVMLight()
00060 {
00061 }
00062
00064 CSVRLight::~CSVRLight()
00065 {
00066 }
00067
00068 EMachineType CSVRLight::get_classifier_type()
00069 {
00070 return CT_SVRLIGHT;
00071 }
00072
00073 bool CSVRLight::train_machine(CFeatures* data)
00074 {
00075
00076 verbosity=1;
00077 init_margin=0.15;
00078 init_iter=500;
00079 precision_violations=0;
00080 opt_precision=DEF_PRECISION;
00081
00082 strcpy (learn_parm->predfile, "");
00083 learn_parm->biased_hyperplane=1;
00084 learn_parm->sharedslack=0;
00085 learn_parm->remove_inconsistent=0;
00086 learn_parm->skip_final_opt_check=1;
00087 learn_parm->svm_maxqpsize=get_qpsize();
00088 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
00089 learn_parm->maxiter=100000;
00090 learn_parm->svm_iter_to_shrink=100;
00091 learn_parm->svm_c=get_C1();
00092 learn_parm->transduction_posratio=0.33;
00093 learn_parm->svm_costratio=get_C2()/get_C1();
00094 learn_parm->svm_costratio_unlab=1.0;
00095 learn_parm->svm_unlabbound=1E-5;
00096 learn_parm->epsilon_crit=epsilon;
00097 learn_parm->epsilon_a=1E-15;
00098 learn_parm->compute_loo=0;
00099 learn_parm->rho=1.0;
00100 learn_parm->xa_depth=0;
00101
00102 if (!kernel)
00103 {
00104 SG_ERROR( "SVR_light can not proceed without kernel!\n");
00105 return false ;
00106 }
00107
00108 if (!m_labels)
00109 {
00110 SG_ERROR( "SVR_light can not proceed without labels!\n");
00111 return false;
00112 }
00113
00114 if (data)
00115 {
00116 if (m_labels->get_num_labels() != data->get_num_vectors())
00117 SG_ERROR("Number of training vectors does not match number of labels\n");
00118 kernel->init(data, data);
00119 }
00120
00121 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00122 kernel->clear_normal();
00123
00124
00125 SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ;
00126 SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ;
00127 SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ;
00128 SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ;
00129 SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ;
00130 SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ;
00131
00132 use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) ||
00133 (get_linadd_enabled() && kernel->has_property(KP_LINADD)));
00134
00135 SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ;
00136
00137
00138 svr_learn();
00139
00140
00141 create_new_model(model->sv_num-1);
00142 set_bias(-model->b);
00143 for (int32_t i=0; i<model->sv_num-1; i++)
00144 {
00145 set_alpha(i, model->alpha[i+1]);
00146 set_support_vector(i, model->supvec[i+1]);
00147 }
00148
00149 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00150 kernel->clear_normal() ;
00151
00152 return true ;
00153 }
00154
00155 void CSVRLight::svr_learn()
00156 {
00157 int32_t *inconsistent, i, j;
00158 int32_t upsupvecnum;
00159 float64_t maxdiff, *lin, *c, *a;
00160 int32_t iterations;
00161 float64_t *xi_fullset;
00162 float64_t *a_fullset;
00163 TIMING timing_profile;
00164 SHRINK_STATE shrink_state;
00165 int32_t* label;
00166 int32_t* docs;
00167
00168 ASSERT(m_labels);
00169 int32_t totdoc=m_labels->get_num_labels();
00170 num_vectors=totdoc;
00171
00172
00173 docs=SG_MALLOC(int32_t, 2*totdoc);
00174 label=SG_MALLOC(int32_t, 2*totdoc);
00175 c = SG_MALLOC(float64_t, 2*totdoc);
00176
00177 for(i=0;i<totdoc;i++) {
00178 docs[i]=i;
00179 j=2*totdoc-1-i;
00180 label[i]=+1;
00181 c[i]=((CRegressionLabels*) m_labels)->get_label(i);
00182 docs[j]=j;
00183 label[j]=-1;
00184 c[j]=((CRegressionLabels*) m_labels)->get_label(i);
00185 }
00186 totdoc*=2;
00187
00188
00189 kernel->resize_kernel_cache( kernel->get_cache_size(), true);
00190
00191 if (kernel->get_kernel_type() == K_COMBINED)
00192 {
00193 CCombinedKernel* k = (CCombinedKernel*) kernel;
00194 CKernel* kn = k->get_first_kernel();
00195
00196 while (kn)
00197 {
00198 kn->resize_kernel_cache( kernel->get_cache_size(), true);
00199 SG_UNREF(kn);
00200 kn = k->get_next_kernel();
00201 }
00202 }
00203
00204 timing_profile.time_kernel=0;
00205 timing_profile.time_opti=0;
00206 timing_profile.time_shrink=0;
00207 timing_profile.time_update=0;
00208 timing_profile.time_model=0;
00209 timing_profile.time_check=0;
00210 timing_profile.time_select=0;
00211
00212 SG_FREE(W);
00213 W=NULL;
00214
00215 if (kernel->has_property(KP_KERNCOMBINATION) && callback)
00216 {
00217 W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
00218 for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
00219 W[i]=0;
00220 }
00221
00222
00223 if((learn_parm->svm_newvarsinqp < 2)
00224 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
00225 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00226 }
00227
00228 init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
00229
00230 inconsistent = SG_MALLOC(int32_t, totdoc);
00231 a = SG_MALLOC(float64_t, totdoc);
00232 a_fullset = SG_MALLOC(float64_t, totdoc);
00233 xi_fullset = SG_MALLOC(float64_t, totdoc);
00234 lin = SG_MALLOC(float64_t, totdoc);
00235 learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
00236 if (m_linear_term.vlen>0)
00237 learn_parm->eps=get_linear_term_array();
00238 else
00239 {
00240 learn_parm->eps=SG_MALLOC(float64_t, totdoc);
00241 SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, tube_epsilon);
00242 }
00243
00244 SG_FREE(model->supvec);
00245 SG_FREE(model->alpha);
00246 SG_FREE(model->index);
00247 model->supvec = SG_MALLOC(int32_t, totdoc+2);
00248 model->alpha = SG_MALLOC(float64_t, totdoc+2);
00249 model->index = SG_MALLOC(int32_t, totdoc+2);
00250
00251 model->at_upper_bound=0;
00252 model->b=0;
00253 model->supvec[0]=0;
00254 model->alpha[0]=0;
00255 model->totdoc=totdoc;
00256
00257 model->kernel=kernel;
00258
00259 model->sv_num=1;
00260 model->loo_error=-1;
00261 model->loo_recall=-1;
00262 model->loo_precision=-1;
00263 model->xa_error=-1;
00264 model->xa_recall=-1;
00265 model->xa_precision=-1;
00266
00267 for(i=0;i<totdoc;i++) {
00268 inconsistent[i]=0;
00269 a[i]=0;
00270 lin[i]=0;
00271
00272 if(label[i] > 0) {
00273 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
00274 fabs((float64_t)label[i]);
00275 }
00276 else if(label[i] < 0) {
00277 learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
00278 }
00279 else
00280 ASSERT(false);
00281 }
00282
00283 if(verbosity==1) {
00284 SG_DEBUG( "Optimizing...\n");
00285 }
00286
00287
00288 SG_DEBUG( "num_train: %d\n", totdoc);
00289 iterations=optimize_to_convergence(docs,label,totdoc,
00290 &shrink_state,inconsistent,a,lin,
00291 c,&timing_profile,
00292 &maxdiff,(int32_t)-1,
00293 (int32_t)1);
00294
00295
00296 if(verbosity>=1) {
00297 SG_DONE();
00298 SG_INFO("(%ld iterations)\n",iterations);
00299 SG_INFO( "Optimization finished (maxdiff=%.8f).\n",maxdiff);
00300 SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b);
00301
00302 upsupvecnum=0;
00303
00304 SG_DEBUG( "num sv: %d\n", model->sv_num);
00305 for(i=1;i<model->sv_num;i++)
00306 {
00307 if(fabs(model->alpha[i]) >=
00308 (learn_parm->svm_cost[model->supvec[i]]-
00309 learn_parm->epsilon_a))
00310 upsupvecnum++;
00311 }
00312 SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n",
00313 model->sv_num-1,upsupvecnum);
00314 }
00315
00316
00317
00318 for(i=1;i<model->sv_num;i++) {
00319 j=model->supvec[i];
00320 if(j >= (totdoc/2)) {
00321 j=totdoc-j-1;
00322 }
00323 model->supvec[i]=j;
00324 }
00325
00326 shrink_state_cleanup(&shrink_state);
00327 SG_FREE(label);
00328 SG_FREE(inconsistent);
00329 SG_FREE(c);
00330 SG_FREE(a);
00331 SG_FREE(a_fullset);
00332 SG_FREE(xi_fullset);
00333 SG_FREE(lin);
00334 SG_FREE(learn_parm->svm_cost);
00335 SG_FREE(docs);
00336 }
00337
00338 float64_t CSVRLight::compute_objective_function(
00339 float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00340 int32_t totdoc)
00341 {
00342
00343 float64_t criterion=0;
00344
00345 for(int32_t i=0;i<totdoc;i++)
00346 criterion+=(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359 return(criterion);
00360 }
00361
00362 void* CSVRLight::update_linear_component_linadd_helper(void *params_)
00363 {
00364 S_THREAD_PARAM * params = (S_THREAD_PARAM*) params_ ;
00365
00366 int32_t jj=0, j=0 ;
00367
00368 for(jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
00369 params->lin[j]+=params->kernel->compute_optimized(CSVRLight::regression_fix_index2(params->docs[j], params->num_vectors));
00370
00371 return NULL ;
00372 }
00373
00374 int32_t CSVRLight::regression_fix_index(int32_t i)
00375 {
00376 if (i>=num_vectors)
00377 i=2*num_vectors-1-i;
00378
00379 return i;
00380 }
00381
00382 int32_t CSVRLight::regression_fix_index2(
00383 int32_t i, int32_t num_vectors)
00384 {
00385 if (i>=num_vectors)
00386 i=2*num_vectors-1-i;
00387
00388 return i;
00389 }
00390
00391 float64_t CSVRLight::compute_kernel(int32_t i, int32_t j)
00392 {
00393 i=regression_fix_index(i);
00394 j=regression_fix_index(j);
00395 return kernel->kernel(i, j);
00396 }
00397
00398 void CSVRLight::update_linear_component(
00399 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
00400 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00401 float64_t *aicache, float64_t* c)
00402
00403
00404
00405
00406 {
00407 register int32_t i=0,ii=0,j=0,jj=0;
00408
00409 if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00410 {
00411 if (callback)
00412 {
00413 update_linear_component_mkl_linadd(docs, label, active2dnum, a, a_old, working2dnum,
00414 totdoc, lin, aicache, c) ;
00415 }
00416 else
00417 {
00418 kernel->clear_normal();
00419
00420 int32_t num_working=0;
00421 for(ii=0;(i=working2dnum[ii])>=0;ii++) {
00422 if(a[i] != a_old[i]) {
00423 kernel->add_to_normal(regression_fix_index(docs[i]), (a[i]-a_old[i])*(float64_t)label[i]);
00424 num_working++;
00425 }
00426 }
00427
00428 if (num_working>0)
00429 {
00430 if (parallel->get_num_threads() < 2)
00431 {
00432 for(jj=0;(j=active2dnum[jj])>=0;jj++) {
00433 lin[j]+=kernel->compute_optimized(regression_fix_index(docs[j]));
00434 }
00435 }
00436 #ifdef HAVE_PTHREAD
00437 else
00438 {
00439 int32_t num_elem = 0 ;
00440 for(jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
00441
00442 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
00443 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1);
00444 int32_t start = 0 ;
00445 int32_t step = num_elem/parallel->get_num_threads() ;
00446 int32_t end = step ;
00447
00448 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
00449 {
00450 params[t].kernel = kernel ;
00451 params[t].lin = lin ;
00452 params[t].docs = docs ;
00453 params[t].active2dnum=active2dnum ;
00454 params[t].start = start ;
00455 params[t].end = end ;
00456 params[t].num_vectors=num_vectors ;
00457
00458 start=end ;
00459 end+=step ;
00460 pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)¶ms[t]) ;
00461 }
00462
00463 for(jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
00464 lin[j]+=kernel->compute_optimized(regression_fix_index(docs[j]));
00465 }
00466 void* ret;
00467 for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
00468 pthread_join(threads[t], &ret) ;
00469
00470 SG_FREE(params);
00471 SG_FREE(threads);
00472 }
00473 #endif
00474 }
00475 }
00476 }
00477 else
00478 {
00479 if (callback)
00480 {
00481 update_linear_component_mkl(docs, label, active2dnum,
00482 a, a_old, working2dnum, totdoc, lin, aicache, c) ;
00483 }
00484 else {
00485 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
00486 if(a[i] != a_old[i]) {
00487 kernel->get_kernel_row(i,active2dnum,aicache);
00488 for(ii=0;(j=active2dnum[ii])>=0;ii++)
00489 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
00490 }
00491 }
00492 }
00493 }
00494 }
00495
00496 void CSVRLight::update_linear_component_mkl(
00497 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
00498 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00499 float64_t *aicache, float64_t* c)
00500 {
00501 int32_t num = totdoc;
00502 int32_t num_weights = -1;
00503 int32_t num_kernels = kernel->get_num_subkernels() ;
00504 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
00505
00506 ASSERT(num_weights==num_kernels);
00507
00508 if ((kernel->get_kernel_type()==K_COMBINED) &&
00509 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))
00510 {
00511 CCombinedKernel* k = (CCombinedKernel*) kernel;
00512 CKernel* kn = k->get_first_kernel() ;
00513 int32_t n = 0, i, j ;
00514
00515 while (kn!=NULL)
00516 {
00517 for(i=0;i<num;i++)
00518 {
00519 if(a[i] != a_old[i])
00520 {
00521 kn->get_kernel_row(i,NULL,aicache, true);
00522 for(j=0;j<num;j++)
00523 W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[regression_fix_index(j)]*(float64_t)label[i];
00524 }
00525 }
00526 SG_UNREF(kn);
00527 kn = k->get_next_kernel();
00528 n++ ;
00529 }
00530 }
00531 else
00532 {
00533 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
00534 float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
00535
00536
00537 for (int32_t i=0; i<num_kernels; i++)
00538 {
00539 w_backup[i] = old_beta[i] ;
00540 w1[i]=0.0 ;
00541 }
00542 for (int32_t n=0; n<num_kernels; n++)
00543 {
00544 w1[n]=1.0 ;
00545 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights)) ;
00546
00547 for(int32_t i=0;i<num;i++)
00548 {
00549 if(a[i] != a_old[i])
00550 {
00551 for(int32_t j=0;j<num;j++)
00552 W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
00553 }
00554 }
00555 w1[n]=0.0 ;
00556 }
00557
00558
00559 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
00560
00561 SG_FREE(w_backup);
00562 SG_FREE(w1);
00563 }
00564
00565 call_mkl_callback(a, label, lin, c, totdoc);
00566 }
00567
00568
00569 void CSVRLight::update_linear_component_mkl_linadd(
00570 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
00571 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00572 float64_t *aicache, float64_t* c)
00573 {
00574
00575
00576 int32_t num_weights = -1;
00577 int32_t num_kernels = kernel->get_num_subkernels() ;
00578 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
00579
00580 ASSERT(num_weights==num_kernels);
00581
00582 float64_t* w_backup=SG_MALLOC(float64_t, num_kernels);
00583 float64_t* w1=SG_MALLOC(float64_t, num_kernels);
00584
00585
00586 for (int32_t i=0; i<num_kernels; i++)
00587 {
00588 w_backup[i] = old_beta[i] ;
00589 w1[i]=1.0 ;
00590 }
00591
00592 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights));
00593
00594
00595 kernel->clear_normal();
00596 for(int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
00597 if(a[i] != a_old[i]) {
00598 kernel->add_to_normal(regression_fix_index(docs[i]), (a[i]-a_old[i])*(float64_t)label[i]);
00599 }
00600 }
00601
00602
00603 for (int32_t i=0; i<num_vectors; i++)
00604 kernel->compute_by_subkernel(i,&W[i*num_kernels]) ;
00605
00606
00607 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
00608
00609 call_mkl_callback(a, label, lin, c, totdoc);
00610 }
00611
00612 void CSVRLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin, float64_t* c, int32_t totdoc)
00613 {
00614 int32_t num = totdoc;
00615 int32_t num_kernels = kernel->get_num_subkernels() ;
00616 float64_t sumalpha = 0;
00617 float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
00618
00619 for (int32_t i=0; i<num; i++)
00620 sumalpha-=a[i]*(learn_parm->eps[i]-label[i]*c[i]);
00621
00622 #ifdef HAVE_LAPACK
00623 int nk = (int) num_kernels;
00624 double* alphay = SG_MALLOC(double, num);
00625 for (int32_t i=0; i<num; i++)
00626 alphay[i]=a[i]*label[i];
00627
00628 for (int32_t i=0; i<num_kernels; i++)
00629 sumw[i]=0;
00630
00631 cblas_dgemv(CblasColMajor, CblasNoTrans, nk, (int) num, 0.5, (double*) W,
00632 nk, (double*) alphay, 1, 1.0, (double*) sumw, 1);
00633
00634 SG_FREE(alphay);
00635 #else
00636 for (int32_t d=0; d<num_kernels; d++)
00637 {
00638 sumw[d]=0;
00639 for(int32_t i=0; i<num; i++)
00640 sumw[d] += 0.5*a[i]*label[i]*W[i*num_kernels+d];
00641 }
00642 #endif
00643
00644 if (callback)
00645 mkl_converged=callback(mkl, sumw, sumalpha);
00646
00647 const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels);
00648
00649
00650 #ifdef HAVE_LAPACK
00651 cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
00652 nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
00653 #else
00654 for(int32_t i=0; i<num; i++)
00655 lin[i]=0 ;
00656 for (int32_t d=0; d<num_kernels; d++)
00657 if (new_beta[d]!=0)
00658 for(int32_t i=0; i<num; i++)
00659 lin[i] += new_beta[d]*W[i*num_kernels+d] ;
00660 #endif
00661
00662
00663 SG_FREE(sumw);
00664 }
00665
00666
00667 void CSVRLight::reactivate_inactive_examples(
00668 int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
00669 float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
00670 int32_t* docs, float64_t *aicache, float64_t *maxdiff)
00671
00672
00673
00674 {
00675 register int32_t i=0,j,ii=0,jj,t,*changed2dnum,*inactive2dnum;
00676 int32_t *changed,*inactive;
00677 register float64_t *a_old,dist;
00678 float64_t ex_c,target;
00679
00680 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
00681 a_old=shrink_state->last_a;
00682
00683 kernel->clear_normal();
00684 int32_t num_modified=0;
00685 for(i=0;i<totdoc;i++) {
00686 if(a[i] != a_old[i]) {
00687 kernel->add_to_normal(regression_fix_index(docs[i]), ((a[i]-a_old[i])*(float64_t)label[i]));
00688 a_old[i]=a[i];
00689 num_modified++;
00690 }
00691 }
00692
00693 if (num_modified>0)
00694 {
00695 for(i=0;i<totdoc;i++) {
00696 if(!shrink_state->active[i]) {
00697 lin[i]=shrink_state->last_lin[i]+kernel->compute_optimized(regression_fix_index(docs[i]));
00698 }
00699 shrink_state->last_lin[i]=lin[i];
00700 }
00701 }
00702 }
00703 else
00704 {
00705 changed=SG_MALLOC(int32_t, totdoc);
00706 changed2dnum=SG_MALLOC(int32_t, totdoc+11);
00707 inactive=SG_MALLOC(int32_t, totdoc);
00708 inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
00709 for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
00710 if(verbosity>=2) {
00711 SG_INFO( "%ld..",t);
00712 }
00713 a_old=shrink_state->a_history[t];
00714 for(i=0;i<totdoc;i++) {
00715 inactive[i]=((!shrink_state->active[i])
00716 && (shrink_state->inactive_since[i] == t));
00717 changed[i]= (a[i] != a_old[i]);
00718 }
00719 compute_index(inactive,totdoc,inactive2dnum);
00720 compute_index(changed,totdoc,changed2dnum);
00721
00722 for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
00723 CKernelMachine::kernel->get_kernel_row(i,inactive2dnum,aicache);
00724 for(jj=0;(j=inactive2dnum[jj])>=0;jj++)
00725 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
00726 }
00727 }
00728 SG_FREE(changed);
00729 SG_FREE(changed2dnum);
00730 SG_FREE(inactive);
00731 SG_FREE(inactive2dnum);
00732 }
00733
00734 (*maxdiff)=0;
00735 for(i=0;i<totdoc;i++) {
00736 shrink_state->inactive_since[i]=shrink_state->deactnum-1;
00737 if(!inconsistent[i]) {
00738 dist=(lin[i]-model->b)*(float64_t)label[i];
00739 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
00740 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
00741 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
00742 if((dist-target)>(*maxdiff))
00743 (*maxdiff)=dist-target;
00744 }
00745 else if((a[i]<ex_c) && (dist < target)) {
00746 if((target-dist)>(*maxdiff))
00747 (*maxdiff)=target-dist;
00748 }
00749 if((a[i]>(0+learn_parm->epsilon_a))
00750 && (a[i]<ex_c)) {
00751 shrink_state->active[i]=1;
00752 }
00753 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
00754 shrink_state->active[i]=1;
00755 }
00756 else if((a[i]>=ex_c)
00757 && (dist > (target-learn_parm->epsilon_shrink))) {
00758 shrink_state->active[i]=1;
00759 }
00760 else if(learn_parm->sharedslack) {
00761 shrink_state->active[i]=1;
00762 }
00763 }
00764 }
00765 if (use_kernel_cache) {
00766 for(i=0;i<totdoc;i++) {
00767 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
00768 }
00769 for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
00770 SG_FREE(shrink_state->a_history[t]);
00771 shrink_state->a_history[t]=0;
00772 }
00773 }
00774 }
00775 #endif //USE_SVMLIGHT