SVMLight.cpp

Go to the documentation of this file.
00001 /***********************************************************************/
00002 /*                                                                     */
00003 /*   SVMLight.cpp                                                      */
00004 /*                                                                     */
00005 /*   Author: Thorsten Joachims                                         */
00006 /*   Date: 19.07.99                                                    */
00007 /*                                                                     */
00008 /*   Copyright (c) 1999  Universitaet Dortmund - All rights reserved   */
00009 /*                                                                     */
00010 /*   This software is available for non-commercial use only. It must   */
00011 /*   not be modified and distributed without prior permission of the   */
00012 /*   author. The author is not responsible for implications from the   */
00013 /*   use of this software.                                             */
00014 /*                                                                     */
00015 /*   THIS INCLUDES THE FOLLOWING ADDITIONS                             */
00016 /*   Generic Kernel Interfacing: Soeren Sonnenburg                     */
00017 /*   Parallizations: Soeren Sonnenburg                                 */
00018 /*   Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg,      */
00019 /*                          Alexander Zien, Marius Kloft, Chen Guohua  */
00020 /*   Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg                 */
00021 /*                                                                     */
00022 /***********************************************************************/
00023 #include <shogun/lib/config.h>
00024 
00025 #ifdef USE_SVMLIGHT
00026 
00027 #include <shogun/io/SGIO.h>
00028 #include <shogun/lib/Signal.h>
00029 #include <shogun/mathematics/Math.h>
00030 #include <shogun/lib/Time.h>
00031 #include <shogun/mathematics/lapack.h>
00032 
00033 #include <shogun/classifier/svm/SVMLight.h>
00034 #include <shogun/lib/external/pr_loqo.h>
00035 
00036 #include <shogun/kernel/Kernel.h>
00037 #include <shogun/machine/KernelMachine.h>
00038 #include <shogun/kernel/CombinedKernel.h>
00039 
00040 #include <unistd.h>
00041 
00042 #include <shogun/base/Parallel.h>
00043 #include <shogun/labels/BinaryLabels.h>
00044 
00045 #ifdef HAVE_PTHREAD
00046 #include <pthread.h>
00047 #endif
00048 
00049 using namespace shogun;
00050 
00051 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00052 struct S_THREAD_PARAM_REACTIVATE_LINADD
00053 {
00054     CKernel* kernel;
00055     float64_t* lin;
00056     float64_t* last_lin;
00057     int32_t* active;
00058     int32_t* docs;
00059     int32_t start;
00060     int32_t end;
00061 };
00062 
00063 struct S_THREAD_PARAM
00064 {
00065     float64_t * lin ;
00066     float64_t* W;
00067     int32_t start, end;
00068     int32_t * active2dnum ;
00069     int32_t * docs ;
00070     CKernel* kernel ;
00071 };
00072 
00073 struct S_THREAD_PARAM_REACTIVATE_VANILLA
00074 {
00075     CKernel* kernel;
00076     float64_t* lin;
00077     float64_t* aicache;
00078     float64_t* a;
00079     float64_t* a_old;
00080     int32_t* changed2dnum;
00081     int32_t* inactive2dnum;
00082     int32_t* label;
00083     int32_t start;
00084     int32_t end;
00085 };
00086 
00087 struct S_THREAD_PARAM_KERNEL
00088 {
00089     float64_t *Kval ;
00090     int32_t *KI, *KJ ;
00091     int32_t start, end;
00092     CSVMLight* svmlight;
00093 };
00094 
00095 #endif // DOXYGEN_SHOULD_SKIP_THIS
00096 
00097 void* CSVMLight::update_linear_component_linadd_helper(void* p)
00098 {
00099     S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00100 
00101     int32_t jj=0, j=0 ;
00102 
00103     for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
00104         params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
00105 
00106     return NULL ;
00107 }
00108 
00109 void* CSVMLight::compute_kernel_helper(void* p)
00110 {
00111     S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
00112 
00113     int32_t jj=0 ;
00114     for (jj=params->start;jj<params->end;jj++)
00115         params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
00116 
00117     return NULL ;
00118 }
00119 
00120 CSVMLight::CSVMLight()
00121 : CSVM()
00122 {
00123     init();
00124     set_kernel(NULL);
00125 }
00126 
00127 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab)
00128 : CSVM(C, k, lab)
00129 {
00130     init();
00131 }
00132 
00133 void CSVMLight::init()
00134 {
00135     //optimizer settings
00136     primal=NULL;
00137     dual=NULL;
00138     init_margin=0.15;
00139     init_iter=500;
00140     precision_violations=0;
00141     model_b=0;
00142     verbosity=1;
00143     opt_precision=DEF_PRECISION;
00144 
00145     // svm variables
00146     W=NULL;
00147     model=SG_MALLOC(MODEL, 1);
00148     learn_parm=SG_MALLOC(LEARN_PARM, 1);
00149     model->supvec=NULL;
00150     model->alpha=NULL;
00151     model->index=NULL;
00152 
00153     // MKL stuff
00154     mymaxdiff=1 ;
00155     mkl_converged=false;
00156 }
00157 
00158 CSVMLight::~CSVMLight()
00159 {
00160 
00161   SG_FREE(model->supvec);
00162   SG_FREE(model->alpha);
00163   SG_FREE(model->index);
00164   SG_FREE(model);
00165   SG_FREE(learn_parm);
00166 
00167   // MKL stuff
00168   SG_FREE(W);
00169 
00170   // optimizer variables
00171   SG_FREE(dual);
00172   SG_FREE(primal);
00173 }
00174 
00175 bool CSVMLight::train_machine(CFeatures* data)
00176 {
00177     //certain setup params
00178     mkl_converged=false;
00179     verbosity=1 ;
00180     init_margin=0.15;
00181     init_iter=500;
00182     precision_violations=0;
00183     opt_precision=DEF_PRECISION;
00184 
00185     strcpy (learn_parm->predfile, "");
00186     learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
00187     learn_parm->sharedslack=0;
00188     learn_parm->remove_inconsistent=0;
00189     learn_parm->skip_final_opt_check=0;
00190     learn_parm->svm_maxqpsize=get_qpsize();
00191     learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
00192     learn_parm->maxiter=100000;
00193     learn_parm->svm_iter_to_shrink=100;
00194     learn_parm->svm_c=C1;
00195     learn_parm->transduction_posratio=0.33;
00196     learn_parm->svm_costratio=C2/C1;
00197     learn_parm->svm_costratio_unlab=1.0;
00198     learn_parm->svm_unlabbound=1E-5;
00199     learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ??
00200     learn_parm->epsilon_a=1E-15;
00201     learn_parm->compute_loo=0;
00202     learn_parm->rho=1.0;
00203     learn_parm->xa_depth=0;
00204 
00205     if (!kernel)
00206         SG_ERROR( "SVM_light can not proceed without kernel!\n");
00207 
00208     if (data)
00209     {
00210         if (m_labels->get_num_labels() != data->get_num_vectors())
00211         {
00212             SG_ERROR("%s::train_machine(): Number of training vectors (%d) does"
00213                     " not match number of labels (%d)\n", get_name(),
00214                     data->get_num_vectors(), m_labels->get_num_labels());
00215         }
00216         kernel->init(data, data);
00217     }
00218 
00219     if (!kernel->has_features())
00220         SG_ERROR( "SVM_light can not proceed without initialized kernel!\n");
00221 
00222     ASSERT(m_labels && m_labels->get_num_labels());
00223     ASSERT(m_labels->get_label_type() == LT_BINARY);
00224     ASSERT(kernel->get_num_vec_lhs()==m_labels->get_num_labels());
00225 
00226     // in case of LINADD enabled kernels cleanup!
00227     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00228         kernel->clear_normal() ;
00229 
00230     // output some info
00231     SG_DEBUG( "threads = %i\n", parallel->get_num_threads()) ;
00232     SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ;
00233     SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ;
00234     SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ;
00235     SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ;
00236     SG_DEBUG( "kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) ;
00237     SG_DEBUG( "kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) ;
00238     SG_DEBUG( "get_solver_type() = %i\n", get_solver_type());
00239     SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ;
00240     SG_DEBUG( "get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) ;
00241     SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ;
00242 
00243     use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) ||
00244                          (get_linadd_enabled() && kernel->has_property(KP_LINADD)));
00245 
00246     SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ;
00247 
00248     if (kernel->get_kernel_type() == K_COMBINED)
00249     {
00250         CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel();
00251 
00252         while (kn)
00253         {
00254             // allocate kernel cache but clean up beforehand
00255             kn->resize_kernel_cache(kn->get_cache_size());
00256             SG_UNREF(kn);
00257             kn = ((CCombinedKernel*) kernel)->get_next_kernel();
00258         }
00259     }
00260 
00261     kernel->resize_kernel_cache(kernel->get_cache_size());
00262 
00263     // train the svm
00264     svm_learn();
00265 
00266     // brain damaged svm light work around
00267     create_new_model(model->sv_num-1);
00268     set_bias(-model->b);
00269     for (int32_t i=0; i<model->sv_num-1; i++)
00270     {
00271         set_alpha(i, model->alpha[i+1]);
00272         set_support_vector(i, model->supvec[i+1]);
00273     }
00274 
00275     // in case of LINADD enabled kernels cleanup!
00276     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00277     {
00278         kernel->clear_normal() ;
00279         kernel->delete_optimization() ;
00280     }
00281 
00282     if (use_kernel_cache)
00283         kernel->kernel_cache_cleanup();
00284 
00285     return true ;
00286 }
00287 
00288 int32_t CSVMLight::get_runtime()
00289 {
00290   clock_t start;
00291   start = clock();
00292   return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
00293 }
00294 
00295 
00296 void CSVMLight::svm_learn()
00297 {
00298     int32_t *inconsistent, i;
00299     int32_t misclassified,upsupvecnum;
00300     float64_t maxdiff, *lin, *c, *a;
00301     int32_t iterations;
00302     int32_t trainpos=0, trainneg=0 ;
00303     ASSERT(m_labels);
00304     SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels();
00305     int32_t totdoc=lab.vlen;
00306     ASSERT(lab.vector && lab.vlen);
00307     int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen);
00308 
00309     int32_t* docs=SG_MALLOC(int32_t, totdoc);
00310     SG_FREE(W);
00311     W=NULL;
00312     count = 0 ;
00313 
00314     if (kernel->has_property(KP_KERNCOMBINATION) && callback)
00315     {
00316         W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
00317         for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
00318             W[i]=0;
00319     }
00320 
00321     for (i=0; i<totdoc; i++)
00322         docs[i]=i;
00323 
00324     float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */
00325     float64_t *a_fullset;  /* buffer for storing alpha on full sample in loo */
00326     TIMING timing_profile;
00327     SHRINK_STATE shrink_state;
00328 
00329     timing_profile.time_kernel=0;
00330     timing_profile.time_opti=0;
00331     timing_profile.time_shrink=0;
00332     timing_profile.time_update=0;
00333     timing_profile.time_model=0;
00334     timing_profile.time_check=0;
00335     timing_profile.time_select=0;
00336 
00337     /* make sure -n value is reasonable */
00338     if((learn_parm->svm_newvarsinqp < 2)
00339             || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
00340         learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00341     }
00342 
00343     init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
00344 
00345     inconsistent = SG_MALLOC(int32_t, totdoc);
00346     c = SG_MALLOC(float64_t, totdoc);
00347     a = SG_MALLOC(float64_t, totdoc);
00348     a_fullset = SG_MALLOC(float64_t, totdoc);
00349     xi_fullset = SG_MALLOC(float64_t, totdoc);
00350     lin = SG_MALLOC(float64_t, totdoc);
00351     if (m_linear_term.vlen>0)
00352         learn_parm->eps=get_linear_term_array();
00353     else
00354     {
00355         learn_parm->eps=SG_MALLOC(float64_t, totdoc);      /* equivalent regression epsilon for classification */
00356         SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0);
00357     }
00358 
00359     learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
00360 
00361     SG_FREE(model->supvec);
00362     SG_FREE(model->alpha);
00363     SG_FREE(model->index);
00364     model->supvec = SG_MALLOC(int32_t, totdoc+2);
00365     model->alpha = SG_MALLOC(float64_t, totdoc+2);
00366     model->index = SG_MALLOC(int32_t, totdoc+2);
00367 
00368     model->at_upper_bound=0;
00369     model->b=0;
00370     model->supvec[0]=0;  /* element 0 reserved and empty for now */
00371     model->alpha[0]=0;
00372     model->totdoc=totdoc;
00373 
00374     model->kernel=kernel;
00375 
00376     model->sv_num=1;
00377     model->loo_error=-1;
00378     model->loo_recall=-1;
00379     model->loo_precision=-1;
00380     model->xa_error=-1;
00381     model->xa_recall=-1;
00382     model->xa_precision=-1;
00383 
00384     for (i=0;i<totdoc;i++) {    /* various inits */
00385         inconsistent[i]=0;
00386         c[i]=0;
00387         a[i]=0;
00388         lin[i]=0;
00389 
00390         if(label[i] > 0) {
00391             learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
00392                 fabs((float64_t)label[i]);
00393             label[i]=1;
00394             trainpos++;
00395         }
00396         else if(label[i] < 0) {
00397             learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
00398             label[i]=-1;
00399             trainneg++;
00400         }
00401         else {
00402             learn_parm->svm_cost[i]=0;
00403         }
00404     }
00405 
00406   /* compute starting state for initial alpha values */
00407     SG_DEBUG( "alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors());
00408 
00409     if(m_alpha.vector && get_num_support_vectors()) {
00410         if(verbosity>=1) {
00411             SG_INFO( "Computing starting state...");
00412         }
00413 
00414     float64_t* alpha = SG_MALLOC(float64_t, totdoc);
00415 
00416     for (i=0; i<totdoc; i++)
00417         alpha[i]=0;
00418 
00419     for (i=0; i<get_num_support_vectors(); i++)
00420         alpha[get_support_vector(i)]=get_alpha(i);
00421 
00422     int32_t* index = SG_MALLOC(int32_t, totdoc);
00423     int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11);
00424     float64_t* aicache = SG_MALLOC(float64_t, totdoc);
00425     for (i=0;i<totdoc;i++) {    /* create full index and clip alphas */
00426       index[i]=1;
00427       alpha[i]=fabs(alpha[i]);
00428       if(alpha[i]<0) alpha[i]=0;
00429       if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
00430     }
00431 
00432     if (use_kernel_cache)
00433     {
00434         if (callback &&
00435                 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights())
00436            )
00437         {
00438             CCombinedKernel* k      = (CCombinedKernel*) kernel;
00439             CKernel* kn = k->get_first_kernel();
00440 
00441             while (kn)
00442             {
00443                 for (i=0;i<totdoc;i++)     // fill kernel cache with unbounded SV
00444                     if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00445                             && (kn->kernel_cache_space_available()))
00446                         kn->cache_kernel_row(i);
00447 
00448                 for (i=0;i<totdoc;i++)     // fill rest of kernel cache with bounded SV
00449                     if((alpha[i]==learn_parm->svm_cost[i])
00450                             && (kn->kernel_cache_space_available()))
00451                         kn->cache_kernel_row(i);
00452 
00453                 SG_UNREF(kn);
00454                 kn = k->get_next_kernel();
00455             }
00456         }
00457         else
00458         {
00459             for (i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */
00460                 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00461                         && (kernel->kernel_cache_space_available()))
00462                     kernel->cache_kernel_row(i);
00463 
00464             for (i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */
00465                 if((alpha[i]==learn_parm->svm_cost[i])
00466                         && (kernel->kernel_cache_space_available()))
00467                     kernel->cache_kernel_row(i);
00468         }
00469     }
00470     compute_index(index,totdoc,index2dnum);
00471     update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
00472                 lin,aicache,NULL);
00473     calculate_svm_model(docs,label,lin,alpha,a,c,
00474                   index2dnum,index2dnum);
00475     for (i=0;i<totdoc;i++) {    /* copy initial alphas */
00476       a[i]=alpha[i];
00477     }
00478 
00479     SG_FREE(index);
00480     SG_FREE(index2dnum);
00481     SG_FREE(aicache);
00482     SG_FREE(alpha);
00483 
00484     if(verbosity>=1)
00485         SG_DONE();
00486   }
00487         SG_DEBUG( "%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg);
00488         SG_DEBUG( "Optimizing...\n");
00489 
00490     /* train the svm */
00491   iterations=optimize_to_convergence(docs,label,totdoc,
00492                      &shrink_state,inconsistent,a,lin,
00493                      c,&timing_profile,
00494                      &maxdiff,(int32_t)-1,
00495                      (int32_t)1);
00496 
00497 
00498     if(verbosity>=1) {
00499         if(verbosity==1)
00500         {
00501             SG_DONE();
00502             SG_DEBUG("(%ld iterations)", iterations);
00503         }
00504 
00505         misclassified=0;
00506         for (i=0;(i<totdoc);i++) { /* get final statistic */
00507             if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
00508                 misclassified++;
00509         }
00510 
00511         SG_INFO( "Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
00512                 misclassified,maxdiff);
00513 
00514         SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b);
00515         if (maxdiff>epsilon)
00516             SG_WARNING( "maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon);
00517 
00518         upsupvecnum=0;
00519         for (i=1;i<model->sv_num;i++)
00520         {
00521             if(fabs(model->alpha[i]) >=
00522                     (learn_parm->svm_cost[model->supvec[i]]-
00523                      learn_parm->epsilon_a))
00524                 upsupvecnum++;
00525         }
00526         SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n",
00527                 model->sv_num-1,upsupvecnum);
00528     }
00529 
00530     shrink_state_cleanup(&shrink_state);
00531     SG_FREE(label);
00532     SG_FREE(inconsistent);
00533     SG_FREE(c);
00534     SG_FREE(a);
00535     SG_FREE(a_fullset);
00536     SG_FREE(xi_fullset);
00537     SG_FREE(lin);
00538     SG_FREE(learn_parm->eps);
00539     SG_FREE(learn_parm->svm_cost);
00540     SG_FREE(docs);
00541 }
00542 
00543 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
00544                  SHRINK_STATE *shrink_state,
00545                  int32_t *inconsistent,
00546                  float64_t *a, float64_t *lin, float64_t *c,
00547                  TIMING *timing_profile, float64_t *maxdiff,
00548                  int32_t heldout, int32_t retrain)
00549      /* docs: Training vectors (x-part) */
00550      /* label: Training labels/value (y-part, zero if test example for
00551                   transduction) */
00552      /* totdoc: Number of examples in docs/label */
00553      /* laern_parm: Learning paramenters */
00554      /* kernel_parm: Kernel paramenters */
00555      /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
00556                       NULL if linear. */
00557      /* shrink_state: State of active variables */
00558      /* inconsistent: examples thrown out as inconstistent */
00559      /* a: alphas */
00560      /* lin: linear component of gradient */
00561      /* c: right hand side of inequalities (margin) */
00562      /* maxdiff: returns maximum violation of KT-conditions */
00563      /* heldout: marks held-out example for leave-one-out (or -1) */
00564      /* retrain: selects training mode (1=regular / 2=holdout) */
00565 {
00566 
00567   int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
00568   int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
00569   int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
00570   int32_t *working2dnum,*selexam;
00571   int32_t activenum;
00572   float64_t criterion, eq;
00573   float64_t *a_old;
00574   int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
00575   float64_t epsilon_crit_org;
00576   float64_t bestmaxdiff;
00577   float64_t worstmaxdiff;
00578   int32_t   bestmaxdiffiter,terminate;
00579   bool reactivated=false;
00580 
00581   float64_t *selcrit;  /* buffer for sorting */
00582   float64_t *aicache;  /* buffer to keep one row of hessian */
00583   QP qp;            /* buffer for one quadratic program */
00584 
00585   epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
00586   if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
00587       learn_parm->epsilon_crit=2.0;
00588       /* caching makes no sense for linear kernel */
00589   }
00590   learn_parm->epsilon_shrink=2;
00591   (*maxdiff)=1;
00592 
00593   SG_DEBUG("totdoc:%d\n",totdoc);
00594   chosen = SG_MALLOC(int32_t, totdoc);
00595   last_suboptimal_at =SG_MALLOC(int32_t, totdoc);
00596   key =SG_MALLOC(int32_t, totdoc+11);
00597   selcrit =SG_MALLOC(float64_t, totdoc);
00598   selexam =SG_MALLOC(int32_t, totdoc);
00599   a_old =SG_MALLOC(float64_t, totdoc);
00600   aicache =SG_MALLOC(float64_t, totdoc);
00601   working2dnum =SG_MALLOC(int32_t, totdoc+11);
00602   active2dnum =SG_MALLOC(int32_t, totdoc+11);
00603   qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00604   qp.opt_ce0 =SG_MALLOC(float64_t, 1);
00605   qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize);
00606   qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00607   qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00608   qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00609   qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00610 
00611   choosenum=0;
00612   inconsistentnum=0;
00613   if(!retrain) retrain=1;
00614   iteration=1;
00615   bestmaxdiffiter=1;
00616   bestmaxdiff=999999999;
00617   worstmaxdiff=1e-10;
00618   terminate=0;
00619 
00620 
00621   kernel->set_time(iteration);  /* for lru cache */
00622 
00623   for (i=0;i<totdoc;i++) {    /* various inits */
00624     chosen[i]=0;
00625     a_old[i]=a[i];
00626     last_suboptimal_at[i]=1;
00627     if(inconsistent[i])
00628       inconsistentnum++;
00629   }
00630   activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00631   inactivenum=totdoc-activenum;
00632   clear_index(working2dnum);
00633 
00634   /* repeat this loop until we have convergence */
00635   CTime start_time;
00636   mkl_converged=false;
00637 
00638 
00639 #ifdef CYGWIN
00640   for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
00641 #else
00642       CSignal::clear_cancel();
00643       for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
00644 #endif
00645 
00646       if(use_kernel_cache)
00647           kernel->set_time(iteration);  /* for lru cache */
00648 
00649       if(verbosity>=2) t0=get_runtime();
00650       if(verbosity>=3) {
00651           SG_DEBUG( "\nSelecting working set... ");
00652       }
00653 
00654       if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
00655           learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00656 
00657       i=0;
00658       for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
00659           if((chosen[j]>=(learn_parm->svm_maxqpsize/
00660                           CMath::min(learn_parm->svm_maxqpsize,
00661                                      learn_parm->svm_newvarsinqp)))
00662              || (inconsistent[j])
00663              || (j == heldout)) {
00664               chosen[j]=0;
00665               choosenum--;
00666           }
00667           else {
00668               chosen[j]++;
00669               working2dnum[i++]=j;
00670           }
00671       }
00672       working2dnum[i]=-1;
00673 
00674       if(retrain == 2) {
00675           choosenum=0;
00676           for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
00677               chosen[j]=0;
00678           }
00679           clear_index(working2dnum);
00680           for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
00681               if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
00682                   chosen[i]=99999;
00683                   choosenum++;
00684                   a[i]=0;
00685               }
00686           }
00687           if(learn_parm->biased_hyperplane) {
00688               eq=0;
00689               for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
00690                   eq+=a[i]*label[i];
00691               }
00692               for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
00693                   if((eq*label[i] > 0) && (a[i] > 0)) {
00694                       chosen[i]=88888;
00695                       choosenum++;
00696                       if((eq*label[i]) > a[i]) {
00697                           eq-=(a[i]*label[i]);
00698                           a[i]=0;
00699                       }
00700                       else {
00701                           a[i]-=(eq*label[i]);
00702                           eq=0;
00703                       }
00704                   }
00705               }
00706           }
00707           compute_index(chosen,totdoc,working2dnum);
00708       }
00709       else
00710       {   /* select working set according to steepest gradient */
00711           if(iteration % 101)
00712           {
00713               already_chosen=0;
00714               if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
00715                       (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00716               {
00717                   /* select part of the working set from cache */
00718                   already_chosen=select_next_qp_subproblem_grad(
00719                       label,a,lin,c,totdoc,
00720                       (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
00721                                         learn_parm->svm_newvarsinqp)/2),
00722                       inconsistent,active2dnum,
00723                       working2dnum,selcrit,selexam,1,
00724                       key,chosen);
00725                   choosenum+=already_chosen;
00726               }
00727               choosenum+=select_next_qp_subproblem_grad(
00728                   label,a,lin,c,totdoc,
00729                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00730                              learn_parm->svm_newvarsinqp-already_chosen),
00731                   inconsistent,active2dnum,
00732                   working2dnum,selcrit,selexam,0,key,
00733                   chosen);
00734           }
00735           else { /* once in a while, select a somewhat random working set
00736                     to get unlocked of infinite loops due to numerical
00737                     inaccuracies in the core qp-solver */
00738               choosenum+=select_next_qp_subproblem_rand(
00739                   label,a,lin,c,totdoc,
00740                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00741                              learn_parm->svm_newvarsinqp),
00742                   inconsistent,active2dnum,
00743                   working2dnum,selcrit,selexam,key,
00744                   chosen,iteration);
00745           }
00746       }
00747 
00748       if(verbosity>=2) {
00749           SG_INFO( " %ld vectors chosen\n",choosenum);
00750       }
00751 
00752       if(verbosity>=2) t1=get_runtime();
00753 
00754       if (use_kernel_cache)
00755       {
00756           // in case of MKL w/o linadd cache each kernel independently
00757           // else if linadd is disabled cache single kernel
00758           if ( callback &&
00759                   (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
00760              )
00761           {
00762               CCombinedKernel* k      = (CCombinedKernel*) kernel;
00763               CKernel* kn = k->get_first_kernel();
00764 
00765               while (kn)
00766               {
00767                   kn->cache_multiple_kernel_rows(working2dnum, choosenum);
00768                   SG_UNREF(kn);
00769                   kn = k->get_next_kernel() ;
00770               }
00771           }
00772           else
00773               kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
00774       }
00775 
00776       if(verbosity>=2) t2=get_runtime();
00777 
00778       if(retrain != 2) {
00779           optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
00780                        totdoc,working2dnum,choosenum,a,lin,c,
00781                        aicache,&qp,&epsilon_crit_org);
00782       }
00783 
00784       if(verbosity>=2) t3=get_runtime();
00785       update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
00786                               lin,aicache,c);
00787 
00788       if(verbosity>=2) t4=get_runtime();
00789       supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
00790 
00791       if(verbosity>=2) t5=get_runtime();
00792 
00793       for (jj=0;(i=working2dnum[jj])>=0;jj++) {
00794           a_old[i]=a[i];
00795       }
00796 
00797       retrain=check_optimality(label,a,lin,c,totdoc,
00798                                maxdiff,epsilon_crit_org,&misclassified,
00799                                inconsistent,active2dnum,last_suboptimal_at,
00800                                iteration);
00801 
00802       if(verbosity>=2) {
00803           t6=get_runtime();
00804           timing_profile->time_select+=t1-t0;
00805           timing_profile->time_kernel+=t2-t1;
00806           timing_profile->time_opti+=t3-t2;
00807           timing_profile->time_update+=t4-t3;
00808           timing_profile->time_model+=t5-t4;
00809           timing_profile->time_check+=t6-t5;
00810       }
00811 
00812       /* checking whether optimizer got stuck */
00813       if((*maxdiff) < bestmaxdiff) {
00814           bestmaxdiff=(*maxdiff);
00815           bestmaxdiffiter=iteration;
00816       }
00817       if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
00818           /* int32_t time no progress? */
00819           terminate=1;
00820           retrain=0;
00821           SG_WARNING( "Relaxing KT-Conditions due to slow progress! Terminating!\n");
00822           SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter );
00823       }
00824 
00825       noshrink= (get_shrinking_enabled()) ? 0 : 1;
00826 
00827       if ((!callback) && (!retrain) && (inactivenum>0) &&
00828               ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00829       {
00830           t1=get_runtime();
00831           SG_DEBUG( "reactivating inactive examples\n");
00832 
00833           reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00834                                        iteration,inconsistent,
00835                                        docs,aicache,
00836                                        maxdiff);
00837           reactivated=true;
00838           SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org);
00839           /* Update to new active variables. */
00840           activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00841           inactivenum=totdoc-activenum;
00842           /* reset watchdog */
00843           bestmaxdiff=(*maxdiff);
00844           bestmaxdiffiter=iteration;
00845 
00846           /* termination criterion */
00847           noshrink=1;
00848           retrain=0;
00849           if((*maxdiff) > learn_parm->epsilon_crit)
00850           {
00851               SG_INFO( "restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit);
00852               retrain=1;
00853           }
00854           timing_profile->time_shrink+=get_runtime()-t1;
00855           if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00856              || (verbosity>=2)) {
00857               SG_DONE();
00858               SG_DEBUG("Number of inactive variables = %ld\n", inactivenum);
00859           }
00860       }
00861 
00862       if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
00863           learn_parm->epsilon_crit=(*maxdiff);
00864       if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
00865           learn_parm->epsilon_crit/=4.0;
00866           retrain=1;
00867           noshrink=1;
00868       }
00869       if(learn_parm->epsilon_crit<epsilon_crit_org)
00870           learn_parm->epsilon_crit=epsilon_crit_org;
00871 
00872       if(verbosity>=2) {
00873           SG_INFO( " => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
00874                        supvecnum,model->at_upper_bound,(*maxdiff));
00875 
00876       }
00877       mymaxdiff=*maxdiff ;
00878 
00879       //don't shrink w/ mkl
00880       if (((iteration % 10) == 0) && (!noshrink) && !callback)
00881       {
00882           activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
00883                   CMath::max((int32_t)(activenum/10),
00884                       CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
00885                   a,inconsistent, c, lin, label);
00886 
00887           inactivenum=totdoc-activenum;
00888 
00889           if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
00890                   && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
00891 
00892               kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
00893                           (int32_t) (kernel->get_activenum_cache()-supvecnum)),
00894                       shrink_state->active);
00895           }
00896       }
00897 
00898       if (bestmaxdiff>worstmaxdiff)
00899           worstmaxdiff=bestmaxdiff;
00900 
00901       SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6);
00902 
00903       /* Terminate loop */
00904       if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) {
00905           terminate = 1;
00906           retrain = 0;
00907       }
00908 
00909   } /* end of loop */
00910 
00911   SG_DEBUG( "inactive:%d\n", inactivenum);
00912 
00913   if (inactivenum && !reactivated && !callback)
00914   {
00915       SG_DEBUG( "reactivating inactive examples\n");
00916       reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00917               iteration,inconsistent,
00918               docs,aicache,
00919               maxdiff);
00920       SG_DEBUG( "done reactivating inactive examples\n");
00921       /* Update to new active variables. */
00922       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00923       inactivenum=totdoc-activenum;
00924       /* reset watchdog */
00925       bestmaxdiff=(*maxdiff);
00926       bestmaxdiffiter=iteration;
00927   }
00928 
00929   //use this for our purposes!
00930   criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
00931   CSVM::set_objective(criterion);
00932 
00933   SG_FREE(chosen);
00934   SG_FREE(last_suboptimal_at);
00935   SG_FREE(key);
00936   SG_FREE(selcrit);
00937   SG_FREE(selexam);
00938   SG_FREE(a_old);
00939   SG_FREE(aicache);
00940   SG_FREE(working2dnum);
00941   SG_FREE(active2dnum);
00942   SG_FREE(qp.opt_ce);
00943   SG_FREE(qp.opt_ce0);
00944   SG_FREE(qp.opt_g);
00945   SG_FREE(qp.opt_g0);
00946   SG_FREE(qp.opt_xinit);
00947   SG_FREE(qp.opt_low);
00948   SG_FREE(qp.opt_up);
00949 
00950   learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
00951 
00952   return(iteration);
00953 }
00954 
00955 float64_t CSVMLight::compute_objective_function(
00956     float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00957     int32_t totdoc)
00958      /* Return value of objective function. */
00959      /* Works only relative to the active variables! */
00960 {
00961   /* calculate value of objective function */
00962   float64_t criterion=0;
00963 
00964   for (int32_t i=0;i<totdoc;i++)
00965       criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
00966 
00967   return(criterion);
00968 }
00969 
00970 
00971 void CSVMLight::clear_index(int32_t *index)
00972               /* initializes and empties index */
00973 {
00974   index[0]=-1;
00975 }
00976 
00977 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
00978      /* initializes and empties index */
00979 {
00980   register int32_t i;
00981   for (i=0;index[i] != -1;i++);
00982   index[i]=elem;
00983   index[i+1]=-1;
00984 }
00985 
00986 int32_t CSVMLight::compute_index(
00987     int32_t *binfeature, int32_t range, int32_t *index)
00988      /* create an inverted index of binfeature */
00989 {
00990   register int32_t i,ii;
00991 
00992   ii=0;
00993   for (i=0;i<range;i++) {
00994     if(binfeature[i]) {
00995       index[ii]=i;
00996       ii++;
00997     }
00998   }
00999   for (i=0;i<4;i++) {
01000     index[ii+i]=-1;
01001   }
01002   return(ii);
01003 }
01004 
01005 
01006 void CSVMLight::optimize_svm(
01007     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01008     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
01009     int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
01010     float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
01011      /* Do optimization on the working set. */
01012 {
01013     int32_t i;
01014     float64_t *a_v;
01015 
01016     //compute_matrices_for_optimization_parallel(docs,label,
01017     //                                         exclude_from_eq_const,eq_target,chosen,
01018     //                                         active2dnum,working2dnum,a,lin,c,
01019     //                                         varnum,totdoc,aicache,qp);
01020 
01021     compute_matrices_for_optimization(docs,label,
01022                                       exclude_from_eq_const,eq_target,chosen,
01023                                       active2dnum,working2dnum,a,lin,c,
01024                                       varnum,totdoc,aicache,qp);
01025 
01026     if(verbosity>=3) {
01027      SG_DEBUG( "Running optimizer...");
01028     }
01029     /* call the qp-subsolver */
01030     a_v=optimize_qp(qp,epsilon_crit_target,
01031             learn_parm->svm_maxqpsize,
01032             &(model->b),                /* in case the optimizer gives us */
01033             learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */
01034                                         /* b is calculated in calculate_model. */
01035     if(verbosity>=3) {
01036      SG_DONE();
01037     }
01038 
01039     for (i=0;i<varnum;i++)
01040       a[working2dnum[i]]=a_v[i];
01041 }
01042 
01043 void CSVMLight::compute_matrices_for_optimization_parallel(
01044     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01045     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01046     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01047     float64_t *aicache, QP *qp)
01048 {
01049     if (parallel->get_num_threads()<=1)
01050     {
01051         compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
01052                                                    chosen, active2dnum, key, a, lin, c,
01053                                                    varnum, totdoc, aicache, qp) ;
01054     }
01055 #ifdef HAVE_PTHREAD
01056     else
01057     {
01058         register int32_t ki,kj,i,j;
01059         register float64_t kernel_temp;
01060 
01061         qp->opt_n=varnum;
01062         qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01063         for (j=1;j<model->sv_num;j++) { /* start at 1 */
01064             if((!chosen[model->supvec[j]])
01065                     && (!exclude_from_eq_const[(model->supvec[j])])) {
01066                 qp->opt_ce0[0]+=model->alpha[j];
01067             }
01068         }
01069         if(learn_parm->biased_hyperplane)
01070             qp->opt_m=1;
01071         else
01072             qp->opt_m=0;  /* eq-constraint will be ignored */
01073 
01074         /* init linear part of objective function */
01075         for (i=0;i<varnum;i++) {
01076             qp->opt_g0[i]=lin[key[i]];
01077         }
01078 
01079         ASSERT(parallel->get_num_threads()>1);
01080         int32_t *KI=SG_MALLOC(int32_t, varnum*varnum);
01081         int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum);
01082         int32_t Knum=0 ;
01083         float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2);
01084         for (i=0;i<varnum;i++) {
01085             ki=key[i];
01086             KI[Knum]=docs[ki] ;
01087             KJ[Knum]=docs[ki] ;
01088             Knum++ ;
01089             for (j=i+1;j<varnum;j++)
01090             {
01091                 kj=key[j];
01092                 KI[Knum]=docs[ki] ;
01093                 KJ[Knum]=docs[kj] ;
01094                 Knum++ ;
01095             }
01096         }
01097         ASSERT(Knum<=varnum*(varnum+1)/2);
01098 
01099         pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01100         S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1);
01101         int32_t step= Knum/parallel->get_num_threads();
01102         //SG_DEBUG( "\nkernel-step size: %i\n", step) ;
01103         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01104         {
01105             params[t].svmlight = this;
01106             params[t].start = t*step;
01107             params[t].end = (t+1)*step;
01108             params[t].KI=KI ;
01109             params[t].KJ=KJ ;
01110             params[t].Kval=Kval ;
01111             pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)&params[t]);
01112         }
01113         for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++)
01114             Kval[i]=compute_kernel(KI[i],KJ[i]) ;
01115 
01116         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01117             pthread_join(threads[t], NULL);
01118 
01119         SG_FREE(params);
01120         SG_FREE(threads);
01121 
01122         Knum=0 ;
01123         for (i=0;i<varnum;i++) {
01124             ki=key[i];
01125 
01126             /* Compute the matrix for equality constraints */
01127             qp->opt_ce[i]=label[ki];
01128             qp->opt_low[i]=0;
01129             qp->opt_up[i]=learn_parm->svm_cost[ki];
01130 
01131             kernel_temp=Kval[Knum] ;
01132             Knum++ ;
01133             /* compute linear part of objective function */
01134             qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01135             /* compute quadratic part of objective function */
01136             qp->opt_g[varnum*i+i]=kernel_temp;
01137 
01138             for (j=i+1;j<varnum;j++) {
01139                 kj=key[j];
01140                 kernel_temp=Kval[Knum] ;
01141                 Knum++ ;
01142                 /* compute linear part of objective function */
01143                 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01144                 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01145                 /* compute quadratic part of objective function */
01146                 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01147                 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01148             }
01149 
01150             if(verbosity>=3) {
01151                 if(i % 20 == 0) {
01152                     SG_DEBUG( "%ld..",i);
01153                 }
01154             }
01155         }
01156 
01157         SG_FREE(KI);
01158         SG_FREE(KJ);
01159         SG_FREE(Kval);
01160 
01161         for (i=0;i<varnum;i++) {
01162             /* assure starting at feasible point */
01163             qp->opt_xinit[i]=a[key[i]];
01164             /* set linear part of objective function */
01165             qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
01166         }
01167 
01168         if(verbosity>=3) {
01169             SG_DONE();
01170         }
01171     }
01172 #endif
01173 }
01174 
01175 void CSVMLight::compute_matrices_for_optimization(
01176     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01177     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01178     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01179     float64_t *aicache, QP *qp)
01180 {
01181   register int32_t ki,kj,i,j;
01182   register float64_t kernel_temp;
01183 
01184   qp->opt_n=varnum;
01185   qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01186   for (j=1;j<model->sv_num;j++) { /* start at 1 */
01187     if((!chosen[model->supvec[j]])
01188        && (!exclude_from_eq_const[(model->supvec[j])])) {
01189       qp->opt_ce0[0]+=model->alpha[j];
01190     }
01191   }
01192   if(learn_parm->biased_hyperplane)
01193     qp->opt_m=1;
01194   else
01195     qp->opt_m=0;  /* eq-constraint will be ignored */
01196 
01197   /* init linear part of objective function */
01198   for (i=0;i<varnum;i++) {
01199     qp->opt_g0[i]=lin[key[i]];
01200   }
01201 
01202   for (i=0;i<varnum;i++) {
01203       ki=key[i];
01204 
01205       /* Compute the matrix for equality constraints */
01206       qp->opt_ce[i]=label[ki];
01207       qp->opt_low[i]=0;
01208       qp->opt_up[i]=learn_parm->svm_cost[ki];
01209 
01210       kernel_temp=compute_kernel(docs[ki], docs[ki]);
01211       /* compute linear part of objective function */
01212       qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01213       /* compute quadratic part of objective function */
01214       qp->opt_g[varnum*i+i]=kernel_temp;
01215 
01216       for (j=i+1;j<varnum;j++) {
01217           kj=key[j];
01218           kernel_temp=compute_kernel(docs[ki], docs[kj]);
01219 
01220           /* compute linear part of objective function */
01221           qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01222           qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01223           /* compute quadratic part of objective function */
01224           qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01225           qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01226       }
01227 
01228       if(verbosity>=3) {
01229           if(i % 20 == 0) {
01230               SG_DEBUG( "%ld..",i);
01231           }
01232       }
01233   }
01234 
01235   for (i=0;i<varnum;i++) {
01236       /* assure starting at feasible point */
01237       qp->opt_xinit[i]=a[key[i]];
01238       /* set linear part of objective function */
01239       qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
01240   }
01241 
01242   if(verbosity>=3) {
01243       SG_DONE();
01244   }
01245 }
01246 
01247 
01248 int32_t CSVMLight::calculate_svm_model(
01249     int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
01250     float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
01251      /* Compute decision function based on current values */
01252      /* of alpha. */
01253 {
01254   int32_t i,ii,pos,b_calculated=0,first_low,first_high;
01255   float64_t ex_c,b_temp,b_low,b_high;
01256 
01257   if(verbosity>=3) {
01258    SG_DEBUG( "Calculating model...");
01259   }
01260 
01261   if(!learn_parm->biased_hyperplane) {
01262     model->b=0;
01263     b_calculated=1;
01264   }
01265 
01266   for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01267     if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
01268       pos=model->index[i];
01269       model->index[i]=-1;
01270       (model->sv_num)--;
01271       model->supvec[pos]=model->supvec[model->sv_num];
01272       model->alpha[pos]=model->alpha[model->sv_num];
01273       model->index[model->supvec[pos]]=pos;
01274     }
01275     else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
01276       model->supvec[model->sv_num]=docs[i];
01277       model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
01278       model->index[i]=model->sv_num;
01279       (model->sv_num)++;
01280     }
01281     else if(a_old[i]==a[i]) { /* nothing to do */
01282     }
01283     else {  /* just update alpha */
01284       model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
01285     }
01286 
01287     ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01288     if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
01289       (model->at_upper_bound)--;
01290     }
01291     else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
01292       (model->at_upper_bound)++;
01293     }
01294 
01295     if((!b_calculated)
01296        && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {   /* calculate b */
01297         model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
01298     b_calculated=1;
01299     }
01300   }
01301 
01302   /* No alpha in the working set not at bounds, so b was not
01303      calculated in the usual way. The following handles this special
01304      case. */
01305   if(learn_parm->biased_hyperplane
01306      && (!b_calculated)
01307      && (model->sv_num-1 == model->at_upper_bound)) {
01308     first_low=1;
01309     first_high=1;
01310     b_low=0;
01311     b_high=0;
01312     for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01313       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01314       if(a[i]<ex_c) {
01315     if(label[i]>0)  {
01316       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01317       if((b_temp>b_low) || (first_low)) {
01318         b_low=b_temp;
01319         first_low=0;
01320       }
01321     }
01322     else {
01323       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01324       if((b_temp<b_high) || (first_high)) {
01325         b_high=b_temp;
01326         first_high=0;
01327       }
01328     }
01329       }
01330       else {
01331     if(label[i]<0)  {
01332       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01333       if((b_temp>b_low) || (first_low)) {
01334         b_low=b_temp;
01335         first_low=0;
01336       }
01337     }
01338     else {
01339       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01340       if((b_temp<b_high) || (first_high)) {
01341         b_high=b_temp;
01342         first_high=0;
01343       }
01344     }
01345       }
01346     }
01347     if(first_high) {
01348       model->b=-b_low;
01349     }
01350     else if(first_low) {
01351       model->b=-b_high;
01352     }
01353     else {
01354       model->b=-(b_high+b_low)/2.0;  /* select b as the middle of range */
01355     }
01356   }
01357 
01358   if(verbosity>=3) {
01359    SG_DONE();
01360   }
01361 
01362   return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
01363 }
01364 
01365 int32_t CSVMLight::check_optimality(
01366     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01367     float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
01368     int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
01369     int32_t iteration)
01370      /* Check KT-conditions */
01371 {
01372   int32_t i,ii,retrain;
01373   float64_t dist,ex_c,target;
01374 
01375   if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01376       learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
01377   else
01378       learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
01379   retrain=0;
01380   (*maxdiff)=0;
01381   (*misclassified)=0;
01382   for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01383       if((!inconsistent[i]) && label[i]) {
01384           dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from
01385                                                      hyperplane*/
01386           target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
01387           ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01388           if(dist <= 0) {
01389               (*misclassified)++;  /* does not work due to deactivation of var */
01390           }
01391           if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
01392               if((dist-target)>(*maxdiff))  /* largest violation */
01393                   (*maxdiff)=dist-target;
01394           }
01395           else if((a[i]<ex_c) && (dist < target)) {
01396               if((target-dist)>(*maxdiff))  /* largest violation */
01397                   (*maxdiff)=target-dist;
01398           }
01399           /* Count how int32_t a variable was at lower/upper bound (and optimal).*/
01400           /* Variables, which were at the bound and optimal for a int32_t */
01401           /* time are unlikely to become support vectors. In case our */
01402           /* cache is filled up, those variables are excluded to save */
01403           /* kernel evaluations. (See chapter 'Shrinking').*/
01404           if((a[i]>(learn_parm->epsilon_a))
01405              && (a[i]<ex_c)) {
01406               last_suboptimal_at[i]=iteration;         /* not at bound */
01407           }
01408           else if((a[i]<=(learn_parm->epsilon_a))
01409                   && (dist < (target+learn_parm->epsilon_shrink))) {
01410               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01411           }
01412           else if((a[i]>=ex_c)
01413                   && (dist > (target-learn_parm->epsilon_shrink)))  {
01414               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01415           }
01416       }
01417   }
01418 
01419   /* termination criterion */
01420   if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
01421       retrain=1;
01422   }
01423   return(retrain);
01424 }
01425 
01426 void CSVMLight::update_linear_component(
01427     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01428     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01429     float64_t *aicache, float64_t* c)
01430      /* keep track of the linear component */
01431      /* lin of the gradient etc. by updating */
01432      /* based on the change of the variables */
01433      /* in the current working set */
01434 {
01435     register int32_t i=0,ii=0,j=0,jj=0;
01436 
01437     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01438     {
01439         if (callback)
01440         {
01441             update_linear_component_mkl_linadd(docs, label, active2dnum, a,
01442                     a_old, working2dnum, totdoc, lin, aicache);
01443         }
01444         else
01445         {
01446             kernel->clear_normal();
01447 
01448             int32_t num_working=0;
01449             for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01450                 if(a[i] != a_old[i]) {
01451                     kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01452                     num_working++;
01453                 }
01454             }
01455 
01456             if (num_working>0)
01457             {
01458                 if (parallel->get_num_threads() < 2)
01459                 {
01460                     for (jj=0;(j=active2dnum[jj])>=0;jj++) {
01461                         lin[j]+=kernel->compute_optimized(docs[j]);
01462                     }
01463                 }
01464 #ifdef HAVE_PTHREAD
01465                 else
01466                 {
01467                     int32_t num_elem = 0 ;
01468                     for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
01469 
01470                     pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01471                     S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1);
01472                     int32_t start = 0 ;
01473                     int32_t step = num_elem/parallel->get_num_threads();
01474                     int32_t end = step ;
01475 
01476                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01477                     {
01478                         params[t].kernel = kernel ;
01479                         params[t].lin = lin ;
01480                         params[t].docs = docs ;
01481                         params[t].active2dnum=active2dnum ;
01482                         params[t].start = start ;
01483                         params[t].end = end ;
01484                         start=end ;
01485                         end+=step ;
01486                         pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)&params[t]) ;
01487                     }
01488 
01489                     for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
01490                         lin[j]+=kernel->compute_optimized(docs[j]);
01491                     }
01492                     void* ret;
01493                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01494                         pthread_join(threads[t], &ret) ;
01495 
01496                     SG_FREE(params);
01497                     SG_FREE(threads);
01498                 }
01499 #endif
01500             }
01501         }
01502     }
01503     else
01504     {
01505         if (callback)
01506         {
01507             update_linear_component_mkl(docs, label, active2dnum,
01508                     a, a_old, working2dnum, totdoc, lin, aicache);
01509         }
01510         else {
01511             for (jj=0;(i=working2dnum[jj])>=0;jj++) {
01512                 if(a[i] != a_old[i]) {
01513                     kernel->get_kernel_row(i,active2dnum,aicache);
01514                     for (ii=0;(j=active2dnum[ii])>=0;ii++)
01515                         lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01516                 }
01517             }
01518         }
01519     }
01520 }
01521 
01522 
01523 void CSVMLight::update_linear_component_mkl(
01524     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01525     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01526     float64_t *aicache)
01527 {
01528     //int inner_iters=0;
01529     int32_t num = kernel->get_num_vec_rhs();
01530     int32_t num_weights = -1;
01531     int32_t num_kernels = kernel->get_num_subkernels() ;
01532     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01533     ASSERT(num_weights==num_kernels);
01534 
01535     if ((kernel->get_kernel_type()==K_COMBINED) &&
01536              (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))// for combined kernel
01537     {
01538         CCombinedKernel* k      = (CCombinedKernel*) kernel;
01539         CKernel* kn = k->get_first_kernel() ;
01540         int32_t n = 0, i, j ;
01541 
01542         while (kn!=NULL)
01543         {
01544             for (i=0;i<num;i++)
01545             {
01546                 if(a[i] != a_old[i])
01547                 {
01548                     kn->get_kernel_row(i,NULL,aicache, true);
01549                     for (j=0;j<num;j++)
01550                         W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01551                 }
01552             }
01553 
01554             SG_UNREF(kn);
01555             kn = k->get_next_kernel();
01556             n++ ;
01557         }
01558     }
01559     else // hope the kernel is fast ...
01560     {
01561         float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01562         float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01563 
01564         // backup and set to zero
01565         for (int32_t i=0; i<num_kernels; i++)
01566         {
01567             w_backup[i] = old_beta[i] ;
01568             w1[i]=0.0 ;
01569         }
01570         for (int32_t n=0; n<num_kernels; n++)
01571         {
01572             w1[n]=1.0 ;
01573             kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights));
01574 
01575             for (int32_t i=0;i<num;i++)
01576             {
01577                 if(a[i] != a_old[i])
01578                 {
01579                     for (int32_t j=0;j<num;j++)
01580                         W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
01581                 }
01582             }
01583             w1[n]=0.0 ;
01584         }
01585 
01586         // restore old weights
01587         kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
01588 
01589         SG_FREE(w_backup);
01590         SG_FREE(w1);
01591     }
01592 
01593     call_mkl_callback(a, label, lin);
01594 }
01595 
01596 
01597 void CSVMLight::update_linear_component_mkl_linadd(
01598     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01599     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01600     float64_t *aicache)
01601 {
01602     //int inner_iters=0;
01603 
01604     // kernel with LP_LINADD property is assumed to have
01605     // compute_by_subkernel functions
01606     int32_t num = kernel->get_num_vec_rhs();
01607     int32_t num_weights = -1;
01608     int32_t num_kernels = kernel->get_num_subkernels() ;
01609     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01610     ASSERT(num_weights==num_kernels);
01611 
01612     float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01613     float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01614 
01615     // backup and set to one
01616     for (int32_t i=0; i<num_kernels; i++)
01617     {
01618         w_backup[i] = old_beta[i] ;
01619         w1[i]=1.0 ;
01620     }
01621     // set the kernel weights
01622     kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights));
01623 
01624     // create normal update (with changed alphas only)
01625     kernel->clear_normal();
01626     for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
01627         if(a[i] != a_old[i]) {
01628             kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01629         }
01630     }
01631 
01632     if (parallel->get_num_threads() < 2)
01633     {
01634         // determine contributions of different kernels
01635         for (int32_t i=0; i<num; i++)
01636             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01637     }
01638 #ifdef HAVE_PTHREAD
01639     else
01640     {
01641         pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01642         S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1);
01643         int32_t step= num/parallel->get_num_threads();
01644 
01645         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01646         {
01647             params[t].kernel = kernel;
01648             params[t].W = W;
01649             params[t].start = t*step;
01650             params[t].end = (t+1)*step;
01651             pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)&params[t]);
01652         }
01653 
01654         for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++)
01655             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01656 
01657         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01658             pthread_join(threads[t], NULL);
01659 
01660         SG_FREE(params);
01661         SG_FREE(threads);
01662     }
01663 #endif
01664 
01665     // restore old weights
01666     kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
01667 
01668     call_mkl_callback(a, label, lin);
01669 }
01670 
01671 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p)
01672 {
01673     S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
01674 
01675     int32_t num_kernels=params->kernel->get_num_subkernels();
01676 
01677     // determine contributions of different kernels
01678     for (int32_t i=params->start; i<params->end; i++)
01679         params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
01680 
01681     return NULL ;
01682 }
01683 
01684 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
01685 {
01686     int32_t num = kernel->get_num_vec_rhs();
01687     int32_t num_kernels = kernel->get_num_subkernels() ;
01688 
01689     float64_t suma=0;
01690     float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
01691 #ifdef HAVE_LAPACK
01692     int nk = (int) num_kernels; /* calling external lib */
01693     double* alphay  = SG_MALLOC(double, num);
01694 
01695     for (int32_t i=0; i<num; i++)
01696     {
01697         alphay[i]=a[i]*label[i];
01698         suma+=a[i];
01699     }
01700 
01701     for (int32_t i=0; i<num_kernels; i++)
01702         sumw[i]=0;
01703 
01704     cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
01705             num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
01706 
01707     SG_FREE(alphay);
01708 #else
01709     for (int32_t i=0; i<num; i++)
01710         suma += a[i];
01711 
01712     for (int32_t d=0; d<num_kernels; d++)
01713     {
01714         sumw[d]=0;
01715         for (int32_t i=0; i<num; i++)
01716             sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
01717     }
01718 #endif
01719 
01720     if (callback)
01721         mkl_converged=callback(mkl, sumw, suma);
01722 
01723 
01724     const float64_t* new_beta   = kernel->get_subkernel_weights(num_kernels);
01725 
01726     // update lin
01727 #ifdef HAVE_LAPACK
01728     cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
01729         nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
01730 #else
01731     for (int32_t i=0; i<num; i++)
01732         lin[i]=0 ;
01733     for (int32_t d=0; d<num_kernels; d++)
01734         if (new_beta[d]!=0)
01735             for (int32_t i=0; i<num; i++)
01736                 lin[i] += new_beta[d]*W[i*num_kernels+d] ;
01737 #endif
01738 
01739     SG_FREE(sumw);
01740 }
01741 
01742 
01743 /*************************** Working set selection ***************************/
01744 
01745 int32_t CSVMLight::select_next_qp_subproblem_grad(
01746     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01747     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01748     int32_t *working2dnum, float64_t *selcrit, int32_t *select,
01749     int32_t cache_only, int32_t *key, int32_t *chosen)
01750     /* Use the feasible direction approach to select the next
01751        qp-subproblem (see chapter 'Selecting a good working set'). If
01752        'cache_only' is true, then the variables are selected only among
01753        those for which the kernel evaluations are cached. */
01754 {
01755     int32_t choosenum,i,j,k,activedoc,inum,valid;
01756     float64_t s;
01757 
01758     for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01759     choosenum=0;
01760     activedoc=0;
01761     for (i=0;(j=active2dnum[i])>=0;i++) {
01762         s=-label[j];
01763         if(cache_only)
01764         {
01765             if (use_kernel_cache)
01766                 valid=(kernel->kernel_cache_check(j));
01767             else
01768                 valid = 1 ;
01769         }
01770         else
01771             valid=1;
01772         if(valid
01773            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01774            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01775                  && (s>0)))
01776            && (!chosen[j])
01777            && (label[j])
01778            && (!inconsistent[j]))
01779         {
01780             selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01781             key[activedoc]=j;
01782             activedoc++;
01783         }
01784     }
01785     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01786     for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01787         i=key[select[k]];
01788         chosen[i]=1;
01789         working2dnum[inum+choosenum]=i;
01790         choosenum+=1;
01791         if (use_kernel_cache)
01792             kernel->kernel_cache_touch(i);
01793         /* make sure it does not get kicked */
01794         /* out of cache */
01795     }
01796 
01797     activedoc=0;
01798     for (i=0;(j=active2dnum[i])>=0;i++) {
01799         s=label[j];
01800         if(cache_only)
01801         {
01802             if (use_kernel_cache)
01803                 valid=(kernel->kernel_cache_check(j));
01804             else
01805                 valid = 1 ;
01806         }
01807         else
01808             valid=1;
01809         if(valid
01810            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01811            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01812                  && (s>0)))
01813            && (!chosen[j])
01814            && (label[j])
01815            && (!inconsistent[j]))
01816         {
01817             selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01818             /*  selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */
01819             key[activedoc]=j;
01820             activedoc++;
01821         }
01822     }
01823     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01824     for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01825         i=key[select[k]];
01826         chosen[i]=1;
01827         working2dnum[inum+choosenum]=i;
01828         choosenum+=1;
01829         if (use_kernel_cache)
01830             kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01831         /* out of cache */
01832     }
01833     working2dnum[inum+choosenum]=-1; /* complete index */
01834     return(choosenum);
01835 }
01836 
01837 int32_t CSVMLight::select_next_qp_subproblem_rand(
01838     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01839     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01840     int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
01841     int32_t *chosen, int32_t iteration)
01842 /* Use the feasible direction approach to select the next
01843    qp-subproblem (see section 'Selecting a good working set'). Chooses
01844    a feasible direction at (pseudo) random to help jump over numerical
01845    problem. */
01846 {
01847   int32_t choosenum,i,j,k,activedoc,inum;
01848   float64_t s;
01849 
01850   for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01851   choosenum=0;
01852   activedoc=0;
01853   for (i=0;(j=active2dnum[i])>=0;i++) {
01854     s=-label[j];
01855     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01856        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01857          && (s>0)))
01858        && (!inconsistent[j])
01859        && (label[j])
01860        && (!chosen[j])) {
01861       selcrit[activedoc]=(j+iteration) % totdoc;
01862       key[activedoc]=j;
01863       activedoc++;
01864     }
01865   }
01866   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01867   for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01868     i=key[select[k]];
01869     chosen[i]=1;
01870     working2dnum[inum+choosenum]=i;
01871     choosenum+=1;
01872     if (use_kernel_cache)
01873         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01874                                         /* out of cache */
01875   }
01876 
01877   activedoc=0;
01878   for (i=0;(j=active2dnum[i])>=0;i++) {
01879     s=label[j];
01880     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01881        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01882          && (s>0)))
01883        && (!inconsistent[j])
01884        && (label[j])
01885        && (!chosen[j])) {
01886       selcrit[activedoc]=(j+iteration) % totdoc;
01887       key[activedoc]=j;
01888       activedoc++;
01889     }
01890   }
01891   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01892   for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01893     i=key[select[k]];
01894     chosen[i]=1;
01895     working2dnum[inum+choosenum]=i;
01896     choosenum+=1;
01897     if (use_kernel_cache)
01898         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01899                                         /* out of cache */
01900   }
01901   working2dnum[inum+choosenum]=-1; /* complete index */
01902   return(choosenum);
01903 }
01904 
01905 
01906 
01907 void CSVMLight::select_top_n(
01908     float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
01909 {
01910   register int32_t i,j;
01911 
01912   for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
01913     for (j=i;j>=0;j--) {
01914       if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
01915     select[j]=select[j-1];
01916       }
01917       else {
01918     select[j]=i;
01919     j=-1;
01920       }
01921     }
01922   }
01923   if(n>0) {
01924     for (i=n;i<range;i++) {
01925       if(selcrit[i]>selcrit[select[n-1]]) {
01926     for (j=n-1;j>=0;j--) {
01927       if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
01928         select[j]=select[j-1];
01929       }
01930       else {
01931         select[j]=i;
01932         j=-1;
01933       }
01934     }
01935       }
01936     }
01937   }
01938 }
01939 
01940 
01941 /******************************** Shrinking  *********************************/
01942 
01943 void CSVMLight::init_shrink_state(
01944     SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
01945 {
01946   int32_t i;
01947 
01948   shrink_state->deactnum=0;
01949   shrink_state->active = SG_MALLOC(int32_t, totdoc);
01950   shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc);
01951   shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory);
01952   shrink_state->maxhistory=maxhistory;
01953   shrink_state->last_lin = SG_MALLOC(float64_t, totdoc);
01954   shrink_state->last_a = SG_MALLOC(float64_t, totdoc);
01955 
01956   for (i=0;i<totdoc;i++) {
01957     shrink_state->active[i]=1;
01958     shrink_state->inactive_since[i]=0;
01959     shrink_state->last_a[i]=0;
01960     shrink_state->last_lin[i]=0;
01961   }
01962 }
01963 
01964 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
01965 {
01966   SG_FREE(shrink_state->active);
01967   SG_FREE(shrink_state->inactive_since);
01968   if(shrink_state->deactnum > 0)
01969     SG_FREE((shrink_state->a_history[shrink_state->deactnum-1]));
01970   SG_FREE((shrink_state->a_history));
01971   SG_FREE((shrink_state->last_a));
01972   SG_FREE((shrink_state->last_lin));
01973 }
01974 
01975 int32_t CSVMLight::shrink_problem(
01976     SHRINK_STATE *shrink_state, int32_t *active2dnum,
01977     int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
01978     int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
01979     float64_t* lin, int* label)
01980      /* Shrink some variables away.  Do the shrinking only if at least
01981         minshrink variables can be removed. */
01982 {
01983   int32_t i,ii,change,activenum,lastiter;
01984   float64_t *a_old=NULL;
01985 
01986   activenum=0;
01987   change=0;
01988   for (ii=0;active2dnum[ii]>=0;ii++) {
01989       i=active2dnum[ii];
01990       activenum++;
01991       lastiter=last_suboptimal_at[i];
01992 
01993       if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
01994          || (inconsistent[i])) {
01995           change++;
01996       }
01997   }
01998   if((change>=minshrink) /* shrink only if sufficiently many candidates */
01999      && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
02000       /* Shrink problem by removing those variables which are */
02001       /* optimal at a bound for a minimum number of iterations */
02002       if(verbosity>=2) {
02003           SG_INFO( " Shrinking...");
02004       }
02005 
02006       if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /*  non-linear case save alphas */
02007 
02008           a_old=SG_MALLOC(float64_t, totdoc);
02009           shrink_state->a_history[shrink_state->deactnum]=a_old;
02010           for (i=0;i<totdoc;i++) {
02011               a_old[i]=a[i];
02012           }
02013       }
02014       for (ii=0;active2dnum[ii]>=0;ii++) {
02015           i=active2dnum[ii];
02016           lastiter=last_suboptimal_at[i];
02017           if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
02018              || (inconsistent[i])) {
02019               shrink_state->active[i]=0;
02020               shrink_state->inactive_since[i]=shrink_state->deactnum;
02021           }
02022       }
02023       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
02024       shrink_state->deactnum++;
02025       if(kernel->has_property(KP_LINADD) && get_linadd_enabled())
02026           shrink_state->deactnum=0;
02027 
02028       if(verbosity>=2) {
02029           SG_DONE();
02030           SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum);
02031       }
02032   }
02033   return(activenum);
02034 }
02035 
02036 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p)
02037 {
02038     S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
02039 
02040     CKernel* k = params->kernel;
02041     float64_t* lin = params->lin;
02042     float64_t* last_lin = params->last_lin;
02043     int32_t* active = params->active;
02044     int32_t* docs = params->docs;
02045     int32_t start = params->start;
02046     int32_t end = params->end;
02047 
02048     for (int32_t i=start;i<end;i++)
02049     {
02050         if (!active[i])
02051             lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
02052 
02053         last_lin[i]=lin[i];
02054     }
02055 
02056     return NULL;
02057 }
02058 
02059 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p)
02060 {
02061     S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
02062     ASSERT(params);
02063     ASSERT(params->kernel);
02064     ASSERT(params->lin);
02065     ASSERT(params->aicache);
02066     ASSERT(params->a);
02067     ASSERT(params->a_old);
02068     ASSERT(params->changed2dnum);
02069     ASSERT(params->inactive2dnum);
02070     ASSERT(params->label);
02071 
02072     CKernel* k = params->kernel;
02073     float64_t* lin = params->lin;
02074     float64_t* aicache = params->aicache;
02075     float64_t* a= params->a;
02076     float64_t* a_old = params->a_old;
02077     int32_t* changed2dnum = params->changed2dnum;
02078     int32_t* inactive2dnum = params->inactive2dnum;
02079     int32_t* label = params->label;
02080     int32_t start = params->start;
02081     int32_t end = params->end;
02082 
02083     for (int32_t ii=start;ii<end;ii++)
02084     {
02085         int32_t i=changed2dnum[ii];
02086         int32_t j=0;
02087         ASSERT(i>=0);
02088 
02089         k->get_kernel_row(i,inactive2dnum,aicache);
02090         for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
02091             lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02092     }
02093     return NULL;
02094 }
02095 
02096 void CSVMLight::reactivate_inactive_examples(
02097     int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
02098     float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
02099     int32_t* docs, float64_t *aicache, float64_t *maxdiff)
02100      /* Make all variables active again which had been removed by
02101         shrinking. */
02102      /* Computes lin for those variables from scratch. */
02103 {
02104   register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
02105   int32_t *changed,*inactive;
02106   register float64_t *a_old,dist;
02107   float64_t ex_c,target;
02108 
02109   if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */
02110       a_old=shrink_state->last_a;
02111 
02112       if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION))
02113       {
02114           SG_DEBUG( " clear normal - linadd\n");
02115           kernel->clear_normal();
02116 
02117           int32_t num_modified=0;
02118           for (i=0;i<totdoc;i++) {
02119               if(a[i] != a_old[i]) {
02120                   kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
02121                   a_old[i]=a[i];
02122                   num_modified++;
02123               }
02124           }
02125 
02126           if (num_modified>0)
02127           {
02128               int32_t num_threads=parallel->get_num_threads();
02129               ASSERT(num_threads>0);
02130               if (num_threads < 2)
02131               {
02132                   S_THREAD_PARAM_REACTIVATE_LINADD params;
02133                   params.kernel=kernel;
02134                   params.lin=lin;
02135                   params.last_lin=shrink_state->last_lin;
02136                   params.docs=docs;
02137                   params.active=shrink_state->active;
02138                   params.start=0;
02139                   params.end=totdoc;
02140                   reactivate_inactive_examples_linadd_helper((void*) &params);
02141               }
02142 #ifdef HAVE_PTHREAD
02143               else
02144               {
02145                   pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
02146                   S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads);
02147                   int32_t step= totdoc/num_threads;
02148 
02149                   for (t=0; t<num_threads-1; t++)
02150                   {
02151                       params[t].kernel=kernel;
02152                       params[t].lin=lin;
02153                       params[t].last_lin=shrink_state->last_lin;
02154                       params[t].docs=docs;
02155                       params[t].active=shrink_state->active;
02156                       params[t].start = t*step;
02157                       params[t].end = (t+1)*step;
02158                       pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)&params[t]);
02159                   }
02160 
02161                   params[t].kernel=kernel;
02162                   params[t].lin=lin;
02163                   params[t].last_lin=shrink_state->last_lin;
02164                   params[t].docs=docs;
02165                   params[t].active=shrink_state->active;
02166                   params[t].start = t*step;
02167                   params[t].end = totdoc;
02168                   reactivate_inactive_examples_linadd_helper((void*) &params[t]);
02169 
02170                   for (t=0; t<num_threads-1; t++)
02171                       pthread_join(threads[t], NULL);
02172 
02173                   SG_FREE(threads);
02174                   SG_FREE(params);
02175               }
02176 #endif
02177 
02178           }
02179       }
02180       else
02181       {
02182           float64_t *alphas = SG_MALLOC(float64_t, totdoc);
02183           int32_t *idx = SG_MALLOC(int32_t, totdoc);
02184           int32_t num_suppvec=0 ;
02185 
02186           for (i=0; i<totdoc; i++)
02187           {
02188               if(a[i] != a_old[i])
02189               {
02190                   alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
02191                   idx[num_suppvec] = i ;
02192                   a_old[i] = a[i] ;
02193                   num_suppvec++ ;
02194               }
02195           }
02196 
02197           if (num_suppvec>0)
02198           {
02199               int32_t num_inactive=0;
02200               int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc); // infact we only need a subset
02201 
02202               j=0;
02203               for (i=0;i<totdoc;i++)
02204               {
02205                   if(!shrink_state->active[i])
02206                   {
02207                       inactive_idx[j++] = i;
02208                       num_inactive++;
02209                   }
02210               }
02211 
02212               if (num_inactive>0)
02213               {
02214                   float64_t* dest = SG_MALLOC(float64_t, num_inactive);
02215                   memset(dest, 0, sizeof(float64_t)*num_inactive);
02216 
02217                   kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
02218 
02219                   j=0;
02220                   for (i=0;i<totdoc;i++) {
02221                       if(!shrink_state->active[i]) {
02222                           lin[i] = shrink_state->last_lin[i] + dest[j++] ;
02223                       }
02224                       shrink_state->last_lin[i]=lin[i];
02225                   }
02226 
02227                   SG_FREE(dest);
02228               }
02229               else
02230               {
02231                   for (i=0;i<totdoc;i++)
02232                       shrink_state->last_lin[i]=lin[i];
02233               }
02234               SG_FREE(inactive_idx);
02235           }
02236           SG_FREE(alphas);
02237           SG_FREE(idx);
02238       }
02239 
02240       kernel->delete_optimization();
02241   }
02242   else
02243   {
02244       changed=SG_MALLOC(int32_t, totdoc);
02245       changed2dnum=SG_MALLOC(int32_t, totdoc+11);
02246       inactive=SG_MALLOC(int32_t, totdoc);
02247       inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
02248       for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
02249       {
02250           if(verbosity>=2) {
02251               SG_INFO( "%ld..",t);
02252           }
02253           a_old=shrink_state->a_history[t];
02254           for (i=0;i<totdoc;i++) {
02255               inactive[i]=((!shrink_state->active[i])
02256                            && (shrink_state->inactive_since[i] == t));
02257               changed[i]= (a[i] != a_old[i]);
02258           }
02259           compute_index(inactive,totdoc,inactive2dnum);
02260           compute_index(changed,totdoc,changed2dnum);
02261 
02262 
02263           //TODO: PUT THIS BACK IN!!!!!!!! int32_t num_threads=parallel->get_num_threads();
02264           int32_t num_threads=1;
02265           ASSERT(num_threads>0);
02266 
02267           if (num_threads < 2)
02268           {
02269               for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
02270                   kernel->get_kernel_row(i,inactive2dnum,aicache);
02271                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02272                       lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02273               }
02274           }
02275 #ifdef HAVE_PTHREAD
02276           else
02277           {
02278               //find number of the changed ones
02279               int32_t num_changed=0;
02280               for (ii=0;changed2dnum[ii]>=0;ii++)
02281                   num_changed++;
02282 
02283               if (num_changed>0)
02284               {
02285                   pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1);
02286                   S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads);
02287                   int32_t step= num_changed/num_threads;
02288 
02289                   // alloc num_threads many tmp buffers
02290                   float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads);
02291                   memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02292                   float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads);
02293                   memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02294 
02295                   int32_t thr;
02296                   for (thr=0; thr<num_threads-1; thr++)
02297                   {
02298                       params[thr].kernel=kernel;
02299                       params[thr].lin=&tmp_lin[thr*totdoc];
02300                       params[thr].aicache=&tmp_aicache[thr*totdoc];
02301                       params[thr].a=a;
02302                       params[thr].a_old=a_old;
02303                       params[thr].changed2dnum=changed2dnum;
02304                       params[thr].inactive2dnum=inactive2dnum;
02305                       params[thr].label=label;
02306                       params[thr].start = thr*step;
02307                       params[thr].end = (thr+1)*step;
02308                       pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)&params[thr]);
02309                   }
02310 
02311                   params[thr].kernel=kernel;
02312                   params[thr].lin=&tmp_lin[thr*totdoc];
02313                   params[thr].aicache=&tmp_aicache[thr*totdoc];
02314                   params[thr].a=a;
02315                   params[thr].a_old=a_old;
02316                   params[thr].changed2dnum=changed2dnum;
02317                   params[thr].inactive2dnum=inactive2dnum;
02318                   params[thr].label=label;
02319                   params[thr].start = thr*step;
02320                   params[thr].end = num_changed;
02321                   reactivate_inactive_examples_vanilla_helper((void*) &params[thr]);
02322 
02323                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02324                       lin[j]+=tmp_lin[totdoc*thr+j];
02325 
02326                   for (thr=0; thr<num_threads-1; thr++)
02327                   {
02328                       pthread_join(threads[thr], NULL);
02329 
02330                       //add up results
02331                       for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02332                           lin[j]+=tmp_lin[totdoc*thr+j];
02333                   }
02334 
02335                   SG_FREE(tmp_lin);
02336                   SG_FREE(tmp_aicache);
02337                   SG_FREE(threads);
02338                   SG_FREE(params);
02339               }
02340           }
02341 #endif
02342       }
02343       SG_FREE(changed);
02344       SG_FREE(changed2dnum);
02345       SG_FREE(inactive);
02346       SG_FREE(inactive2dnum);
02347   }
02348 
02349   (*maxdiff)=0;
02350   for (i=0;i<totdoc;i++) {
02351     shrink_state->inactive_since[i]=shrink_state->deactnum-1;
02352     if(!inconsistent[i]) {
02353       dist=(lin[i]-model->b)*(float64_t)label[i];
02354       target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
02355       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
02356       if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
02357     if((dist-target)>(*maxdiff))  /* largest violation */
02358       (*maxdiff)=dist-target;
02359       }
02360       else if((a[i]<ex_c) && (dist < target)) {
02361     if((target-dist)>(*maxdiff))  /* largest violation */
02362       (*maxdiff)=target-dist;
02363       }
02364       if((a[i]>(0+learn_parm->epsilon_a))
02365      && (a[i]<ex_c)) {
02366     shrink_state->active[i]=1;                         /* not at bound */
02367       }
02368       else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
02369     shrink_state->active[i]=1;
02370       }
02371       else if((a[i]>=ex_c)
02372           && (dist > (target-learn_parm->epsilon_shrink))) {
02373     shrink_state->active[i]=1;
02374       }
02375       else if(learn_parm->sharedslack) { /* make all active when sharedslack */
02376     shrink_state->active[i]=1;
02377       }
02378     }
02379   }
02380 
02381   if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */
02382       for (i=0;i<totdoc;i++) {
02383           (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
02384       }
02385       for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
02386           SG_FREE(shrink_state->a_history[t]);
02387           shrink_state->a_history[t]=0;
02388       }
02389   }
02390 }
02391 
02392 
02393 
02394 /* start the optimizer and return the optimal values */
02395 float64_t* CSVMLight::optimize_qp(
02396         QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
02397         int32_t& svm_maxqpsize)
02398 {
02399     register int32_t i, j, result;
02400     float64_t margin, obj_before, obj_after;
02401     float64_t sigdig, dist, epsilon_loqo;
02402     int32_t iter;
02403 
02404     if(!primal) { /* allocate memory at first call */
02405         primal=SG_MALLOC(float64_t, nx*3);
02406         dual=SG_MALLOC(float64_t, nx*2+1);
02407     }
02408 
02409     obj_before=0; /* calculate objective before optimization */
02410     for(i=0;i<qp->opt_n;i++) {
02411         obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
02412         obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
02413         for(j=0;j<i;j++) {
02414             obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
02415         }
02416     }
02417 
02418     result=STILL_RUNNING;
02419     qp->opt_ce0[0]*=(-1.0);
02420     /* Run pr_loqo. If a run fails, try again with parameters which lead */
02421     /* to a slower, but more robust setting. */
02422     for(margin=init_margin,iter=init_iter;
02423         (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
02424 
02425         opt_precision=CMath::max(opt_precision, DEF_PRECISION);
02426         sigdig=-log10(opt_precision);
02427 
02428         result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
02429                 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
02430                 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
02431                 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
02432                 (float64_t *)primal,(float64_t *)dual,
02433                 (int32_t)(verbosity-2),
02434                 (float64_t)sigdig,(int32_t)iter,
02435                 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
02436 
02437         if(CMath::is_nan(dual[0])) {     /* check for choldc problem */
02438             if(verbosity>=2) {
02439                 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n");
02440             }
02441             if(init_margin<0.80) { /* become more conservative in general */
02442                 init_margin=(4.0*margin+1.0)/5.0;
02443             }
02444             margin=(margin+1.0)/2.0;
02445             (opt_precision)*=10.0;   /* reduce precision */
02446             if(verbosity>=2) {
02447                 SG_SDEBUG("Reducing precision of PR_LOQO.\n");
02448             }
02449         }
02450         else if(result!=OPTIMAL_SOLUTION) {
02451             iter+=2000;
02452             init_iter+=10;
02453             (opt_precision)*=10.0;   /* reduce precision */
02454             if(verbosity>=2) {
02455                 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result);
02456             }
02457         }
02458     }
02459 
02460     if(qp->opt_m)         /* Thanks to Alex Smola for this hint */
02461         model_b=dual[0];
02462     else
02463         model_b=0;
02464 
02465     /* Check the precision of the alphas. If results of current optimization */
02466     /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
02467     epsilon_loqo=1E-10;
02468     for(i=0;i<qp->opt_n;i++) {
02469         dist=-model_b*qp->opt_ce[i];
02470         dist+=(qp->opt_g0[i]+1.0);
02471         for(j=0;j<i;j++) {
02472             dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
02473         }
02474         for(j=i;j<qp->opt_n;j++) {
02475             dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
02476         }
02477         /*  SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */
02478         if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
02479             epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
02480         }
02481         else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
02482             epsilon_loqo=primal[i]*2.0;
02483         }
02484     }
02485 
02486     for(i=0;i<qp->opt_n;i++) {  /* clip alphas to bounds */
02487         if(primal[i]<=(0+epsilon_loqo)) {
02488             primal[i]=0;
02489         }
02490         else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
02491             primal[i]=qp->opt_up[i];
02492         }
02493     }
02494 
02495     obj_after=0;  /* calculate objective after optimization */
02496     for(i=0;i<qp->opt_n;i++) {
02497         obj_after+=(qp->opt_g0[i]*primal[i]);
02498         obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
02499         for(j=0;j<i;j++) {
02500             obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
02501         }
02502     }
02503 
02504     /* if optimizer returned NAN values, reset and retry with smaller */
02505     /* working set. */
02506     if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
02507         for(i=0;i<qp->opt_n;i++) {
02508             primal[i]=qp->opt_xinit[i];
02509         }
02510         model_b=0;
02511         if(svm_maxqpsize>2) {
02512             svm_maxqpsize--;  /* decrease size of qp-subproblems */
02513         }
02514     }
02515 
02516     if(obj_after >= obj_before) { /* check whether there was progress */
02517         (opt_precision)/=100.0;
02518         precision_violations++;
02519         if(verbosity>=2) {
02520             SG_SDEBUG("Increasing Precision of PR_LOQO.\n");
02521         }
02522     }
02523 
02524     // TODO: add parameter for this (e.g. for MTL experiments)
02525     if(precision_violations > 5000) {
02526         (*epsilon_crit)*=10.0;
02527         precision_violations=0;
02528         SG_SINFO("Relaxing epsilon on KT-Conditions.\n");
02529     }
02530 
02531     (*threshold)=model_b;
02532 
02533     if(result!=OPTIMAL_SOLUTION) {
02534         SG_SERROR("PR_LOQO did not converge.\n");
02535         return(qp->opt_xinit);
02536     }
02537     else {
02538         return(primal);
02539     }
02540 }
02541 
02542 
02543 #endif //USE_SVMLIGHT
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation