SVMLight.cpp

Go to the documentation of this file.
00001 /***********************************************************************/
00002 /*                                                                     */
00003 /*   SVMLight.cpp                                                      */
00004 /*                                                                     */
00005 /*   Author: Thorsten Joachims                                         */
00006 /*   Date: 19.07.99                                                    */
00007 /*                                                                     */
00008 /*   Copyright (c) 1999  Universitaet Dortmund - All rights reserved   */
00009 /*                                                                     */
00010 /*   This software is available for non-commercial use only. It must   */
00011 /*   not be modified and distributed without prior permission of the   */
00012 /*   author. The author is not responsible for implications from the   */
00013 /*   use of this software.                                             */
00014 /*                                                                     */
00015 /*   THIS INCLUDES THE FOLLOWING ADDITIONS                             */
00016 /*   Generic Kernel Interfacing: Soeren Sonnenburg                     */
00017 /*   Parallizations: Soeren Sonnenburg                                 */
00018 /*   Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg,      */
00019 /*                          Alexander Zien, Marius Kloft, Chen Guohua  */
00020 /*   Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg                 */
00021 /*                                                                     */
00022 /***********************************************************************/
00023 #include "lib/config.h"
00024 
00025 #ifdef USE_SVMLIGHT
00026 
00027 #include "lib/io.h"
00028 #include "lib/Signal.h"
00029 #include "lib/Mathematics.h"
00030 #include "lib/Time.h"
00031 #include "lib/lapack.h"
00032 
00033 #include "features/SimpleFeatures.h"
00034 #include "classifier/svm/SVMLight.h"
00035 #include "classifier/svm/pr_loqo.h"
00036 
00037 #include "kernel/Kernel.h"
00038 #include "classifier/KernelMachine.h"
00039 #include "kernel/CombinedKernel.h"
00040 
00041 #include <unistd.h>
00042 
00043 #include "base/Parallel.h"
00044 
00045 #ifndef WIN32
00046 #include <pthread.h>
00047 #endif
00048 
00049 using namespace shogun;
00050 
00051 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00052 struct S_THREAD_PARAM_REACTIVATE_LINADD
00053 {
00054     CKernel* kernel;
00055     float64_t* lin;
00056     float64_t* last_lin;
00057     int32_t* active;
00058     int32_t* docs;
00059     int32_t start;
00060     int32_t end;
00061 };
00062 
00063 struct S_THREAD_PARAM
00064 {
00065     float64_t * lin ;
00066     float64_t* W;
00067     int32_t start, end;
00068     int32_t * active2dnum ;
00069     int32_t * docs ;
00070     CKernel* kernel ;
00071 };
00072 
00073 struct S_THREAD_PARAM_REACTIVATE_VANILLA
00074 {
00075     CKernel* kernel;
00076     float64_t* lin;
00077     float64_t* aicache;
00078     float64_t* a;
00079     float64_t* a_old;
00080     int32_t* changed2dnum;
00081     int32_t* inactive2dnum;
00082     int32_t* label;
00083     int32_t start;
00084     int32_t end;
00085 };
00086 
00087 struct S_THREAD_PARAM_KERNEL
00088 {
00089     float64_t *Kval ;
00090     int32_t *KI, *KJ ;
00091     int32_t start, end;
00092     CSVMLight* svmlight;
00093 };
00094 
00095 #endif // DOXYGEN_SHOULD_SKIP_THIS
00096 
00097 void* CSVMLight::update_linear_component_linadd_helper(void* p)
00098 {
00099     S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00100 
00101     int32_t jj=0, j=0 ;
00102 
00103     for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
00104         params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
00105 
00106     return NULL ;
00107 }
00108 
00109 void* CSVMLight::compute_kernel_helper(void* p)
00110 {
00111     S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
00112 
00113     int32_t jj=0 ;
00114     for (jj=params->start;jj<params->end;jj++)
00115         params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
00116 
00117     return NULL ;
00118 }
00119 
00120 CSVMLight::CSVMLight()
00121 : CSVM()
00122 {
00123     init();
00124     set_kernel(NULL);
00125 }
00126 
00127 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab)
00128 : CSVM(C, k, lab)
00129 {
00130     init();
00131 }
00132 
00133 void CSVMLight::init()
00134 {
00135     //optimizer settings
00136     primal=NULL;
00137     dual=NULL;
00138     init_margin=0.15;
00139     init_iter=500;
00140     precision_violations=0;
00141     model_b=0;
00142     verbosity=1;
00143     opt_precision=DEF_PRECISION;
00144 
00145     // svm variables
00146     W=NULL;
00147     model=new MODEL[1];
00148     learn_parm=new LEARN_PARM[1];
00149     model->supvec=NULL;
00150     model->alpha=NULL;
00151     model->index=NULL;
00152 
00153     // MKL stuff
00154     mymaxdiff=1 ;
00155     mkl_converged=false;
00156 }
00157 
00158 CSVMLight::~CSVMLight()
00159 {
00160 
00161   delete[] model->supvec;
00162   delete[] model->alpha;
00163   delete[] model->index;
00164   delete[] model;
00165   delete[] learn_parm;
00166 
00167   // MKL stuff
00168   delete[] W ;
00169 
00170   // optimizer variables
00171   delete[] dual;
00172   delete[] primal;
00173 }
00174 
00175 bool CSVMLight::train(CFeatures* data)
00176 {
00177     //certain setup params
00178     mkl_converged=false;
00179     verbosity=1 ;
00180     init_margin=0.15;
00181     init_iter=500;
00182     precision_violations=0;
00183     opt_precision=DEF_PRECISION;
00184 
00185     strcpy (learn_parm->predfile, "");
00186     learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
00187     learn_parm->sharedslack=0;
00188     learn_parm->remove_inconsistent=0;
00189     learn_parm->skip_final_opt_check=0;
00190     learn_parm->svm_maxqpsize=get_qpsize();
00191     learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
00192     learn_parm->maxiter=100000;
00193     learn_parm->svm_iter_to_shrink=100;
00194     learn_parm->svm_c=C1;
00195     learn_parm->transduction_posratio=0.33;
00196     learn_parm->svm_costratio=C2/C1;
00197     learn_parm->svm_costratio_unlab=1.0;
00198     learn_parm->svm_unlabbound=1E-5;
00199     learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ??
00200     learn_parm->epsilon_a=1E-15;
00201     learn_parm->compute_loo=0;
00202     learn_parm->rho=1.0;
00203     learn_parm->xa_depth=0;
00204 
00205     if (!kernel)
00206         SG_ERROR( "SVM_light can not proceed without kernel!\n");
00207 
00208     if (data)
00209     {
00210         if (labels->get_num_labels() != data->get_num_vectors())
00211             SG_ERROR("Number of training vectors does not match number of labels\n");
00212         kernel->init(data, data);
00213     }
00214 
00215     if (!kernel->has_features())
00216         SG_ERROR( "SVM_light can not proceed without initialized kernel!\n");
00217 
00218     ASSERT(labels && labels->get_num_labels());
00219     ASSERT(labels->is_two_class_labeling());
00220     ASSERT(kernel->get_num_vec_lhs()==labels->get_num_labels());
00221 
00222     // in case of LINADD enabled kernels cleanup!
00223     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00224         kernel->clear_normal() ;
00225 
00226     // output some info
00227     SG_DEBUG( "threads = %i\n", parallel->get_num_threads()) ;
00228     SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ;
00229     SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ;
00230     SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ;
00231     SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ;
00232     SG_DEBUG( "kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) ;
00233     SG_DEBUG( "kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) ;
00234     SG_DEBUG( "get_solver_type() = %i\n", get_solver_type());
00235     SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ;
00236     SG_DEBUG( "get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) ;
00237     SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ;
00238 
00239     use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) ||
00240                          (get_linadd_enabled() && kernel->has_property(KP_LINADD)));
00241 
00242     SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ;
00243 
00244     if (kernel->get_kernel_type() == K_COMBINED)
00245     {
00246         CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel();
00247 
00248         while (kn)
00249         {
00250             // allocate kernel cache but clean up beforehand
00251             kn->resize_kernel_cache(kn->get_cache_size());
00252             SG_UNREF(kn);
00253             kn = ((CCombinedKernel*) kernel)->get_next_kernel();
00254         }
00255     }
00256 
00257     kernel->resize_kernel_cache(kernel->get_cache_size());
00258 
00259     // train the svm
00260     svm_learn();
00261 
00262     // brain damaged svm light work around
00263     create_new_model(model->sv_num-1);
00264     set_bias(-model->b);
00265     for (int32_t i=0; i<model->sv_num-1; i++)
00266     {
00267         set_alpha(i, model->alpha[i+1]);
00268         set_support_vector(i, model->supvec[i+1]);
00269     }
00270 
00271     // in case of LINADD enabled kernels cleanup!
00272     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00273     {
00274         kernel->clear_normal() ;
00275         kernel->delete_optimization() ;
00276     }
00277 
00278     if (use_kernel_cache)
00279         kernel->kernel_cache_cleanup();
00280 
00281     return true ;
00282 }
00283 
00284 int32_t CSVMLight::get_runtime()
00285 {
00286   clock_t start;
00287   start = clock();
00288   return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
00289 }
00290 
00291 
00292 void CSVMLight::svm_learn()
00293 {
00294     int32_t *inconsistent, i;
00295     int32_t inconsistentnum;
00296     int32_t misclassified,upsupvecnum;
00297     float64_t maxdiff, *lin, *c, *a;
00298     int32_t runtime_start,runtime_end;
00299     int32_t iterations;
00300     int32_t trainpos=0, trainneg=0 ;
00301     int32_t totdoc=0;
00302     ASSERT(labels);
00303     int32_t* label=labels->get_int_labels(totdoc);
00304     ASSERT(label);
00305     int32_t* docs=new int32_t[totdoc];
00306     delete[] W;
00307     W=NULL;
00308     count = 0 ;
00309 
00310     if (kernel->has_property(KP_KERNCOMBINATION) && callback)
00311     {
00312         W = new float64_t[totdoc*kernel->get_num_subkernels()];
00313         for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
00314             W[i]=0;
00315     }
00316 
00317     for (i=0; i<totdoc; i++)
00318         docs[i]=i;
00319 
00320     float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */
00321     float64_t *a_fullset;  /* buffer for storing alpha on full sample in loo */
00322     TIMING timing_profile;
00323     SHRINK_STATE shrink_state;
00324 
00325     runtime_start=get_runtime();
00326     timing_profile.time_kernel=0;
00327     timing_profile.time_opti=0;
00328     timing_profile.time_shrink=0;
00329     timing_profile.time_update=0;
00330     timing_profile.time_model=0;
00331     timing_profile.time_check=0;
00332     timing_profile.time_select=0;
00333 
00334     /* make sure -n value is reasonable */
00335     if((learn_parm->svm_newvarsinqp < 2)
00336             || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
00337         learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00338     }
00339 
00340     init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
00341 
00342     inconsistent = new int32_t[totdoc];
00343     c = new float64_t[totdoc];
00344     a = new float64_t[totdoc];
00345     a_fullset = new float64_t[totdoc];
00346     xi_fullset = new float64_t[totdoc];
00347     lin = new float64_t[totdoc];
00348     if (m_linear_term_len > 0)
00349         learn_parm->eps=get_linear_term_array();
00350     else
00351     {
00352         learn_parm->eps=new float64_t[totdoc];      /* equivalent regression epsilon for classification */
00353         CMath::fill_vector(learn_parm->eps, totdoc, -1.0);
00354     }
00355 
00356     learn_parm->svm_cost = new float64_t[totdoc];
00357 
00358     delete[] model->supvec;
00359     delete[] model->alpha;
00360     delete[] model->index;
00361     model->supvec = new int32_t[totdoc+2];
00362     model->alpha = new float64_t[totdoc+2];
00363     model->index = new int32_t[totdoc+2];
00364 
00365     model->at_upper_bound=0;
00366     model->b=0;
00367     model->supvec[0]=0;  /* element 0 reserved and empty for now */
00368     model->alpha[0]=0;
00369     model->totdoc=totdoc;
00370 
00371     model->kernel=kernel;
00372 
00373     model->sv_num=1;
00374     model->loo_error=-1;
00375     model->loo_recall=-1;
00376     model->loo_precision=-1;
00377     model->xa_error=-1;
00378     model->xa_recall=-1;
00379     model->xa_precision=-1;
00380     inconsistentnum=0;
00381 
00382 
00383     for (i=0;i<totdoc;i++) {    /* various inits */
00384         inconsistent[i]=0;
00385         c[i]=0;
00386         a[i]=0;
00387         lin[i]=0;
00388 
00389         if(label[i] > 0) {
00390             learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
00391                 fabs((float64_t)label[i]);
00392             label[i]=1;
00393             trainpos++;
00394         }
00395         else if(label[i] < 0) {
00396             learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
00397             label[i]=-1;
00398             trainneg++;
00399         }
00400         else {
00401             learn_parm->svm_cost[i]=0;
00402         }
00403     }
00404 
00405   /* compute starting state for initial alpha values */
00406     SG_DEBUG( "alpha:%d num_sv:%d\n", m_alpha, get_num_support_vectors());
00407 
00408     if(m_alpha && get_num_support_vectors()) {
00409         if(verbosity>=1) {
00410             SG_INFO( "Computing starting state...");
00411         }
00412 
00413     float64_t* alpha = new float64_t[totdoc];
00414 
00415     for (i=0; i<totdoc; i++)
00416         alpha[i]=0;
00417 
00418     for (i=0; i<get_num_support_vectors(); i++)
00419         alpha[get_support_vector(i)]=get_alpha(i);
00420 
00421     int32_t* index = new int32_t[totdoc];
00422     int32_t* index2dnum = new int32_t[totdoc+11];
00423     float64_t* aicache = new float64_t[totdoc];
00424     for (i=0;i<totdoc;i++) {    /* create full index and clip alphas */
00425       index[i]=1;
00426       alpha[i]=fabs(alpha[i]);
00427       if(alpha[i]<0) alpha[i]=0;
00428       if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
00429     }
00430 
00431     if (use_kernel_cache)
00432     {
00433         if (callback &&
00434                 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights())
00435            )
00436         {
00437             CCombinedKernel* k      = (CCombinedKernel*) kernel;
00438             CKernel* kn = k->get_first_kernel();
00439 
00440             while (kn)
00441             {
00442                 for (i=0;i<totdoc;i++)     // fill kernel cache with unbounded SV
00443                     if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00444                             && (kn->kernel_cache_space_available()))
00445                         kn->cache_kernel_row(i);
00446 
00447                 for (i=0;i<totdoc;i++)     // fill rest of kernel cache with bounded SV
00448                     if((alpha[i]==learn_parm->svm_cost[i])
00449                             && (kn->kernel_cache_space_available()))
00450                         kn->cache_kernel_row(i);
00451 
00452                 SG_UNREF(kn);
00453                 kn = k->get_next_kernel();
00454             }
00455         }
00456         else
00457         {
00458             for (i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */
00459                 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00460                         && (kernel->kernel_cache_space_available()))
00461                     kernel->cache_kernel_row(i);
00462 
00463             for (i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */
00464                 if((alpha[i]==learn_parm->svm_cost[i])
00465                         && (kernel->kernel_cache_space_available()))
00466                     kernel->cache_kernel_row(i);
00467         }
00468     }
00469     (void)compute_index(index,totdoc,index2dnum);
00470     update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
00471                 lin,aicache,NULL);
00472     (void)calculate_svm_model(docs,label,lin,alpha,a,c,
00473                   index2dnum,index2dnum);
00474     for (i=0;i<totdoc;i++) {    /* copy initial alphas */
00475       a[i]=alpha[i];
00476     }
00477 
00478     delete[] index;
00479     delete[] index2dnum;
00480     delete[] aicache;
00481     delete[] alpha;
00482 
00483     if(verbosity>=1)
00484         SG_DONE();
00485   }
00486         SG_DEBUG( "%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg);
00487         SG_DEBUG( "Optimizing...\n");
00488 
00489     /* train the svm */
00490   iterations=optimize_to_convergence(docs,label,totdoc,
00491                      &shrink_state,inconsistent,a,lin,
00492                      c,&timing_profile,
00493                      &maxdiff,(int32_t)-1,
00494                      (int32_t)1);
00495 
00496 
00497     if(verbosity>=1) {
00498         if(verbosity==1)
00499         {
00500             SG_DONE();
00501             SG_DEBUG("(%ld iterations)", iterations);
00502         }
00503 
00504         misclassified=0;
00505         for (i=0;(i<totdoc);i++) { /* get final statistic */
00506             if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
00507                 misclassified++;
00508         }
00509 
00510         SG_INFO( "Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
00511                 misclassified,maxdiff);
00512 
00513         SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b);
00514         if (maxdiff>epsilon)
00515             SG_WARNING( "maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon);
00516 
00517         runtime_end=get_runtime();
00518         upsupvecnum=0;
00519         for (i=1;i<model->sv_num;i++)
00520         {
00521             if(fabs(model->alpha[i]) >=
00522                     (learn_parm->svm_cost[model->supvec[i]]-
00523                      learn_parm->epsilon_a))
00524                 upsupvecnum++;
00525         }
00526         SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n",
00527                 model->sv_num-1,upsupvecnum);
00528     }
00529 
00530     shrink_state_cleanup(&shrink_state);
00531     delete[] label;
00532     delete[] inconsistent;
00533     delete[] c;
00534     delete[] a;
00535     delete[] a_fullset;
00536     delete[] xi_fullset;
00537     delete[] lin;
00538     delete[] learn_parm->eps;
00539     delete[] learn_parm->svm_cost;
00540     delete[] docs;
00541 }
00542 
00543 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
00544                  SHRINK_STATE *shrink_state,
00545                  int32_t *inconsistent,
00546                  float64_t *a, float64_t *lin, float64_t *c,
00547                  TIMING *timing_profile, float64_t *maxdiff,
00548                  int32_t heldout, int32_t retrain)
00549      /* docs: Training vectors (x-part) */
00550      /* label: Training labels/value (y-part, zero if test example for
00551                   transduction) */
00552      /* totdoc: Number of examples in docs/label */
00553      /* laern_parm: Learning paramenters */
00554      /* kernel_parm: Kernel paramenters */
00555      /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
00556                       NULL if linear. */
00557      /* shrink_state: State of active variables */
00558      /* inconsistent: examples thrown out as inconstistent */
00559      /* a: alphas */
00560      /* lin: linear component of gradient */
00561      /* c: right hand side of inequalities (margin) */
00562      /* maxdiff: returns maximum violation of KT-conditions */
00563      /* heldout: marks held-out example for leave-one-out (or -1) */
00564      /* retrain: selects training mode (1=regular / 2=holdout) */
00565 {
00566   int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
00567   int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
00568   int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
00569   int32_t *working2dnum,*selexam;
00570   int32_t activenum;
00571   float64_t criterion, eq;
00572   float64_t *a_old;
00573   int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
00574   int32_t transductcycle;
00575   int32_t transduction;
00576   float64_t epsilon_crit_org;
00577   float64_t bestmaxdiff;
00578   float64_t worstmaxdiff;
00579   int32_t   bestmaxdiffiter,terminate;
00580   bool reactivated=false;
00581 
00582   float64_t *selcrit;  /* buffer for sorting */
00583   float64_t *aicache;  /* buffer to keep one row of hessian */
00584   QP qp;            /* buffer for one quadratic program */
00585 
00586   epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
00587   if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
00588       learn_parm->epsilon_crit=2.0;
00589       /* caching makes no sense for linear kernel */
00590   }
00591   learn_parm->epsilon_shrink=2;
00592   (*maxdiff)=1;
00593 
00594   SG_DEBUG("totdoc:%d\n",totdoc);
00595   chosen = new int32_t[totdoc];
00596   last_suboptimal_at =new int32_t[totdoc];
00597   key =new int32_t[totdoc+11];
00598   selcrit =new float64_t[totdoc];
00599   selexam =new int32_t[totdoc];
00600   a_old =new float64_t[totdoc];
00601   aicache =new float64_t[totdoc];
00602   working2dnum =new int32_t[totdoc+11];
00603   active2dnum =new int32_t[totdoc+11];
00604   qp.opt_ce =new float64_t[learn_parm->svm_maxqpsize];
00605   qp.opt_ce0 =new float64_t[1];
00606   qp.opt_g =new float64_t[learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize];
00607   qp.opt_g0 =new float64_t[learn_parm->svm_maxqpsize];
00608   qp.opt_xinit =new float64_t[learn_parm->svm_maxqpsize];
00609   qp.opt_low=new float64_t[learn_parm->svm_maxqpsize];
00610   qp.opt_up=new float64_t[learn_parm->svm_maxqpsize];
00611 
00612   choosenum=0;
00613   inconsistentnum=0;
00614   transductcycle=0;
00615   transduction=0;
00616   if(!retrain) retrain=1;
00617   iteration=1;
00618   bestmaxdiffiter=1;
00619   bestmaxdiff=999999999;
00620   worstmaxdiff=1e-10;
00621   terminate=0;
00622 
00623   kernel->set_time(iteration);  /* for lru cache */
00624 
00625   for (i=0;i<totdoc;i++) {    /* various inits */
00626     chosen[i]=0;
00627     a_old[i]=a[i];
00628     last_suboptimal_at[i]=1;
00629     if(inconsistent[i])
00630       inconsistentnum++;
00631   }
00632   activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00633   inactivenum=totdoc-activenum;
00634   clear_index(working2dnum);
00635 
00636   /* repeat this loop until we have convergence */
00637   CTime start_time;
00638   mkl_converged=false;
00639 
00640 #ifdef CYGWIN
00641   for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
00642 #else
00643       CSignal::clear_cancel();
00644       for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
00645 #endif
00646 
00647       if(use_kernel_cache)
00648           kernel->set_time(iteration);  /* for lru cache */
00649 
00650       if(verbosity>=2) t0=get_runtime();
00651       if(verbosity>=3) {
00652           SG_DEBUG( "\nSelecting working set... ");
00653       }
00654 
00655       if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
00656           learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00657 
00658       i=0;
00659       for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
00660           if((chosen[j]>=(learn_parm->svm_maxqpsize/
00661                           CMath::min(learn_parm->svm_maxqpsize,
00662                                      learn_parm->svm_newvarsinqp)))
00663              || (inconsistent[j])
00664              || (j == heldout)) {
00665               chosen[j]=0;
00666               choosenum--;
00667           }
00668           else {
00669               chosen[j]++;
00670               working2dnum[i++]=j;
00671           }
00672       }
00673       working2dnum[i]=-1;
00674 
00675       if(retrain == 2) {
00676           choosenum=0;
00677           for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
00678               chosen[j]=0;
00679           }
00680           clear_index(working2dnum);
00681           for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
00682               if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
00683                   chosen[i]=99999;
00684                   choosenum++;
00685                   a[i]=0;
00686               }
00687           }
00688           if(learn_parm->biased_hyperplane) {
00689               eq=0;
00690               for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
00691                   eq+=a[i]*label[i];
00692               }
00693               for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
00694                   if((eq*label[i] > 0) && (a[i] > 0)) {
00695                       chosen[i]=88888;
00696                       choosenum++;
00697                       if((eq*label[i]) > a[i]) {
00698                           eq-=(a[i]*label[i]);
00699                           a[i]=0;
00700                       }
00701                       else {
00702                           a[i]-=(eq*label[i]);
00703                           eq=0;
00704                       }
00705                   }
00706               }
00707           }
00708           compute_index(chosen,totdoc,working2dnum);
00709       }
00710       else
00711       {   /* select working set according to steepest gradient */
00712           if(iteration % 101)
00713           {
00714               already_chosen=0;
00715               if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
00716                       (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00717               {
00718                   /* select part of the working set from cache */
00719                   already_chosen=select_next_qp_subproblem_grad(
00720                       label,a,lin,c,totdoc,
00721                       (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
00722                                         learn_parm->svm_newvarsinqp)/2),
00723                       inconsistent,active2dnum,
00724                       working2dnum,selcrit,selexam,1,
00725                       key,chosen);
00726                   choosenum+=already_chosen;
00727               }
00728               choosenum+=select_next_qp_subproblem_grad(
00729                   label,a,lin,c,totdoc,
00730                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00731                              learn_parm->svm_newvarsinqp-already_chosen),
00732                   inconsistent,active2dnum,
00733                   working2dnum,selcrit,selexam,0,key,
00734                   chosen);
00735           }
00736           else { /* once in a while, select a somewhat random working set
00737                     to get unlocked of infinite loops due to numerical
00738                     inaccuracies in the core qp-solver */
00739               choosenum+=select_next_qp_subproblem_rand(
00740                   label,a,lin,c,totdoc,
00741                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00742                              learn_parm->svm_newvarsinqp),
00743                   inconsistent,active2dnum,
00744                   working2dnum,selcrit,selexam,key,
00745                   chosen,iteration);
00746           }
00747       }
00748 
00749       if(verbosity>=2) {
00750           SG_INFO( " %ld vectors chosen\n",choosenum);
00751       }
00752 
00753       if(verbosity>=2) t1=get_runtime();
00754 
00755       if (use_kernel_cache)
00756       {
00757           // in case of MKL w/o linadd cache each kernel independently
00758           // else if linadd is disabled cache single kernel
00759           if ( callback &&
00760                   (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
00761              )
00762           {
00763               CCombinedKernel* k      = (CCombinedKernel*) kernel;
00764               CKernel* kn = k->get_first_kernel();
00765 
00766               while (kn)
00767               {
00768                   kn->cache_multiple_kernel_rows(working2dnum, choosenum);
00769                   SG_UNREF(kn);
00770                   kn = k->get_next_kernel() ;
00771               }
00772           }
00773           else
00774               kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
00775       }
00776 
00777       if(verbosity>=2) t2=get_runtime();
00778 
00779       if(retrain != 2) {
00780           optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
00781                        totdoc,working2dnum,choosenum,a,lin,c,
00782                        aicache,&qp,&epsilon_crit_org);
00783       }
00784 
00785       if(verbosity>=2) t3=get_runtime();
00786       update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
00787                               lin,aicache,c);
00788 
00789       if(verbosity>=2) t4=get_runtime();
00790       supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
00791 
00792       if(verbosity>=2) t5=get_runtime();
00793 
00794       for (jj=0;(i=working2dnum[jj])>=0;jj++) {
00795           a_old[i]=a[i];
00796       }
00797 
00798       retrain=check_optimality(label,a,lin,c,totdoc,
00799                                maxdiff,epsilon_crit_org,&misclassified,
00800                                inconsistent,active2dnum,last_suboptimal_at,
00801                                iteration);
00802 
00803       if(verbosity>=2) {
00804           t6=get_runtime();
00805           timing_profile->time_select+=t1-t0;
00806           timing_profile->time_kernel+=t2-t1;
00807           timing_profile->time_opti+=t3-t2;
00808           timing_profile->time_update+=t4-t3;
00809           timing_profile->time_model+=t5-t4;
00810           timing_profile->time_check+=t6-t5;
00811       }
00812 
00813       /* checking whether optimizer got stuck */
00814       if((*maxdiff) < bestmaxdiff) {
00815           bestmaxdiff=(*maxdiff);
00816           bestmaxdiffiter=iteration;
00817       }
00818       if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
00819           /* int32_t time no progress? */
00820           terminate=1;
00821           retrain=0;
00822           SG_WARNING( "Relaxing KT-Conditions due to slow progress! Terminating!\n");
00823       }
00824 
00825       noshrink= (get_shrinking_enabled()) ? 0 : 1;
00826 
00827       if ((!callback) && (!retrain) && (inactivenum>0) &&
00828               ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00829       {
00830           t1=get_runtime();
00831           SG_DEBUG( "reactivating inactive examples\n");
00832 
00833           reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00834                                        iteration,inconsistent,
00835                                        docs,aicache,
00836                                        maxdiff);
00837           reactivated=true;
00838           SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org);
00839           /* Update to new active variables. */
00840           activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00841           inactivenum=totdoc-activenum;
00842           /* reset watchdog */
00843           bestmaxdiff=(*maxdiff);
00844           bestmaxdiffiter=iteration;
00845 
00846           /* termination criterion */
00847           noshrink=1;
00848           retrain=0;
00849           if((*maxdiff) > learn_parm->epsilon_crit)
00850           {
00851               SG_INFO( "restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit);
00852               retrain=1;
00853           }
00854           timing_profile->time_shrink+=get_runtime()-t1;
00855           if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00856              || (verbosity>=2)) {
00857               SG_DONE();
00858               SG_DEBUG("Number of inactive variables = %ld\n", inactivenum);
00859           }
00860       }
00861 
00862       if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
00863           learn_parm->epsilon_crit=(*maxdiff);
00864       if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
00865           learn_parm->epsilon_crit/=4.0;
00866           retrain=1;
00867           noshrink=1;
00868       }
00869       if(learn_parm->epsilon_crit<epsilon_crit_org)
00870           learn_parm->epsilon_crit=epsilon_crit_org;
00871 
00872       if(verbosity>=2) {
00873           SG_INFO( " => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
00874                        supvecnum,model->at_upper_bound,(*maxdiff));
00875 
00876       }
00877       mymaxdiff=*maxdiff ;
00878 
00879       //don't shrink w/ mkl
00880       if (((iteration % 10) == 0) && (!noshrink) && !callback)
00881       {
00882           activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
00883                   CMath::max((int32_t)(activenum/10),
00884                       CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
00885                   a,inconsistent, c, lin, label);
00886 
00887           inactivenum=totdoc-activenum;
00888 
00889           if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
00890                   && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
00891 
00892               kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
00893                           (int32_t) (kernel->get_activenum_cache()-supvecnum)),
00894                       shrink_state->active);
00895           }
00896       }
00897 
00898       if (bestmaxdiff>worstmaxdiff)
00899           worstmaxdiff=bestmaxdiff;
00900 
00901       SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6);
00902 
00903       /* Terminate loop */
00904       if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) {
00905           terminate = 1;
00906           retrain = 0;
00907       }
00908   } /* end of loop */
00909 
00910   SG_DEBUG( "inactive:%d\n", inactivenum);
00911 
00912   if (inactivenum && !reactivated && !callback)
00913   {
00914       SG_DEBUG( "reactivating inactive examples\n");
00915       reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00916               iteration,inconsistent,
00917               docs,aicache,
00918               maxdiff);
00919       SG_DEBUG( "done reactivating inactive examples\n");
00920       /* Update to new active variables. */
00921       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00922       inactivenum=totdoc-activenum;
00923       /* reset watchdog */
00924       bestmaxdiff=(*maxdiff);
00925       bestmaxdiffiter=iteration;
00926   }
00927 
00928   criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
00929   CSVM::set_objective(criterion);
00930 
00931   delete[] chosen;
00932   delete[] last_suboptimal_at;
00933   delete[] key;
00934   delete[] selcrit;
00935   delete[] selexam;
00936   delete[] a_old;
00937   delete[] aicache;
00938   delete[] working2dnum;
00939   delete[] active2dnum;
00940   delete[] qp.opt_ce;
00941   delete[] qp.opt_ce0;
00942   delete[] qp.opt_g;
00943   delete[] qp.opt_g0;
00944   delete[] qp.opt_xinit;
00945   delete[] qp.opt_low;
00946   delete[] qp.opt_up;
00947 
00948   learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
00949 
00950   return(iteration);
00951 }
00952 
00953 float64_t CSVMLight::compute_objective_function(
00954     float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00955     int32_t totdoc)
00956      /* Return value of objective function. */
00957      /* Works only relative to the active variables! */
00958 {
00959   /* calculate value of objective function */
00960   float64_t criterion=0;
00961 
00962   for (int32_t i=0;i<totdoc;i++)
00963       criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
00964 
00965   return(criterion);
00966 }
00967 
00968 
00969 void CSVMLight::clear_index(int32_t *index)
00970               /* initializes and empties index */
00971 {
00972   index[0]=-1;
00973 }
00974 
00975 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
00976      /* initializes and empties index */
00977 {
00978   register int32_t i;
00979   for (i=0;index[i] != -1;i++);
00980   index[i]=elem;
00981   index[i+1]=-1;
00982 }
00983 
00984 int32_t CSVMLight::compute_index(
00985     int32_t *binfeature, int32_t range, int32_t *index)
00986      /* create an inverted index of binfeature */
00987 {
00988   register int32_t i,ii;
00989 
00990   ii=0;
00991   for (i=0;i<range;i++) {
00992     if(binfeature[i]) {
00993       index[ii]=i;
00994       ii++;
00995     }
00996   }
00997   for (i=0;i<4;i++) {
00998     index[ii+i]=-1;
00999   }
01000   return(ii);
01001 }
01002 
01003 
01004 void CSVMLight::optimize_svm(
01005     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01006     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
01007     int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
01008     float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
01009      /* Do optimization on the working set. */
01010 {
01011     int32_t i;
01012     float64_t *a_v;
01013 
01014     //compute_matrices_for_optimization_parallel(docs,label,
01015     //                                         exclude_from_eq_const,eq_target,chosen,
01016     //                                         active2dnum,working2dnum,a,lin,c,
01017     //                                         varnum,totdoc,aicache,qp);
01018 
01019     compute_matrices_for_optimization(docs,label,
01020                                       exclude_from_eq_const,eq_target,chosen,
01021                                       active2dnum,working2dnum,a,lin,c,
01022                                       varnum,totdoc,aicache,qp);
01023 
01024     if(verbosity>=3) {
01025      SG_DEBUG( "Running optimizer...");
01026     }
01027     /* call the qp-subsolver */
01028     a_v=optimize_qp(qp,epsilon_crit_target,
01029             learn_parm->svm_maxqpsize,
01030             &(model->b),                /* in case the optimizer gives us */
01031             learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */
01032                                         /* b is calculated in calculate_model. */
01033     if(verbosity>=3) {
01034      SG_DONE();
01035     }
01036 
01037     for (i=0;i<varnum;i++)
01038       a[working2dnum[i]]=a_v[i];
01039 }
01040 
01041 void CSVMLight::compute_matrices_for_optimization_parallel(
01042     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01043     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01044     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01045     float64_t *aicache, QP *qp)
01046 {
01047     if (parallel->get_num_threads()<=1)
01048     {
01049         compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
01050                                                    chosen, active2dnum, key, a, lin, c,
01051                                                    varnum, totdoc, aicache, qp) ;
01052     }
01053 #ifndef WIN32
01054     else
01055     {
01056         register int32_t ki,kj,i,j;
01057         register float64_t kernel_temp;
01058 
01059         qp->opt_n=varnum;
01060         qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01061         for (j=1;j<model->sv_num;j++) { /* start at 1 */
01062             if((!chosen[model->supvec[j]])
01063                     && (!exclude_from_eq_const[(model->supvec[j])])) {
01064                 qp->opt_ce0[0]+=model->alpha[j];
01065             }
01066         }
01067         if(learn_parm->biased_hyperplane)
01068             qp->opt_m=1;
01069         else
01070             qp->opt_m=0;  /* eq-constraint will be ignored */
01071 
01072         /* init linear part of objective function */
01073         for (i=0;i<varnum;i++) {
01074             qp->opt_g0[i]=lin[key[i]];
01075         }
01076 
01077         ASSERT(parallel->get_num_threads()>1);
01078         int32_t *KI=new int32_t[varnum*varnum] ;
01079         int32_t *KJ=new int32_t[varnum*varnum] ;
01080         int32_t Knum=0 ;
01081         float64_t *Kval = new float64_t[varnum*(varnum+1)/2] ;
01082         for (i=0;i<varnum;i++) {
01083             ki=key[i];
01084             KI[Knum]=docs[ki] ;
01085             KJ[Knum]=docs[ki] ;
01086             Knum++ ;
01087             for (j=i+1;j<varnum;j++)
01088             {
01089                 kj=key[j];
01090                 KI[Knum]=docs[ki] ;
01091                 KJ[Knum]=docs[kj] ;
01092                 Knum++ ;
01093             }
01094         }
01095         ASSERT(Knum<=varnum*(varnum+1)/2);
01096 
01097         pthread_t* threads = new pthread_t[parallel->get_num_threads()-1];
01098         S_THREAD_PARAM_KERNEL* params = new S_THREAD_PARAM_KERNEL[parallel->get_num_threads()-1];
01099         int32_t step= Knum/parallel->get_num_threads();
01100         //SG_DEBUG( "\nkernel-step size: %i\n", step) ;
01101         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01102         {
01103             params[t].svmlight = this;
01104             params[t].start = t*step;
01105             params[t].end = (t+1)*step;
01106             params[t].KI=KI ;
01107             params[t].KJ=KJ ;
01108             params[t].Kval=Kval ;
01109             pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)&params[t]);
01110         }
01111         for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++)
01112             Kval[i]=compute_kernel(KI[i],KJ[i]) ;
01113 
01114         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01115             pthread_join(threads[t], NULL);
01116 
01117         delete[] params;
01118         delete[] threads;
01119 
01120         Knum=0 ;
01121         for (i=0;i<varnum;i++) {
01122             ki=key[i];
01123 
01124             /* Compute the matrix for equality constraints */
01125             qp->opt_ce[i]=label[ki];
01126             qp->opt_low[i]=0;
01127             qp->opt_up[i]=learn_parm->svm_cost[ki];
01128 
01129             kernel_temp=Kval[Knum] ;
01130             Knum++ ;
01131             /* compute linear part of objective function */
01132             qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01133             /* compute quadratic part of objective function */
01134             qp->opt_g[varnum*i+i]=kernel_temp;
01135 
01136             for (j=i+1;j<varnum;j++) {
01137                 kj=key[j];
01138                 kernel_temp=Kval[Knum] ;
01139                 Knum++ ;
01140                 /* compute linear part of objective function */
01141                 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01142                 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01143                 /* compute quadratic part of objective function */
01144                 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01145                 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01146             }
01147 
01148             if(verbosity>=3) {
01149                 if(i % 20 == 0) {
01150                     SG_DEBUG( "%ld..",i);
01151                 }
01152             }
01153         }
01154 
01155         delete[] KI ;
01156         delete[] KJ ;
01157         delete[] Kval ;
01158 
01159         for (i=0;i<varnum;i++) {
01160             /* assure starting at feasible point */
01161             qp->opt_xinit[i]=a[key[i]];
01162             /* set linear part of objective function */
01163             qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
01164         }
01165 
01166         if(verbosity>=3) {
01167             SG_DONE();
01168         }
01169     }
01170 #endif
01171 }
01172 
01173 void CSVMLight::compute_matrices_for_optimization(
01174     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01175     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01176     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01177     float64_t *aicache, QP *qp)
01178 {
01179   register int32_t ki,kj,i,j;
01180   register float64_t kernel_temp;
01181 
01182   qp->opt_n=varnum;
01183   qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01184   for (j=1;j<model->sv_num;j++) { /* start at 1 */
01185     if((!chosen[model->supvec[j]])
01186        && (!exclude_from_eq_const[(model->supvec[j])])) {
01187       qp->opt_ce0[0]+=model->alpha[j];
01188     }
01189   }
01190   if(learn_parm->biased_hyperplane)
01191     qp->opt_m=1;
01192   else
01193     qp->opt_m=0;  /* eq-constraint will be ignored */
01194 
01195   /* init linear part of objective function */
01196   for (i=0;i<varnum;i++) {
01197     qp->opt_g0[i]=lin[key[i]];
01198   }
01199 
01200   for (i=0;i<varnum;i++) {
01201       ki=key[i];
01202 
01203       /* Compute the matrix for equality constraints */
01204       qp->opt_ce[i]=label[ki];
01205       qp->opt_low[i]=0;
01206       qp->opt_up[i]=learn_parm->svm_cost[ki];
01207 
01208       kernel_temp=compute_kernel(docs[ki], docs[ki]);
01209       /* compute linear part of objective function */
01210       qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01211       /* compute quadratic part of objective function */
01212       qp->opt_g[varnum*i+i]=kernel_temp;
01213 
01214       for (j=i+1;j<varnum;j++) {
01215           kj=key[j];
01216           kernel_temp=compute_kernel(docs[ki], docs[kj]);
01217 
01218           /* compute linear part of objective function */
01219           qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01220           qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01221           /* compute quadratic part of objective function */
01222           qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01223           qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01224       }
01225 
01226       if(verbosity>=3) {
01227           if(i % 20 == 0) {
01228               SG_DEBUG( "%ld..",i);
01229           }
01230       }
01231   }
01232 
01233   for (i=0;i<varnum;i++) {
01234       /* assure starting at feasible point */
01235       qp->opt_xinit[i]=a[key[i]];
01236       /* set linear part of objective function */
01237       qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
01238   }
01239 
01240   if(verbosity>=3) {
01241       SG_DONE();
01242   }
01243 }
01244 
01245 
01246 int32_t CSVMLight::calculate_svm_model(
01247     int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
01248     float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
01249      /* Compute decision function based on current values */
01250      /* of alpha. */
01251 {
01252   int32_t i,ii,pos,b_calculated=0,first_low,first_high;
01253   float64_t ex_c,b_temp,b_low,b_high;
01254 
01255   if(verbosity>=3) {
01256    SG_DEBUG( "Calculating model...");
01257   }
01258 
01259   if(!learn_parm->biased_hyperplane) {
01260     model->b=0;
01261     b_calculated=1;
01262   }
01263 
01264   for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01265     if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
01266       pos=model->index[i];
01267       model->index[i]=-1;
01268       (model->sv_num)--;
01269       model->supvec[pos]=model->supvec[model->sv_num];
01270       model->alpha[pos]=model->alpha[model->sv_num];
01271       model->index[model->supvec[pos]]=pos;
01272     }
01273     else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
01274       model->supvec[model->sv_num]=docs[i];
01275       model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
01276       model->index[i]=model->sv_num;
01277       (model->sv_num)++;
01278     }
01279     else if(a_old[i]==a[i]) { /* nothing to do */
01280     }
01281     else {  /* just update alpha */
01282       model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
01283     }
01284 
01285     ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01286     if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
01287       (model->at_upper_bound)--;
01288     }
01289     else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
01290       (model->at_upper_bound)++;
01291     }
01292 
01293     if((!b_calculated)
01294        && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {   /* calculate b */
01295         model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
01296     b_calculated=1;
01297     }
01298   }
01299 
01300   /* No alpha in the working set not at bounds, so b was not
01301      calculated in the usual way. The following handles this special
01302      case. */
01303   if(learn_parm->biased_hyperplane
01304      && (!b_calculated)
01305      && (model->sv_num-1 == model->at_upper_bound)) {
01306     first_low=1;
01307     first_high=1;
01308     b_low=0;
01309     b_high=0;
01310     for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01311       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01312       if(a[i]<ex_c) {
01313     if(label[i]>0)  {
01314       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01315       if((b_temp>b_low) || (first_low)) {
01316         b_low=b_temp;
01317         first_low=0;
01318       }
01319     }
01320     else {
01321       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01322       if((b_temp<b_high) || (first_high)) {
01323         b_high=b_temp;
01324         first_high=0;
01325       }
01326     }
01327       }
01328       else {
01329     if(label[i]<0)  {
01330       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01331       if((b_temp>b_low) || (first_low)) {
01332         b_low=b_temp;
01333         first_low=0;
01334       }
01335     }
01336     else {
01337       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01338       if((b_temp<b_high) || (first_high)) {
01339         b_high=b_temp;
01340         first_high=0;
01341       }
01342     }
01343       }
01344     }
01345     if(first_high) {
01346       model->b=-b_low;
01347     }
01348     else if(first_low) {
01349       model->b=-b_high;
01350     }
01351     else {
01352       model->b=-(b_high+b_low)/2.0;  /* select b as the middle of range */
01353     }
01354   }
01355 
01356   if(verbosity>=3) {
01357    SG_DONE();
01358   }
01359 
01360   return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
01361 }
01362 
01363 int32_t CSVMLight::check_optimality(
01364     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01365     float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
01366     int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
01367     int32_t iteration)
01368      /* Check KT-conditions */
01369 {
01370   int32_t i,ii,retrain;
01371   float64_t dist,ex_c,target;
01372 
01373   if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01374       learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
01375   else
01376       learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
01377   retrain=0;
01378   (*maxdiff)=0;
01379   (*misclassified)=0;
01380   for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01381       if((!inconsistent[i]) && label[i]) {
01382           dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from
01383                                                      hyperplane*/
01384           target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
01385           ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01386           if(dist <= 0) {
01387               (*misclassified)++;  /* does not work due to deactivation of var */
01388           }
01389           if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
01390               if((dist-target)>(*maxdiff))  /* largest violation */
01391                   (*maxdiff)=dist-target;
01392           }
01393           else if((a[i]<ex_c) && (dist < target)) {
01394               if((target-dist)>(*maxdiff))  /* largest violation */
01395                   (*maxdiff)=target-dist;
01396           }
01397           /* Count how int32_t a variable was at lower/upper bound (and optimal).*/
01398           /* Variables, which were at the bound and optimal for a int32_t */
01399           /* time are unlikely to become support vectors. In case our */
01400           /* cache is filled up, those variables are excluded to save */
01401           /* kernel evaluations. (See chapter 'Shrinking').*/
01402           if((a[i]>(learn_parm->epsilon_a))
01403              && (a[i]<ex_c)) {
01404               last_suboptimal_at[i]=iteration;         /* not at bound */
01405           }
01406           else if((a[i]<=(learn_parm->epsilon_a))
01407                   && (dist < (target+learn_parm->epsilon_shrink))) {
01408               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01409           }
01410           else if((a[i]>=ex_c)
01411                   && (dist > (target-learn_parm->epsilon_shrink)))  {
01412               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01413           }
01414       }
01415   }
01416 
01417   /* termination criterion */
01418   if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
01419       retrain=1;
01420   }
01421   return(retrain);
01422 }
01423 
01424 void CSVMLight::update_linear_component(
01425     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01426     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01427     float64_t *aicache, float64_t* c)
01428      /* keep track of the linear component */
01429      /* lin of the gradient etc. by updating */
01430      /* based on the change of the variables */
01431      /* in the current working set */
01432 {
01433     register int32_t i=0,ii=0,j=0,jj=0;
01434 
01435     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01436     {
01437         if (callback)
01438         {
01439             update_linear_component_mkl_linadd(docs, label, active2dnum, a,
01440                     a_old, working2dnum, totdoc, lin, aicache);
01441         }
01442         else
01443         {
01444             kernel->clear_normal();
01445 
01446             int32_t num_working=0;
01447             for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01448                 if(a[i] != a_old[i]) {
01449                     kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01450                     num_working++;
01451                 }
01452             }
01453 
01454             if (num_working>0)
01455             {
01456                 if (parallel->get_num_threads() < 2)
01457                 {
01458                     for (jj=0;(j=active2dnum[jj])>=0;jj++) {
01459                         lin[j]+=kernel->compute_optimized(docs[j]);
01460                     }
01461                 }
01462 #ifndef WIN32
01463                 else
01464                 {
01465                     int32_t num_elem = 0 ;
01466                     for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
01467 
01468                     pthread_t* threads = new pthread_t[parallel->get_num_threads()-1] ;
01469                     S_THREAD_PARAM* params = new S_THREAD_PARAM[parallel->get_num_threads()-1] ;
01470                     int32_t start = 0 ;
01471                     int32_t step = num_elem/parallel->get_num_threads();
01472                     int32_t end = step ;
01473 
01474                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01475                     {
01476                         params[t].kernel = kernel ;
01477                         params[t].lin = lin ;
01478                         params[t].docs = docs ;
01479                         params[t].active2dnum=active2dnum ;
01480                         params[t].start = start ;
01481                         params[t].end = end ;
01482                         start=end ;
01483                         end+=step ;
01484                         pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)&params[t]) ;
01485                     }
01486 
01487                     for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
01488                         lin[j]+=kernel->compute_optimized(docs[j]);
01489                     }
01490                     void* ret;
01491                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01492                         pthread_join(threads[t], &ret) ;
01493 
01494                     delete[] params;
01495                     delete[] threads;
01496                 }
01497 #endif
01498             }
01499         }
01500     }
01501     else
01502     {
01503         if (callback)
01504         {
01505             update_linear_component_mkl(docs, label, active2dnum,
01506                     a, a_old, working2dnum, totdoc, lin, aicache);
01507         }
01508         else {
01509             for (jj=0;(i=working2dnum[jj])>=0;jj++) {
01510                 if(a[i] != a_old[i]) {
01511                     kernel->get_kernel_row(i,active2dnum,aicache);
01512                     for (ii=0;(j=active2dnum[ii])>=0;ii++)
01513                         lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01514                 }
01515             }
01516         }
01517     }
01518 }
01519 
01520 
01521 void CSVMLight::update_linear_component_mkl(
01522     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01523     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01524     float64_t *aicache)
01525 {
01526     //int inner_iters=0;
01527     int32_t num = kernel->get_num_vec_rhs();
01528     int32_t num_weights = -1;
01529     int32_t num_kernels = kernel->get_num_subkernels() ;
01530     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01531     ASSERT(num_weights==num_kernels);
01532 
01533     if ((kernel->get_kernel_type()==K_COMBINED) &&
01534              (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))// for combined kernel
01535     {
01536         CCombinedKernel* k      = (CCombinedKernel*) kernel;
01537         CKernel* kn = k->get_first_kernel() ;
01538         int32_t n = 0, i, j ;
01539 
01540         while (kn!=NULL)
01541         {
01542             for (i=0;i<num;i++)
01543             {
01544                 if(a[i] != a_old[i])
01545                 {
01546                     kn->get_kernel_row(i,NULL,aicache, true);
01547                     for (j=0;j<num;j++)
01548                         W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01549                 }
01550             }
01551 
01552             SG_UNREF(kn);
01553             kn = k->get_next_kernel();
01554             n++ ;
01555         }
01556     }
01557     else // hope the kernel is fast ...
01558     {
01559         float64_t* w_backup = new float64_t[num_kernels] ;
01560         float64_t* w1 = new float64_t[num_kernels] ;
01561 
01562         // backup and set to zero
01563         for (int32_t i=0; i<num_kernels; i++)
01564         {
01565             w_backup[i] = old_beta[i] ;
01566             w1[i]=0.0 ;
01567         }
01568         for (int32_t n=0; n<num_kernels; n++)
01569         {
01570             w1[n]=1.0 ;
01571             kernel->set_subkernel_weights(w1, num_weights) ;
01572 
01573             for (int32_t i=0;i<num;i++)
01574             {
01575                 if(a[i] != a_old[i])
01576                 {
01577                     for (int32_t j=0;j<num;j++)
01578                         W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
01579                 }
01580             }
01581             w1[n]=0.0 ;
01582         }
01583 
01584         // restore old weights
01585         kernel->set_subkernel_weights(w_backup,num_weights) ;
01586 
01587         delete[] w_backup ;
01588         delete[] w1 ;
01589     }
01590 
01591     call_mkl_callback(a, label, lin);
01592 }
01593 
01594 
01595 void CSVMLight::update_linear_component_mkl_linadd(
01596     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01597     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01598     float64_t *aicache)
01599 {
01600     //int inner_iters=0;
01601 
01602     // kernel with LP_LINADD property is assumed to have
01603     // compute_by_subkernel functions
01604     int32_t num = kernel->get_num_vec_rhs();
01605     int32_t num_weights = -1;
01606     int32_t num_kernels = kernel->get_num_subkernels() ;
01607     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01608     ASSERT(num_weights==num_kernels);
01609 
01610     float64_t* w_backup = new float64_t[num_kernels] ;
01611     float64_t* w1 = new float64_t[num_kernels] ;
01612 
01613     // backup and set to one
01614     for (int32_t i=0; i<num_kernels; i++)
01615     {
01616         w_backup[i] = old_beta[i] ;
01617         w1[i]=1.0 ;
01618     }
01619     // set the kernel weights
01620     kernel->set_subkernel_weights(w1, num_weights) ;
01621 
01622     // create normal update (with changed alphas only)
01623     kernel->clear_normal();
01624     for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
01625         if(a[i] != a_old[i]) {
01626             kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01627         }
01628     }
01629 
01630     if (parallel->get_num_threads() < 2)
01631     {
01632         // determine contributions of different kernels
01633         for (int32_t i=0; i<num; i++)
01634             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01635     }
01636 #ifndef WIN32
01637     else
01638     {
01639         pthread_t* threads = new pthread_t[parallel->get_num_threads()-1];
01640         S_THREAD_PARAM* params = new S_THREAD_PARAM[parallel->get_num_threads()-1];
01641         int32_t step= num/parallel->get_num_threads();
01642 
01643         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01644         {
01645             params[t].kernel = kernel;
01646             params[t].W = W;
01647             params[t].start = t*step;
01648             params[t].end = (t+1)*step;
01649             pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)&params[t]);
01650         }
01651 
01652         for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++)
01653             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01654 
01655         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01656             pthread_join(threads[t], NULL);
01657 
01658         delete[] params;
01659         delete[] threads;
01660     }
01661 #endif
01662 
01663     // restore old weights
01664     kernel->set_subkernel_weights(w_backup,num_weights);
01665 
01666     delete[] w_backup;
01667     delete[] w1;
01668 
01669     call_mkl_callback(a, label, lin);
01670 }
01671 
01672 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p)
01673 {
01674     S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
01675 
01676     int32_t num_kernels=params->kernel->get_num_subkernels();
01677 
01678     // determine contributions of different kernels
01679     for (int32_t i=params->start; i<params->end; i++)
01680         params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
01681 
01682     return NULL ;
01683 }
01684 
01685 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
01686 {
01687     int32_t num = kernel->get_num_vec_rhs();
01688     int32_t num_kernels = kernel->get_num_subkernels() ;
01689 
01690     int nk = (int) num_kernels; /* calling external lib */
01691 
01692     float64_t suma=0;
01693     float64_t* sumw=new float64_t[num_kernels];
01694 #ifdef HAVE_LAPACK
01695     double* alphay  = new double[num];
01696 
01697     for (int32_t i=0; i<num; i++)
01698     {
01699         alphay[i]=a[i]*label[i];
01700         suma+=a[i];
01701     }
01702 
01703     for (int32_t i=0; i<num_kernels; i++)
01704         sumw[i]=0;
01705 
01706     cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
01707             num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
01708 
01709     delete[] alphay;
01710 #else
01711     for (int32_t i=0; i<num; i++)
01712         suma += a[i];
01713 
01714     for (int32_t d=0; d<num_kernels; d++)
01715     {
01716         sumw[d]=0;
01717         for (int32_t i=0; i<num; i++)
01718             sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
01719     }
01720 #endif
01721 
01722     if (callback)
01723         mkl_converged=callback(mkl, sumw, suma);
01724 
01725 
01726     const float64_t* new_beta   = kernel->get_subkernel_weights(num_kernels);
01727 
01728     // update lin
01729 #ifdef HAVE_LAPACK
01730     cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
01731         nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
01732 #else
01733     for (int32_t i=0; i<num; i++)
01734         lin[i]=0 ;
01735     for (int32_t d=0; d<num_kernels; d++)
01736         if (new_beta[d]!=0)
01737             for (int32_t i=0; i<num; i++)
01738                 lin[i] += new_beta[d]*W[i*num_kernels+d] ;
01739 #endif
01740 
01741     delete[] sumw;
01742 }
01743 
01744 
01745 /*************************** Working set selection ***************************/
01746 
01747 int32_t CSVMLight::select_next_qp_subproblem_grad(
01748     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01749     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01750     int32_t *working2dnum, float64_t *selcrit, int32_t *select,
01751     int32_t cache_only, int32_t *key, int32_t *chosen)
01752     /* Use the feasible direction approach to select the next
01753        qp-subproblem (see chapter 'Selecting a good working set'). If
01754        'cache_only' is true, then the variables are selected only among
01755        those for which the kernel evaluations are cached. */
01756 {
01757     int32_t choosenum,i,j,k,activedoc,inum,valid;
01758     float64_t s;
01759 
01760     for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01761     choosenum=0;
01762     activedoc=0;
01763     for (i=0;(j=active2dnum[i])>=0;i++) {
01764         s=-label[j];
01765         if(cache_only)
01766         {
01767             if (use_kernel_cache)
01768                 valid=(kernel->kernel_cache_check(j));
01769             else
01770                 valid = 1 ;
01771         }
01772         else
01773             valid=1;
01774         if(valid
01775            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01776            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01777                  && (s>0)))
01778            && (!chosen[j])
01779            && (label[j])
01780            && (!inconsistent[j]))
01781         {
01782             selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01783             key[activedoc]=j;
01784             activedoc++;
01785         }
01786     }
01787     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01788     for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01789         i=key[select[k]];
01790         chosen[i]=1;
01791         working2dnum[inum+choosenum]=i;
01792         choosenum+=1;
01793         if (use_kernel_cache)
01794             kernel->kernel_cache_touch(i);
01795         /* make sure it does not get kicked */
01796         /* out of cache */
01797     }
01798 
01799     activedoc=0;
01800     for (i=0;(j=active2dnum[i])>=0;i++) {
01801         s=label[j];
01802         if(cache_only)
01803         {
01804             if (use_kernel_cache)
01805                 valid=(kernel->kernel_cache_check(j));
01806             else
01807                 valid = 1 ;
01808         }
01809         else
01810             valid=1;
01811         if(valid
01812            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01813            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01814                  && (s>0)))
01815            && (!chosen[j])
01816            && (label[j])
01817            && (!inconsistent[j]))
01818         {
01819             selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01820             /*  selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */
01821             key[activedoc]=j;
01822             activedoc++;
01823         }
01824     }
01825     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01826     for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01827         i=key[select[k]];
01828         chosen[i]=1;
01829         working2dnum[inum+choosenum]=i;
01830         choosenum+=1;
01831         if (use_kernel_cache)
01832             kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01833         /* out of cache */
01834     }
01835     working2dnum[inum+choosenum]=-1; /* complete index */
01836     return(choosenum);
01837 }
01838 
01839 int32_t CSVMLight::select_next_qp_subproblem_rand(
01840     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01841     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01842     int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
01843     int32_t *chosen, int32_t iteration)
01844 /* Use the feasible direction approach to select the next
01845    qp-subproblem (see section 'Selecting a good working set'). Chooses
01846    a feasible direction at (pseudo) random to help jump over numerical
01847    problem. */
01848 {
01849   int32_t choosenum,i,j,k,activedoc,inum;
01850   float64_t s;
01851 
01852   for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01853   choosenum=0;
01854   activedoc=0;
01855   for (i=0;(j=active2dnum[i])>=0;i++) {
01856     s=-label[j];
01857     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01858        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01859          && (s>0)))
01860        && (!inconsistent[j])
01861        && (label[j])
01862        && (!chosen[j])) {
01863       selcrit[activedoc]=(j+iteration) % totdoc;
01864       key[activedoc]=j;
01865       activedoc++;
01866     }
01867   }
01868   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01869   for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01870     i=key[select[k]];
01871     chosen[i]=1;
01872     working2dnum[inum+choosenum]=i;
01873     choosenum+=1;
01874     if (use_kernel_cache)
01875         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01876                                         /* out of cache */
01877   }
01878 
01879   activedoc=0;
01880   for (i=0;(j=active2dnum[i])>=0;i++) {
01881     s=label[j];
01882     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01883        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01884          && (s>0)))
01885        && (!inconsistent[j])
01886        && (label[j])
01887        && (!chosen[j])) {
01888       selcrit[activedoc]=(j+iteration) % totdoc;
01889       key[activedoc]=j;
01890       activedoc++;
01891     }
01892   }
01893   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01894   for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01895     i=key[select[k]];
01896     chosen[i]=1;
01897     working2dnum[inum+choosenum]=i;
01898     choosenum+=1;
01899     if (use_kernel_cache)
01900         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01901                                         /* out of cache */
01902   }
01903   working2dnum[inum+choosenum]=-1; /* complete index */
01904   return(choosenum);
01905 }
01906 
01907 
01908 
01909 void CSVMLight::select_top_n(
01910     float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
01911 {
01912   register int32_t i,j;
01913 
01914   for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
01915     for (j=i;j>=0;j--) {
01916       if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
01917     select[j]=select[j-1];
01918       }
01919       else {
01920     select[j]=i;
01921     j=-1;
01922       }
01923     }
01924   }
01925   if(n>0) {
01926     for (i=n;i<range;i++) {
01927       if(selcrit[i]>selcrit[select[n-1]]) {
01928     for (j=n-1;j>=0;j--) {
01929       if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
01930         select[j]=select[j-1];
01931       }
01932       else {
01933         select[j]=i;
01934         j=-1;
01935       }
01936     }
01937       }
01938     }
01939   }
01940 }
01941 
01942 
01943 /******************************** Shrinking  *********************************/
01944 
01945 void CSVMLight::init_shrink_state(
01946     SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
01947 {
01948   int32_t i;
01949 
01950   shrink_state->deactnum=0;
01951   shrink_state->active = new int32_t[totdoc];
01952   shrink_state->inactive_since = new int32_t[totdoc];
01953   shrink_state->a_history = new float64_t*[maxhistory];
01954   shrink_state->maxhistory=maxhistory;
01955   shrink_state->last_lin = new float64_t[totdoc];
01956   shrink_state->last_a = new float64_t[totdoc];
01957 
01958   for (i=0;i<totdoc;i++) {
01959     shrink_state->active[i]=1;
01960     shrink_state->inactive_since[i]=0;
01961     shrink_state->last_a[i]=0;
01962     shrink_state->last_lin[i]=0;
01963   }
01964 }
01965 
01966 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
01967 {
01968   delete[] shrink_state->active;
01969   delete[] shrink_state->inactive_since;
01970   if(shrink_state->deactnum > 0)
01971     delete[] (shrink_state->a_history[shrink_state->deactnum-1]);
01972   delete[] (shrink_state->a_history);
01973   delete[] (shrink_state->last_a);
01974   delete[] (shrink_state->last_lin);
01975 }
01976 
01977 int32_t CSVMLight::shrink_problem(
01978     SHRINK_STATE *shrink_state, int32_t *active2dnum,
01979     int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
01980     int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
01981     float64_t* lin, int* label)
01982      /* Shrink some variables away.  Do the shrinking only if at least
01983         minshrink variables can be removed. */
01984 {
01985   int32_t i,ii,change,activenum,lastiter;
01986   float64_t *a_old=NULL;
01987 
01988   activenum=0;
01989   change=0;
01990   for (ii=0;active2dnum[ii]>=0;ii++) {
01991       i=active2dnum[ii];
01992       activenum++;
01993       lastiter=last_suboptimal_at[i];
01994 
01995       if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
01996          || (inconsistent[i])) {
01997           change++;
01998       }
01999   }
02000   if((change>=minshrink) /* shrink only if sufficiently many candidates */
02001      && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
02002       /* Shrink problem by removing those variables which are */
02003       /* optimal at a bound for a minimum number of iterations */
02004       if(verbosity>=2) {
02005           SG_INFO( " Shrinking...");
02006       }
02007 
02008       if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /*  non-linear case save alphas */
02009 
02010           a_old=new float64_t[totdoc];
02011           shrink_state->a_history[shrink_state->deactnum]=a_old;
02012           for (i=0;i<totdoc;i++) {
02013               a_old[i]=a[i];
02014           }
02015       }
02016       for (ii=0;active2dnum[ii]>=0;ii++) {
02017           i=active2dnum[ii];
02018           lastiter=last_suboptimal_at[i];
02019           if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
02020              || (inconsistent[i])) {
02021               shrink_state->active[i]=0;
02022               shrink_state->inactive_since[i]=shrink_state->deactnum;
02023           }
02024       }
02025       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
02026       shrink_state->deactnum++;
02027       if(kernel->has_property(KP_LINADD) && get_linadd_enabled())
02028           shrink_state->deactnum=0;
02029 
02030       if(verbosity>=2) {
02031           SG_DONE();
02032           SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum);
02033       }
02034   }
02035   return(activenum);
02036 }
02037 
02038 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p)
02039 {
02040     S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
02041 
02042     CKernel* k = params->kernel;
02043     float64_t* lin = params->lin;
02044     float64_t* last_lin = params->last_lin;
02045     int32_t* active = params->active;
02046     int32_t* docs = params->docs;
02047     int32_t start = params->start;
02048     int32_t end = params->end;
02049 
02050     for (int32_t i=start;i<end;i++)
02051     {
02052         if (!active[i])
02053             lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
02054 
02055         last_lin[i]=lin[i];
02056     }
02057 
02058     return NULL;
02059 }
02060 
02061 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p)
02062 {
02063     S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
02064     ASSERT(params);
02065     ASSERT(params->kernel);
02066     ASSERT(params->lin);
02067     ASSERT(params->aicache);
02068     ASSERT(params->a);
02069     ASSERT(params->a_old);
02070     ASSERT(params->changed2dnum);
02071     ASSERT(params->inactive2dnum);
02072     ASSERT(params->label);
02073 
02074     CKernel* k = params->kernel;
02075     float64_t* lin = params->lin;
02076     float64_t* aicache = params->aicache;
02077     float64_t* a= params->a;
02078     float64_t* a_old = params->a_old;
02079     int32_t* changed2dnum = params->changed2dnum;
02080     int32_t* inactive2dnum = params->inactive2dnum;
02081     int32_t* label = params->label;
02082     int32_t start = params->start;
02083     int32_t end = params->end;
02084 
02085     for (int32_t ii=start;ii<end;ii++)
02086     {
02087         int32_t i=changed2dnum[ii];
02088         int32_t j=0;
02089         ASSERT(i>=0);
02090 
02091         k->get_kernel_row(i,inactive2dnum,aicache);
02092         for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
02093             lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02094     }
02095     return NULL;
02096 }
02097 
02098 void CSVMLight::reactivate_inactive_examples(
02099     int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
02100     float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
02101     int32_t* docs, float64_t *aicache, float64_t *maxdiff)
02102      /* Make all variables active again which had been removed by
02103         shrinking. */
02104      /* Computes lin for those variables from scratch. */
02105 {
02106   register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
02107   int32_t *changed,*inactive;
02108   register float64_t *a_old,dist;
02109   float64_t ex_c,target;
02110 
02111   if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */
02112       a_old=shrink_state->last_a;
02113 
02114       if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION))
02115       {
02116           SG_DEBUG( " clear normal - linadd\n");
02117           kernel->clear_normal();
02118 
02119           int32_t num_modified=0;
02120           for (i=0;i<totdoc;i++) {
02121               if(a[i] != a_old[i]) {
02122                   kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
02123                   a_old[i]=a[i];
02124                   num_modified++;
02125               }
02126           }
02127 
02128           if (num_modified>0)
02129           {
02130               int32_t num_threads=parallel->get_num_threads();
02131               ASSERT(num_threads>0);
02132               if (num_threads < 2)
02133               {
02134                   S_THREAD_PARAM_REACTIVATE_LINADD params;
02135                   params.kernel=kernel;
02136                   params.lin=lin;
02137                   params.last_lin=shrink_state->last_lin;
02138                   params.docs=docs;
02139                   params.active=shrink_state->active;
02140                   params.start=0;
02141                   params.end=totdoc;
02142                   reactivate_inactive_examples_linadd_helper((void*) &params);
02143               }
02144 #ifndef WIN32
02145               else
02146               {
02147                   pthread_t* threads = new pthread_t[num_threads-1];
02148                   S_THREAD_PARAM_REACTIVATE_LINADD* params = new S_THREAD_PARAM_REACTIVATE_LINADD[num_threads];
02149                   int32_t step= totdoc/num_threads;
02150 
02151                   for (t=0; t<num_threads-1; t++)
02152                   {
02153                       params[t].kernel=kernel;
02154                       params[t].lin=lin;
02155                       params[t].last_lin=shrink_state->last_lin;
02156                       params[t].docs=docs;
02157                       params[t].active=shrink_state->active;
02158                       params[t].start = t*step;
02159                       params[t].end = (t+1)*step;
02160                       pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)&params[t]);
02161                   }
02162 
02163                   params[t].kernel=kernel;
02164                   params[t].lin=lin;
02165                   params[t].last_lin=shrink_state->last_lin;
02166                   params[t].docs=docs;
02167                   params[t].active=shrink_state->active;
02168                   params[t].start = t*step;
02169                   params[t].end = totdoc;
02170                   reactivate_inactive_examples_linadd_helper((void*) &params[t]);
02171 
02172                   for (t=0; t<num_threads-1; t++)
02173                       pthread_join(threads[t], NULL);
02174 
02175                   delete[] threads;
02176                   delete[] params;
02177               }
02178 #endif
02179 
02180           }
02181       }
02182       else
02183       {
02184           float64_t *alphas = new float64_t[totdoc] ;
02185           int32_t *idx = new int32_t[totdoc] ;
02186           int32_t num_suppvec=0 ;
02187 
02188           for (i=0; i<totdoc; i++)
02189           {
02190               if(a[i] != a_old[i])
02191               {
02192                   alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
02193                   idx[num_suppvec] = i ;
02194                   a_old[i] = a[i] ;
02195                   num_suppvec++ ;
02196               }
02197           }
02198 
02199           if (num_suppvec>0)
02200           {
02201               int32_t num_inactive=0;
02202               int32_t* inactive_idx=new int32_t[totdoc]; // infact we only need a subset
02203 
02204               j=0;
02205               for (i=0;i<totdoc;i++)
02206               {
02207                   if(!shrink_state->active[i])
02208                   {
02209                       inactive_idx[j++] = i;
02210                       num_inactive++;
02211                   }
02212               }
02213 
02214               if (num_inactive>0)
02215               {
02216                   float64_t* dest = new float64_t[num_inactive];
02217                   memset(dest, 0, sizeof(float64_t)*num_inactive);
02218 
02219                   kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
02220 
02221                   j=0;
02222                   for (i=0;i<totdoc;i++) {
02223                       if(!shrink_state->active[i]) {
02224                           lin[i] = shrink_state->last_lin[i] + dest[j++] ;
02225                       }
02226                       shrink_state->last_lin[i]=lin[i];
02227                   }
02228 
02229                   delete[] dest;
02230               }
02231               else
02232               {
02233                   for (i=0;i<totdoc;i++)
02234                       shrink_state->last_lin[i]=lin[i];
02235               }
02236               delete[] inactive_idx;
02237           }
02238           delete[] alphas;
02239           delete[] idx;
02240       }
02241 
02242       kernel->delete_optimization();
02243   }
02244   else
02245   {
02246       changed=new int32_t[totdoc];
02247       changed2dnum=new int32_t[totdoc+11];
02248       inactive=new int32_t[totdoc];
02249       inactive2dnum=new int32_t[totdoc+11];
02250       for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
02251       {
02252           if(verbosity>=2) {
02253               SG_INFO( "%ld..",t);
02254           }
02255           a_old=shrink_state->a_history[t];
02256           for (i=0;i<totdoc;i++) {
02257               inactive[i]=((!shrink_state->active[i])
02258                            && (shrink_state->inactive_since[i] == t));
02259               changed[i]= (a[i] != a_old[i]);
02260           }
02261           compute_index(inactive,totdoc,inactive2dnum);
02262           compute_index(changed,totdoc,changed2dnum);
02263 
02264 
02265           int32_t num_threads=parallel->get_num_threads();
02266           ASSERT(num_threads>0);
02267 
02268           if (num_threads < 2)
02269           {
02270               for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
02271                   kernel->get_kernel_row(i,inactive2dnum,aicache);
02272                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02273                       lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02274               }
02275           }
02276 #ifndef WIN32
02277           else
02278           {
02279               //find number of the changed ones
02280               int32_t num_changed=0;
02281               for (ii=0;changed2dnum[ii]>=0;ii++)
02282                   num_changed++;
02283 
02284               if (num_changed>0)
02285               {
02286                   pthread_t* threads= new pthread_t[num_threads-1];
02287                   S_THREAD_PARAM_REACTIVATE_VANILLA* params = new S_THREAD_PARAM_REACTIVATE_VANILLA[num_threads];
02288                   int32_t step= num_changed/num_threads;
02289 
02290                   // alloc num_threads many tmp buffers
02291                   float64_t* tmp_lin=new float64_t[totdoc*num_threads];
02292                   memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02293                   float64_t* tmp_aicache=new float64_t[totdoc*num_threads];
02294                   memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02295 
02296                   int32_t thr;
02297                   for (thr=0; thr<num_threads-1; thr++)
02298                   {
02299                       params[thr].kernel=kernel;
02300                       params[thr].lin=&tmp_lin[thr*totdoc];
02301                       params[thr].aicache=&tmp_aicache[thr*totdoc];
02302                       params[thr].a=a;
02303                       params[thr].a_old=a_old;
02304                       params[thr].changed2dnum=changed2dnum;
02305                       params[thr].inactive2dnum=inactive2dnum;
02306                       params[thr].label=label;
02307                       params[thr].start = thr*step;
02308                       params[thr].end = (thr+1)*step;
02309                       pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)&params[thr]);
02310                   }
02311 
02312                   params[thr].kernel=kernel;
02313                   params[thr].lin=&tmp_lin[thr*totdoc];
02314                   params[thr].aicache=&tmp_aicache[thr*totdoc];
02315                   params[thr].a=a;
02316                   params[thr].a_old=a_old;
02317                   params[thr].changed2dnum=changed2dnum;
02318                   params[thr].inactive2dnum=inactive2dnum;
02319                   params[thr].label=label;
02320                   params[thr].start = thr*step;
02321                   params[thr].end = num_changed;
02322                   reactivate_inactive_examples_vanilla_helper((void*) &params[thr]);
02323 
02324                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02325                       lin[j]+=tmp_lin[totdoc*thr+j];
02326 
02327                   for (thr=0; thr<num_threads-1; thr++)
02328                   {
02329                       pthread_join(threads[thr], NULL);
02330 
02331                       //add up results
02332                       for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02333                           lin[j]+=tmp_lin[totdoc*thr+j];
02334                   }
02335 
02336                   delete[] tmp_lin;
02337                   delete[] tmp_aicache;
02338                   delete[] threads;
02339                   delete[] params;
02340               }
02341           }
02342 #endif
02343       }
02344       delete[] changed;
02345       delete[] changed2dnum;
02346       delete[] inactive;
02347       delete[] inactive2dnum;
02348   }
02349 
02350   (*maxdiff)=0;
02351   for (i=0;i<totdoc;i++) {
02352     shrink_state->inactive_since[i]=shrink_state->deactnum-1;
02353     if(!inconsistent[i]) {
02354       dist=(lin[i]-model->b)*(float64_t)label[i];
02355       target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
02356       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
02357       if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
02358     if((dist-target)>(*maxdiff))  /* largest violation */
02359       (*maxdiff)=dist-target;
02360       }
02361       else if((a[i]<ex_c) && (dist < target)) {
02362     if((target-dist)>(*maxdiff))  /* largest violation */
02363       (*maxdiff)=target-dist;
02364       }
02365       if((a[i]>(0+learn_parm->epsilon_a))
02366      && (a[i]<ex_c)) {
02367     shrink_state->active[i]=1;                         /* not at bound */
02368       }
02369       else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
02370     shrink_state->active[i]=1;
02371       }
02372       else if((a[i]>=ex_c)
02373           && (dist > (target-learn_parm->epsilon_shrink))) {
02374     shrink_state->active[i]=1;
02375       }
02376       else if(learn_parm->sharedslack) { /* make all active when sharedslack */
02377     shrink_state->active[i]=1;
02378       }
02379     }
02380   }
02381 
02382   if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */
02383       for (i=0;i<totdoc;i++) {
02384           (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
02385       }
02386       for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
02387           delete[] shrink_state->a_history[t];
02388           shrink_state->a_history[t]=0;
02389       }
02390   }
02391 }
02392 
02393 
02394 
02395 /* start the optimizer and return the optimal values */
02396 float64_t* CSVMLight::optimize_qp(
02397         QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
02398         int32_t& svm_maxqpsize)
02399 {
02400     register int32_t i, j, result;
02401     float64_t margin, obj_before, obj_after;
02402     float64_t sigdig, dist, epsilon_loqo;
02403     int32_t iter;
02404 
02405     if(!primal) { /* allocate memory at first call */
02406         primal=new float64_t[nx*3];
02407         dual=new float64_t[nx*2+1];
02408     }
02409 
02410     obj_before=0; /* calculate objective before optimization */
02411     for(i=0;i<qp->opt_n;i++) {
02412         obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
02413         obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
02414         for(j=0;j<i;j++) {
02415             obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
02416         }
02417     }
02418 
02419     result=STILL_RUNNING;
02420     qp->opt_ce0[0]*=(-1.0);
02421     /* Run pr_loqo. If a run fails, try again with parameters which lead */
02422     /* to a slower, but more robust setting. */
02423     for(margin=init_margin,iter=init_iter;
02424         (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
02425 
02426         opt_precision=CMath::max(opt_precision, DEF_PRECISION);
02427         sigdig=-log10(opt_precision);
02428 
02429         result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
02430                 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
02431                 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
02432                 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
02433                 (float64_t *)primal,(float64_t *)dual,
02434                 (int32_t)(verbosity-2),
02435                 (float64_t)sigdig,(int32_t)iter,
02436                 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
02437 
02438         if(CMath::is_nan(dual[0])) {     /* check for choldc problem */
02439             if(verbosity>=2) {
02440                 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n");
02441             }
02442             if(init_margin<0.80) { /* become more conservative in general */
02443                 init_margin=(4.0*margin+1.0)/5.0;
02444             }
02445             margin=(margin+1.0)/2.0;
02446             (opt_precision)*=10.0;   /* reduce precision */
02447             if(verbosity>=2) {
02448                 SG_SDEBUG("Reducing precision of PR_LOQO.\n");
02449             }
02450         }
02451         else if(result!=OPTIMAL_SOLUTION) {
02452             iter+=2000;
02453             init_iter+=10;
02454             (opt_precision)*=10.0;   /* reduce precision */
02455             if(verbosity>=2) {
02456                 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result);
02457             }
02458         }
02459     }
02460 
02461     if(qp->opt_m)         /* Thanks to Alex Smola for this hint */
02462         model_b=dual[0];
02463     else
02464         model_b=0;
02465 
02466     /* Check the precision of the alphas. If results of current optimization */
02467     /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
02468     epsilon_loqo=1E-10;
02469     for(i=0;i<qp->opt_n;i++) {
02470         dist=-model_b*qp->opt_ce[i];
02471         dist+=(qp->opt_g0[i]+1.0);
02472         for(j=0;j<i;j++) {
02473             dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
02474         }
02475         for(j=i;j<qp->opt_n;j++) {
02476             dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
02477         }
02478         /*  SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */
02479         if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
02480             epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
02481         }
02482         else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
02483             epsilon_loqo=primal[i]*2.0;
02484         }
02485     }
02486 
02487     for(i=0;i<qp->opt_n;i++) {  /* clip alphas to bounds */
02488         if(primal[i]<=(0+epsilon_loqo)) {
02489             primal[i]=0;
02490         }
02491         else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
02492             primal[i]=qp->opt_up[i];
02493         }
02494     }
02495 
02496     obj_after=0;  /* calculate objective after optimization */
02497     for(i=0;i<qp->opt_n;i++) {
02498         obj_after+=(qp->opt_g0[i]*primal[i]);
02499         obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
02500         for(j=0;j<i;j++) {
02501             obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
02502         }
02503     }
02504 
02505     /* if optimizer returned NAN values, reset and retry with smaller */
02506     /* working set. */
02507     if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
02508         for(i=0;i<qp->opt_n;i++) {
02509             primal[i]=qp->opt_xinit[i];
02510         }
02511         model_b=0;
02512         if(svm_maxqpsize>2) {
02513             svm_maxqpsize--;  /* decrease size of qp-subproblems */
02514         }
02515     }
02516 
02517     if(obj_after >= obj_before) { /* check whether there was progress */
02518         (opt_precision)/=100.0;
02519         precision_violations++;
02520         if(verbosity>=2) {
02521             SG_SDEBUG("Increasing Precision of PR_LOQO.\n");
02522         }
02523     }
02524 
02525     if(precision_violations > 500) {
02526         (*epsilon_crit)*=10.0;
02527         precision_violations=0;
02528         SG_SINFO("Relaxing epsilon on KT-Conditions.\n");
02529     }
02530 
02531     (*threshold)=model_b;
02532 
02533     if(result!=OPTIMAL_SOLUTION) {
02534         SG_SERROR("PR_LOQO did not converge.\n");
02535         return(qp->opt_xinit);
02536     }
02537     else {
02538         return(primal);
02539     }
02540 }
02541 
02542 
02543 #endif //USE_SVMLIGHT
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation