SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SVMLight.cpp
Go to the documentation of this file.
1 /***********************************************************************/
2 /* */
3 /* SVMLight.cpp */
4 /* */
5 /* Author: Thorsten Joachims */
6 /* Date: 19.07.99 */
7 /* */
8 /* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */
9 /* */
10 /* This software is available for non-commercial use only. It must */
11 /* not be modified and distributed without prior permission of the */
12 /* author. The author is not responsible for implications from the */
13 /* use of this software. */
14 /* */
15 /* THIS INCLUDES THE FOLLOWING ADDITIONS */
16 /* Generic Kernel Interfacing: Soeren Sonnenburg */
17 /* Parallizations: Soeren Sonnenburg */
18 /* Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg, */
19 /* Alexander Zien, Marius Kloft, Chen Guohua */
20 /* Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg */
21 /* */
22 /***********************************************************************/
23 #include <shogun/lib/config.h>
24 
25 #ifdef USE_SVMLIGHT
26 
27 #include <shogun/io/SGIO.h>
28 #include <shogun/lib/Signal.h>
30 #include <shogun/lib/Time.h>
32 
34 #include <shogun/lib/external/pr_loqo.h>
35 
36 #include <shogun/kernel/Kernel.h>
39 
40 #include <unistd.h>
41 
42 #include <shogun/base/Parallel.h>
44 
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <time.h>
50 
51 #ifdef HAVE_PTHREAD
52 #include <pthread.h>
53 #endif
54 
55 using namespace shogun;
56 
57 #ifndef DOXYGEN_SHOULD_SKIP_THIS
58 struct S_THREAD_PARAM_REACTIVATE_LINADD
59 {
60  CKernel* kernel;
61  float64_t* lin;
62  float64_t* last_lin;
63  int32_t* active;
64  int32_t* docs;
65  int32_t start;
66  int32_t end;
67 };
68 
69 struct S_THREAD_PARAM_SVMLIGHT
70 {
71  float64_t * lin ;
72  float64_t* W;
73  int32_t start, end;
74  int32_t * active2dnum ;
75  int32_t * docs ;
76  CKernel* kernel ;
77 };
78 
79 struct S_THREAD_PARAM_REACTIVATE_VANILLA
80 {
81  CKernel* kernel;
82  float64_t* lin;
83  float64_t* aicache;
84  float64_t* a;
85  float64_t* a_old;
86  int32_t* changed2dnum;
87  int32_t* inactive2dnum;
88  int32_t* label;
89  int32_t start;
90  int32_t end;
91 };
92 
93 struct S_THREAD_PARAM_KERNEL
94 {
95  float64_t *Kval ;
96  int32_t *KI, *KJ ;
97  int32_t start, end;
98  CSVMLight* svmlight;
99 };
100 
101 #endif // DOXYGEN_SHOULD_SKIP_THIS
102 
104 {
105  S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
106 
107  int32_t jj=0, j=0 ;
108 
109  for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
110  params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
111 
112  return NULL ;
113 }
114 
116 {
117  S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
118 
119  int32_t jj=0 ;
120  for (jj=params->start;jj<params->end;jj++)
121  params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
122 
123  return NULL ;
124 }
125 
127 : CSVM()
128 {
129  init();
130  set_kernel(NULL);
131 }
132 
134 : CSVM(C, k, lab)
135 {
136  init();
137 }
138 
140 {
141  //optimizer settings
142  primal=NULL;
143  dual=NULL;
144  init_margin=0.15;
145  init_iter=500;
147  model_b=0;
148  verbosity=1;
150 
151  // svm variables
152  W=NULL;
153  model=SG_MALLOC(MODEL, 1);
154  learn_parm=SG_MALLOC(LEARN_PARM, 1);
155  model->supvec=NULL;
156  model->alpha=NULL;
157  model->index=NULL;
158 
159  // MKL stuff
160  mymaxdiff=1 ;
161  mkl_converged=false;
162 }
163 
165 {
166 
167  SG_FREE(model->supvec);
168  SG_FREE(model->alpha);
169  SG_FREE(model->index);
170  SG_FREE(model);
171  SG_FREE(learn_parm);
172 
173  // MKL stuff
174  SG_FREE(W);
175 
176  // optimizer variables
177  SG_FREE(dual);
178  SG_FREE(primal);
179 }
180 
182 {
183  //certain setup params
184  mkl_converged=false;
185  verbosity=1 ;
186  init_margin=0.15;
187  init_iter=500;
190 
191  strcpy (learn_parm->predfile, "");
192  learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
193  learn_parm->sharedslack=0;
194  learn_parm->remove_inconsistent=0;
195  learn_parm->skip_final_opt_check=0;
196  learn_parm->svm_maxqpsize=get_qpsize();
197  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
198  learn_parm->maxiter=100000;
199  learn_parm->svm_iter_to_shrink=100;
200  learn_parm->svm_c=C1;
201  learn_parm->transduction_posratio=0.33;
202  learn_parm->svm_costratio=C2/C1;
203  learn_parm->svm_costratio_unlab=1.0;
204  learn_parm->svm_unlabbound=1E-5;
205  learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ??
206  learn_parm->epsilon_a=1E-15;
207  learn_parm->compute_loo=0;
208  learn_parm->rho=1.0;
209  learn_parm->xa_depth=0;
210 
211  if (!kernel)
212  SG_ERROR("SVM_light can not proceed without kernel!\n")
213 
214  if (data)
215  {
216  if (m_labels->get_num_labels() != data->get_num_vectors())
217  {
218  SG_ERROR("%s::train_machine(): Number of training vectors (%d) does"
219  " not match number of labels (%d)\n", get_name(),
221  }
222  kernel->init(data, data);
223  }
224 
225  if (!kernel->has_features())
226  SG_ERROR("SVM_light can not proceed without initialized kernel!\n")
227 
231 
232  // in case of LINADD enabled kernels cleanup!
234  kernel->clear_normal() ;
235 
236  // output some info
237  SG_DEBUG("threads = %i\n", parallel->get_num_threads())
238  SG_DEBUG("qpsize = %i\n", learn_parm->svm_maxqpsize)
239  SG_DEBUG("epsilon = %1.1e\n", learn_parm->epsilon_crit)
240  SG_DEBUG("kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD))
241  SG_DEBUG("kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION))
242  SG_DEBUG("kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION))
243  SG_DEBUG("kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" )
244  SG_DEBUG("get_solver_type() = %i\n", get_solver_type())
245  SG_DEBUG("get_linadd_enabled() = %i\n", get_linadd_enabled())
246  SG_DEBUG("get_batch_computation_enabled() = %i\n", get_batch_computation_enabled())
247  SG_DEBUG("kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels())
248 
251 
252  SG_DEBUG("use_kernel_cache = %i\n", use_kernel_cache)
253 
255  {
256 
257  for (index_t k_idx=0; k_idx<((CCombinedKernel*) kernel)->get_num_kernels(); k_idx++)
258  {
259  CKernel* kn = ((CCombinedKernel*) kernel)->get_kernel(k_idx);
260  // allocate kernel cache but clean up beforehand
262  SG_UNREF(kn);
263  }
264  }
265 
267 
268  // train the svm
269  svm_learn();
270 
271  // brain damaged svm light work around
272  create_new_model(model->sv_num-1);
273  set_bias(-model->b);
274  for (int32_t i=0; i<model->sv_num-1; i++)
275  {
276  set_alpha(i, model->alpha[i+1]);
277  set_support_vector(i, model->supvec[i+1]);
278  }
279 
280  // in case of LINADD enabled kernels cleanup!
282  {
283  kernel->clear_normal() ;
285  }
286 
287  if (use_kernel_cache)
289 
290  return true ;
291 }
292 
294 {
295  clock_t start;
296  start = clock();
297  return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
298 }
299 
300 
302 {
303  int32_t *inconsistent, i;
304  int32_t misclassified,upsupvecnum;
305  float64_t maxdiff, *lin, *c, *a;
306  int32_t iterations;
307  int32_t trainpos=0, trainneg=0 ;
309  SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels();
310  int32_t totdoc=lab.vlen;
311  ASSERT(lab.vector && lab.vlen)
312  int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen);
313 
314  int32_t* docs=SG_MALLOC(int32_t, totdoc);
315  SG_FREE(W);
316  W=NULL;
317  count = 0 ;
318 
320  {
321  W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
322  for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
323  W[i]=0;
324  }
325 
326  for (i=0; i<totdoc; i++)
327  docs[i]=i;
328 
329  float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */
330  float64_t *a_fullset; /* buffer for storing alpha on full sample in loo */
331  TIMING timing_profile;
332  SHRINK_STATE shrink_state;
333 
334  timing_profile.time_kernel=0;
335  timing_profile.time_opti=0;
336  timing_profile.time_shrink=0;
337  timing_profile.time_update=0;
338  timing_profile.time_model=0;
339  timing_profile.time_check=0;
340  timing_profile.time_select=0;
341 
342  /* make sure -n value is reasonable */
343  if((learn_parm->svm_newvarsinqp < 2)
344  || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
345  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
346  }
347 
348  init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
349 
350  inconsistent = SG_MALLOC(int32_t, totdoc);
351  c = SG_MALLOC(float64_t, totdoc);
352  a = SG_MALLOC(float64_t, totdoc);
353  a_fullset = SG_MALLOC(float64_t, totdoc);
354  xi_fullset = SG_MALLOC(float64_t, totdoc);
355  lin = SG_MALLOC(float64_t, totdoc);
356  if (m_linear_term.vlen>0)
358  else
359  {
360  learn_parm->eps=SG_MALLOC(float64_t, totdoc); /* equivalent regression epsilon for classification */
361  SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0);
362  }
363 
364  learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
365 
366  SG_FREE(model->supvec);
367  SG_FREE(model->alpha);
368  SG_FREE(model->index);
369  model->supvec = SG_MALLOC(int32_t, totdoc+2);
370  model->alpha = SG_MALLOC(float64_t, totdoc+2);
371  model->index = SG_MALLOC(int32_t, totdoc+2);
372 
373  model->at_upper_bound=0;
374  model->b=0;
375  model->supvec[0]=0; /* element 0 reserved and empty for now */
376  model->alpha[0]=0;
377  model->totdoc=totdoc;
378 
379  model->kernel=kernel;
380 
381  model->sv_num=1;
382  model->loo_error=-1;
383  model->loo_recall=-1;
384  model->loo_precision=-1;
385  model->xa_error=-1;
386  model->xa_recall=-1;
387  model->xa_precision=-1;
388 
389  for (i=0;i<totdoc;i++) { /* various inits */
390  inconsistent[i]=0;
391  c[i]=0;
392  a[i]=0;
393  lin[i]=0;
394 
395  if(label[i] > 0) {
396  learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
397  fabs((float64_t)label[i]);
398  label[i]=1;
399  trainpos++;
400  }
401  else if(label[i] < 0) {
402  learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
403  label[i]=-1;
404  trainneg++;
405  }
406  else {
407  learn_parm->svm_cost[i]=0;
408  }
409  }
410 
411  /* compute starting state for initial alpha values */
412  SG_DEBUG("alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors())
413 
415  if(verbosity>=1) {
416  SG_INFO("Computing starting state...")
417  }
418 
419  float64_t* alpha = SG_MALLOC(float64_t, totdoc);
420 
421  for (i=0; i<totdoc; i++)
422  alpha[i]=0;
423 
424  for (i=0; i<get_num_support_vectors(); i++)
425  alpha[get_support_vector(i)]=get_alpha(i);
426 
427  int32_t* index = SG_MALLOC(int32_t, totdoc);
428  int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11);
429  float64_t* aicache = SG_MALLOC(float64_t, totdoc);
430  for (i=0;i<totdoc;i++) { /* create full index and clip alphas */
431  index[i]=1;
432  alpha[i]=fabs(alpha[i]);
433  if(alpha[i]<0) alpha[i]=0;
434  if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
435  }
436 
437  if (use_kernel_cache)
438  {
439  if (callback &&
440  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
441  )
442  {
444  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
445  {
446  CKernel* kn = k->get_kernel(k_idx);
447  for (i=0;i<totdoc;i++) // fill kernel cache with unbounded SV
448  if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
449  && (kn->kernel_cache_space_available()))
450  kn->cache_kernel_row(i);
451 
452  for (i=0;i<totdoc;i++) // fill rest of kernel cache with bounded SV
453  if((alpha[i]==learn_parm->svm_cost[i])
454  && (kn->kernel_cache_space_available()))
455  kn->cache_kernel_row(i);
456 
457  SG_UNREF(kn);
458  }
459  }
460  else
461  {
462  for (i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
463  if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
466 
467  for (i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
468  if((alpha[i]==learn_parm->svm_cost[i])
471  }
472  }
473  compute_index(index,totdoc,index2dnum);
474  update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
475  lin,aicache,NULL);
476  calculate_svm_model(docs,label,lin,alpha,a,c,
477  index2dnum,index2dnum);
478  for (i=0;i<totdoc;i++) { /* copy initial alphas */
479  a[i]=alpha[i];
480  }
481 
482  SG_FREE(index);
483  SG_FREE(index2dnum);
484  SG_FREE(aicache);
485  SG_FREE(alpha);
486 
487  if(verbosity>=1)
488  SG_DONE()
489  }
490  SG_DEBUG("%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg)
491  SG_DEBUG("Optimizing...\n")
492 
493  /* train the svm */
494  iterations=optimize_to_convergence(docs,label,totdoc,
495  &shrink_state,inconsistent,a,lin,
496  c,&timing_profile,
497  &maxdiff,(int32_t)-1,
498  (int32_t)1);
499 
500 
501  if(verbosity>=1) {
502  if(verbosity==1)
503  {
504  SG_DONE()
505  SG_DEBUG("(%ld iterations)", iterations)
506  }
507 
508  misclassified=0;
509  for (i=0;(i<totdoc);i++) { /* get final statistic */
510  if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
511  misclassified++;
512  }
513 
514  SG_INFO("Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
515  misclassified,maxdiff);
516 
517  SG_INFO("obj = %.16f, rho = %.16f\n",get_objective(),model->b)
518  if (maxdiff>epsilon)
519  SG_WARNING("maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon)
520 
521  upsupvecnum=0;
522  for (i=1;i<model->sv_num;i++)
523  {
524  if(fabs(model->alpha[i]) >=
525  (learn_parm->svm_cost[model->supvec[i]]-
526  learn_parm->epsilon_a))
527  upsupvecnum++;
528  }
529  SG_INFO("Number of SV: %d (including %d at upper bound)\n",
530  model->sv_num-1,upsupvecnum);
531  }
532 
533  shrink_state_cleanup(&shrink_state);
534  SG_FREE(label);
535  SG_FREE(inconsistent);
536  SG_FREE(c);
537  SG_FREE(a);
538  SG_FREE(a_fullset);
539  SG_FREE(xi_fullset);
540  SG_FREE(lin);
541  SG_FREE(learn_parm->eps);
542  SG_FREE(learn_parm->svm_cost);
543  SG_FREE(docs);
544 }
545 
546 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
547  SHRINK_STATE *shrink_state,
548  int32_t *inconsistent,
549  float64_t *a, float64_t *lin, float64_t *c,
550  TIMING *timing_profile, float64_t *maxdiff,
551  int32_t heldout, int32_t retrain)
552  /* docs: Training vectors (x-part) */
553  /* label: Training labels/value (y-part, zero if test example for
554  transduction) */
555  /* totdoc: Number of examples in docs/label */
556  /* laern_parm: Learning paramenters */
557  /* kernel_parm: Kernel paramenters */
558  /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
559  NULL if linear. */
560  /* shrink_state: State of active variables */
561  /* inconsistent: examples thrown out as inconstistent */
562  /* a: alphas */
563  /* lin: linear component of gradient */
564  /* c: right hand side of inequalities (margin) */
565  /* maxdiff: returns maximum violation of KT-conditions */
566  /* heldout: marks held-out example for leave-one-out (or -1) */
567  /* retrain: selects training mode (1=regular / 2=holdout) */
568 {
569 
570  int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
571  int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
572  int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
573  int32_t *working2dnum,*selexam;
574  int32_t activenum;
575  float64_t criterion, eq;
576  float64_t *a_old;
577  int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
578  float64_t epsilon_crit_org;
579  float64_t bestmaxdiff;
580  float64_t worstmaxdiff;
581  int32_t bestmaxdiffiter,terminate;
582  bool reactivated=false;
583 
584  float64_t *selcrit; /* buffer for sorting */
585  float64_t *aicache; /* buffer to keep one row of hessian */
586  QP qp; /* buffer for one quadratic program */
587 
588  epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
590  learn_parm->epsilon_crit=2.0;
591  /* caching makes no sense for linear kernel */
592  }
593  learn_parm->epsilon_shrink=2;
594  (*maxdiff)=1;
595 
596  SG_DEBUG("totdoc:%d\n",totdoc)
597  chosen = SG_MALLOC(int32_t, totdoc);
598  last_suboptimal_at =SG_MALLOC(int32_t, totdoc);
599  key =SG_MALLOC(int32_t, totdoc+11);
600  selcrit =SG_MALLOC(float64_t, totdoc);
601  selexam =SG_MALLOC(int32_t, totdoc);
602  a_old =SG_MALLOC(float64_t, totdoc);
603  aicache =SG_MALLOC(float64_t, totdoc);
604  working2dnum =SG_MALLOC(int32_t, totdoc+11);
605  active2dnum =SG_MALLOC(int32_t, totdoc+11);
606  qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
607  qp.opt_ce0 =SG_MALLOC(float64_t, 1);
608  qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize);
609  qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
610  qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
611  qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
612  qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
613 
614  choosenum=0;
615  inconsistentnum=0;
616  if(!retrain) retrain=1;
617  iteration=1;
618  bestmaxdiffiter=1;
619  bestmaxdiff=999999999;
620  worstmaxdiff=1e-10;
621  terminate=0;
622 
623 
624  kernel->set_time(iteration); /* for lru cache */
625 
626  for (i=0;i<totdoc;i++) { /* various inits */
627  chosen[i]=0;
628  a_old[i]=a[i];
629  last_suboptimal_at[i]=1;
630  if(inconsistent[i])
631  inconsistentnum++;
632  }
633  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
634  inactivenum=totdoc-activenum;
635  clear_index(working2dnum);
636 
637  /* repeat this loop until we have convergence */
638  CTime start_time;
639  mkl_converged=false;
640 
641 
642 #ifdef CYGWIN
643  for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
644 #else
646  for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
647 #endif
648 
649  if(use_kernel_cache)
650  kernel->set_time(iteration); /* for lru cache */
651 
652  if(verbosity>=2) t0=get_runtime();
653  if(verbosity>=3) {
654  SG_DEBUG("\nSelecting working set... ")
655  }
656 
657  if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
658  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
659 
660  i=0;
661  for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
662  if((chosen[j]>=(learn_parm->svm_maxqpsize/
663  CMath::min(learn_parm->svm_maxqpsize,
664  learn_parm->svm_newvarsinqp)))
665  || (inconsistent[j])
666  || (j == heldout)) {
667  chosen[j]=0;
668  choosenum--;
669  }
670  else {
671  chosen[j]++;
672  working2dnum[i++]=j;
673  }
674  }
675  working2dnum[i]=-1;
676 
677  if(retrain == 2) {
678  choosenum=0;
679  for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
680  chosen[j]=0;
681  }
682  clear_index(working2dnum);
683  for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
684  if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
685  chosen[i]=99999;
686  choosenum++;
687  a[i]=0;
688  }
689  }
690  if(learn_parm->biased_hyperplane) {
691  eq=0;
692  for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
693  eq+=a[i]*label[i];
694  }
695  for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
696  if((eq*label[i] > 0) && (a[i] > 0)) {
697  chosen[i]=88888;
698  choosenum++;
699  if((eq*label[i]) > a[i]) {
700  eq-=(a[i]*label[i]);
701  a[i]=0;
702  }
703  else {
704  a[i]-=(eq*label[i]);
705  eq=0;
706  }
707  }
708  }
709  }
710  compute_index(chosen,totdoc,working2dnum);
711  }
712  else
713  { /* select working set according to steepest gradient */
714  if(iteration % 101)
715  {
716  already_chosen=0;
717  if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
719  {
720  /* select part of the working set from cache */
721  already_chosen=select_next_qp_subproblem_grad(
722  label,a,lin,c,totdoc,
723  (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
724  learn_parm->svm_newvarsinqp)/2),
725  inconsistent,active2dnum,
726  working2dnum,selcrit,selexam,1,
727  key,chosen);
728  choosenum+=already_chosen;
729  }
731  label,a,lin,c,totdoc,
732  CMath::min(learn_parm->svm_maxqpsize-choosenum,
733  learn_parm->svm_newvarsinqp-already_chosen),
734  inconsistent,active2dnum,
735  working2dnum,selcrit,selexam,0,key,
736  chosen);
737  }
738  else { /* once in a while, select a somewhat random working set
739  to get unlocked of infinite loops due to numerical
740  inaccuracies in the core qp-solver */
742  label,a,lin,c,totdoc,
743  CMath::min(learn_parm->svm_maxqpsize-choosenum,
744  learn_parm->svm_newvarsinqp),
745  inconsistent,active2dnum,
746  working2dnum,selcrit,selexam,key,
747  chosen,iteration);
748  }
749  }
750 
751  if(verbosity>=2) {
752  SG_INFO(" %ld vectors chosen\n",choosenum)
753  }
754 
755  if(verbosity>=2) t1=get_runtime();
756 
757  if (use_kernel_cache)
758  {
759  // in case of MKL w/o linadd cache each kernel independently
760  // else if linadd is disabled cache single kernel
761  if ( callback &&
762  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
763  )
764  {
766  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
767  {
768  CKernel* kn = k->get_kernel(k_idx);
769  kn->cache_multiple_kernel_rows(working2dnum, choosenum);
770  SG_UNREF(kn);
771  }
772  }
773  else
774  kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
775  }
776 
777  if(verbosity>=2) t2=get_runtime();
778 
779  if(retrain != 2) {
780  optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
781  totdoc,working2dnum,choosenum,a,lin,c,
782  aicache,&qp,&epsilon_crit_org);
783  }
784 
785  if(verbosity>=2) t3=get_runtime();
786  update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
787  lin,aicache,c);
788 
789  if(verbosity>=2) t4=get_runtime();
790  supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
791 
792  if(verbosity>=2) t5=get_runtime();
793 
794  for (jj=0;(i=working2dnum[jj])>=0;jj++) {
795  a_old[i]=a[i];
796  }
797 
798  retrain=check_optimality(label,a,lin,c,totdoc,
799  maxdiff,epsilon_crit_org,&misclassified,
800  inconsistent,active2dnum,last_suboptimal_at,
801  iteration);
802 
803  if(verbosity>=2) {
804  t6=get_runtime();
805  timing_profile->time_select+=t1-t0;
806  timing_profile->time_kernel+=t2-t1;
807  timing_profile->time_opti+=t3-t2;
808  timing_profile->time_update+=t4-t3;
809  timing_profile->time_model+=t5-t4;
810  timing_profile->time_check+=t6-t5;
811  }
812 
813  /* checking whether optimizer got stuck */
814  if((*maxdiff) < bestmaxdiff) {
815  bestmaxdiff=(*maxdiff);
816  bestmaxdiffiter=iteration;
817  }
818  if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
819  /* int32_t time no progress? */
820  terminate=1;
821  retrain=0;
822  SG_WARNING("Relaxing KT-Conditions due to slow progress! Terminating!\n")
823  SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter )
824  }
825 
826  noshrink= (get_shrinking_enabled()) ? 0 : 1;
827 
828  if ((!callback) && (!retrain) && (inactivenum>0) &&
829  ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
830  {
831  t1=get_runtime();
832  SG_DEBUG("reactivating inactive examples\n")
833 
834  reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
835  iteration,inconsistent,
836  docs,aicache,
837  maxdiff);
838  reactivated=true;
839  SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org)
840  /* Update to new active variables. */
841  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
842  inactivenum=totdoc-activenum;
843  /* reset watchdog */
844  bestmaxdiff=(*maxdiff);
845  bestmaxdiffiter=iteration;
846 
847  /* termination criterion */
848  noshrink=1;
849  retrain=0;
850  if((*maxdiff) > learn_parm->epsilon_crit)
851  {
852  SG_INFO("restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit)
853  retrain=1;
854  }
855  timing_profile->time_shrink+=get_runtime()-t1;
856  if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
857  || (verbosity>=2)) {
858  SG_DONE()
859  SG_DEBUG("Number of inactive variables = %ld\n", inactivenum)
860  }
861  }
862 
863  if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
864  learn_parm->epsilon_crit=(*maxdiff);
865  if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
866  learn_parm->epsilon_crit/=4.0;
867  retrain=1;
868  noshrink=1;
869  }
870  if(learn_parm->epsilon_crit<epsilon_crit_org)
871  learn_parm->epsilon_crit=epsilon_crit_org;
872 
873  if(verbosity>=2) {
874  SG_INFO(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
875  supvecnum,model->at_upper_bound,(*maxdiff));
876 
877  }
878  mymaxdiff=*maxdiff ;
879 
880  //don't shrink w/ mkl
881  if (((iteration % 10) == 0) && (!noshrink) && !callback)
882  {
883  activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
884  CMath::max((int32_t)(activenum/10),
885  CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
886  a,inconsistent, c, lin, label);
887 
888  inactivenum=totdoc-activenum;
889 
890  if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
891  && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
892 
893  kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
894  (int32_t) (kernel->get_activenum_cache()-supvecnum)),
895  shrink_state->active);
896  }
897  }
898 
899  if (bestmaxdiff>worstmaxdiff)
900  worstmaxdiff=bestmaxdiff;
901 
902  SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6)
903 
904  /* Terminate loop */
905  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) {
906  terminate = 1;
907  retrain = 0;
908  }
909 
910  } /* end of loop */
911 
912  SG_DEBUG("inactive:%d\n", inactivenum)
913 
914  if (inactivenum && !reactivated && !callback)
915  {
916  SG_DEBUG("reactivating inactive examples\n")
917  reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
918  iteration,inconsistent,
919  docs,aicache,
920  maxdiff);
921  SG_DEBUG("done reactivating inactive examples\n")
922  /* Update to new active variables. */
923  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
924  inactivenum=totdoc-activenum;
925  /* reset watchdog */
926  bestmaxdiff=(*maxdiff);
927  bestmaxdiffiter=iteration;
928  }
929 
930  //use this for our purposes!
931  criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
932  CSVM::set_objective(criterion);
933 
934  SG_FREE(chosen);
935  SG_FREE(last_suboptimal_at);
936  SG_FREE(key);
937  SG_FREE(selcrit);
938  SG_FREE(selexam);
939  SG_FREE(a_old);
940  SG_FREE(aicache);
941  SG_FREE(working2dnum);
942  SG_FREE(active2dnum);
943  SG_FREE(qp.opt_ce);
944  SG_FREE(qp.opt_ce0);
945  SG_FREE(qp.opt_g);
946  SG_FREE(qp.opt_g0);
947  SG_FREE(qp.opt_xinit);
948  SG_FREE(qp.opt_low);
949  SG_FREE(qp.opt_up);
950 
951  learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
952 
953  return(iteration);
954 }
955 
957  float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
958  int32_t totdoc)
959  /* Return value of objective function. */
960  /* Works only relative to the active variables! */
961 {
962  /* calculate value of objective function */
963  float64_t criterion=0;
964 
965  for (int32_t i=0;i<totdoc;i++)
966  criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
967 
968  return(criterion);
969 }
970 
971 
972 void CSVMLight::clear_index(int32_t *index)
973  /* initializes and empties index */
974 {
975  index[0]=-1;
976 }
977 
978 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
979  /* initializes and empties index */
980 {
981  register int32_t i;
982  for (i=0;index[i] != -1;i++);
983  index[i]=elem;
984  index[i+1]=-1;
985 }
986 
988  int32_t *binfeature, int32_t range, int32_t *index)
989  /* create an inverted index of binfeature */
990 {
991  register int32_t i,ii;
992 
993  ii=0;
994  for (i=0;i<range;i++) {
995  if(binfeature[i]) {
996  index[ii]=i;
997  ii++;
998  }
999  }
1000  for (i=0;i<4;i++) {
1001  index[ii+i]=-1;
1002  }
1003  return(ii);
1004 }
1005 
1006 
1008  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1009  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
1010  int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
1011  float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
1012  /* Do optimization on the working set. */
1013 {
1014  int32_t i;
1015  float64_t *a_v;
1016 
1017  //compute_matrices_for_optimization_parallel(docs,label,
1018  // exclude_from_eq_const,eq_target,chosen,
1019  // active2dnum,working2dnum,a,lin,c,
1020  // varnum,totdoc,aicache,qp);
1021 
1023  exclude_from_eq_const,eq_target,chosen,
1024  active2dnum,working2dnum,a,lin,c,
1025  varnum,totdoc,aicache,qp);
1026 
1027  if(verbosity>=3) {
1028  SG_DEBUG("Running optimizer...")
1029  }
1030  /* call the qp-subsolver */
1031  a_v=optimize_qp(qp,epsilon_crit_target,
1032  learn_parm->svm_maxqpsize,
1033  &(model->b), /* in case the optimizer gives us */
1034  learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */
1035  /* b is calculated in calculate_model. */
1036  if(verbosity>=3) {
1037  SG_DONE()
1038  }
1039 
1040  for (i=0;i<varnum;i++)
1041  a[working2dnum[i]]=a_v[i];
1042 }
1043 
1045  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1046  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
1047  float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
1048  float64_t *aicache, QP *qp)
1049 {
1050  if (parallel->get_num_threads()<=1)
1051  {
1052  compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
1053  chosen, active2dnum, key, a, lin, c,
1054  varnum, totdoc, aicache, qp) ;
1055  }
1056 #ifdef HAVE_PTHREAD
1057  else
1058  {
1059  register int32_t ki,kj,i,j;
1060  register float64_t kernel_temp;
1061 
1062  qp->opt_n=varnum;
1063  qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
1064  for (j=1;j<model->sv_num;j++) { /* start at 1 */
1065  if((!chosen[model->supvec[j]])
1066  && (!exclude_from_eq_const[(model->supvec[j])])) {
1067  qp->opt_ce0[0]+=model->alpha[j];
1068  }
1069  }
1070  if(learn_parm->biased_hyperplane)
1071  qp->opt_m=1;
1072  else
1073  qp->opt_m=0; /* eq-constraint will be ignored */
1074 
1075  /* init linear part of objective function */
1076  for (i=0;i<varnum;i++) {
1077  qp->opt_g0[i]=lin[key[i]];
1078  }
1079 
1081  int32_t *KI=SG_MALLOC(int32_t, varnum*varnum);
1082  int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum);
1083  int32_t Knum=0 ;
1084  float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2);
1085  for (i=0;i<varnum;i++) {
1086  ki=key[i];
1087  KI[Knum]=docs[ki] ;
1088  KJ[Knum]=docs[ki] ;
1089  Knum++ ;
1090  for (j=i+1;j<varnum;j++)
1091  {
1092  kj=key[j];
1093  KI[Knum]=docs[ki] ;
1094  KJ[Knum]=docs[kj] ;
1095  Knum++ ;
1096  }
1097  }
1098  ASSERT(Knum<=varnum*(varnum+1)/2)
1099 
1100  pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
1101  S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1);
1102  int32_t step= Knum/parallel->get_num_threads();
1103  //SG_DEBUG("\nkernel-step size: %i\n", step)
1104  for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
1105  {
1106  params[t].svmlight = this;
1107  params[t].start = t*step;
1108  params[t].end = (t+1)*step;
1109  params[t].KI=KI ;
1110  params[t].KJ=KJ ;
1111  params[t].Kval=Kval ;
1112  pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)&params[t]);
1113  }
1114  for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++)
1115  Kval[i]=compute_kernel(KI[i],KJ[i]) ;
1116 
1117  for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
1118  pthread_join(threads[t], NULL);
1119 
1120  SG_FREE(params);
1121  SG_FREE(threads);
1122 
1123  Knum=0 ;
1124  for (i=0;i<varnum;i++) {
1125  ki=key[i];
1126 
1127  /* Compute the matrix for equality constraints */
1128  qp->opt_ce[i]=label[ki];
1129  qp->opt_low[i]=0;
1130  qp->opt_up[i]=learn_parm->svm_cost[ki];
1131 
1132  kernel_temp=Kval[Knum] ;
1133  Knum++ ;
1134  /* compute linear part of objective function */
1135  qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1136  /* compute quadratic part of objective function */
1137  qp->opt_g[varnum*i+i]=kernel_temp;
1138 
1139  for (j=i+1;j<varnum;j++) {
1140  kj=key[j];
1141  kernel_temp=Kval[Knum] ;
1142  Knum++ ;
1143  /* compute linear part of objective function */
1144  qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
1145  qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1146  /* compute quadratic part of objective function */
1147  qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1148  qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1149  }
1150 
1151  if(verbosity>=3) {
1152  if(i % 20 == 0) {
1153  SG_DEBUG("%ld..",i)
1154  }
1155  }
1156  }
1157 
1158  SG_FREE(KI);
1159  SG_FREE(KJ);
1160  SG_FREE(Kval);
1161 
1162  for (i=0;i<varnum;i++) {
1163  /* assure starting at feasible point */
1164  qp->opt_xinit[i]=a[key[i]];
1165  /* set linear part of objective function */
1166  qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
1167  }
1168 
1169  if(verbosity>=3) {
1170  SG_DONE()
1171  }
1172  }
1173 #endif
1174 }
1175 
1177  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1178  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
1179  float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
1180  float64_t *aicache, QP *qp)
1181 {
1182  register int32_t ki,kj,i,j;
1183  register float64_t kernel_temp;
1184 
1185  qp->opt_n=varnum;
1186  qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
1187  for (j=1;j<model->sv_num;j++) { /* start at 1 */
1188  if((!chosen[model->supvec[j]])
1189  && (!exclude_from_eq_const[(model->supvec[j])])) {
1190  qp->opt_ce0[0]+=model->alpha[j];
1191  }
1192  }
1193  if(learn_parm->biased_hyperplane)
1194  qp->opt_m=1;
1195  else
1196  qp->opt_m=0; /* eq-constraint will be ignored */
1197 
1198  /* init linear part of objective function */
1199  for (i=0;i<varnum;i++) {
1200  qp->opt_g0[i]=lin[key[i]];
1201  }
1202 
1203  for (i=0;i<varnum;i++) {
1204  ki=key[i];
1205 
1206  /* Compute the matrix for equality constraints */
1207  qp->opt_ce[i]=label[ki];
1208  qp->opt_low[i]=0;
1209  qp->opt_up[i]=learn_parm->svm_cost[ki];
1210 
1211  kernel_temp=compute_kernel(docs[ki], docs[ki]);
1212  /* compute linear part of objective function */
1213  qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1214  /* compute quadratic part of objective function */
1215  qp->opt_g[varnum*i+i]=kernel_temp;
1216 
1217  for (j=i+1;j<varnum;j++) {
1218  kj=key[j];
1219  kernel_temp=compute_kernel(docs[ki], docs[kj]);
1220 
1221  /* compute linear part of objective function */
1222  qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
1223  qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1224  /* compute quadratic part of objective function */
1225  qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1226  qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1227  }
1228 
1229  if(verbosity>=3) {
1230  if(i % 20 == 0) {
1231  SG_DEBUG("%ld..",i)
1232  }
1233  }
1234  }
1235 
1236  for (i=0;i<varnum;i++) {
1237  /* assure starting at feasible point */
1238  qp->opt_xinit[i]=a[key[i]];
1239  /* set linear part of objective function */
1240  qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
1241  }
1242 
1243  if(verbosity>=3) {
1244  SG_DONE()
1245  }
1246 }
1247 
1248 
1250  int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
1251  float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
1252  /* Compute decision function based on current values */
1253  /* of alpha. */
1254 {
1255  int32_t i,ii,pos,b_calculated=0,first_low,first_high;
1256  float64_t ex_c,b_temp,b_low,b_high;
1257 
1258  if(verbosity>=3) {
1259  SG_DEBUG("Calculating model...")
1260  }
1261 
1262  if(!learn_parm->biased_hyperplane) {
1263  model->b=0;
1264  b_calculated=1;
1265  }
1266 
1267  for (ii=0;(i=working2dnum[ii])>=0;ii++) {
1268  if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
1269  pos=model->index[i];
1270  model->index[i]=-1;
1271  (model->sv_num)--;
1272  model->supvec[pos]=model->supvec[model->sv_num];
1273  model->alpha[pos]=model->alpha[model->sv_num];
1274  model->index[model->supvec[pos]]=pos;
1275  }
1276  else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
1277  model->supvec[model->sv_num]=docs[i];
1278  model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
1279  model->index[i]=model->sv_num;
1280  (model->sv_num)++;
1281  }
1282  else if(a_old[i]==a[i]) { /* nothing to do */
1283  }
1284  else { /* just update alpha */
1285  model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
1286  }
1287 
1288  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1289  if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
1290  (model->at_upper_bound)--;
1291  }
1292  else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
1293  (model->at_upper_bound)++;
1294  }
1295 
1296  if((!b_calculated)
1297  && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
1298  model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
1299  b_calculated=1;
1300  }
1301  }
1302 
1303  /* No alpha in the working set not at bounds, so b was not
1304  calculated in the usual way. The following handles this special
1305  case. */
1306  if(learn_parm->biased_hyperplane
1307  && (!b_calculated)
1308  && (model->sv_num-1 == model->at_upper_bound)) {
1309  first_low=1;
1310  first_high=1;
1311  b_low=0;
1312  b_high=0;
1313  for (ii=0;(i=active2dnum[ii])>=0;ii++) {
1314  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1315  if(a[i]<ex_c) {
1316  if(label[i]>0) {
1317  b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
1318  if((b_temp>b_low) || (first_low)) {
1319  b_low=b_temp;
1320  first_low=0;
1321  }
1322  }
1323  else {
1324  b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
1325  if((b_temp<b_high) || (first_high)) {
1326  b_high=b_temp;
1327  first_high=0;
1328  }
1329  }
1330  }
1331  else {
1332  if(label[i]<0) {
1333  b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
1334  if((b_temp>b_low) || (first_low)) {
1335  b_low=b_temp;
1336  first_low=0;
1337  }
1338  }
1339  else {
1340  b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
1341  if((b_temp<b_high) || (first_high)) {
1342  b_high=b_temp;
1343  first_high=0;
1344  }
1345  }
1346  }
1347  }
1348  if(first_high) {
1349  model->b=-b_low;
1350  }
1351  else if(first_low) {
1352  model->b=-b_high;
1353  }
1354  else {
1355  model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
1356  }
1357  }
1358 
1359  if(verbosity>=3) {
1360  SG_DONE()
1361  }
1362 
1363  return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
1364 }
1365 
1367  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1368  float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
1369  int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
1370  int32_t iteration)
1371  /* Check KT-conditions */
1372 {
1373  int32_t i,ii,retrain;
1374  float64_t dist,ex_c,target;
1375 
1377  learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
1378  else
1379  learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
1380  retrain=0;
1381  (*maxdiff)=0;
1382  (*misclassified)=0;
1383  for (ii=0;(i=active2dnum[ii])>=0;ii++) {
1384  if((!inconsistent[i]) && label[i]) {
1385  dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from
1386  hyperplane*/
1387  target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
1388  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1389  if(dist <= 0) {
1390  (*misclassified)++; /* does not work due to deactivation of var */
1391  }
1392  if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
1393  if((dist-target)>(*maxdiff)) /* largest violation */
1394  (*maxdiff)=dist-target;
1395  }
1396  else if((a[i]<ex_c) && (dist < target)) {
1397  if((target-dist)>(*maxdiff)) /* largest violation */
1398  (*maxdiff)=target-dist;
1399  }
1400  /* Count how int32_t a variable was at lower/upper bound (and optimal).*/
1401  /* Variables, which were at the bound and optimal for a int32_t */
1402  /* time are unlikely to become support vectors. In case our */
1403  /* cache is filled up, those variables are excluded to save */
1404  /* kernel evaluations. (See chapter 'Shrinking').*/
1405  if((a[i]>(learn_parm->epsilon_a))
1406  && (a[i]<ex_c)) {
1407  last_suboptimal_at[i]=iteration; /* not at bound */
1408  }
1409  else if((a[i]<=(learn_parm->epsilon_a))
1410  && (dist < (target+learn_parm->epsilon_shrink))) {
1411  last_suboptimal_at[i]=iteration; /* not likely optimal */
1412  }
1413  else if((a[i]>=ex_c)
1414  && (dist > (target-learn_parm->epsilon_shrink))) {
1415  last_suboptimal_at[i]=iteration; /* not likely optimal */
1416  }
1417  }
1418  }
1419 
1420  /* termination criterion */
1421  if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
1422  retrain=1;
1423  }
1424  return(retrain);
1425 }
1426 
1428  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1429  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1430  float64_t *aicache, float64_t* c)
1431  /* keep track of the linear component */
1432  /* lin of the gradient etc. by updating */
1433  /* based on the change of the variables */
1434  /* in the current working set */
1435 {
1436  register int32_t i=0,ii=0,j=0,jj=0;
1437 
1439  {
1440  if (callback)
1441  {
1442  update_linear_component_mkl_linadd(docs, label, active2dnum, a,
1443  a_old, working2dnum, totdoc, lin, aicache);
1444  }
1445  else
1446  {
1447  kernel->clear_normal();
1448 
1449  int32_t num_working=0;
1450  for (ii=0;(i=working2dnum[ii])>=0;ii++) {
1451  if(a[i] != a_old[i]) {
1452  kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
1453  num_working++;
1454  }
1455  }
1456 
1457  if (num_working>0)
1458  {
1459  if (parallel->get_num_threads() < 2)
1460  {
1461  for (jj=0;(j=active2dnum[jj])>=0;jj++) {
1462  lin[j]+=kernel->compute_optimized(docs[j]);
1463  }
1464  }
1465 #ifdef HAVE_PTHREAD
1466  else
1467  {
1468  int32_t num_elem = 0 ;
1469  for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
1470 
1471  pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
1472  S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1);
1473  int32_t start = 0 ;
1474  int32_t step = num_elem/parallel->get_num_threads();
1475  int32_t end = step ;
1476 
1477  for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
1478  {
1479  params[t].kernel = kernel ;
1480  params[t].lin = lin ;
1481  params[t].docs = docs ;
1482  params[t].active2dnum=active2dnum ;
1483  params[t].start = start ;
1484  params[t].end = end ;
1485  start=end ;
1486  end+=step ;
1487  pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)&params[t]) ;
1488  }
1489 
1490  for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
1491  lin[j]+=kernel->compute_optimized(docs[j]);
1492  }
1493  void* ret;
1494  for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
1495  pthread_join(threads[t], &ret) ;
1496 
1497  SG_FREE(params);
1498  SG_FREE(threads);
1499  }
1500 #endif
1501  }
1502  }
1503  }
1504  else
1505  {
1506  if (callback)
1507  {
1508  update_linear_component_mkl(docs, label, active2dnum,
1509  a, a_old, working2dnum, totdoc, lin, aicache);
1510  }
1511  else {
1512  for (jj=0;(i=working2dnum[jj])>=0;jj++) {
1513  if(a[i] != a_old[i]) {
1514  kernel->get_kernel_row(i,active2dnum,aicache);
1515  for (ii=0;(j=active2dnum[ii])>=0;ii++)
1516  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
1517  }
1518  }
1519  }
1520  }
1521 }
1522 
1523 
1525  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1526  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1527  float64_t *aicache)
1528 {
1529  //int inner_iters=0;
1530  int32_t num = kernel->get_num_vec_rhs();
1531  int32_t num_weights = -1;
1532  int32_t num_kernels = kernel->get_num_subkernels() ;
1533  const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
1534  ASSERT(num_weights==num_kernels)
1535 
1536  if ((kernel->get_kernel_type()==K_COMBINED) &&
1537  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()))// for combined kernel
1538  {
1540 
1541  int32_t n = 0, i, j ;
1542 
1543  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
1544  {
1545  CKernel* kn = k->get_kernel(k_idx);
1546  for (i=0;i<num;i++)
1547  {
1548  if(a[i] != a_old[i])
1549  {
1550  kn->get_kernel_row(i,NULL,aicache, true);
1551  for (j=0;j<num;j++)
1552  W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
1553  }
1554  }
1555 
1556  SG_UNREF(kn);
1557  n++ ;
1558  }
1559  }
1560  else // hope the kernel is fast ...
1561  {
1562  float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
1563  float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
1564 
1565  // backup and set to zero
1566  for (int32_t i=0; i<num_kernels; i++)
1567  {
1568  w_backup[i] = old_beta[i] ;
1569  w1[i]=0.0 ;
1570  }
1571  for (int32_t n=0; n<num_kernels; n++)
1572  {
1573  w1[n]=1.0 ;
1575 
1576  for (int32_t i=0;i<num;i++)
1577  {
1578  if(a[i] != a_old[i])
1579  {
1580  for (int32_t j=0;j<num;j++)
1581  W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
1582  }
1583  }
1584  w1[n]=0.0 ;
1585  }
1586 
1587  // restore old weights
1588  kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
1589 
1590  SG_FREE(w_backup);
1591  SG_FREE(w1);
1592  }
1593 
1594  call_mkl_callback(a, label, lin);
1595 }
1596 
1597 
1599  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1600  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1601  float64_t *aicache)
1602 {
1603  //int inner_iters=0;
1604 
1605  // kernel with LP_LINADD property is assumed to have
1606  // compute_by_subkernel functions
1607  int32_t num = kernel->get_num_vec_rhs();
1608  int32_t num_weights = -1;
1609  int32_t num_kernels = kernel->get_num_subkernels() ;
1610  const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
1611  ASSERT(num_weights==num_kernels)
1612 
1613  float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
1614  float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
1615 
1616  // backup and set to one
1617  for (int32_t i=0; i<num_kernels; i++)
1618  {
1619  w_backup[i] = old_beta[i] ;
1620  w1[i]=1.0 ;
1621  }
1622  // set the kernel weights
1624 
1625  // create normal update (with changed alphas only)
1626  kernel->clear_normal();
1627  for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
1628  if(a[i] != a_old[i]) {
1629  kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
1630  }
1631  }
1632 
1633  if (parallel->get_num_threads() < 2)
1634  {
1635  // determine contributions of different kernels
1636  for (int32_t i=0; i<num; i++)
1637  kernel->compute_by_subkernel(i,&W[i*num_kernels]);
1638  }
1639 #ifdef HAVE_PTHREAD
1640  else
1641  {
1642  pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
1643  S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1);
1644  int32_t step= num/parallel->get_num_threads();
1645 
1646  for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
1647  {
1648  params[t].kernel = kernel;
1649  params[t].W = W;
1650  params[t].start = t*step;
1651  params[t].end = (t+1)*step;
1652  pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)&params[t]);
1653  }
1654 
1655  for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++)
1656  kernel->compute_by_subkernel(i,&W[i*num_kernels]);
1657 
1658  for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
1659  pthread_join(threads[t], NULL);
1660 
1661  SG_FREE(params);
1662  SG_FREE(threads);
1663  }
1664 #endif
1665 
1666  // restore old weights
1667  kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
1668 
1669  call_mkl_callback(a, label, lin);
1670 }
1671 
1673 {
1674  S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
1675 
1676  int32_t num_kernels=params->kernel->get_num_subkernels();
1677 
1678  // determine contributions of different kernels
1679  for (int32_t i=params->start; i<params->end; i++)
1680  params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
1681 
1682  return NULL ;
1683 }
1684 
1685 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
1686 {
1687  int32_t num = kernel->get_num_vec_rhs();
1688  int32_t num_kernels = kernel->get_num_subkernels() ;
1689 
1690  float64_t suma=0;
1691  float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
1692 #ifdef HAVE_LAPACK
1693  int nk = (int) num_kernels; /* calling external lib */
1694  double* alphay = SG_MALLOC(double, num);
1695 
1696  for (int32_t i=0; i<num; i++)
1697  {
1698  alphay[i]=a[i]*label[i];
1699  suma+=a[i];
1700  }
1701 
1702  for (int32_t i=0; i<num_kernels; i++)
1703  sumw[i]=0;
1704 
1705  cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
1706  num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
1707 
1708  SG_FREE(alphay);
1709 #else
1710  for (int32_t i=0; i<num; i++)
1711  suma += a[i];
1712 
1713  for (int32_t d=0; d<num_kernels; d++)
1714  {
1715  sumw[d]=0;
1716  for (int32_t i=0; i<num; i++)
1717  sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
1718  }
1719 #endif
1720 
1721  if (callback)
1722  mkl_converged=callback(mkl, sumw, suma);
1723 
1724 
1725  const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels);
1726 
1727  // update lin
1728 #ifdef HAVE_LAPACK
1729  cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
1730  nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
1731 #else
1732  for (int32_t i=0; i<num; i++)
1733  lin[i]=0 ;
1734  for (int32_t d=0; d<num_kernels; d++)
1735  if (new_beta[d]!=0)
1736  for (int32_t i=0; i<num; i++)
1737  lin[i] += new_beta[d]*W[i*num_kernels+d] ;
1738 #endif
1739 
1740  SG_FREE(sumw);
1741 }
1742 
1743 
1744 /*************************** Working set selection ***************************/
1745 
1747  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1748  int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
1749  int32_t *working2dnum, float64_t *selcrit, int32_t *select,
1750  int32_t cache_only, int32_t *key, int32_t *chosen)
1751  /* Use the feasible direction approach to select the next
1752  qp-subproblem (see chapter 'Selecting a good working set'). If
1753  'cache_only' is true, then the variables are selected only among
1754  those for which the kernel evaluations are cached. */
1755 {
1756  int32_t choosenum,i,j,k,activedoc,inum,valid;
1757  float64_t s;
1758 
1759  for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
1760  choosenum=0;
1761  activedoc=0;
1762  for (i=0;(j=active2dnum[i])>=0;i++) {
1763  s=-label[j];
1764  if(cache_only)
1765  {
1766  if (use_kernel_cache)
1767  valid=(kernel->kernel_cache_check(j));
1768  else
1769  valid = 1 ;
1770  }
1771  else
1772  valid=1;
1773  if(valid
1774  && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1775  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1776  && (s>0)))
1777  && (!chosen[j])
1778  && (label[j])
1779  && (!inconsistent[j]))
1780  {
1781  selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
1782  key[activedoc]=j;
1783  activedoc++;
1784  }
1785  }
1786  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1787  for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
1788  i=key[select[k]];
1789  chosen[i]=1;
1790  working2dnum[inum+choosenum]=i;
1791  choosenum+=1;
1792  if (use_kernel_cache)
1794  /* make sure it does not get kicked */
1795  /* out of cache */
1796  }
1797 
1798  activedoc=0;
1799  for (i=0;(j=active2dnum[i])>=0;i++) {
1800  s=label[j];
1801  if(cache_only)
1802  {
1803  if (use_kernel_cache)
1804  valid=(kernel->kernel_cache_check(j));
1805  else
1806  valid = 1 ;
1807  }
1808  else
1809  valid=1;
1810  if(valid
1811  && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1812  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1813  && (s>0)))
1814  && (!chosen[j])
1815  && (label[j])
1816  && (!inconsistent[j]))
1817  {
1818  selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
1819  /* selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */
1820  key[activedoc]=j;
1821  activedoc++;
1822  }
1823  }
1824  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1825  for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
1826  i=key[select[k]];
1827  chosen[i]=1;
1828  working2dnum[inum+choosenum]=i;
1829  choosenum+=1;
1830  if (use_kernel_cache)
1831  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1832  /* out of cache */
1833  }
1834  working2dnum[inum+choosenum]=-1; /* complete index */
1835  return(choosenum);
1836 }
1837 
1839  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1840  int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
1841  int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
1842  int32_t *chosen, int32_t iteration)
1843 /* Use the feasible direction approach to select the next
1844  qp-subproblem (see section 'Selecting a good working set'). Chooses
1845  a feasible direction at (pseudo) random to help jump over numerical
1846  problem. */
1847 {
1848  int32_t choosenum,i,j,k,activedoc,inum;
1849  float64_t s;
1850 
1851  for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
1852  choosenum=0;
1853  activedoc=0;
1854  for (i=0;(j=active2dnum[i])>=0;i++) {
1855  s=-label[j];
1856  if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1857  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1858  && (s>0)))
1859  && (!inconsistent[j])
1860  && (label[j])
1861  && (!chosen[j])) {
1862  selcrit[activedoc]=(j+iteration) % totdoc;
1863  key[activedoc]=j;
1864  activedoc++;
1865  }
1866  }
1867  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1868  for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
1869  i=key[select[k]];
1870  chosen[i]=1;
1871  working2dnum[inum+choosenum]=i;
1872  choosenum+=1;
1873  if (use_kernel_cache)
1874  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1875  /* out of cache */
1876  }
1877 
1878  activedoc=0;
1879  for (i=0;(j=active2dnum[i])>=0;i++) {
1880  s=label[j];
1881  if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1882  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1883  && (s>0)))
1884  && (!inconsistent[j])
1885  && (label[j])
1886  && (!chosen[j])) {
1887  selcrit[activedoc]=(j+iteration) % totdoc;
1888  key[activedoc]=j;
1889  activedoc++;
1890  }
1891  }
1892  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1893  for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
1894  i=key[select[k]];
1895  chosen[i]=1;
1896  working2dnum[inum+choosenum]=i;
1897  choosenum+=1;
1898  if (use_kernel_cache)
1899  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1900  /* out of cache */
1901  }
1902  working2dnum[inum+choosenum]=-1; /* complete index */
1903  return(choosenum);
1904 }
1905 
1906 
1907 
1909  float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
1910 {
1911  register int32_t i,j;
1912 
1913  for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
1914  for (j=i;j>=0;j--) {
1915  if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
1916  select[j]=select[j-1];
1917  }
1918  else {
1919  select[j]=i;
1920  j=-1;
1921  }
1922  }
1923  }
1924  if(n>0) {
1925  for (i=n;i<range;i++) {
1926  if(selcrit[i]>selcrit[select[n-1]]) {
1927  for (j=n-1;j>=0;j--) {
1928  if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
1929  select[j]=select[j-1];
1930  }
1931  else {
1932  select[j]=i;
1933  j=-1;
1934  }
1935  }
1936  }
1937  }
1938  }
1939 }
1940 
1941 
1942 /******************************** Shrinking *********************************/
1943 
1945  SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
1946 {
1947  int32_t i;
1948 
1949  shrink_state->deactnum=0;
1950  shrink_state->active = SG_MALLOC(int32_t, totdoc);
1951  shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc);
1952  shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory);
1953  shrink_state->maxhistory=maxhistory;
1954  shrink_state->last_lin = SG_MALLOC(float64_t, totdoc);
1955  shrink_state->last_a = SG_MALLOC(float64_t, totdoc);
1956 
1957  for (i=0;i<totdoc;i++) {
1958  shrink_state->active[i]=1;
1959  shrink_state->inactive_since[i]=0;
1960  shrink_state->last_a[i]=0;
1961  shrink_state->last_lin[i]=0;
1962  }
1963 }
1964 
1965 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
1966 {
1967  SG_FREE(shrink_state->active);
1968  SG_FREE(shrink_state->inactive_since);
1969  if(shrink_state->deactnum > 0)
1970  SG_FREE((shrink_state->a_history[shrink_state->deactnum-1]));
1971  SG_FREE((shrink_state->a_history));
1972  SG_FREE((shrink_state->last_a));
1973  SG_FREE((shrink_state->last_lin));
1974 }
1975 
1977  SHRINK_STATE *shrink_state, int32_t *active2dnum,
1978  int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
1979  int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
1980  float64_t* lin, int* label)
1981  /* Shrink some variables away. Do the shrinking only if at least
1982  minshrink variables can be removed. */
1983 {
1984  int32_t i,ii,change,activenum,lastiter;
1985  float64_t *a_old=NULL;
1986 
1987  activenum=0;
1988  change=0;
1989  for (ii=0;active2dnum[ii]>=0;ii++) {
1990  i=active2dnum[ii];
1991  activenum++;
1992  lastiter=last_suboptimal_at[i];
1993 
1994  if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
1995  || (inconsistent[i])) {
1996  change++;
1997  }
1998  }
1999  if((change>=minshrink) /* shrink only if sufficiently many candidates */
2000  && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
2001  /* Shrink problem by removing those variables which are */
2002  /* optimal at a bound for a minimum number of iterations */
2003  if(verbosity>=2) {
2004  SG_INFO(" Shrinking...")
2005  }
2006 
2007  if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* non-linear case save alphas */
2008 
2009  a_old=SG_MALLOC(float64_t, totdoc);
2010  shrink_state->a_history[shrink_state->deactnum]=a_old;
2011  for (i=0;i<totdoc;i++) {
2012  a_old[i]=a[i];
2013  }
2014  }
2015  for (ii=0;active2dnum[ii]>=0;ii++) {
2016  i=active2dnum[ii];
2017  lastiter=last_suboptimal_at[i];
2018  if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
2019  || (inconsistent[i])) {
2020  shrink_state->active[i]=0;
2021  shrink_state->inactive_since[i]=shrink_state->deactnum;
2022  }
2023  }
2024  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
2025  shrink_state->deactnum++;
2027  shrink_state->deactnum=0;
2028 
2029  if(verbosity>=2) {
2030  SG_DONE()
2031  SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum)
2032  }
2033  }
2034  return(activenum);
2035 }
2036 
2038 {
2039  S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
2040 
2041  CKernel* k = params->kernel;
2042  float64_t* lin = params->lin;
2043  float64_t* last_lin = params->last_lin;
2044  int32_t* active = params->active;
2045  int32_t* docs = params->docs;
2046  int32_t start = params->start;
2047  int32_t end = params->end;
2048 
2049  for (int32_t i=start;i<end;i++)
2050  {
2051  if (!active[i])
2052  lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
2053 
2054  last_lin[i]=lin[i];
2055  }
2056 
2057  return NULL;
2058 }
2059 
2061 {
2062  S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
2063  ASSERT(params)
2064  ASSERT(params->kernel)
2065  ASSERT(params->lin)
2066  ASSERT(params->aicache)
2067  ASSERT(params->a)
2068  ASSERT(params->a_old)
2069  ASSERT(params->changed2dnum)
2070  ASSERT(params->inactive2dnum)
2071  ASSERT(params->label)
2072 
2073  CKernel* k = params->kernel;
2074  float64_t* lin = params->lin;
2075  float64_t* aicache = params->aicache;
2076  float64_t* a= params->a;
2077  float64_t* a_old = params->a_old;
2078  int32_t* changed2dnum = params->changed2dnum;
2079  int32_t* inactive2dnum = params->inactive2dnum;
2080  int32_t* label = params->label;
2081  int32_t start = params->start;
2082  int32_t end = params->end;
2083 
2084  for (int32_t ii=start;ii<end;ii++)
2085  {
2086  int32_t i=changed2dnum[ii];
2087  int32_t j=0;
2088  ASSERT(i>=0)
2089 
2090  k->get_kernel_row(i,inactive2dnum,aicache);
2091  for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
2092  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
2093  }
2094  return NULL;
2095 }
2096 
2098  int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
2099  float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
2100  int32_t* docs, float64_t *aicache, float64_t *maxdiff)
2101  /* Make all variables active again which had been removed by
2102  shrinking. */
2103  /* Computes lin for those variables from scratch. */
2104 {
2105  register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
2106  int32_t *changed,*inactive;
2107  register float64_t *a_old,dist;
2108  float64_t ex_c,target;
2109 
2110  if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */
2111  a_old=shrink_state->last_a;
2112 
2114  {
2115  SG_DEBUG(" clear normal - linadd\n")
2116  kernel->clear_normal();
2117 
2118  int32_t num_modified=0;
2119  for (i=0;i<totdoc;i++) {
2120  if(a[i] != a_old[i]) {
2121  kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
2122  a_old[i]=a[i];
2123  num_modified++;
2124  }
2125  }
2126 
2127  if (num_modified>0)
2128  {
2129  int32_t num_threads=parallel->get_num_threads();
2130  ASSERT(num_threads>0)
2131  if (num_threads < 2)
2132  {
2133  S_THREAD_PARAM_REACTIVATE_LINADD params;
2134  params.kernel=kernel;
2135  params.lin=lin;
2136  params.last_lin=shrink_state->last_lin;
2137  params.docs=docs;
2138  params.active=shrink_state->active;
2139  params.start=0;
2140  params.end=totdoc;
2142  }
2143 #ifdef HAVE_PTHREAD
2144  else
2145  {
2146  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
2147  S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads);
2148  int32_t step= totdoc/num_threads;
2149 
2150  for (t=0; t<num_threads-1; t++)
2151  {
2152  params[t].kernel=kernel;
2153  params[t].lin=lin;
2154  params[t].last_lin=shrink_state->last_lin;
2155  params[t].docs=docs;
2156  params[t].active=shrink_state->active;
2157  params[t].start = t*step;
2158  params[t].end = (t+1)*step;
2159  pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)&params[t]);
2160  }
2161 
2162  params[t].kernel=kernel;
2163  params[t].lin=lin;
2164  params[t].last_lin=shrink_state->last_lin;
2165  params[t].docs=docs;
2166  params[t].active=shrink_state->active;
2167  params[t].start = t*step;
2168  params[t].end = totdoc;
2169  reactivate_inactive_examples_linadd_helper((void*) &params[t]);
2170 
2171  for (t=0; t<num_threads-1; t++)
2172  pthread_join(threads[t], NULL);
2173 
2174  SG_FREE(threads);
2175  SG_FREE(params);
2176  }
2177 #endif
2178 
2179  }
2180  }
2181  else
2182  {
2183  float64_t *alphas = SG_MALLOC(float64_t, totdoc);
2184  int32_t *idx = SG_MALLOC(int32_t, totdoc);
2185  int32_t num_suppvec=0 ;
2186 
2187  for (i=0; i<totdoc; i++)
2188  {
2189  if(a[i] != a_old[i])
2190  {
2191  alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
2192  idx[num_suppvec] = i ;
2193  a_old[i] = a[i] ;
2194  num_suppvec++ ;
2195  }
2196  }
2197 
2198  if (num_suppvec>0)
2199  {
2200  int32_t num_inactive=0;
2201  int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc); // infact we only need a subset
2202 
2203  j=0;
2204  for (i=0;i<totdoc;i++)
2205  {
2206  if(!shrink_state->active[i])
2207  {
2208  inactive_idx[j++] = i;
2209  num_inactive++;
2210  }
2211  }
2212 
2213  if (num_inactive>0)
2214  {
2215  float64_t* dest = SG_MALLOC(float64_t, num_inactive);
2216  memset(dest, 0, sizeof(float64_t)*num_inactive);
2217 
2218  kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
2219 
2220  j=0;
2221  for (i=0;i<totdoc;i++) {
2222  if(!shrink_state->active[i]) {
2223  lin[i] = shrink_state->last_lin[i] + dest[j++] ;
2224  }
2225  shrink_state->last_lin[i]=lin[i];
2226  }
2227 
2228  SG_FREE(dest);
2229  }
2230  else
2231  {
2232  for (i=0;i<totdoc;i++)
2233  shrink_state->last_lin[i]=lin[i];
2234  }
2235  SG_FREE(inactive_idx);
2236  }
2237  SG_FREE(alphas);
2238  SG_FREE(idx);
2239  }
2240 
2242  }
2243  else
2244  {
2245  changed=SG_MALLOC(int32_t, totdoc);
2246  changed2dnum=SG_MALLOC(int32_t, totdoc+11);
2247  inactive=SG_MALLOC(int32_t, totdoc);
2248  inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
2249  for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
2250  {
2251  if(verbosity>=2) {
2252  SG_INFO("%ld..",t)
2253  }
2254  a_old=shrink_state->a_history[t];
2255  for (i=0;i<totdoc;i++) {
2256  inactive[i]=((!shrink_state->active[i])
2257  && (shrink_state->inactive_since[i] == t));
2258  changed[i]= (a[i] != a_old[i]);
2259  }
2260  compute_index(inactive,totdoc,inactive2dnum);
2261  compute_index(changed,totdoc,changed2dnum);
2262 
2263 
2264  //TODO: PUT THIS BACK IN!!!!!!!! int32_t num_threads=parallel->get_num_threads();
2265  int32_t num_threads=1;
2266  ASSERT(num_threads>0)
2267 
2268  if (num_threads < 2)
2269  {
2270  for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
2271  kernel->get_kernel_row(i,inactive2dnum,aicache);
2272  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2273  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
2274  }
2275  }
2276 #ifdef HAVE_PTHREAD
2277  else
2278  {
2279  //find number of the changed ones
2280  int32_t num_changed=0;
2281  for (ii=0;changed2dnum[ii]>=0;ii++)
2282  num_changed++;
2283 
2284  if (num_changed>0)
2285  {
2286  pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1);
2287  S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads);
2288  int32_t step= num_changed/num_threads;
2289 
2290  // alloc num_threads many tmp buffers
2291  float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads);
2292  memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
2293  float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads);
2294  memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
2295 
2296  int32_t thr;
2297  for (thr=0; thr<num_threads-1; thr++)
2298  {
2299  params[thr].kernel=kernel;
2300  params[thr].lin=&tmp_lin[thr*totdoc];
2301  params[thr].aicache=&tmp_aicache[thr*totdoc];
2302  params[thr].a=a;
2303  params[thr].a_old=a_old;
2304  params[thr].changed2dnum=changed2dnum;
2305  params[thr].inactive2dnum=inactive2dnum;
2306  params[thr].label=label;
2307  params[thr].start = thr*step;
2308  params[thr].end = (thr+1)*step;
2309  pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)&params[thr]);
2310  }
2311 
2312  params[thr].kernel=kernel;
2313  params[thr].lin=&tmp_lin[thr*totdoc];
2314  params[thr].aicache=&tmp_aicache[thr*totdoc];
2315  params[thr].a=a;
2316  params[thr].a_old=a_old;
2317  params[thr].changed2dnum=changed2dnum;
2318  params[thr].inactive2dnum=inactive2dnum;
2319  params[thr].label=label;
2320  params[thr].start = thr*step;
2321  params[thr].end = num_changed;
2322  reactivate_inactive_examples_vanilla_helper((void*) &params[thr]);
2323 
2324  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2325  lin[j]+=tmp_lin[totdoc*thr+j];
2326 
2327  for (thr=0; thr<num_threads-1; thr++)
2328  {
2329  pthread_join(threads[thr], NULL);
2330 
2331  //add up results
2332  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2333  lin[j]+=tmp_lin[totdoc*thr+j];
2334  }
2335 
2336  SG_FREE(tmp_lin);
2337  SG_FREE(tmp_aicache);
2338  SG_FREE(threads);
2339  SG_FREE(params);
2340  }
2341  }
2342 #endif
2343  }
2344  SG_FREE(changed);
2345  SG_FREE(changed2dnum);
2346  SG_FREE(inactive);
2347  SG_FREE(inactive2dnum);
2348  }
2349 
2350  (*maxdiff)=0;
2351  for (i=0;i<totdoc;i++) {
2352  shrink_state->inactive_since[i]=shrink_state->deactnum-1;
2353  if(!inconsistent[i]) {
2354  dist=(lin[i]-model->b)*(float64_t)label[i];
2355  target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
2356  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2357  if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2358  if((dist-target)>(*maxdiff)) /* largest violation */
2359  (*maxdiff)=dist-target;
2360  }
2361  else if((a[i]<ex_c) && (dist < target)) {
2362  if((target-dist)>(*maxdiff)) /* largest violation */
2363  (*maxdiff)=target-dist;
2364  }
2365  if((a[i]>(0+learn_parm->epsilon_a))
2366  && (a[i]<ex_c)) {
2367  shrink_state->active[i]=1; /* not at bound */
2368  }
2369  else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
2370  shrink_state->active[i]=1;
2371  }
2372  else if((a[i]>=ex_c)
2373  && (dist > (target-learn_parm->epsilon_shrink))) {
2374  shrink_state->active[i]=1;
2375  }
2376  else if(learn_parm->sharedslack) { /* make all active when sharedslack */
2377  shrink_state->active[i]=1;
2378  }
2379  }
2380  }
2381 
2382  if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */
2383  for (i=0;i<totdoc;i++) {
2384  (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
2385  }
2386  for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
2387  SG_FREE(shrink_state->a_history[t]);
2388  shrink_state->a_history[t]=0;
2389  }
2390  }
2391 }
2392 
2393 
2394 
2395 /* start the optimizer and return the optimal values */
2397  QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
2398  int32_t& svm_maxqpsize)
2399 {
2400  register int32_t i, j, result;
2401  float64_t margin, obj_before, obj_after;
2402  float64_t sigdig, dist, epsilon_loqo;
2403  int32_t iter;
2404 
2405  if(!primal) { /* allocate memory at first call */
2406  primal=SG_MALLOC(float64_t, nx*3);
2407  dual=SG_MALLOC(float64_t, nx*2+1);
2408  }
2409 
2410  obj_before=0; /* calculate objective before optimization */
2411  for(i=0;i<qp->opt_n;i++) {
2412  obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
2413  obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
2414  for(j=0;j<i;j++) {
2415  obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
2416  }
2417  }
2418 
2419  result=STILL_RUNNING;
2420  qp->opt_ce0[0]*=(-1.0);
2421  /* Run pr_loqo. If a run fails, try again with parameters which lead */
2422  /* to a slower, but more robust setting. */
2423  for(margin=init_margin,iter=init_iter;
2424  (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
2425 
2427  sigdig=-log10(opt_precision);
2428 
2429  result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
2430  (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
2431  (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
2432  (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
2434  (int32_t)(verbosity-2),
2435  (float64_t)sigdig,(int32_t)iter,
2436  (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
2437 
2438  if(CMath::is_nan(dual[0])) { /* check for choldc problem */
2439  if(verbosity>=2) {
2440  SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n")
2441  }
2442  if(init_margin<0.80) { /* become more conservative in general */
2443  init_margin=(4.0*margin+1.0)/5.0;
2444  }
2445  margin=(margin+1.0)/2.0;
2446  (opt_precision)*=10.0; /* reduce precision */
2447  if(verbosity>=2) {
2448  SG_SDEBUG("Reducing precision of PR_LOQO.\n")
2449  }
2450  }
2451  else if(result!=OPTIMAL_SOLUTION) {
2452  iter+=2000;
2453  init_iter+=10;
2454  (opt_precision)*=10.0; /* reduce precision */
2455  if(verbosity>=2) {
2456  SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result)
2457  }
2458  }
2459  }
2460 
2461  if(qp->opt_m) /* Thanks to Alex Smola for this hint */
2462  model_b=dual[0];
2463  else
2464  model_b=0;
2465 
2466  /* Check the precision of the alphas. If results of current optimization */
2467  /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
2468  epsilon_loqo=1E-10;
2469  for(i=0;i<qp->opt_n;i++) {
2470  dist=-model_b*qp->opt_ce[i];
2471  dist+=(qp->opt_g0[i]+1.0);
2472  for(j=0;j<i;j++) {
2473  dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
2474  }
2475  for(j=i;j<qp->opt_n;j++) {
2476  dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
2477  }
2478  /* SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]) */
2479  if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
2480  epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
2481  }
2482  else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
2483  epsilon_loqo=primal[i]*2.0;
2484  }
2485  }
2486 
2487  for(i=0;i<qp->opt_n;i++) { /* clip alphas to bounds */
2488  if(primal[i]<=(0+epsilon_loqo)) {
2489  primal[i]=0;
2490  }
2491  else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
2492  primal[i]=qp->opt_up[i];
2493  }
2494  }
2495 
2496  obj_after=0; /* calculate objective after optimization */
2497  for(i=0;i<qp->opt_n;i++) {
2498  obj_after+=(qp->opt_g0[i]*primal[i]);
2499  obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
2500  for(j=0;j<i;j++) {
2501  obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
2502  }
2503  }
2504 
2505  /* if optimizer returned NAN values, reset and retry with smaller */
2506  /* working set. */
2507  if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
2508  for(i=0;i<qp->opt_n;i++) {
2509  primal[i]=qp->opt_xinit[i];
2510  }
2511  model_b=0;
2512  if(svm_maxqpsize>2) {
2513  svm_maxqpsize--; /* decrease size of qp-subproblems */
2514  }
2515  }
2516 
2517  if(obj_after >= obj_before) { /* check whether there was progress */
2518  (opt_precision)/=100.0;
2520  if(verbosity>=2) {
2521  SG_SDEBUG("Increasing Precision of PR_LOQO.\n")
2522  }
2523  }
2524 
2525  // TODO: add parameter for this (e.g. for MTL experiments)
2526  if(precision_violations > 5000) {
2527  (*epsilon_crit)*=10.0;
2529  SG_SINFO("Relaxing epsilon on KT-Conditions.\n")
2530  }
2531 
2532  (*threshold)=model_b;
2533 
2534  if(result!=OPTIMAL_SOLUTION) {
2535  SG_SERROR("PR_LOQO did not converge.\n")
2536  return(qp->opt_xinit);
2537  }
2538  else {
2539  return(primal);
2540  }
2541 }
2542 
2543 
2544 #endif //USE_SVMLIGHT
virtual void clear_normal()
Definition: Kernel.cpp:858
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
void optimize_svm(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc, int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin, float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
Definition: SVMLight.cpp:1007
#define DEF_PRECISION
Definition: SVMLight.h:38
void select_top_n(float64_t *selcrit, int32_t range, int32_t *select, int32_t n)
Definition: SVMLight.cpp:1908
int32_t get_activenum_cache()
Definition: Kernel.h:614
#define SG_INFO(...)
Definition: SGIO.h:118
float64_t opt_precision
Definition: SVMLight.h:668
#define SG_DONE()
Definition: SGIO.h:157
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:223
virtual ELabelType get_label_type() const =0
int32_t select_next_qp_subproblem_rand(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key, int32_t *chosen, int32_t iteration)
Definition: SVMLight.cpp:1838
binary labels +1/-1
Definition: LabelTypes.h:18
int32_t init_iter
Definition: SVMLight.h:662
virtual void compute_by_subkernel(int32_t vector_idx, float64_t *subkernel_contrib)
Definition: Kernel.cpp:868
void cache_multiple_kernel_rows(int32_t *key, int32_t varnum)
Definition: Kernel.cpp:376
int32_t compute_index(int32_t *binfeature, int32_t range, int32_t *index)
Definition: SVMLight.cpp:987
int32_t get_max_elems_cache()
Definition: Kernel.h:608
static void * update_linear_component_linadd_helper(void *p)
Definition: SVMLight.cpp:103
virtual void update_linear_component(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache, float64_t *c)
Definition: SVMLight.cpp:1427
int32_t get_num_threads() const
Definition: Parallel.cpp:64
static void * reactivate_inactive_examples_linadd_helper(void *p)
Definition: SVMLight.cpp:2037
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
virtual void reactivate_inactive_examples(int32_t *label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin, float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent, int32_t *docs, float64_t *aicache, float64_t *maxdiff)
Definition: SVMLight.cpp:2097
static float64_t log10(float64_t v)
Definition: Math.h:897
bool(* callback)(CMKL *mkl, const float64_t *sumw, const float64_t suma)
Definition: SVM.h:269
int32_t verbosity
Definition: SVMLight.h:657
virtual int32_t get_num_vectors() const =0
bool use_kernel_cache
Definition: SVMLight.h:685
float64_t m_max_train_time
Definition: Machine.h:358
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
virtual bool delete_optimization()
Definition: Kernel.cpp:834
int32_t kernel_cache_space_available()
Definition: Kernel.h:698
CMKL * mkl
Definition: SVM.h:272
int32_t check_optimality(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified, int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at, int32_t iteration)
Definition: SVMLight.cpp:1366
float64_t epsilon
Definition: SVM.h:251
Parallel * parallel
Definition: SGObject.h:372
float64_t * primal
Definition: SVMLight.h:670
static void * update_linear_component_mkl_linadd_helper(void *p)
Definition: SVMLight.cpp:1672
virtual int32_t get_num_vec_lhs()
Definition: Kernel.h:516
void compute_matrices_for_optimization_parallel(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, float64_t *aicache, QP *qp)
Definition: SVMLight.cpp:1044
int32_t kernel_cache_touch(int32_t cacheidx)
Definition: Kernel.h:674
void kernel_cache_shrink(int32_t totdoc, int32_t num_shrink, int32_t *after)
Definition: Kernel.cpp:495
void shrink_state_cleanup(SHRINK_STATE *shrink_state)
Definition: SVMLight.cpp:1965
virtual float64_t * get_linear_term_array()
Definition: SVM.cpp:302
static T * clone_vector(const T *vec, int32_t len)
Definition: SGVector.cpp:215
SGVector< float64_t > m_alpha
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
float64_t model_b
Definition: SVMLight.h:666
SGVector< float64_t > m_linear_term
Definition: SVM.h:246
bool has_property(EKernelProperty p)
Definition: Kernel.h:723
void add_to_index(int32_t *index, int32_t elem)
Definition: SVMLight.cpp:978
index_t vlen
Definition: SGVector.h:494
void update_linear_component_mkl(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache)
Definition: SVMLight.cpp:1524
float64_t init_margin
Definition: SVMLight.h:660
#define ASSERT(x)
Definition: SGIO.h:201
void set_bias(float64_t bias)
void cache_kernel_row(int32_t x)
Definition: Kernel.cpp:302
float64_t C2
Definition: SVM.h:259
void compute_matrices_for_optimization(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, float64_t *aicache, QP *qp)
Definition: SVMLight.cpp:1176
static void clear_cancel()
Definition: Signal.cpp:129
CKernel * get_kernel(int32_t idx)
int32_t select_next_qp_subproblem_grad(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t cache_only, int32_t *key, int32_t *chosen)
Definition: SVMLight.cpp:1746
bool get_shrinking_enabled()
Definition: SVM.h:188
double float64_t
Definition: common.h:50
static void * compute_kernel_helper(void *p)
Definition: SVMLight.cpp:115
static void * reactivate_inactive_examples_vanilla_helper(void *p)
Definition: SVMLight.cpp:2060
bool set_alpha(int32_t idx, float64_t val)
float64_t mymaxdiff
Definition: SVMLight.h:683
void set_objective(float64_t v)
Definition: SVM.h:209
virtual float64_t compute_objective_function(float64_t *a, float64_t *lin, float64_t *c, float64_t *eps, int32_t *label, int32_t totdoc)
Definition: SVMLight.cpp:956
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:840
EOptimizationType get_optimization_type()
Definition: Kernel.h:741
int32_t precision_violations
Definition: SVMLight.h:664
float64_t get_alpha(int32_t idx)
virtual bool train_machine(CFeatures *data=NULL)
Definition: SVMLight.cpp:181
float64_t * optimize_qp(QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold, int32_t &svm_maxqpsize)
Definition: SVMLight.cpp:2396
int32_t calculate_svm_model(int32_t *docs, int32_t *label, float64_t *lin, float64_t *a, float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
Definition: SVMLight.cpp:1249
ESolverType get_solver_type()
Definition: Machine.cpp:102
virtual const float64_t * get_subkernel_weights(int32_t &num_weights)
Definition: Kernel.cpp:874
The Combined kernel is used to combine a number of kernels into a single CombinedKernel object by lin...
float64_t C1
Definition: SVM.h:257
static T max(T a, T b)
Definition: Math.h:168
bool set_support_vector(int32_t idx, int32_t val)
int32_t get_support_vector(int32_t idx)
static bool cancel_computations()
Definition: Signal.h:86
virtual ~CSVMLight()
Definition: SVMLight.cpp:164
void clear_index(int32_t *index)
Definition: SVMLight.cpp:972
virtual int32_t get_num_vec_rhs()
Definition: Kernel.h:525
virtual void set_subkernel_weights(SGVector< float64_t > weights)
Definition: Kernel.cpp:887
#define SG_ABS_PROGRESS(...)
Definition: SGIO.h:152
int32_t get_runtime()
Definition: SVMLight.cpp:293
int32_t optimize_to_convergence(int32_t *docs, int32_t *label, int32_t totdoc, SHRINK_STATE *shrink_state, int32_t *inconsistent, float64_t *a, float64_t *lin, float64_t *c, TIMING *timing_profile, float64_t *maxdiff, int32_t heldout, int32_t retrain)
Definition: SVMLight.cpp:546
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:846
#define SG_SDEBUG(...)
Definition: SGIO.h:168
virtual EKernelType get_kernel_type()=0
void set_time(int32_t t)
Definition: Kernel.h:664
static int is_nan(double f)
checks whether a float is nan
Definition: Math.cpp:250
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:157
#define SG_SERROR(...)
Definition: SGIO.h:179
void call_mkl_callback(float64_t *a, int32_t *label, float64_t *lin)
Definition: SVMLight.cpp:1685
#define SG_SINFO(...)
Definition: SGIO.h:173
LEARN_PARM * learn_parm
Definition: SVMLight.h:655
void kernel_cache_cleanup()
Definition: Kernel.cpp:567
class SVMlight
Definition: SVMLight.h:225
int32_t kernel_cache_check(int32_t cacheidx)
Definition: Kernel.h:689
virtual int32_t get_num_subkernels()
Definition: Kernel.cpp:863
A generic Support Vector Machine Interface.
Definition: SVM.h:49
int32_t shrink_problem(SHRINK_STATE *shrink_state, int32_t *active2dnum, int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc, int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t *c, float64_t *lin, int *label)
Definition: SVMLight.cpp:1976
The Kernel base class.
Definition: Kernel.h:158
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
int32_t get_cache_size()
Definition: Kernel.h:598
virtual SGVector< float64_t > get_kernel_row(int32_t i)
Definition: Kernel.h:279
void set_kernel(CKernel *k)
float64_t * W
Definition: SVMLight.h:679
#define SG_WARNING(...)
Definition: SGIO.h:128
void update_linear_component_mkl_linadd(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache)
Definition: SVMLight.cpp:1598
int32_t get_qpsize()
Definition: SVM.h:173
virtual float64_t compute_kernel(int32_t i, int32_t j)
Definition: SVMLight.h:605
virtual bool has_features()
Definition: Kernel.h:534
virtual void add_to_normal(int32_t vector_idx, float64_t weight)
Definition: Kernel.cpp:853
float64_t * dual
Definition: SVMLight.h:672
#define MAXSHRINK
Definition: SVMLight.h:39
float64_t get_objective()
Definition: SVM.h:218
void resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack=false)
Definition: Kernel.cpp:85
bool create_new_model(int32_t num)
virtual const char * get_name() const
Definition: SVMLight.h:635
void init_shrink_state(SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
Definition: SVMLight.cpp:1944

SHOGUN Machine Learning Toolbox - Documentation