SHOGUN  6.1.3
SVMLight.cpp
Go to the documentation of this file.
1 /***********************************************************************/
2 /* */
3 /* SVMLight.cpp */
4 /* */
5 /* Author: Thorsten Joachims */
6 /* Date: 19.07.99 */
7 /* */
8 /* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */
9 /* */
10 /* This software is available for non-commercial use only. It must */
11 /* not be modified and distributed without prior permission of the */
12 /* author. The author is not responsible for implications from the */
13 /* use of this software. */
14 /* */
15 /* THIS INCLUDES THE FOLLOWING ADDITIONS */
16 /* Generic Kernel Interfacing: Soeren Sonnenburg */
17 /* Parallizations: Soeren Sonnenburg */
18 /* Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg, */
19 /* Alexander Zien, Marius Kloft, Chen Guohua */
20 /* Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg */
21 /* */
22 /***********************************************************************/
23 #include <shogun/lib/config.h>
24 
25 #ifdef USE_SVMLIGHT
26 
27 #include <shogun/base/progress.h>
28 #include <shogun/io/SGIO.h>
29 #include <shogun/lib/Signal.h>
30 #include <shogun/lib/Time.h>
33 
35 #include <shogun/lib/external/pr_loqo.h>
36 
37 #include <shogun/kernel/Kernel.h>
40 
41 #ifndef _WIN32
42 #include <unistd.h>
43 #endif
44 
45 #include <shogun/base/Parallel.h>
47 
48 #include <stdio.h>
49 #include <ctype.h>
50 #include <string.h>
51 #include <stdlib.h>
52 #include <time.h>
53 
54 #ifdef HAVE_PTHREAD
55 #include <pthread.h>
56 
57 #endif
58 
59 using namespace shogun;
60 
61 #ifndef DOXYGEN_SHOULD_SKIP_THIS
62 struct S_THREAD_PARAM_REACTIVATE_LINADD
63 {
64  CKernel* kernel;
65  float64_t* lin;
66  float64_t* last_lin;
67  int32_t* active;
68  int32_t* docs;
69  int32_t start;
70  int32_t end;
71 };
72 
73 struct S_THREAD_PARAM_SVMLIGHT
74 {
75  float64_t * lin ;
76  float64_t* W;
77  int32_t start, end;
78  int32_t * active2dnum ;
79  int32_t * docs ;
80  CKernel* kernel ;
81 };
82 
83 struct S_THREAD_PARAM_REACTIVATE_VANILLA
84 {
85  CKernel* kernel;
86  float64_t* lin;
87  float64_t* aicache;
88  float64_t* a;
89  float64_t* a_old;
90  int32_t* changed2dnum;
91  int32_t* inactive2dnum;
92  int32_t* label;
93  int32_t start;
94  int32_t end;
95 };
96 
97 struct S_THREAD_PARAM_KERNEL
98 {
99  float64_t *Kval ;
100  int32_t *KI, *KJ ;
101  int32_t start, end;
102  CSVMLight* svmlight;
103 };
104 
105 #endif // DOXYGEN_SHOULD_SKIP_THIS
106 
108 {
109  S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
110 
111  int32_t jj=0, j=0 ;
112 
113  for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
114  params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
115 
116  return NULL ;
117 }
118 
120 {
121  S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
122 
123  int32_t jj=0 ;
124  for (jj=params->start;jj<params->end;jj++)
125  params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
126 
127  return NULL ;
128 }
129 
131 : CSVM()
132 {
133  init();
134  set_kernel(NULL);
135 }
136 
138 : CSVM(C, k, lab)
139 {
140  init();
141 }
142 
144 {
145  //optimizer settings
146  primal=NULL;
147  dual=NULL;
148  init_margin=0.15;
149  init_iter=500;
151  model_b=0;
152  verbosity=1;
154 
155  // svm variables
156  W=NULL;
157  model=SG_MALLOC(MODEL, 1);
158  learn_parm=SG_MALLOC(LEARN_PARM, 1);
159  model->supvec=NULL;
160  model->alpha=NULL;
161  model->index=NULL;
162 
163  // MKL stuff
164  mymaxdiff=1 ;
165  mkl_converged=false;
166 }
167 
169 {
170 
171  SG_FREE(model->supvec);
172  SG_FREE(model->alpha);
173  SG_FREE(model->index);
174  SG_FREE(model);
175  SG_FREE(learn_parm);
176 
177  // MKL stuff
178  SG_FREE(W);
179 
180  // optimizer variables
181  SG_FREE(dual);
182  SG_FREE(primal);
183 }
184 
186 {
187  //certain setup params
188  mkl_converged=false;
189  verbosity=1 ;
190  init_margin=0.15;
191  init_iter=500;
194 
195  strcpy (learn_parm->predfile, "");
196  learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
197  learn_parm->sharedslack=0;
198  learn_parm->remove_inconsistent=0;
199  learn_parm->skip_final_opt_check=0;
200  learn_parm->svm_maxqpsize=get_qpsize();
201  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
202  learn_parm->maxiter=100000;
203  learn_parm->svm_iter_to_shrink=100;
204  learn_parm->svm_c=C1;
205  learn_parm->transduction_posratio=0.33;
206  learn_parm->svm_costratio=C2/C1;
207  learn_parm->svm_costratio_unlab=1.0;
208  learn_parm->svm_unlabbound=1E-5;
209  learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ??
210  learn_parm->epsilon_a=1E-15;
211  learn_parm->compute_loo=0;
212  learn_parm->rho=1.0;
213  learn_parm->xa_depth=0;
214 
215  if (!kernel)
216  SG_ERROR("SVM_light can not proceed without kernel!\n")
217 
218  if (data)
219  {
220  if (m_labels->get_num_labels() != data->get_num_vectors())
221  {
222  SG_ERROR("%s::train_machine(): Number of training vectors (%d) does"
223  " not match number of labels (%d)\n", get_name(),
225  }
226  kernel->init(data, data);
227  }
228 
229  if (!kernel->has_features())
230  SG_ERROR("SVM_light can not proceed without initialized kernel!\n")
231 
235 
236  // in case of LINADD enabled kernels cleanup!
238  kernel->clear_normal() ;
239 
240  // output some info
241  SG_DEBUG("threads = %i\n", parallel->get_num_threads())
242  SG_DEBUG("qpsize = %i\n", learn_parm->svm_maxqpsize)
243  SG_DEBUG("epsilon = %1.1e\n", learn_parm->epsilon_crit)
244  SG_DEBUG("kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD))
245  SG_DEBUG("kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION))
246  SG_DEBUG("kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION))
247  SG_DEBUG("kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" )
248  SG_DEBUG("get_solver_type() = %i\n", get_solver_type())
249  SG_DEBUG("get_linadd_enabled() = %i\n", get_linadd_enabled())
250  SG_DEBUG("get_batch_computation_enabled() = %i\n", get_batch_computation_enabled())
251  SG_DEBUG("kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels())
252 
255 
256  SG_DEBUG("use_kernel_cache = %i\n", use_kernel_cache)
257 
259  {
260 
261  for (index_t k_idx=0; k_idx<((CCombinedKernel*) kernel)->get_num_kernels(); k_idx++)
262  {
263  CKernel* kn = ((CCombinedKernel*) kernel)->get_kernel(k_idx);
264  // allocate kernel cache but clean up beforehand
266  SG_UNREF(kn);
267  }
268  }
269 
271 
272  // train the svm
273  svm_learn();
274 
275  // brain damaged svm light work around
276  create_new_model(model->sv_num-1);
277  set_bias(-model->b);
278  for (int32_t i=0; i<model->sv_num-1; i++)
279  {
280  set_alpha(i, model->alpha[i+1]);
281  set_support_vector(i, model->supvec[i+1]);
282  }
283 
284  // in case of LINADD enabled kernels cleanup!
286  {
287  kernel->clear_normal() ;
289  }
290 
291  if (use_kernel_cache)
293 
294  return true ;
295 }
296 
298 {
299  clock_t start;
300  start = clock();
301  return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
302 }
303 
304 
306 {
307  int32_t *inconsistent, i;
308  int32_t misclassified,upsupvecnum;
309  float64_t maxdiff, *lin, *c, *a;
310  int32_t iterations;
311  int32_t trainpos=0, trainneg=0 ;
313  SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels();
314  int32_t totdoc=lab.vlen;
315  ASSERT(lab.vector && lab.vlen)
316  int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen);
317 
318  int32_t* docs=SG_MALLOC(int32_t, totdoc);
319  SG_FREE(W);
320  W=NULL;
321  count = 0 ;
322 
324  {
325  W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
326  for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
327  W[i]=0;
328  }
329 
330  for (i=0; i<totdoc; i++)
331  docs[i]=i;
332 
333  float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */
334  float64_t *a_fullset; /* buffer for storing alpha on full sample in loo */
335  TIMING timing_profile;
336  SHRINK_STATE shrink_state;
337 
338  timing_profile.time_kernel=0;
339  timing_profile.time_opti=0;
340  timing_profile.time_shrink=0;
341  timing_profile.time_update=0;
342  timing_profile.time_model=0;
343  timing_profile.time_check=0;
344  timing_profile.time_select=0;
345 
346  /* make sure -n value is reasonable */
347  if((learn_parm->svm_newvarsinqp < 2)
348  || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
349  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
350  }
351 
352  init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
353 
354  inconsistent = SG_MALLOC(int32_t, totdoc);
355  c = SG_MALLOC(float64_t, totdoc);
356  a = SG_MALLOC(float64_t, totdoc);
357  a_fullset = SG_MALLOC(float64_t, totdoc);
358  xi_fullset = SG_MALLOC(float64_t, totdoc);
359  lin = SG_MALLOC(float64_t, totdoc);
360  if (m_linear_term.vlen>0)
362  else
363  {
364  learn_parm->eps=SG_MALLOC(float64_t, totdoc); /* equivalent regression epsilon for classification */
365  SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0);
366  }
367 
368  learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
369 
370  SG_FREE(model->supvec);
371  SG_FREE(model->alpha);
372  SG_FREE(model->index);
373  model->supvec = SG_MALLOC(int32_t, totdoc+2);
374  model->alpha = SG_MALLOC(float64_t, totdoc+2);
375  model->index = SG_MALLOC(int32_t, totdoc+2);
376 
377  model->at_upper_bound=0;
378  model->b=0;
379  model->supvec[0]=0; /* element 0 reserved and empty for now */
380  model->alpha[0]=0;
381  model->totdoc=totdoc;
382 
383  model->kernel=kernel;
384 
385  model->sv_num=1;
386  model->loo_error=-1;
387  model->loo_recall=-1;
388  model->loo_precision=-1;
389  model->xa_error=-1;
390  model->xa_recall=-1;
391  model->xa_precision=-1;
392 
393  for (i=0;i<totdoc;i++) { /* various inits */
394  inconsistent[i]=0;
395  c[i]=0;
396  a[i]=0;
397  lin[i]=0;
398 
399  if(label[i] > 0) {
400  learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
401  fabs((float64_t)label[i]);
402  label[i]=1;
403  trainpos++;
404  }
405  else if(label[i] < 0) {
406  learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
407  label[i]=-1;
408  trainneg++;
409  }
410  else {
411  learn_parm->svm_cost[i]=0;
412  }
413  }
414 
415  /* compute starting state for initial alpha values */
416  SG_DEBUG("alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors())
417 
419  if(verbosity>=1) {
420  SG_INFO("Computing starting state...")
421  }
422 
423  float64_t* alpha = SG_MALLOC(float64_t, totdoc);
424 
425  for (i=0; i<totdoc; i++)
426  alpha[i]=0;
427 
428  for (i=0; i<get_num_support_vectors(); i++)
429  alpha[get_support_vector(i)]=get_alpha(i);
430 
431  int32_t* index = SG_MALLOC(int32_t, totdoc);
432  int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11);
433  float64_t* aicache = SG_MALLOC(float64_t, totdoc);
434  for (i=0;i<totdoc;i++) { /* create full index and clip alphas */
435  index[i]=1;
436  alpha[i]=fabs(alpha[i]);
437  if(alpha[i]<0) alpha[i]=0;
438  if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
439  }
440 
441  if (use_kernel_cache)
442  {
443  if (callback &&
444  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
445  )
446  {
448  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
449  {
450  CKernel* kn = k->get_kernel(k_idx);
451  for (i=0;i<totdoc;i++) // fill kernel cache with unbounded SV
452  if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
453  && (kn->kernel_cache_space_available()))
454  kn->cache_kernel_row(i);
455 
456  for (i=0;i<totdoc;i++) // fill rest of kernel cache with bounded SV
457  if((alpha[i]==learn_parm->svm_cost[i])
458  && (kn->kernel_cache_space_available()))
459  kn->cache_kernel_row(i);
460 
461  SG_UNREF(kn);
462  }
463  }
464  else
465  {
466  for (i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
467  if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
470 
471  for (i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
472  if((alpha[i]==learn_parm->svm_cost[i])
475  }
476  }
477  compute_index(index,totdoc,index2dnum);
478  update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
479  lin,aicache,NULL);
480  calculate_svm_model(docs,label,lin,alpha,a,c,
481  index2dnum,index2dnum);
482  for (i=0;i<totdoc;i++) { /* copy initial alphas */
483  a[i]=alpha[i];
484  }
485 
486  SG_FREE(index);
487  SG_FREE(index2dnum);
488  SG_FREE(aicache);
489  SG_FREE(alpha);
490 
491  if(verbosity>=1)
492  SG_DONE()
493  }
494  SG_DEBUG("%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg)
495  SG_DEBUG("Optimizing...\n")
496 
497  /* train the svm */
498  iterations=optimize_to_convergence(docs,label,totdoc,
499  &shrink_state,inconsistent,a,lin,
500  c,&timing_profile,
501  &maxdiff,(int32_t)-1,
502  (int32_t)1);
503 
504 
505  if(verbosity>=1) {
506  if(verbosity==1)
507  {
508  SG_DONE()
509  SG_DEBUG("(%ld iterations)", iterations)
510  }
511 
512  misclassified=0;
513  for (i=0;(i<totdoc);i++) { /* get final statistic */
514  if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
515  misclassified++;
516  }
517 
518  SG_INFO("Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
519  misclassified,maxdiff);
520 
521  SG_INFO("obj = %.16f, rho = %.16f\n",get_objective(),model->b)
522  if (maxdiff>epsilon)
523  SG_WARNING("maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon)
524 
525  upsupvecnum=0;
526  for (i=1;i<model->sv_num;i++)
527  {
528  if(fabs(model->alpha[i]) >=
529  (learn_parm->svm_cost[model->supvec[i]]-
530  learn_parm->epsilon_a))
531  upsupvecnum++;
532  }
533  SG_INFO("Number of SV: %d (including %d at upper bound)\n",
534  model->sv_num-1,upsupvecnum);
535  }
536 
537  shrink_state_cleanup(&shrink_state);
538  SG_FREE(label);
539  SG_FREE(inconsistent);
540  SG_FREE(c);
541  SG_FREE(a);
542  SG_FREE(a_fullset);
543  SG_FREE(xi_fullset);
544  SG_FREE(lin);
545  SG_FREE(learn_parm->eps);
546  SG_FREE(learn_parm->svm_cost);
547  SG_FREE(docs);
548 }
549 
550 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
551  SHRINK_STATE *shrink_state,
552  int32_t *inconsistent,
553  float64_t *a, float64_t *lin, float64_t *c,
554  TIMING *timing_profile, float64_t *maxdiff,
555  int32_t heldout, int32_t retrain)
556  /* docs: Training vectors (x-part) */
557  /* label: Training labels/value (y-part, zero if test example for
558  transduction) */
559  /* totdoc: Number of examples in docs/label */
560  /* laern_parm: Learning paramenters */
561  /* kernel_parm: Kernel paramenters */
562  /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
563  NULL if linear. */
564  /* shrink_state: State of active variables */
565  /* inconsistent: examples thrown out as inconstistent */
566  /* a: alphas */
567  /* lin: linear component of gradient */
568  /* c: right hand side of inequalities (margin) */
569  /* maxdiff: returns maximum violation of KT-conditions */
570  /* heldout: marks held-out example for leave-one-out (or -1) */
571  /* retrain: selects training mode (1=regular / 2=holdout) */
572 {
573 
574  int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
575  int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
576  int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
577  int32_t *working2dnum,*selexam;
578  int32_t activenum;
579  float64_t criterion, eq;
580  float64_t *a_old;
581  int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
582  float64_t epsilon_crit_org;
583  float64_t bestmaxdiff;
584  float64_t worstmaxdiff;
585  int32_t bestmaxdiffiter,terminate;
586  bool reactivated=false;
587 
588  float64_t *selcrit; /* buffer for sorting */
589  float64_t *aicache; /* buffer to keep one row of hessian */
590  QP qp; /* buffer for one quadratic program */
591 
592  epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
594  learn_parm->epsilon_crit=2.0;
595  /* caching makes no sense for linear kernel */
596  }
597  learn_parm->epsilon_shrink=2;
598  (*maxdiff)=1;
599 
600  SG_DEBUG("totdoc:%d\n",totdoc)
601  chosen = SG_MALLOC(int32_t, totdoc);
602  last_suboptimal_at =SG_MALLOC(int32_t, totdoc);
603  key =SG_MALLOC(int32_t, totdoc+11);
604  selcrit =SG_MALLOC(float64_t, totdoc);
605  selexam =SG_MALLOC(int32_t, totdoc);
606  a_old =SG_MALLOC(float64_t, totdoc);
607  aicache =SG_MALLOC(float64_t, totdoc);
608  working2dnum =SG_MALLOC(int32_t, totdoc+11);
609  active2dnum =SG_MALLOC(int32_t, totdoc+11);
610  qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
611  qp.opt_ce0 =SG_MALLOC(float64_t, 1);
612  qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize);
613  qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
614  qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
615  qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
616  qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
617 
618  choosenum=0;
619  inconsistentnum=0;
620  if(!retrain) retrain=1;
621  iteration=1;
622  bestmaxdiffiter=1;
623  bestmaxdiff=999999999;
624  worstmaxdiff=1e-10;
625  terminate=0;
626 
627 
628  kernel->set_time(iteration); /* for lru cache */
629 
630  for (i=0;i<totdoc;i++) { /* various inits */
631  chosen[i]=0;
632  a_old[i]=a[i];
633  last_suboptimal_at[i]=1;
634  if(inconsistent[i])
635  inconsistentnum++;
636  }
637  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
638  inactivenum=totdoc-activenum;
639  clear_index(working2dnum);
640 
641  /* repeat this loop until we have convergence */
642  CTime start_time;
643  mkl_converged=false;
644 
645  auto pb = progress(range(10), *this->io);
646 #ifdef CYGWIN
647  for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
648 #else
649 
650  for (; ((!cancel_computation()) &&
651  ((iteration < 3 || (!mkl_converged && callback)) ||
652  (retrain && (!terminate))));
653  iteration++)
654  {
655 #endif
656 
657  if(use_kernel_cache)
658  kernel->set_time(iteration); /* for lru cache */
659 
660  if(verbosity>=2) t0=get_runtime();
661  if(verbosity>=3) {
662  SG_DEBUG("\nSelecting working set... ")
663  }
664 
665  if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
666  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
667 
668  i=0;
669  for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
670  if((chosen[j]>=(learn_parm->svm_maxqpsize/
671  CMath::min(learn_parm->svm_maxqpsize,
672  learn_parm->svm_newvarsinqp)))
673  || (inconsistent[j])
674  || (j == heldout)) {
675  chosen[j]=0;
676  choosenum--;
677  }
678  else {
679  chosen[j]++;
680  working2dnum[i++]=j;
681  }
682  }
683  working2dnum[i]=-1;
684 
685  if(retrain == 2) {
686  choosenum=0;
687  for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
688  chosen[j]=0;
689  }
690  clear_index(working2dnum);
691  for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
692  if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
693  chosen[i]=99999;
694  choosenum++;
695  a[i]=0;
696  }
697  }
698  if(learn_parm->biased_hyperplane) {
699  eq=0;
700  for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
701  eq+=a[i]*label[i];
702  }
703  for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
704  if((eq*label[i] > 0) && (a[i] > 0)) {
705  chosen[i]=88888;
706  choosenum++;
707  if((eq*label[i]) > a[i]) {
708  eq-=(a[i]*label[i]);
709  a[i]=0;
710  }
711  else {
712  a[i]-=(eq*label[i]);
713  eq=0;
714  }
715  }
716  }
717  }
718  compute_index(chosen,totdoc,working2dnum);
719  }
720  else
721  { /* select working set according to steepest gradient */
722  if(iteration % 101)
723  {
724  already_chosen=0;
725  if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
727  {
728  /* select part of the working set from cache */
729  already_chosen=select_next_qp_subproblem_grad(
730  label,a,lin,c,totdoc,
731  (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
732  learn_parm->svm_newvarsinqp)/2),
733  inconsistent,active2dnum,
734  working2dnum,selcrit,selexam,1,
735  key,chosen);
736  choosenum+=already_chosen;
737  }
739  label,a,lin,c,totdoc,
740  CMath::min(learn_parm->svm_maxqpsize-choosenum,
741  learn_parm->svm_newvarsinqp-already_chosen),
742  inconsistent,active2dnum,
743  working2dnum,selcrit,selexam,0,key,
744  chosen);
745  }
746  else { /* once in a while, select a somewhat random working set
747  to get unlocked of infinite loops due to numerical
748  inaccuracies in the core qp-solver */
750  label,a,lin,c,totdoc,
751  CMath::min(learn_parm->svm_maxqpsize-choosenum,
752  learn_parm->svm_newvarsinqp),
753  inconsistent,active2dnum,
754  working2dnum,selcrit,selexam,key,
755  chosen,iteration);
756  }
757  }
758 
759  if(verbosity>=2) {
760  SG_INFO(" %ld vectors chosen\n",choosenum)
761  }
762 
763  if(verbosity>=2) t1=get_runtime();
764 
765  if (use_kernel_cache)
766  {
767  // in case of MKL w/o linadd cache each kernel independently
768  // else if linadd is disabled cache single kernel
769  if ( callback &&
770  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
771  )
772  {
774  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
775  {
776  CKernel* kn = k->get_kernel(k_idx);
777  kn->cache_multiple_kernel_rows(working2dnum, choosenum);
778  SG_UNREF(kn);
779  }
780  }
781  else
782  kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
783  }
784 
785  if(verbosity>=2) t2=get_runtime();
786 
787  if(retrain != 2) {
788  optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
789  totdoc,working2dnum,choosenum,a,lin,c,
790  aicache,&qp,&epsilon_crit_org);
791  }
792 
793  if(verbosity>=2) t3=get_runtime();
794  update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
795  lin,aicache,c);
796 
797  if(verbosity>=2) t4=get_runtime();
798  supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
799 
800  if(verbosity>=2) t5=get_runtime();
801 
802  for (jj=0;(i=working2dnum[jj])>=0;jj++) {
803  a_old[i]=a[i];
804  }
805 
806  retrain=check_optimality(label,a,lin,c,totdoc,
807  maxdiff,epsilon_crit_org,&misclassified,
808  inconsistent,active2dnum,last_suboptimal_at,
809  iteration);
810 
811  if(verbosity>=2) {
812  t6=get_runtime();
813  timing_profile->time_select+=t1-t0;
814  timing_profile->time_kernel+=t2-t1;
815  timing_profile->time_opti+=t3-t2;
816  timing_profile->time_update+=t4-t3;
817  timing_profile->time_model+=t5-t4;
818  timing_profile->time_check+=t6-t5;
819  }
820 
821  /* checking whether optimizer got stuck */
822  if((*maxdiff) < bestmaxdiff) {
823  bestmaxdiff=(*maxdiff);
824  bestmaxdiffiter=iteration;
825  }
826  if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
827  /* int32_t time no progress? */
828  terminate=1;
829  retrain=0;
830  SG_WARNING("Relaxing KT-Conditions due to slow progress! Terminating!\n")
831  SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter )
832  }
833 
834  noshrink= (get_shrinking_enabled()) ? 0 : 1;
835 
836  if ((!callback) && (!retrain) && (inactivenum>0) &&
837  ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
838  {
839  t1=get_runtime();
840  SG_DEBUG("reactivating inactive examples\n")
841 
842  reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
843  iteration,inconsistent,
844  docs,aicache,
845  maxdiff);
846  reactivated=true;
847  SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org)
848  /* Update to new active variables. */
849  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
850  inactivenum=totdoc-activenum;
851  /* reset watchdog */
852  bestmaxdiff=(*maxdiff);
853  bestmaxdiffiter=iteration;
854 
855  /* termination criterion */
856  noshrink=1;
857  retrain=0;
858  if((*maxdiff) > learn_parm->epsilon_crit)
859  {
860  SG_INFO("restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit)
861  retrain=1;
862  }
863  timing_profile->time_shrink+=get_runtime()-t1;
864  if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
865  || (verbosity>=2)) {
866  SG_DONE()
867  SG_DEBUG("Number of inactive variables = %ld\n", inactivenum)
868  }
869  }
870 
871  if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
872  learn_parm->epsilon_crit=(*maxdiff);
873  if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
874  learn_parm->epsilon_crit/=4.0;
875  retrain=1;
876  noshrink=1;
877  }
878  if(learn_parm->epsilon_crit<epsilon_crit_org)
879  learn_parm->epsilon_crit=epsilon_crit_org;
880 
881  if(verbosity>=2) {
882  SG_INFO(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
883  supvecnum,model->at_upper_bound,(*maxdiff));
884 
885  }
886  mymaxdiff=*maxdiff ;
887 
888  //don't shrink w/ mkl
889  if (((iteration % 10) == 0) && (!noshrink) && !callback)
890  {
891  activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
892  CMath::max((int32_t)(activenum/10),
893  CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
894  a,inconsistent, c, lin, label);
895 
896  inactivenum=totdoc-activenum;
897 
898  if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
899  && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
900 
901  kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
902  (int32_t) (kernel->get_activenum_cache()-supvecnum)),
903  shrink_state->active);
904  }
905  }
906 
907  if (bestmaxdiff>worstmaxdiff)
908  worstmaxdiff=bestmaxdiff;
909 
910  pb.print_absolute(
911  bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff),
913 
914  /* Terminate loop */
915  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) {
916  terminate = 1;
917  retrain = 0;
918  }
919 
920  } /* end of loop */
921  pb.complete_absolute();
922 
923  SG_DEBUG("inactive:%d\n", inactivenum)
924 
925  if (inactivenum && !reactivated && !callback)
926  {
927  SG_DEBUG("reactivating inactive examples\n")
928  reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
929  iteration,inconsistent,
930  docs,aicache,
931  maxdiff);
932  SG_DEBUG("done reactivating inactive examples\n")
933  /* Update to new active variables. */
934  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
935  inactivenum=totdoc-activenum;
936  /* reset watchdog */
937  bestmaxdiff=(*maxdiff);
938  bestmaxdiffiter=iteration;
939  }
940 
941  //use this for our purposes!
942  criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
943  CSVM::set_objective(criterion);
944 
945  SG_FREE(chosen);
946  SG_FREE(last_suboptimal_at);
947  SG_FREE(key);
948  SG_FREE(selcrit);
949  SG_FREE(selexam);
950  SG_FREE(a_old);
951  SG_FREE(aicache);
952  SG_FREE(working2dnum);
953  SG_FREE(active2dnum);
954  SG_FREE(qp.opt_ce);
955  SG_FREE(qp.opt_ce0);
956  SG_FREE(qp.opt_g);
957  SG_FREE(qp.opt_g0);
958  SG_FREE(qp.opt_xinit);
959  SG_FREE(qp.opt_low);
960  SG_FREE(qp.opt_up);
961 
962  learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
963 
964  return(iteration);
965 }
966 
968  float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
969  int32_t totdoc)
970  /* Return value of objective function. */
971  /* Works only relative to the active variables! */
972 {
973  /* calculate value of objective function */
974  float64_t criterion=0;
975 
976  for (int32_t i=0;i<totdoc;i++)
977  criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
978 
979  return(criterion);
980 }
981 
982 
983 void CSVMLight::clear_index(int32_t *index)
984  /* initializes and empties index */
985 {
986  index[0]=-1;
987 }
988 
989 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
990  /* initializes and empties index */
991 {
992  register int32_t i;
993  for (i=0;index[i] != -1;i++);
994  index[i]=elem;
995  index[i+1]=-1;
996 }
997 
999  int32_t *binfeature, int32_t range, int32_t *index)
1000  /* create an inverted index of binfeature */
1001 {
1002  register int32_t i,ii;
1003 
1004  ii=0;
1005  for (i=0;i<range;i++) {
1006  if(binfeature[i]) {
1007  index[ii]=i;
1008  ii++;
1009  }
1010  }
1011  for (i=0;i<4;i++) {
1012  index[ii+i]=-1;
1013  }
1014  return(ii);
1015 }
1016 
1017 
1019  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1020  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
1021  int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
1022  float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
1023  /* Do optimization on the working set. */
1024 {
1025  int32_t i;
1026  float64_t *a_v;
1027 
1028  //compute_matrices_for_optimization_parallel(docs,label,
1029  // exclude_from_eq_const,eq_target,chosen,
1030  // active2dnum,working2dnum,a,lin,c,
1031  // varnum,totdoc,aicache,qp);
1032 
1034  exclude_from_eq_const,eq_target,chosen,
1035  active2dnum,working2dnum,a,lin,c,
1036  varnum,totdoc,aicache,qp);
1037 
1038  if(verbosity>=3) {
1039  SG_DEBUG("Running optimizer...")
1040  }
1041  /* call the qp-subsolver */
1042  a_v=optimize_qp(qp,epsilon_crit_target,
1043  learn_parm->svm_maxqpsize,
1044  &(model->b), /* in case the optimizer gives us */
1045  learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */
1046  /* b is calculated in calculate_model. */
1047  if(verbosity>=3) {
1048  SG_DONE()
1049  }
1050 
1051  for (i=0;i<varnum;i++)
1052  a[working2dnum[i]]=a_v[i];
1053 }
1054 
1056  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1057  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
1058  float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
1059  float64_t *aicache, QP *qp)
1060 {
1061  // TODO: port to use OpenMP backend instead of pthread
1062 #ifdef HAVE_PTHREAD
1063  int32_t num_threads=parallel->get_num_threads();
1064 #else
1065  int32_t num_threads=1;
1066 #endif
1067  if (num_threads < 2)
1068  {
1069  compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
1070  chosen, active2dnum, key, a, lin, c,
1071  varnum, totdoc, aicache, qp) ;
1072  }
1073 #ifdef HAVE_PTHREAD
1074  else
1075  {
1076  register int32_t ki,kj,i,j;
1077  register float64_t kernel_temp;
1078 
1079  qp->opt_n=varnum;
1080  qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
1081  for (j=1;j<model->sv_num;j++) { /* start at 1 */
1082  if((!chosen[model->supvec[j]])
1083  && (!exclude_from_eq_const[(model->supvec[j])])) {
1084  qp->opt_ce0[0]+=model->alpha[j];
1085  }
1086  }
1087  if(learn_parm->biased_hyperplane)
1088  qp->opt_m=1;
1089  else
1090  qp->opt_m=0; /* eq-constraint will be ignored */
1091 
1092  /* init linear part of objective function */
1093  for (i=0;i<varnum;i++) {
1094  qp->opt_g0[i]=lin[key[i]];
1095  }
1096 
1097  ASSERT(num_threads>1)
1098  int32_t *KI=SG_MALLOC(int32_t, varnum*varnum);
1099  int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum);
1100  int32_t Knum=0 ;
1101  float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2);
1102  for (i=0;i<varnum;i++) {
1103  ki=key[i];
1104  KI[Knum]=docs[ki] ;
1105  KJ[Knum]=docs[ki] ;
1106  Knum++ ;
1107  for (j=i+1;j<varnum;j++)
1108  {
1109  kj=key[j];
1110  KI[Knum]=docs[ki] ;
1111  KJ[Knum]=docs[kj] ;
1112  Knum++ ;
1113  }
1114  }
1115  ASSERT(Knum<=varnum*(varnum+1)/2)
1116 
1117  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
1118  S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, num_threads-1);
1119  int32_t step= Knum/num_threads;
1120  //SG_DEBUG("\nkernel-step size: %i\n", step)
1121  for (int32_t t=0; t<num_threads-1; t++)
1122  {
1123  params[t].svmlight = this;
1124  params[t].start = t*step;
1125  params[t].end = (t+1)*step;
1126  params[t].KI=KI ;
1127  params[t].KJ=KJ ;
1128  params[t].Kval=Kval ;
1129  pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)&params[t]);
1130  }
1131  for (i=params[num_threads-2].end; i<Knum; i++)
1132  Kval[i]=compute_kernel(KI[i],KJ[i]) ;
1133 
1134  for (int32_t t=0; t<num_threads-1; t++)
1135  pthread_join(threads[t], NULL);
1136 
1137  SG_FREE(params);
1138  SG_FREE(threads);
1139 
1140  Knum=0 ;
1141  for (i=0;i<varnum;i++) {
1142  ki=key[i];
1143 
1144  /* Compute the matrix for equality constraints */
1145  qp->opt_ce[i]=label[ki];
1146  qp->opt_low[i]=0;
1147  qp->opt_up[i]=learn_parm->svm_cost[ki];
1148 
1149  kernel_temp=Kval[Knum] ;
1150  Knum++ ;
1151  /* compute linear part of objective function */
1152  qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1153  /* compute quadratic part of objective function */
1154  qp->opt_g[varnum*i+i]=kernel_temp;
1155 
1156  for (j=i+1;j<varnum;j++) {
1157  kj=key[j];
1158  kernel_temp=Kval[Knum] ;
1159  Knum++ ;
1160  /* compute linear part of objective function */
1161  qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
1162  qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1163  /* compute quadratic part of objective function */
1164  qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1165  qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1166  }
1167 
1168  if(verbosity>=3) {
1169  if(i % 20 == 0) {
1170  SG_DEBUG("%ld..",i)
1171  }
1172  }
1173  }
1174 
1175  SG_FREE(KI);
1176  SG_FREE(KJ);
1177  SG_FREE(Kval);
1178 
1179  for (i=0;i<varnum;i++) {
1180  /* assure starting at feasible point */
1181  qp->opt_xinit[i]=a[key[i]];
1182  /* set linear part of objective function */
1183  qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
1184  }
1185 
1186  if(verbosity>=3) {
1187  SG_DONE()
1188  }
1189  }
1190 #endif
1191 }
1192 
1194  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1195  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
1196  float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
1197  float64_t *aicache, QP *qp)
1198 {
1199  register int32_t ki,kj,i,j;
1200  register float64_t kernel_temp;
1201 
1202  qp->opt_n=varnum;
1203  qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
1204  for (j=1;j<model->sv_num;j++) { /* start at 1 */
1205  if((!chosen[model->supvec[j]])
1206  && (!exclude_from_eq_const[(model->supvec[j])])) {
1207  qp->opt_ce0[0]+=model->alpha[j];
1208  }
1209  }
1210  if(learn_parm->biased_hyperplane)
1211  qp->opt_m=1;
1212  else
1213  qp->opt_m=0; /* eq-constraint will be ignored */
1214 
1215  /* init linear part of objective function */
1216  for (i=0;i<varnum;i++) {
1217  qp->opt_g0[i]=lin[key[i]];
1218  }
1219 
1220  for (i=0;i<varnum;i++) {
1221  ki=key[i];
1222 
1223  /* Compute the matrix for equality constraints */
1224  qp->opt_ce[i]=label[ki];
1225  qp->opt_low[i]=0;
1226  qp->opt_up[i]=learn_parm->svm_cost[ki];
1227 
1228  kernel_temp=compute_kernel(docs[ki], docs[ki]);
1229  /* compute linear part of objective function */
1230  qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1231  /* compute quadratic part of objective function */
1232  qp->opt_g[varnum*i+i]=kernel_temp;
1233 
1234  for (j=i+1;j<varnum;j++) {
1235  kj=key[j];
1236  kernel_temp=compute_kernel(docs[ki], docs[kj]);
1237 
1238  /* compute linear part of objective function */
1239  qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
1240  qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1241  /* compute quadratic part of objective function */
1242  qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1243  qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1244  }
1245 
1246  if(verbosity>=3) {
1247  if(i % 20 == 0) {
1248  SG_DEBUG("%ld..",i)
1249  }
1250  }
1251  }
1252 
1253  for (i=0;i<varnum;i++) {
1254  /* assure starting at feasible point */
1255  qp->opt_xinit[i]=a[key[i]];
1256  /* set linear part of objective function */
1257  qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
1258  }
1259 
1260  if(verbosity>=3) {
1261  SG_DONE()
1262  }
1263 }
1264 
1265 
1267  int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
1268  float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
1269  /* Compute decision function based on current values */
1270  /* of alpha. */
1271 {
1272  int32_t i,ii,pos,b_calculated=0,first_low,first_high;
1273  float64_t ex_c,b_temp,b_low,b_high;
1274 
1275  if(verbosity>=3) {
1276  SG_DEBUG("Calculating model...")
1277  }
1278 
1279  if(!learn_parm->biased_hyperplane) {
1280  model->b=0;
1281  b_calculated=1;
1282  }
1283 
1284  for (ii=0;(i=working2dnum[ii])>=0;ii++) {
1285  if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
1286  pos=model->index[i];
1287  model->index[i]=-1;
1288  (model->sv_num)--;
1289  model->supvec[pos]=model->supvec[model->sv_num];
1290  model->alpha[pos]=model->alpha[model->sv_num];
1291  model->index[model->supvec[pos]]=pos;
1292  }
1293  else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
1294  model->supvec[model->sv_num]=docs[i];
1295  model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
1296  model->index[i]=model->sv_num;
1297  (model->sv_num)++;
1298  }
1299  else if(a_old[i]==a[i]) { /* nothing to do */
1300  }
1301  else { /* just update alpha */
1302  model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
1303  }
1304 
1305  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1306  if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
1307  (model->at_upper_bound)--;
1308  }
1309  else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
1310  (model->at_upper_bound)++;
1311  }
1312 
1313  if((!b_calculated)
1314  && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
1315  model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
1316  b_calculated=1;
1317  }
1318  }
1319 
1320  /* No alpha in the working set not at bounds, so b was not
1321  calculated in the usual way. The following handles this special
1322  case. */
1323  if(learn_parm->biased_hyperplane
1324  && (!b_calculated)
1325  && (model->sv_num-1 == model->at_upper_bound)) {
1326  first_low=1;
1327  first_high=1;
1328  b_low=0;
1329  b_high=0;
1330  for (ii=0;(i=active2dnum[ii])>=0;ii++) {
1331  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1332  if(a[i]<ex_c) {
1333  if(label[i]>0) {
1334  b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
1335  if((b_temp>b_low) || (first_low)) {
1336  b_low=b_temp;
1337  first_low=0;
1338  }
1339  }
1340  else {
1341  b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
1342  if((b_temp<b_high) || (first_high)) {
1343  b_high=b_temp;
1344  first_high=0;
1345  }
1346  }
1347  }
1348  else {
1349  if(label[i]<0) {
1350  b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
1351  if((b_temp>b_low) || (first_low)) {
1352  b_low=b_temp;
1353  first_low=0;
1354  }
1355  }
1356  else {
1357  b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
1358  if((b_temp<b_high) || (first_high)) {
1359  b_high=b_temp;
1360  first_high=0;
1361  }
1362  }
1363  }
1364  }
1365  if(first_high) {
1366  model->b=-b_low;
1367  }
1368  else if(first_low) {
1369  model->b=-b_high;
1370  }
1371  else {
1372  model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
1373  }
1374  }
1375 
1376  if(verbosity>=3) {
1377  SG_DONE()
1378  }
1379 
1380  return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
1381 }
1382 
1384  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1385  float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
1386  int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
1387  int32_t iteration)
1388  /* Check KT-conditions */
1389 {
1390  int32_t i,ii,retrain;
1391  float64_t dist,ex_c,target;
1392 
1394  learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
1395  else
1396  learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
1397  retrain=0;
1398  (*maxdiff)=0;
1399  (*misclassified)=0;
1400  for (ii=0;(i=active2dnum[ii])>=0;ii++) {
1401  if((!inconsistent[i]) && label[i]) {
1402  dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from
1403  hyperplane*/
1404  target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
1405  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1406  if(dist <= 0) {
1407  (*misclassified)++; /* does not work due to deactivation of var */
1408  }
1409  if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
1410  if((dist-target)>(*maxdiff)) /* largest violation */
1411  (*maxdiff)=dist-target;
1412  }
1413  else if((a[i]<ex_c) && (dist < target)) {
1414  if((target-dist)>(*maxdiff)) /* largest violation */
1415  (*maxdiff)=target-dist;
1416  }
1417  /* Count how int32_t a variable was at lower/upper bound (and optimal).*/
1418  /* Variables, which were at the bound and optimal for a int32_t */
1419  /* time are unlikely to become support vectors. In case our */
1420  /* cache is filled up, those variables are excluded to save */
1421  /* kernel evaluations. (See chapter 'Shrinking').*/
1422  if((a[i]>(learn_parm->epsilon_a))
1423  && (a[i]<ex_c)) {
1424  last_suboptimal_at[i]=iteration; /* not at bound */
1425  }
1426  else if((a[i]<=(learn_parm->epsilon_a))
1427  && (dist < (target+learn_parm->epsilon_shrink))) {
1428  last_suboptimal_at[i]=iteration; /* not likely optimal */
1429  }
1430  else if((a[i]>=ex_c)
1431  && (dist > (target-learn_parm->epsilon_shrink))) {
1432  last_suboptimal_at[i]=iteration; /* not likely optimal */
1433  }
1434  }
1435  }
1436 
1437  /* termination criterion */
1438  if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
1439  retrain=1;
1440  }
1441  return(retrain);
1442 }
1443 
1445  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1446  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1447  float64_t *aicache, float64_t* c)
1448  /* keep track of the linear component */
1449  /* lin of the gradient etc. by updating */
1450  /* based on the change of the variables */
1451  /* in the current working set */
1452 {
1453  register int32_t i=0,ii=0,j=0,jj=0;
1454 
1456  {
1457  if (callback)
1458  {
1459  update_linear_component_mkl_linadd(docs, label, active2dnum, a,
1460  a_old, working2dnum, totdoc, lin, aicache);
1461  }
1462  else
1463  {
1464  kernel->clear_normal();
1465 
1466  int32_t num_working=0;
1467  for (ii=0;(i=working2dnum[ii])>=0;ii++) {
1468  if(a[i] != a_old[i]) {
1469  kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
1470  num_working++;
1471  }
1472  }
1473 
1474  if (num_working>0)
1475  {
1476  // TODO: port to use OpenMP backend instead of pthread
1477 #ifdef HAVE_PTHREAD
1478  int32_t num_threads=parallel->get_num_threads();
1479 #else
1480  int32_t num_threads=1;
1481 #endif
1482  if (num_threads < 2)
1483  {
1484  for (jj=0;(j=active2dnum[jj])>=0;jj++) {
1485  lin[j]+=kernel->compute_optimized(docs[j]);
1486  }
1487  }
1488 #ifdef HAVE_PTHREAD
1489  else
1490  {
1491  int32_t num_elem = 0 ;
1492  for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
1493 
1494  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
1495  S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, num_threads-1);
1496  int32_t start = 0 ;
1497  int32_t step = num_elem/num_threads;
1498  int32_t end = step ;
1499 
1500  for (int32_t t=0; t<num_threads-1; t++)
1501  {
1502  params[t].kernel = kernel ;
1503  params[t].lin = lin ;
1504  params[t].docs = docs ;
1505  params[t].active2dnum=active2dnum ;
1506  params[t].start = start ;
1507  params[t].end = end ;
1508  start=end ;
1509  end+=step ;
1510  pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)&params[t]) ;
1511  }
1512 
1513  for (jj=params[num_threads-2].end;(j=active2dnum[jj])>=0;jj++) {
1514  lin[j]+=kernel->compute_optimized(docs[j]);
1515  }
1516  void* ret;
1517  for (int32_t t=0; t<num_threads-1; t++)
1518  pthread_join(threads[t], &ret) ;
1519 
1520  SG_FREE(params);
1521  SG_FREE(threads);
1522  }
1523 #endif
1524  }
1525  }
1526  }
1527  else
1528  {
1529  if (callback)
1530  {
1531  update_linear_component_mkl(docs, label, active2dnum,
1532  a, a_old, working2dnum, totdoc, lin, aicache);
1533  }
1534  else {
1535  for (jj=0;(i=working2dnum[jj])>=0;jj++) {
1536  if(a[i] != a_old[i]) {
1537  kernel->get_kernel_row(i,active2dnum,aicache);
1538  for (ii=0;(j=active2dnum[ii])>=0;ii++)
1539  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
1540  }
1541  }
1542  }
1543  }
1544 }
1545 
1546 
1548  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1549  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1550  float64_t *aicache)
1551 {
1552  //int inner_iters=0;
1553  int32_t num = kernel->get_num_vec_rhs();
1554  int32_t num_weights = -1;
1555  int32_t num_kernels = kernel->get_num_subkernels() ;
1556  const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
1557  ASSERT(num_weights==num_kernels)
1558 
1559  if ((kernel->get_kernel_type()==K_COMBINED) &&
1560  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()))// for combined kernel
1561  {
1563 
1564  int32_t n = 0, i, j ;
1565 
1566  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
1567  {
1568  CKernel* kn = k->get_kernel(k_idx);
1569  for (i=0;i<num;i++)
1570  {
1571  if(a[i] != a_old[i])
1572  {
1573  kn->get_kernel_row(i,NULL,aicache, true);
1574  for (j=0;j<num;j++)
1575  W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
1576  }
1577  }
1578 
1579  SG_UNREF(kn);
1580  n++ ;
1581  }
1582  }
1583  else // hope the kernel is fast ...
1584  {
1585  float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
1586  float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
1587 
1588  // backup and set to zero
1589  for (int32_t i=0; i<num_kernels; i++)
1590  {
1591  w_backup[i] = old_beta[i] ;
1592  w1[i]=0.0 ;
1593  }
1594  for (int32_t n=0; n<num_kernels; n++)
1595  {
1596  w1[n]=1.0 ;
1598 
1599  for (int32_t i=0;i<num;i++)
1600  {
1601  if(a[i] != a_old[i])
1602  {
1603  for (int32_t j=0;j<num;j++)
1604  W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
1605  }
1606  }
1607  w1[n]=0.0 ;
1608  }
1609 
1610  // restore old weights
1611  kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
1612 
1613  SG_FREE(w_backup);
1614  SG_FREE(w1);
1615  }
1616 
1617  call_mkl_callback(a, label, lin);
1618 }
1619 
1620 
1622  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1623  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1624  float64_t *aicache)
1625 {
1626  //int inner_iters=0;
1627 
1628  // kernel with LP_LINADD property is assumed to have
1629  // compute_by_subkernel functions
1630  int32_t num = kernel->get_num_vec_rhs();
1631  int32_t num_weights = -1;
1632  int32_t num_kernels = kernel->get_num_subkernels() ;
1633  const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
1634  ASSERT(num_weights==num_kernels)
1635 
1636  float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
1637  float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
1638 
1639  // backup and set to one
1640  for (int32_t i=0; i<num_kernels; i++)
1641  {
1642  w_backup[i] = old_beta[i] ;
1643  w1[i]=1.0 ;
1644  }
1645  // set the kernel weights
1647 
1648  // create normal update (with changed alphas only)
1649  kernel->clear_normal();
1650  for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
1651  if(a[i] != a_old[i]) {
1652  kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
1653  }
1654  }
1655  // TODO: port to use OpenMP backend instead of pthread
1656 #ifdef HAVE_PTHREAD
1657  int32_t num_threads=parallel->get_num_threads();
1658 #else
1659  int32_t num_threads=1;
1660 #endif
1661 
1662  if (num_threads < 2)
1663  {
1664  // determine contributions of different kernels
1665  for (int32_t i=0; i<num; i++)
1666  kernel->compute_by_subkernel(i,&W[i*num_kernels]);
1667  }
1668 #ifdef HAVE_PTHREAD
1669  else
1670  {
1671  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
1672  S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, num_threads-1);
1673  int32_t step= num/num_threads;
1674 
1675  for (int32_t t=0; t<num_threads-1; t++)
1676  {
1677  params[t].kernel = kernel;
1678  params[t].W = W;
1679  params[t].start = t*step;
1680  params[t].end = (t+1)*step;
1681  pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)&params[t]);
1682  }
1683 
1684  for (int32_t i=params[num_threads-2].end; i<num; i++)
1685  kernel->compute_by_subkernel(i,&W[i*num_kernels]);
1686 
1687  for (int32_t t=0; t<num_threads-1; t++)
1688  pthread_join(threads[t], NULL);
1689 
1690  SG_FREE(params);
1691  SG_FREE(threads);
1692  }
1693 #endif
1694 
1695  // restore old weights
1696  kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
1697 
1698  call_mkl_callback(a, label, lin);
1699 }
1700 
1702 {
1703  S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
1704 
1705  int32_t num_kernels=params->kernel->get_num_subkernels();
1706 
1707  // determine contributions of different kernels
1708  for (int32_t i=params->start; i<params->end; i++)
1709  params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
1710 
1711  return NULL ;
1712 }
1713 
1714 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
1715 {
1716  int32_t num = kernel->get_num_vec_rhs();
1717  int32_t num_kernels = kernel->get_num_subkernels() ;
1718 
1719  float64_t suma=0;
1720  float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
1721 #ifdef HAVE_LAPACK
1722  int nk = (int) num_kernels; /* calling external lib */
1723  double* alphay = SG_MALLOC(double, num);
1724 
1725  for (int32_t i=0; i<num; i++)
1726  {
1727  alphay[i]=a[i]*label[i];
1728  suma+=a[i];
1729  }
1730 
1731  for (int32_t i=0; i<num_kernels; i++)
1732  sumw[i]=0;
1733 
1734  cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
1735  num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
1736 
1737  SG_FREE(alphay);
1738 #else
1739  for (int32_t i=0; i<num; i++)
1740  suma += a[i];
1741 
1742  for (int32_t d=0; d<num_kernels; d++)
1743  {
1744  sumw[d]=0;
1745  for (int32_t i=0; i<num; i++)
1746  sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
1747  }
1748 #endif
1749 
1750  if (callback)
1751  mkl_converged=callback(mkl, sumw, suma);
1752 
1753 
1754  const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels);
1755 
1756  // update lin
1757 #ifdef HAVE_LAPACK
1758  cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
1759  nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
1760 #else
1761  for (int32_t i=0; i<num; i++)
1762  lin[i]=0 ;
1763  for (int32_t d=0; d<num_kernels; d++)
1764  if (new_beta[d]!=0)
1765  for (int32_t i=0; i<num; i++)
1766  lin[i] += new_beta[d]*W[i*num_kernels+d] ;
1767 #endif
1768 
1769  SG_FREE(sumw);
1770 }
1771 
1772 
1773 /*************************** Working set selection ***************************/
1774 
1776  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1777  int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
1778  int32_t *working2dnum, float64_t *selcrit, int32_t *select,
1779  int32_t cache_only, int32_t *key, int32_t *chosen)
1780  /* Use the feasible direction approach to select the next
1781  qp-subproblem (see chapter 'Selecting a good working set'). If
1782  'cache_only' is true, then the variables are selected only among
1783  those for which the kernel evaluations are cached. */
1784 {
1785  int32_t choosenum,i,j,k,activedoc,inum,valid;
1786  float64_t s;
1787 
1788  for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
1789  choosenum=0;
1790  activedoc=0;
1791  for (i=0;(j=active2dnum[i])>=0;i++) {
1792  s=-label[j];
1793  if(cache_only)
1794  {
1795  if (use_kernel_cache)
1796  valid=(kernel->kernel_cache_check(j));
1797  else
1798  valid = 1 ;
1799  }
1800  else
1801  valid=1;
1802  if(valid
1803  && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1804  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1805  && (s>0)))
1806  && (!chosen[j])
1807  && (label[j])
1808  && (!inconsistent[j]))
1809  {
1810  selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
1811  key[activedoc]=j;
1812  activedoc++;
1813  }
1814  }
1815  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1816  for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
1817  i=key[select[k]];
1818  chosen[i]=1;
1819  working2dnum[inum+choosenum]=i;
1820  choosenum+=1;
1821  if (use_kernel_cache)
1823  /* make sure it does not get kicked */
1824  /* out of cache */
1825  }
1826 
1827  activedoc=0;
1828  for (i=0;(j=active2dnum[i])>=0;i++) {
1829  s=label[j];
1830  if(cache_only)
1831  {
1832  if (use_kernel_cache)
1833  valid=(kernel->kernel_cache_check(j));
1834  else
1835  valid = 1 ;
1836  }
1837  else
1838  valid=1;
1839  if(valid
1840  && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1841  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1842  && (s>0)))
1843  && (!chosen[j])
1844  && (label[j])
1845  && (!inconsistent[j]))
1846  {
1847  selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
1848  /* selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */
1849  key[activedoc]=j;
1850  activedoc++;
1851  }
1852  }
1853  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1854  for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
1855  i=key[select[k]];
1856  chosen[i]=1;
1857  working2dnum[inum+choosenum]=i;
1858  choosenum+=1;
1859  if (use_kernel_cache)
1860  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1861  /* out of cache */
1862  }
1863  working2dnum[inum+choosenum]=-1; /* complete index */
1864  return(choosenum);
1865 }
1866 
1868  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1869  int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
1870  int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
1871  int32_t *chosen, int32_t iteration)
1872 /* Use the feasible direction approach to select the next
1873  qp-subproblem (see section 'Selecting a good working set'). Chooses
1874  a feasible direction at (pseudo) random to help jump over numerical
1875  problem. */
1876 {
1877  int32_t choosenum,i,j,k,activedoc,inum;
1878  float64_t s;
1879 
1880  for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
1881  choosenum=0;
1882  activedoc=0;
1883  for (i=0;(j=active2dnum[i])>=0;i++) {
1884  s=-label[j];
1885  if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1886  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1887  && (s>0)))
1888  && (!inconsistent[j])
1889  && (label[j])
1890  && (!chosen[j])) {
1891  selcrit[activedoc]=(j+iteration) % totdoc;
1892  key[activedoc]=j;
1893  activedoc++;
1894  }
1895  }
1896  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1897  for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
1898  i=key[select[k]];
1899  chosen[i]=1;
1900  working2dnum[inum+choosenum]=i;
1901  choosenum+=1;
1902  if (use_kernel_cache)
1903  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1904  /* out of cache */
1905  }
1906 
1907  activedoc=0;
1908  for (i=0;(j=active2dnum[i])>=0;i++) {
1909  s=label[j];
1910  if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1911  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1912  && (s>0)))
1913  && (!inconsistent[j])
1914  && (label[j])
1915  && (!chosen[j])) {
1916  selcrit[activedoc]=(j+iteration) % totdoc;
1917  key[activedoc]=j;
1918  activedoc++;
1919  }
1920  }
1921  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1922  for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
1923  i=key[select[k]];
1924  chosen[i]=1;
1925  working2dnum[inum+choosenum]=i;
1926  choosenum+=1;
1927  if (use_kernel_cache)
1928  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1929  /* out of cache */
1930  }
1931  working2dnum[inum+choosenum]=-1; /* complete index */
1932  return(choosenum);
1933 }
1934 
1935 
1936 
1938  float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
1939 {
1940  register int32_t i,j;
1941 
1942  for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
1943  for (j=i;j>=0;j--) {
1944  if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
1945  select[j]=select[j-1];
1946  }
1947  else {
1948  select[j]=i;
1949  j=-1;
1950  }
1951  }
1952  }
1953  if(n>0) {
1954  for (i=n;i<range;i++) {
1955  if(selcrit[i]>selcrit[select[n-1]]) {
1956  for (j=n-1;j>=0;j--) {
1957  if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
1958  select[j]=select[j-1];
1959  }
1960  else {
1961  select[j]=i;
1962  j=-1;
1963  }
1964  }
1965  }
1966  }
1967  }
1968 }
1969 
1970 
1971 /******************************** Shrinking *********************************/
1972 
1974  SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
1975 {
1976  int32_t i;
1977 
1978  shrink_state->deactnum=0;
1979  shrink_state->active = SG_MALLOC(int32_t, totdoc);
1980  shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc);
1981  shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory);
1982  shrink_state->maxhistory=maxhistory;
1983  shrink_state->last_lin = SG_MALLOC(float64_t, totdoc);
1984  shrink_state->last_a = SG_MALLOC(float64_t, totdoc);
1985 
1986  for (i=0;i<totdoc;i++) {
1987  shrink_state->active[i]=1;
1988  shrink_state->inactive_since[i]=0;
1989  shrink_state->last_a[i]=0;
1990  shrink_state->last_lin[i]=0;
1991  }
1992 }
1993 
1994 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
1995 {
1996  SG_FREE(shrink_state->active);
1997  SG_FREE(shrink_state->inactive_since);
1998  if(shrink_state->deactnum > 0)
1999  SG_FREE((shrink_state->a_history[shrink_state->deactnum-1]));
2000  SG_FREE((shrink_state->a_history));
2001  SG_FREE((shrink_state->last_a));
2002  SG_FREE((shrink_state->last_lin));
2003 }
2004 
2006  SHRINK_STATE *shrink_state, int32_t *active2dnum,
2007  int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
2008  int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
2009  float64_t* lin, int* label)
2010  /* Shrink some variables away. Do the shrinking only if at least
2011  minshrink variables can be removed. */
2012 {
2013  int32_t i,ii,change,activenum,lastiter;
2014  float64_t *a_old=NULL;
2015 
2016  activenum=0;
2017  change=0;
2018  for (ii=0;active2dnum[ii]>=0;ii++) {
2019  i=active2dnum[ii];
2020  activenum++;
2021  lastiter=last_suboptimal_at[i];
2022 
2023  if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
2024  || (inconsistent[i])) {
2025  change++;
2026  }
2027  }
2028  if((change>=minshrink) /* shrink only if sufficiently many candidates */
2029  && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
2030  /* Shrink problem by removing those variables which are */
2031  /* optimal at a bound for a minimum number of iterations */
2032  if(verbosity>=2) {
2033  SG_INFO(" Shrinking...")
2034  }
2035 
2036  if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* non-linear case save alphas */
2037 
2038  a_old=SG_MALLOC(float64_t, totdoc);
2039  shrink_state->a_history[shrink_state->deactnum]=a_old;
2040  for (i=0;i<totdoc;i++) {
2041  a_old[i]=a[i];
2042  }
2043  }
2044  for (ii=0;active2dnum[ii]>=0;ii++) {
2045  i=active2dnum[ii];
2046  lastiter=last_suboptimal_at[i];
2047  if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
2048  || (inconsistent[i])) {
2049  shrink_state->active[i]=0;
2050  shrink_state->inactive_since[i]=shrink_state->deactnum;
2051  }
2052  }
2053  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
2054  shrink_state->deactnum++;
2056  shrink_state->deactnum=0;
2057 
2058  if(verbosity>=2) {
2059  SG_DONE()
2060  SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum)
2061  }
2062  }
2063  return(activenum);
2064 }
2065 
2067 {
2068  S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
2069 
2070  CKernel* k = params->kernel;
2071  float64_t* lin = params->lin;
2072  float64_t* last_lin = params->last_lin;
2073  int32_t* active = params->active;
2074  int32_t* docs = params->docs;
2075  int32_t start = params->start;
2076  int32_t end = params->end;
2077 
2078  for (int32_t i=start;i<end;i++)
2079  {
2080  if (!active[i])
2081  lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
2082 
2083  last_lin[i]=lin[i];
2084  }
2085 
2086  return NULL;
2087 }
2088 
2090 {
2091  S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
2092  ASSERT(params)
2093  ASSERT(params->kernel)
2094  ASSERT(params->lin)
2095  ASSERT(params->aicache)
2096  ASSERT(params->a)
2097  ASSERT(params->a_old)
2098  ASSERT(params->changed2dnum)
2099  ASSERT(params->inactive2dnum)
2100  ASSERT(params->label)
2101 
2102  CKernel* k = params->kernel;
2103  float64_t* lin = params->lin;
2104  float64_t* aicache = params->aicache;
2105  float64_t* a= params->a;
2106  float64_t* a_old = params->a_old;
2107  int32_t* changed2dnum = params->changed2dnum;
2108  int32_t* inactive2dnum = params->inactive2dnum;
2109  int32_t* label = params->label;
2110  int32_t start = params->start;
2111  int32_t end = params->end;
2112 
2113  for (int32_t ii=start;ii<end;ii++)
2114  {
2115  int32_t i=changed2dnum[ii];
2116  int32_t j=0;
2117  ASSERT(i>=0)
2118 
2119  k->get_kernel_row(i,inactive2dnum,aicache);
2120  for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
2121  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
2122  }
2123  return NULL;
2124 }
2125 
2127  int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
2128  float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
2129  int32_t* docs, float64_t *aicache, float64_t *maxdiff)
2130  /* Make all variables active again which had been removed by
2131  shrinking. */
2132  /* Computes lin for those variables from scratch. */
2133 {
2134  register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
2135  int32_t *changed,*inactive;
2136  register float64_t *a_old,dist;
2137  float64_t ex_c,target;
2138 
2139  if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */
2140  a_old=shrink_state->last_a;
2141 
2143  {
2144  SG_DEBUG(" clear normal - linadd\n")
2145  kernel->clear_normal();
2146 
2147  int32_t num_modified=0;
2148  for (i=0;i<totdoc;i++) {
2149  if(a[i] != a_old[i]) {
2150  kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
2151  a_old[i]=a[i];
2152  num_modified++;
2153  }
2154  }
2155 
2156  if (num_modified>0)
2157  {
2158  // TODO: port to use OpenMP backend instead of pthread
2159 #ifdef HAVE_PTHREAD
2160  int32_t num_threads=parallel->get_num_threads();
2161 #else
2162  int32_t num_threads=1;
2163 #endif
2164  ASSERT(num_threads>0)
2165  if (num_threads < 2)
2166  {
2167  S_THREAD_PARAM_REACTIVATE_LINADD params;
2168  params.kernel=kernel;
2169  params.lin=lin;
2170  params.last_lin=shrink_state->last_lin;
2171  params.docs=docs;
2172  params.active=shrink_state->active;
2173  params.start=0;
2174  params.end=totdoc;
2176  }
2177 #ifdef HAVE_PTHREAD
2178  else
2179  {
2180  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
2181  S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads);
2182  int32_t step= totdoc/num_threads;
2183 
2184  for (t=0; t<num_threads-1; t++)
2185  {
2186  params[t].kernel=kernel;
2187  params[t].lin=lin;
2188  params[t].last_lin=shrink_state->last_lin;
2189  params[t].docs=docs;
2190  params[t].active=shrink_state->active;
2191  params[t].start = t*step;
2192  params[t].end = (t+1)*step;
2193  pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)&params[t]);
2194  }
2195 
2196  params[t].kernel=kernel;
2197  params[t].lin=lin;
2198  params[t].last_lin=shrink_state->last_lin;
2199  params[t].docs=docs;
2200  params[t].active=shrink_state->active;
2201  params[t].start = t*step;
2202  params[t].end = totdoc;
2203  reactivate_inactive_examples_linadd_helper((void*) &params[t]);
2204 
2205  for (t=0; t<num_threads-1; t++)
2206  pthread_join(threads[t], NULL);
2207 
2208  SG_FREE(threads);
2209  SG_FREE(params);
2210  }
2211 #endif
2212 
2213  }
2214  }
2215  else
2216  {
2217  float64_t *alphas = SG_MALLOC(float64_t, totdoc);
2218  int32_t *idx = SG_MALLOC(int32_t, totdoc);
2219  int32_t num_suppvec=0 ;
2220 
2221  for (i=0; i<totdoc; i++)
2222  {
2223  if(a[i] != a_old[i])
2224  {
2225  alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
2226  idx[num_suppvec] = i ;
2227  a_old[i] = a[i] ;
2228  num_suppvec++ ;
2229  }
2230  }
2231 
2232  if (num_suppvec>0)
2233  {
2234  int32_t num_inactive=0;
2235  int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc); // infact we only need a subset
2236 
2237  j=0;
2238  for (i=0;i<totdoc;i++)
2239  {
2240  if(!shrink_state->active[i])
2241  {
2242  inactive_idx[j++] = i;
2243  num_inactive++;
2244  }
2245  }
2246 
2247  if (num_inactive>0)
2248  {
2249  float64_t* dest = SG_MALLOC(float64_t, num_inactive);
2250  memset(dest, 0, sizeof(float64_t)*num_inactive);
2251 
2252  kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
2253 
2254  j=0;
2255  for (i=0;i<totdoc;i++) {
2256  if(!shrink_state->active[i]) {
2257  lin[i] = shrink_state->last_lin[i] + dest[j++] ;
2258  }
2259  shrink_state->last_lin[i]=lin[i];
2260  }
2261 
2262  SG_FREE(dest);
2263  }
2264  else
2265  {
2266  for (i=0;i<totdoc;i++)
2267  shrink_state->last_lin[i]=lin[i];
2268  }
2269  SG_FREE(inactive_idx);
2270  }
2271  SG_FREE(alphas);
2272  SG_FREE(idx);
2273  }
2274 
2276  }
2277  else
2278  {
2279  changed=SG_MALLOC(int32_t, totdoc);
2280  changed2dnum=SG_MALLOC(int32_t, totdoc+11);
2281  inactive=SG_MALLOC(int32_t, totdoc);
2282  inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
2283  for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
2284  {
2285  if(verbosity>=2) {
2286  SG_INFO("%ld..",t)
2287  }
2288  a_old=shrink_state->a_history[t];
2289  for (i=0;i<totdoc;i++) {
2290  inactive[i]=((!shrink_state->active[i])
2291  && (shrink_state->inactive_since[i] == t));
2292  changed[i]= (a[i] != a_old[i]);
2293  }
2294  compute_index(inactive,totdoc,inactive2dnum);
2295  compute_index(changed,totdoc,changed2dnum);
2296 
2297 
2298  //TODO: PUT THIS BACK IN!!!!!!!! int32_t num_threads=parallel->get_num_threads();
2299  int32_t num_threads=1;
2300  ASSERT(num_threads>0)
2301 
2302  if (num_threads < 2)
2303  {
2304  for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
2305  kernel->get_kernel_row(i,inactive2dnum,aicache);
2306  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2307  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
2308  }
2309  }
2310 #ifdef HAVE_PTHREAD
2311  else
2312  {
2313  //find number of the changed ones
2314  int32_t num_changed=0;
2315  for (ii=0;changed2dnum[ii]>=0;ii++)
2316  num_changed++;
2317 
2318  if (num_changed>0)
2319  {
2320  pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1);
2321  S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads);
2322  int32_t step= num_changed/num_threads;
2323 
2324  // alloc num_threads many tmp buffers
2325  float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads);
2326  memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
2327  float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads);
2328  memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
2329 
2330  int32_t thr;
2331  for (thr=0; thr<num_threads-1; thr++)
2332  {
2333  params[thr].kernel=kernel;
2334  params[thr].lin=&tmp_lin[thr*totdoc];
2335  params[thr].aicache=&tmp_aicache[thr*totdoc];
2336  params[thr].a=a;
2337  params[thr].a_old=a_old;
2338  params[thr].changed2dnum=changed2dnum;
2339  params[thr].inactive2dnum=inactive2dnum;
2340  params[thr].label=label;
2341  params[thr].start = thr*step;
2342  params[thr].end = (thr+1)*step;
2343  pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)&params[thr]);
2344  }
2345 
2346  params[thr].kernel=kernel;
2347  params[thr].lin=&tmp_lin[thr*totdoc];
2348  params[thr].aicache=&tmp_aicache[thr*totdoc];
2349  params[thr].a=a;
2350  params[thr].a_old=a_old;
2351  params[thr].changed2dnum=changed2dnum;
2352  params[thr].inactive2dnum=inactive2dnum;
2353  params[thr].label=label;
2354  params[thr].start = thr*step;
2355  params[thr].end = num_changed;
2356  reactivate_inactive_examples_vanilla_helper((void*) &params[thr]);
2357 
2358  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2359  lin[j]+=tmp_lin[totdoc*thr+j];
2360 
2361  for (thr=0; thr<num_threads-1; thr++)
2362  {
2363  pthread_join(threads[thr], NULL);
2364 
2365  //add up results
2366  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2367  lin[j]+=tmp_lin[totdoc*thr+j];
2368  }
2369 
2370  SG_FREE(tmp_lin);
2371  SG_FREE(tmp_aicache);
2372  SG_FREE(threads);
2373  SG_FREE(params);
2374  }
2375  }
2376 #endif
2377  }
2378  SG_FREE(changed);
2379  SG_FREE(changed2dnum);
2380  SG_FREE(inactive);
2381  SG_FREE(inactive2dnum);
2382  }
2383 
2384  (*maxdiff)=0;
2385  for (i=0;i<totdoc;i++) {
2386  shrink_state->inactive_since[i]=shrink_state->deactnum-1;
2387  if(!inconsistent[i]) {
2388  dist=(lin[i]-model->b)*(float64_t)label[i];
2389  target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
2390  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2391  if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2392  if((dist-target)>(*maxdiff)) /* largest violation */
2393  (*maxdiff)=dist-target;
2394  }
2395  else if((a[i]<ex_c) && (dist < target)) {
2396  if((target-dist)>(*maxdiff)) /* largest violation */
2397  (*maxdiff)=target-dist;
2398  }
2399  if((a[i]>(0+learn_parm->epsilon_a))
2400  && (a[i]<ex_c)) {
2401  shrink_state->active[i]=1; /* not at bound */
2402  }
2403  else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
2404  shrink_state->active[i]=1;
2405  }
2406  else if((a[i]>=ex_c)
2407  && (dist > (target-learn_parm->epsilon_shrink))) {
2408  shrink_state->active[i]=1;
2409  }
2410  else if(learn_parm->sharedslack) { /* make all active when sharedslack */
2411  shrink_state->active[i]=1;
2412  }
2413  }
2414  }
2415 
2416  if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */
2417  for (i=0;i<totdoc;i++) {
2418  (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
2419  }
2420  for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
2421  SG_FREE(shrink_state->a_history[t]);
2422  shrink_state->a_history[t]=0;
2423  }
2424  }
2425 }
2426 
2427 
2428 
2429 /* start the optimizer and return the optimal values */
2431  QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
2432  int32_t& svm_maxqpsize)
2433 {
2434  register int32_t i, j, result;
2435  float64_t margin, obj_before, obj_after;
2436  float64_t sigdig, dist, epsilon_loqo;
2437  int32_t iter;
2438 
2439  if(!primal) { /* allocate memory at first call */
2440  primal=SG_MALLOC(float64_t, nx*3);
2441  dual=SG_MALLOC(float64_t, nx*2+1);
2442  }
2443 
2444  obj_before=0; /* calculate objective before optimization */
2445  for(i=0;i<qp->opt_n;i++) {
2446  obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
2447  obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
2448  for(j=0;j<i;j++) {
2449  obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
2450  }
2451  }
2452 
2453  result=STILL_RUNNING;
2454  qp->opt_ce0[0]*=(-1.0);
2455  /* Run pr_loqo. If a run fails, try again with parameters which lead */
2456  /* to a slower, but more robust setting. */
2457  for(margin=init_margin,iter=init_iter;
2458  (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
2459 
2461  sigdig=-log10(opt_precision);
2462 
2463  result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
2464  (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
2465  (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
2466  (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
2468  (int32_t)(verbosity-2),
2469  (float64_t)sigdig,(int32_t)iter,
2470  (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
2471 
2472  if(CMath::is_nan(dual[0])) { /* check for choldc problem */
2473  if(verbosity>=2) {
2474  SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n")
2475  }
2476  if(init_margin<0.80) { /* become more conservative in general */
2477  init_margin=(4.0*margin+1.0)/5.0;
2478  }
2479  margin=(margin+1.0)/2.0;
2480  (opt_precision)*=10.0; /* reduce precision */
2481  if(verbosity>=2) {
2482  SG_SDEBUG("Reducing precision of PR_LOQO.\n")
2483  }
2484  }
2485  else if(result!=OPTIMAL_SOLUTION) {
2486  iter+=2000;
2487  init_iter+=10;
2488  (opt_precision)*=10.0; /* reduce precision */
2489  if(verbosity>=2) {
2490  SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result)
2491  }
2492  }
2493  }
2494 
2495  if(qp->opt_m) /* Thanks to Alex Smola for this hint */
2496  model_b=dual[0];
2497  else
2498  model_b=0;
2499 
2500  /* Check the precision of the alphas. If results of current optimization */
2501  /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
2502  epsilon_loqo=1E-10;
2503  for(i=0;i<qp->opt_n;i++) {
2504  dist=-model_b*qp->opt_ce[i];
2505  dist+=(qp->opt_g0[i]+1.0);
2506  for(j=0;j<i;j++) {
2507  dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
2508  }
2509  for(j=i;j<qp->opt_n;j++) {
2510  dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
2511  }
2512  /* SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]) */
2513  if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
2514  epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
2515  }
2516  else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
2517  epsilon_loqo=primal[i]*2.0;
2518  }
2519  }
2520 
2521  for(i=0;i<qp->opt_n;i++) { /* clip alphas to bounds */
2522  if(primal[i]<=(0+epsilon_loqo)) {
2523  primal[i]=0;
2524  }
2525  else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
2526  primal[i]=qp->opt_up[i];
2527  }
2528  }
2529 
2530  obj_after=0; /* calculate objective after optimization */
2531  for(i=0;i<qp->opt_n;i++) {
2532  obj_after+=(qp->opt_g0[i]*primal[i]);
2533  obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
2534  for(j=0;j<i;j++) {
2535  obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
2536  }
2537  }
2538 
2539  /* if optimizer returned NAN values, reset and retry with smaller */
2540  /* working set. */
2541  if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
2542  for(i=0;i<qp->opt_n;i++) {
2543  primal[i]=qp->opt_xinit[i];
2544  }
2545  model_b=0;
2546  if(svm_maxqpsize>2) {
2547  svm_maxqpsize--; /* decrease size of qp-subproblems */
2548  }
2549  }
2550 
2551  if(obj_after >= obj_before) { /* check whether there was progress */
2552  (opt_precision)/=100.0;
2554  if(verbosity>=2) {
2555  SG_SDEBUG("Increasing Precision of PR_LOQO.\n")
2556  }
2557  }
2558 
2559  // TODO: add parameter for this (e.g. for MTL experiments)
2560  if(precision_violations > 5000) {
2561  (*epsilon_crit)*=10.0;
2563  SG_SINFO("Relaxing epsilon on KT-Conditions.\n")
2564  }
2565 
2566  (*threshold)=model_b;
2567 
2568  if(result!=OPTIMAL_SOLUTION) {
2569  SG_SERROR("PR_LOQO did not converge.\n")
2570  return(qp->opt_xinit);
2571  }
2572  else {
2573  return(primal);
2574  }
2575 }
2576 
2577 
2578 #endif //USE_SVMLIGHT
virtual void clear_normal()
Definition: Kernel.cpp:835
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:97
void optimize_svm(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc, int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin, float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
Definition: SVMLight.cpp:1018
#define DEF_PRECISION
Definition: SVMLight.h:38
void select_top_n(float64_t *selcrit, int32_t range, int32_t *select, int32_t n)
Definition: SVMLight.cpp:1937
int32_t get_activenum_cache()
#define SG_INFO(...)
Definition: SGIO.h:117
float64_t opt_precision
Definition: SVMLight.h:668
#define SG_DONE()
Definition: SGIO.h:142
virtual ELabelType get_label_type() const =0
int32_t select_next_qp_subproblem_rand(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key, int32_t *chosen, int32_t iteration)
Definition: SVMLight.cpp:1867
binary labels +1/-1
Definition: LabelTypes.h:18
static T * clone_vector(const T *vec, int32_t len)
Definition: SGVector.cpp:271
int32_t init_iter
Definition: SVMLight.h:662
virtual void compute_by_subkernel(int32_t vector_idx, float64_t *subkernel_contrib)
Definition: Kernel.cpp:845
void cache_multiple_kernel_rows(int32_t *key, int32_t varnum)
Definition: Kernel.cpp:375
int32_t compute_index(int32_t *binfeature, int32_t range, int32_t *index)
Definition: SVMLight.cpp:998
static T max(T a, T b)
Definition: Math.h:149
int32_t get_max_elems_cache()
static void * update_linear_component_linadd_helper(void *p)
Definition: SVMLight.cpp:107
virtual void update_linear_component(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache, float64_t *c)
Definition: SVMLight.cpp:1444
int32_t get_num_threads() const
Definition: Parallel.cpp:97
static void * reactivate_inactive_examples_linadd_helper(void *p)
Definition: SVMLight.cpp:2066
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
PRange< T > progress(Range< T > range, const SGIO &io, std::string prefix="PROGRESS: ", SG_PRG_MODE mode=UTF8, std::function< bool()> condition=[](){return true;})
Definition: progress.h:712
virtual void reactivate_inactive_examples(int32_t *label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin, float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent, int32_t *docs, float64_t *aicache, float64_t *maxdiff)
Definition: SVMLight.cpp:2126
static float64_t log10(float64_t v)
Definition: Math.h:693
int32_t verbosity
Definition: SVMLight.h:657
bool(* callback)(CMKL *mkl, const float64_t *sumw, const float64_t suma)
Definition: SVM.h:284
virtual int32_t get_num_vectors() const =0
bool use_kernel_cache
Definition: SVMLight.h:685
float64_t m_max_train_time
Definition: Machine.h:433
CLabels * m_labels
Definition: Machine.h:436
#define SG_ERROR(...)
Definition: SGIO.h:128
virtual bool delete_optimization()
Definition: Kernel.cpp:811
int32_t kernel_cache_space_available()
CMKL * mkl
Definition: SVM.h:287
int32_t check_optimality(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified, int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at, int32_t iteration)
Definition: SVMLight.cpp:1383
float64_t epsilon
Definition: SVM.h:266
Parallel * parallel
Definition: SGObject.h:603
Range< T > range(T rend)
Definition: range.h:136
float64_t * primal
Definition: SVMLight.h:670
static void * update_linear_component_mkl_linadd_helper(void *p)
Definition: SVMLight.cpp:1701
virtual int32_t get_num_vec_lhs()
void compute_matrices_for_optimization_parallel(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, float64_t *aicache, QP *qp)
Definition: SVMLight.cpp:1055
int32_t kernel_cache_touch(int32_t cacheidx)
void kernel_cache_shrink(int32_t totdoc, int32_t num_shrink, int32_t *after)
Definition: Kernel.cpp:471
void shrink_state_cleanup(SHRINK_STATE *shrink_state)
Definition: SVMLight.cpp:1994
virtual float64_t * get_linear_term_array()
Definition: SVM.cpp:297
SGVector< float64_t > m_alpha
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
float64_t model_b
Definition: SVMLight.h:666
SGVector< float64_t > m_linear_term
Definition: SVM.h:261
bool has_property(EKernelProperty p)
void add_to_index(int32_t *index, int32_t elem)
Definition: SVMLight.cpp:989
void update_linear_component_mkl(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache)
Definition: SVMLight.cpp:1547
float64_t init_margin
Definition: SVMLight.h:660
#define ASSERT(x)
Definition: SGIO.h:176
void set_bias(float64_t bias)
void cache_kernel_row(int32_t x)
Definition: Kernel.cpp:301
float64_t C2
Definition: SVM.h:274
void compute_matrices_for_optimization(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, float64_t *aicache, QP *qp)
Definition: SVMLight.cpp:1193
CKernel * get_kernel(int32_t idx)
int32_t select_next_qp_subproblem_grad(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t cache_only, int32_t *key, int32_t *chosen)
Definition: SVMLight.cpp:1775
bool get_shrinking_enabled()
Definition: SVM.h:188
double float64_t
Definition: common.h:60
static void * compute_kernel_helper(void *p)
Definition: SVMLight.cpp:119
static void * reactivate_inactive_examples_vanilla_helper(void *p)
Definition: SVMLight.cpp:2089
bool set_alpha(int32_t idx, float64_t val)
float64_t mymaxdiff
Definition: SVMLight.h:683
void set_objective(float64_t v)
Definition: SVM.h:209
virtual float64_t compute_objective_function(float64_t *a, float64_t *lin, float64_t *c, float64_t *eps, int32_t *label, int32_t totdoc)
Definition: SVMLight.cpp:967
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:817
EOptimizationType get_optimization_type()
int32_t precision_violations
Definition: SVMLight.h:664
float64_t get_alpha(int32_t idx)
virtual bool train_machine(CFeatures *data=NULL)
Definition: SVMLight.cpp:185
float64_t * optimize_qp(QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold, int32_t &svm_maxqpsize)
Definition: SVMLight.cpp:2430
int32_t calculate_svm_model(int32_t *docs, int32_t *label, float64_t *lin, float64_t *a, float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
Definition: SVMLight.cpp:1266
ESolverType get_solver_type()
Definition: Machine.cpp:109
virtual const float64_t * get_subkernel_weights(int32_t &num_weights)
Definition: Kernel.cpp:851
The Combined kernel is used to combine a number of kernels into a single CombinedKernel object by lin...
float64_t C1
Definition: SVM.h:272
bool set_support_vector(int32_t idx, int32_t val)
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:279
int32_t get_support_vector(int32_t idx)
virtual ~CSVMLight()
Definition: SVMLight.cpp:168
void clear_index(int32_t *index)
Definition: SVMLight.cpp:983
virtual int32_t get_num_vec_rhs()
virtual void set_subkernel_weights(SGVector< float64_t > weights)
Definition: Kernel.cpp:864
int32_t get_runtime()
Definition: SVMLight.cpp:297
int32_t optimize_to_convergence(int32_t *docs, int32_t *label, int32_t totdoc, SHRINK_STATE *shrink_state, int32_t *inconsistent, float64_t *a, float64_t *lin, float64_t *c, TIMING *timing_profile, float64_t *maxdiff, int32_t heldout, int32_t retrain)
Definition: SVMLight.cpp:550
SG_FORCED_INLINE bool cancel_computation() const
Definition: Machine.h:319
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:823
#define SG_SDEBUG(...)
Definition: SGIO.h:153
virtual EKernelType get_kernel_type()=0
void set_time(int32_t t)
static int is_nan(double f)
checks whether a float is nan
Definition: Math.cpp:210
The class Features is the base class of all feature objects.
Definition: Features.h:69
#define SG_SERROR(...)
Definition: SGIO.h:164
void call_mkl_callback(float64_t *a, int32_t *label, float64_t *lin)
Definition: SVMLight.cpp:1714
#define SG_SINFO(...)
Definition: SGIO.h:158
LEARN_PARM * learn_parm
Definition: SVMLight.h:655
void kernel_cache_cleanup()
Definition: Kernel.cpp:543
class SVMlight
Definition: SVMLight.h:225
int32_t kernel_cache_check(int32_t cacheidx)
virtual int32_t get_num_subkernels()
Definition: Kernel.cpp:840
A generic Support Vector Machine Interface.
Definition: SVM.h:49
int32_t shrink_problem(SHRINK_STATE *shrink_state, int32_t *active2dnum, int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc, int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t *c, float64_t *lin, int *label)
Definition: SVMLight.cpp:2005
The Kernel base class.
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
int32_t get_cache_size()
virtual SGVector< float64_t > get_kernel_row(int32_t i)
void set_kernel(CKernel *k)
float64_t * W
Definition: SVMLight.h:679
#define SG_WARNING(...)
Definition: SGIO.h:127
void update_linear_component_mkl_linadd(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache)
Definition: SVMLight.cpp:1621
int32_t get_qpsize()
Definition: SVM.h:173
virtual float64_t compute_kernel(int32_t i, int32_t j)
Definition: SVMLight.h:605
virtual bool has_features()
virtual void add_to_normal(int32_t vector_idx, float64_t weight)
Definition: Kernel.cpp:830
static T min(T a, T b)
Definition: Math.h:138
float64_t * dual
Definition: SVMLight.h:672
#define MAXSHRINK
Definition: SVMLight.h:39
float64_t get_objective()
Definition: SVM.h:218
void resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack=false)
Definition: Kernel.cpp:84
bool create_new_model(int32_t num)
index_t vlen
Definition: SGVector.h:571
virtual const char * get_name() const
Definition: SVMLight.h:635
void init_shrink_state(SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
Definition: SVMLight.cpp:1973

SHOGUN Machine Learning Toolbox - Documentation