SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
WeightedDegreeStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
13 #include <shogun/io/SGIO.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Trie.h>
16 #include <shogun/base/Parameter.h>
17 #include <shogun/base/Parallel.h>
18 
23 
24 #ifndef WIN32
25 #include <pthread.h>
26 #endif
27 
28 using namespace shogun;
29 
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 struct S_THREAD_PARAM_WD
32 {
33 
34  int32_t* vec;
35  float64_t* result;
36  float64_t* weights;
38  CTrie<DNATrie>* tries;
39  float64_t factor;
40  int32_t j;
41  int32_t start;
42  int32_t end;
43  int32_t length;
44  int32_t* vec_idx;
45 };
46 #endif // DOXYGEN_SHOULD_SKIP_THIS
47 
49 : CStringKernel<char>()
50 {
51  init();
52 }
53 
54 
56  int32_t d, EWDKernType t)
57 : CStringKernel<char>()
58 {
59  init();
60 
61  degree=d;
62  type=t;
63 
64  if (type!=E_EXTERNAL)
66 }
67 
69 : CStringKernel<char>(10)
70 {
71  init();
72 
74  degree=w.vlen;
75 
76  weights=SG_MALLOC(float64_t, degree*(1+max_mismatch));
79 
80  for (int32_t i=0; i<degree*(1+max_mismatch); i++)
81  weights[i]=w.vector[i];
82 }
83 
86 : CStringKernel<char>(10)
87 {
88  init();
89  degree=d;
90  type=E_WD;
93  init(l, r);
94 }
95 
97 {
98  cleanup();
99 
100  SG_FREE(weights);
101  weights=NULL;
102  weights_degree=0;
103  weights_length=0;
104 
105  SG_FREE(block_weights);
106  block_weights=NULL;
107 
108  SG_FREE(position_weights);
109  position_weights=NULL;
110 
111  SG_FREE(weights_buffer);
112  weights_buffer=NULL;
113 }
114 
115 
117 {
118  SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n")
120 
121  if (tries!=NULL)
122  tries->destroy();
123 
125 }
126 
128 {
129  ASSERT(lhs)
130 
131  seq_length=((CStringFeatures<char>*) lhs)->get_max_vector_length();
132 
133  if (tries!=NULL)
134  {
135  tries->destroy() ;
137  }
138 }
139 
140 bool CWeightedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
141 {
142  int32_t lhs_changed=(lhs!=l);
143  int32_t rhs_changed=(rhs!=r);
144 
146 
147  SG_DEBUG("lhs_changed: %i\n", lhs_changed)
148  SG_DEBUG("rhs_changed: %i\n", rhs_changed)
149 
152 
153  int32_t len=sf_l->get_max_vector_length();
154  if (lhs_changed && !sf_l->have_same_length(len))
155  SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n")
156 
157  if (rhs_changed && !sf_r->have_same_length(len))
158  SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n")
159 
161  alphabet=sf_l->get_alphabet();
162  CAlphabet* ralphabet=sf_r->get_alphabet();
163 
164  if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
165  properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
166 
167  ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet())
168  SG_UNREF(ralphabet);
169 
170  if (tries!=NULL) {
172  SG_UNREF(tries);
173  }
176 
178 
179  return init_normalizer();
180 }
181 
183 {
184  SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n")
186 
187  SG_FREE(block_weights);
188  block_weights=NULL;
189 
190  if (tries!=NULL)
191  {
192  tries->destroy();
193  SG_UNREF(tries);
194  tries=NULL;
195  }
196 
197  seq_length=0;
198  tree_initialized = false;
199 
201  alphabet=NULL;
202 
204 }
205 
206 bool CWeightedDegreeStringKernel::init_optimization(int32_t count, int32_t* IDX, float64_t* alphas, int32_t tree_num)
207 {
208  if (tree_num<0)
209  SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n")
210 
212 
213  if (tree_num<0)
214  SG_DEBUG("initializing CWeightedDegreeStringKernel optimization\n")
215 
216  for (int32_t i=0; i<count; i++)
217  {
218  if (tree_num<0)
219  {
220  if ( (i % (count/10+1)) == 0)
221  SG_PROGRESS(i, 0, count)
222 
223  if (max_mismatch==0)
224  add_example_to_tree(IDX[i], alphas[i]) ;
225  else
226  add_example_to_tree_mismatch(IDX[i], alphas[i]) ;
227 
228  //SG_DEBUG("number of used trie nodes: %i\n", tries.get_num_used_nodes())
229  }
230  else
231  {
232  if (max_mismatch==0)
233  add_example_to_single_tree(IDX[i], alphas[i], tree_num) ;
234  else
235  add_example_to_single_tree_mismatch(IDX[i], alphas[i], tree_num) ;
236  }
237  }
238 
239  if (tree_num<0)
240  SG_DONE()
241 
242  //tries.compact_nodes(NO_CHILD, 0, weights) ;
243 
244  set_is_initialized(true) ;
245  return true ;
246 }
247 
249 {
250  if (get_is_initialized())
251  {
252  if (tries!=NULL)
254  set_is_initialized(false);
255  return true;
256  }
257 
258  return false;
259 }
260 
261 
263  char* avec, int32_t alen, char* bvec, int32_t blen)
264 {
265  float64_t sum = 0.0;
266 
267  for (int32_t i=0; i<alen; i++)
268  {
269  float64_t sumi = 0.0;
270  int32_t mismatches=0;
271 
272  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
273  {
274  if (avec[i+j]!=bvec[i+j])
275  {
276  mismatches++ ;
277  if (mismatches>max_mismatch)
278  break ;
279  } ;
280  sumi += weights[j+degree*mismatches];
281  }
282  if (position_weights!=NULL)
283  sum+=position_weights[i]*sumi ;
284  else
285  sum+=sumi ;
286  }
287  return sum ;
288 }
289 
291  char* avec, int32_t alen, char* bvec, int32_t blen)
292 {
293  ASSERT(alen==blen)
294 
295  float64_t sum=0;
296  int32_t match_len=-1;
297 
298  for (int32_t i=0; i<alen; i++)
299  {
300  if (avec[i]==bvec[i])
301  match_len++;
302  else
303  {
304  if (match_len>=0)
305  sum+=block_weights[match_len];
306  match_len=-1;
307  }
308  }
309 
310  if (match_len>=0)
311  sum+=block_weights[match_len];
312 
313  return sum;
314 }
315 
317  char* avec, int32_t alen, char* bvec, int32_t blen)
318 {
319  float64_t sum = 0.0;
320 
321  for (int32_t i=0; i<alen; i++)
322  {
323  float64_t sumi = 0.0;
324 
325  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
326  {
327  if (avec[i+j]!=bvec[i+j])
328  break ;
329  sumi += weights[j];
330  }
331  if (position_weights!=NULL)
332  sum+=position_weights[i]*sumi ;
333  else
334  sum+=sumi ;
335  }
336  return sum ;
337 }
338 
340  char* avec, int32_t alen, char* bvec, int32_t blen)
341 {
342  float64_t sum = 0.0;
343 
344  for (int32_t i=0; i<alen; i++)
345  {
346  float64_t sumi=0.0;
347  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
348  {
349  if (avec[i+j]!=bvec[i+j])
350  break;
351  sumi += weights[i*degree+j];
352  }
353  if (position_weights!=NULL)
354  sum += position_weights[i]*sumi ;
355  else
356  sum += sumi ;
357  }
358 
359  return sum ;
360 }
361 
362 
363 float64_t CWeightedDegreeStringKernel::compute(int32_t idx_a, int32_t idx_b)
364 {
365  int32_t alen, blen;
366  bool free_avec, free_bvec;
367  char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
368  char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
369  float64_t result=0;
370 
371  if (max_mismatch==0 && length==0 && block_computation)
372  result=compute_using_block(avec, alen, bvec, blen);
373  else
374  {
375  if (max_mismatch>0)
376  result=compute_with_mismatch(avec, alen, bvec, blen);
377  else if (length==0)
378  result=compute_without_mismatch(avec, alen, bvec, blen);
379  else
380  result=compute_without_mismatch_matrix(avec, alen, bvec, blen);
381  }
382  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
383  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
384 
385  return result;
386 }
387 
388 
390  int32_t idx, float64_t alpha)
391 {
394 
395  int32_t len=0;
396  bool free_vec;
397  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
398  ASSERT(max_mismatch==0)
399  int32_t *vec=SG_MALLOC(int32_t, len);
400 
401  for (int32_t i=0; i<len; i++)
402  vec[i]=alphabet->remap_to_bin(char_vec[i]);
403  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
404 
405  if (length == 0 || max_mismatch > 0)
406  {
407  for (int32_t i=0; i<len; i++)
408  {
409  float64_t alpha_pw=alpha;
410  /*if (position_weights!=NULL)
411  alpha_pw *= position_weights[i] ;*/
412  if (alpha_pw==0.0)
413  continue;
414  ASSERT(tries)
415  tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
416  }
417  }
418  else
419  {
420  for (int32_t i=0; i<len; i++)
421  {
422  float64_t alpha_pw=alpha;
423  /*if (position_weights!=NULL)
424  alpha_pw = alpha*position_weights[i] ;*/
425  if (alpha_pw==0.0)
426  continue ;
427  ASSERT(tries)
428  tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
429  }
430  }
431  SG_FREE(vec);
432  tree_initialized=true ;
433 }
434 
436  int32_t idx, float64_t alpha, int32_t tree_num)
437 {
440 
441  int32_t len;
442  bool free_vec;
443  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
444  ASSERT(max_mismatch==0)
445  int32_t *vec = SG_MALLOC(int32_t, len);
446 
447  for (int32_t i=tree_num; i<tree_num+degree && i<len; i++)
448  vec[i]=alphabet->remap_to_bin(char_vec[i]);
449  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
450 
451 
452  ASSERT(tries)
453  if (alpha!=0.0)
454  tries->add_to_trie(tree_num, 0, vec, normalizer->normalize_lhs(alpha, idx), weights, (length!=0));
455 
456  SG_FREE(vec);
457  tree_initialized=true ;
458 }
459 
461 {
462  ASSERT(tries)
465 
466  int32_t len ;
467  bool free_vec;
468  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
469 
470  int32_t *vec = SG_MALLOC(int32_t, len);
471 
472  for (int32_t i=0; i<len; i++)
473  vec[i]=alphabet->remap_to_bin(char_vec[i]);
474  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
475 
476  for (int32_t i=0; i<len; i++)
477  {
478  if (alpha!=0.0)
479  tries->add_example_to_tree_mismatch_recursion(NO_CHILD, i, normalizer->normalize_lhs(alpha, idx), &vec[i], len-i, 0, 0, max_mismatch, weights);
480  }
481 
482  SG_FREE(vec);
483  tree_initialized=true ;
484 }
485 
487  int32_t idx, float64_t alpha, int32_t tree_num)
488 {
489  ASSERT(tries)
492 
493  int32_t len=0;
494  bool free_vec;
495  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
496  int32_t *vec=SG_MALLOC(int32_t, len);
497 
498  for (int32_t i=tree_num; i<len && i<tree_num+degree; i++)
499  vec[i]=alphabet->remap_to_bin(char_vec[i]);
500  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
501 
502  if (alpha!=0.0)
503  {
505  NO_CHILD, tree_num, normalizer->normalize_lhs(alpha, idx), &vec[tree_num], len-tree_num,
506  0, 0, max_mismatch, weights);
507  }
508 
509  SG_FREE(vec);
510  tree_initialized=true;
511 }
512 
513 
515 {
518 
519  int32_t len=0;
520  bool free_vec;
521  char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
522  ASSERT(char_vec && len>0)
523  int32_t *vec=SG_MALLOC(int32_t, len);
524 
525  for (int32_t i=0; i<len; i++)
526  vec[i]=alphabet->remap_to_bin(char_vec[i]);
527  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
528 
529  float64_t sum=0;
530  ASSERT(tries)
531  for (int32_t i=0; i<len; i++)
532  sum+=tries->compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0));
533 
534  SG_FREE(vec);
535  return normalizer->normalize_rhs(sum, idx);
536 }
537 
539  int32_t idx, float64_t* LevelContrib)
540 {
543 
544  int32_t len ;
545  bool free_vec;
546  char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
547 
548  int32_t *vec = SG_MALLOC(int32_t, len);
549 
550  for (int32_t i=0; i<len; i++)
551  vec[i]=alphabet->remap_to_bin(char_vec[i]);
552  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
553 
554  ASSERT(tries)
555  for (int32_t i=0; i<len; i++)
556  {
557  tries->compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
558  normalizer->normalize_rhs(1.0, idx),
559  mkl_stepsize, weights, (length!=0));
560  }
561 
562  SG_FREE(vec);
563 }
564 
566 {
567  ASSERT(tries)
568  return tries->compute_abs_weights(len);
569 }
570 
572 {
573  ASSERT(degree>0)
574  ASSERT(p_type==E_WD)
575 
576  SG_FREE(weights);
577  weights=SG_MALLOC(float64_t, degree);
579  weights_length=1;
580 
581  if (weights)
582  {
583  int32_t i;
584  float64_t sum=0;
585  for (i=0; i<degree; i++)
586  {
587  weights[i]=degree-i;
588  sum+=weights[i];
589  }
590  for (i=0; i<degree; i++)
591  weights[i]/=sum;
592 
593  for (i=0; i<degree; i++)
594  {
595  for (int32_t j=1; j<=max_mismatch; j++)
596  {
597  if (j<i+1)
598  {
599  int32_t nk=CMath::nchoosek(i+1, j);
600  weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
601  }
602  else
603  weights[i+j*degree]= 0;
604  }
605  }
606 
607  if (which_degree>=0)
608  {
609  ASSERT(which_degree<degree)
610  for (i=0; i<degree; i++)
611  {
612  if (i!=which_degree)
613  weights[i]=0;
614  else
615  weights[i]=1;
616  }
617  }
618  return true;
619  }
620  else
621  return false;
622 }
623 
625 {
626  float64_t* ws=new_weights.matrix;
627  int32_t d=new_weights.num_rows;
628  int32_t len=new_weights.num_cols;
629 
630  if (d!=degree || len<0)
631  SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree)
632 
633  degree=d;
634  length=len;
635 
636  if (len <= 0)
637  len=1;
638 
641 
642 
643  SG_DEBUG("Creating weights of size %dx%d\n", weights_degree, weights_length)
644  int32_t num_weights=weights_degree*weights_length;
645  SG_FREE(weights);
646  weights=SG_MALLOC(float64_t, num_weights);
647 
648  for (int32_t i=0; i<degree*len; i++)
649  weights[i]=ws[i];
650 
651  return true;
652 }
653 
655  float64_t* pws, int32_t len)
656 {
657  if (len==0)
658  {
659  SG_FREE(position_weights);
660  position_weights=NULL;
661  ASSERT(tries)
663  }
664 
665  if (seq_length!=len)
666  SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len)
667 
668  SG_FREE(position_weights);
669  position_weights=SG_MALLOC(float64_t, len);
671  ASSERT(tries)
673 
674  if (position_weights)
675  {
676  for (int32_t i=0; i<len; i++)
677  position_weights[i]=pws[i];
678  return true;
679  }
680  else
681  return false;
682 }
683 
685 {
686  SG_FREE(block_weights);
688 
689  int32_t k;
690  float64_t d=degree; // use float to evade rounding errors below
691 
692  for (k=0; k<degree; k++)
693  block_weights[k]=
694  (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
695  for (k=degree; k<seq_length; k++)
696  block_weights[k]=(-d+3*k+4)/3;
697 
698  return true;
699 }
700 
702 {
703  ASSERT(weights)
704  SG_FREE(block_weights);
706 
707  int32_t i=0;
708  block_weights[0]=weights[0];
709  for (i=1; i<CMath::max(seq_length,degree); i++)
710  block_weights[i]=0;
711 
712  for (i=1; i<CMath::max(seq_length,degree); i++)
713  {
715 
716  float64_t contrib=0;
717  for (int32_t j=0; j<CMath::min(degree,i+1); j++)
718  contrib+=weights[j];
719 
720  block_weights[i]+=contrib;
721  }
722  return true;
723 }
724 
726 {
727  SG_FREE(block_weights);
728  block_weights=SG_MALLOC(float64_t, seq_length);
729 
730  for (int32_t i=1; i<seq_length+1 ; i++)
731  block_weights[i-1]=1.0/seq_length;
732  return true;
733 }
734 
736 {
737  SG_FREE(block_weights);
738  block_weights=SG_MALLOC(float64_t, seq_length);
739 
740  for (int32_t i=1; i<seq_length+1 ; i++)
741  block_weights[i-1]=degree*i;
742 
743  return true;
744 }
745 
747 {
748  SG_FREE(block_weights);
749  block_weights=SG_MALLOC(float64_t, seq_length);
750 
751  for (int32_t i=1; i<degree+1 ; i++)
752  block_weights[i-1]=((float64_t) i)*i;
753 
754  for (int32_t i=degree+1; i<seq_length+1 ; i++)
755  block_weights[i-1]=i;
756 
757  return true;
758 }
759 
761 {
762  SG_FREE(block_weights);
763  block_weights=SG_MALLOC(float64_t, seq_length);
764 
765  for (int32_t i=1; i<degree+1 ; i++)
766  block_weights[i-1]=((float64_t) i)*i*i;
767 
768  for (int32_t i=degree+1; i<seq_length+1 ; i++)
769  block_weights[i-1]=i;
770  return true;
771 }
772 
774 {
775  SG_FREE(block_weights);
776  block_weights=SG_MALLOC(float64_t, seq_length);
777 
778  for (int32_t i=1; i<degree+1 ; i++)
779  block_weights[i-1]=exp(((float64_t) i/10.0));
780 
781  for (int32_t i=degree+1; i<seq_length+1 ; i++)
782  block_weights[i-1]=i;
783 
784  return true;
785 }
786 
788 {
789  SG_FREE(block_weights);
790  block_weights=SG_MALLOC(float64_t, seq_length);
791 
792  for (int32_t i=1; i<degree+1 ; i++)
794 
795  for (int32_t i=degree+1; i<seq_length+1 ; i++)
796  block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
797 
798  return true;
799 }
800 
802 {
803  switch (type)
804  {
805  case E_WD:
807  case E_EXTERNAL:
809  case E_BLOCK_CONST:
810  return init_block_weights_const();
811  case E_BLOCK_LINEAR:
812  return init_block_weights_linear();
813  case E_BLOCK_SQPOLY:
814  return init_block_weights_sqpoly();
815  case E_BLOCK_CUBICPOLY:
817  case E_BLOCK_EXP:
818  return init_block_weights_exp();
819  case E_BLOCK_LOG:
820  return init_block_weights_log();
821  };
822  return false;
823 }
824 
825 
827 {
828  S_THREAD_PARAM_WD* params = (S_THREAD_PARAM_WD*) p;
829  int32_t j=params->j;
830  CWeightedDegreeStringKernel* wd=params->kernel;
831  CTrie<DNATrie>* tries=params->tries;
832  float64_t* weights=params->weights;
833  int32_t length=params->length;
834  int32_t* vec=params->vec;
835  float64_t* result=params->result;
836  float64_t factor=params->factor;
837  int32_t* vec_idx=params->vec_idx;
838 
840  CAlphabet* alpha=wd->alphabet;
841 
842  for (int32_t i=params->start; i<params->end; i++)
843  {
844  int32_t len=0;
845  bool free_vec;
846  char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec);
847  for (int32_t k=j; k<CMath::min(len,j+wd->get_degree()); k++)
848  vec[k]=alpha->remap_to_bin(char_vec[k]);
849  rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec);
850 
851  ASSERT(tries)
852 
853  result[i]+=factor*
854  wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
855  }
856 
857  SG_UNREF(rhs_feat);
858 
859  return NULL;
860 }
861 
863  int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
864  int32_t* IDX, float64_t* alphas, float64_t factor)
865 {
866  ASSERT(tries)
869  ASSERT(rhs)
870  ASSERT(num_vec<=rhs->get_num_vectors())
871  ASSERT(num_vec>0)
872  ASSERT(vec_idx)
873  ASSERT(result)
875 
876  int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
877  ASSERT(num_feat>0)
878  int32_t num_threads=parallel->get_num_threads();
879  ASSERT(num_threads>0)
880  int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat);
881 
882  if (num_threads < 2)
883  {
884 #ifdef CYGWIN
885  for (int32_t j=0; j<num_feat; j++)
886 #else
888  for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
889 #endif
890  {
891  init_optimization(num_suppvec, IDX, alphas, j);
892  S_THREAD_PARAM_WD params;
893  params.vec=vec;
894  params.result=result;
895  params.weights=weights;
896  params.kernel=this;
897  params.tries=tries;
898  params.factor=factor;
899  params.j=j;
900  params.start=0;
901  params.end=num_vec;
902  params.length=length;
903  params.vec_idx=vec_idx;
904  compute_batch_helper((void*) &params);
905 
906  SG_PROGRESS(j,0,num_feat)
907  }
908  }
909 #ifdef HAVE_PTHREAD
910  else
911  {
913  for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
914  {
915  init_optimization(num_suppvec, IDX, alphas, j);
916  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
917  S_THREAD_PARAM_WD* params = SG_MALLOC(S_THREAD_PARAM_WD, num_threads);
918  int32_t step= num_vec/num_threads;
919  int32_t t;
920 
921  for (t=0; t<num_threads-1; t++)
922  {
923  params[t].vec=&vec[num_feat*t];
924  params[t].result=result;
925  params[t].weights=weights;
926  params[t].kernel=this;
927  params[t].tries=tries;
928  params[t].factor=factor;
929  params[t].j=j;
930  params[t].start = t*step;
931  params[t].end = (t+1)*step;
932  params[t].length=length;
933  params[t].vec_idx=vec_idx;
934  pthread_create(&threads[t], NULL, CWeightedDegreeStringKernel::compute_batch_helper, (void*)&params[t]);
935  }
936  params[t].vec=&vec[num_feat*t];
937  params[t].result=result;
938  params[t].weights=weights;
939  params[t].kernel=this;
940  params[t].tries=tries;
941  params[t].factor=factor;
942  params[t].j=j;
943  params[t].start=t*step;
944  params[t].end=num_vec;
945  params[t].length=length;
946  params[t].vec_idx=vec_idx;
947  compute_batch_helper((void*) &params[t]);
948 
949  for (t=0; t<num_threads-1; t++)
950  pthread_join(threads[t], NULL);
951  SG_PROGRESS(j,0,num_feat)
952 
953  SG_FREE(params);
954  SG_FREE(threads);
955  }
956  }
957 #endif
958 
959  SG_FREE(vec);
960 
961  //really also free memory as this can be huge on testing especially when
962  //using the combined kernel
964 }
965 
967 {
968  if (type==E_EXTERNAL && max!=0)
969  return false;
970 
971  max_mismatch=max;
972 
973  if (lhs!=NULL && rhs!=NULL)
974  return init(lhs, rhs);
975  else
976  return true;
977 }
978 
979 void CWeightedDegreeStringKernel::init()
980 {
981  weights=NULL;
982  weights_degree=0;
983  weights_length=0;
984 
985  position_weights=NULL;
987 
988  weights_buffer=NULL;
989  mkl_stepsize=1;
990  degree=1;
991  length=0;
992 
993  max_mismatch=0;
994  seq_length=0;
995 
996  block_weights=NULL;
997  block_computation=true;
998  type=E_WD;
999  which_degree=-1;
1000  tries=NULL;
1001 
1002  tree_initialized=false;
1003  alphabet=NULL;
1004 
1005  lhs=NULL;
1006  rhs=NULL;
1007 
1009 
1011 
1013  "weights", "WD Kernel weights.");
1015  "position_weights",
1016  "Weights per position.");
1017  SG_ADD(&mkl_stepsize, "mkl_stepsize", "MKL step size.", MS_AVAILABLE);
1018  SG_ADD(&degree, "degree", "Order of WD kernel.", MS_AVAILABLE);
1019  SG_ADD(&max_mismatch, "max_mismatch",
1020  "Number of allowed mismatches.", MS_AVAILABLE);
1021  SG_ADD(&block_computation, "block_computation",
1022  "If block computation shall be used.", MS_NOT_AVAILABLE);
1023  SG_ADD((machine_int_t*) &type, "type",
1024  "WeightedDegree kernel type.", MS_AVAILABLE);
1025  SG_ADD(&which_degree, "which_degree",
1026  "The selected degree. All degrees are used by default (for value -1).",
1027  MS_AVAILABLE);
1028  SG_ADD((CSGObject**) &alphabet, "alphabet",
1029  "Alphabet of Features.", MS_NOT_AVAILABLE);
1030 }

SHOGUN Machine Learning Toolbox - Documentation