SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
WeightedDegreeStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
13 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
14 
15 #include <shogun/lib/config.h>
16 
17 #include <shogun/lib/common.h>
18 #include <shogun/lib/Trie.h>
22 
23 namespace shogun
24 {
25 
28 {
29  E_WD=0,
31 
38 };
39 
40 
56 {
57  public:
58 
63 
64 
71 
77 
85  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
86 
88 
95  virtual bool init(CFeatures* l, CFeatures* r);
96 
98  virtual void cleanup();
99 
108  {
109  return type;
110  }
111 
117 
122  virtual const char* get_name() const {
123  return "WeightedDegreeStringKernel";
124  }
125 
133  virtual bool init_optimization(
134  int32_t count, int32_t *IDX, float64_t* alphas)
135  {
136  return init_optimization(count, IDX, alphas, -1);
137  }
138 
149  virtual bool init_optimization(
150  int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
151 
156  virtual bool delete_optimization();
157 
163  virtual float64_t compute_optimized(int32_t idx)
164  {
165  if (get_is_initialized())
166  return compute_by_tree(idx);
167 
168  SG_ERROR("CWeightedDegreeStringKernel optimization not initialized\n")
169  return 0;
170  }
171 
176  static void* compute_batch_helper(void* p);
177 
188  virtual void compute_batch(
189  int32_t num_vec, int32_t* vec_idx, float64_t* target,
190  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
191  float64_t factor=1.0);
192 
196  virtual void clear_normal()
197  {
198  if (get_is_initialized())
199  {
200 
202  SG_ERROR("not implemented")
203 
205  set_is_initialized(false);
206  }
207  }
208 
214  virtual void add_to_normal(int32_t idx, float64_t weight)
215  {
216 
218  SG_ERROR("not implemented")
219 
220  if (max_mismatch==0)
221  add_example_to_tree(idx, weight);
222  else
223  add_example_to_tree_mismatch(idx, weight);
224 
225  set_is_initialized(true);
226  }
227 
232  virtual int32_t get_num_subkernels()
233  {
235  return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
236  if (position_weights!=NULL)
237  return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
238  if (length==0)
239  return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
240  return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
241  }
242 
248  inline void compute_by_subkernel(
249  int32_t idx, float64_t * subkernel_contrib)
250  {
251 
252  if (get_is_initialized())
253  {
254 
256  SG_ERROR("not implemented")
257 
258  compute_by_tree(idx, subkernel_contrib);
259  return ;
260  }
261 
262  SG_ERROR("CWeightedDegreeStringKernel optimization not initialized\n")
263  }
264 
270  inline const float64_t* get_subkernel_weights(int32_t& num_weights)
271  {
272 
273  num_weights = get_num_subkernels();
274 
275  SG_FREE(weights_buffer);
276  weights_buffer = SG_MALLOC(float64_t, num_weights);
277 
279  for (int32_t i=0; i<num_weights; i++)
281  else if (position_weights!=NULL)
282  for (int32_t i=0; i<num_weights; i++)
284  else
285  for (int32_t i=0; i<num_weights; i++)
286  weights_buffer[i] = weights[i*mkl_stepsize];
287 
288  return weights_buffer;
289  }
290 
296  {
297  float64_t* weights2=w.vector;
298  int32_t num_weights2=w.vlen;
299  int32_t num_weights = get_num_subkernels();
300  if (num_weights!=num_weights2)
301  SG_ERROR("number of weights do not match\n")
302 
303 
305  for (int32_t i=0; i<num_weights; i++)
306  ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
307  else if (position_weights!=NULL)
308  {
309  for (int32_t i=0; i<num_weights; i++)
310  {
311  for (int32_t j=0; j<mkl_stepsize; j++)
312  {
313  if (i*mkl_stepsize+j<seq_length)
314  position_weights[i*mkl_stepsize+j] = weights2[i];
315  }
316  }
317  }
318  else if (length==0)
319  {
320  for (int32_t i=0; i<num_weights; i++)
321  {
322  for (int32_t j=0; j<mkl_stepsize; j++)
323  {
324  if (i*mkl_stepsize+j<get_degree())
325  weights[i*mkl_stepsize+j] = weights2[i];
326  }
327  }
328  }
329  else
330  {
331  for (int32_t i=0; i<num_weights; i++)
332  {
333  for (int32_t j=0; j<mkl_stepsize; j++)
334  {
335  if (i*mkl_stepsize+j<get_degree()*length)
336  weights[i*mkl_stepsize+j] = weights2[i];
337  }
338  }
339  }
340  }
341 
346  virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
347 
348  if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
351  }
352  else
353  {
356  }
357 
358 
359  return CStringKernel<char>::set_normalizer(normalizer_);
360 
361  }
362 
363  // other kernel tree operations
369  float64_t *compute_abs_weights(int32_t & len);
370 
377  void compute_by_tree(int32_t idx, float64_t *LevelContrib);
378 
384 
390  inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
391  {
392  d=degree;
393  len=length;
394  return weights;
395  }
396 
402  inline float64_t *get_weights(int32_t& num_weights)
403  {
404 
406  SG_ERROR("not implemented")
407 
408  if (position_weights!=NULL)
409  {
410  num_weights = seq_length ;
411  return position_weights ;
412  }
413  if (length==0)
414  num_weights = degree ;
415  else
416  num_weights = degree*length ;
417  return weights;
418  }
419 
425  inline float64_t *get_position_weights(int32_t& len)
426  {
427  len=seq_length;
428  return position_weights;
429  }
430 
437 
442  inline void set_wd_weights(SGVector<float64_t> new_weights)
443  {
444  SGMatrix<float64_t> matrix = SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0);
445  set_weights(matrix);
446  matrix.matrix = NULL;
447  }
448 
453  bool set_weights(SGMatrix<float64_t> new_weights);
454 
461  bool set_position_weights(float64_t* pws, int32_t len);
462 
467  bool init_block_weights();
468 
474 
480 
486 
492 
498 
504 
509  bool init_block_weights_exp();
510 
515  bool init_block_weights_log();
516 
522  {
523  SG_FREE(position_weights);
524  position_weights=NULL;
525  return true;
526  }
527 
533  bool set_max_mismatch(int32_t max);
534 
539  inline int32_t get_max_mismatch() const { return max_mismatch; }
540 
546  inline bool set_degree(int32_t deg) { degree=deg; return true; }
547 
552  inline int32_t get_degree() const { return degree; }
553 
560  {
562  return true;
563  }
564 
570 
576  inline bool set_mkl_stepsize(int32_t step)
577  {
578  if (step<1)
579  SG_ERROR("Stepsize must be a positive integer\n")
580  mkl_stepsize=step;
581  return true;
582  }
583 
588  inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
589 
595  inline bool set_which_degree(int32_t which)
596  {
597  which_degree=which;
598  return true;
599  }
600 
605  inline int32_t get_which_degree() { return which_degree; }
606 
607  protected:
609  void create_empty_tries();
610 
616  void add_example_to_tree(int32_t idx, float64_t weight);
617 
625  int32_t idx, float64_t weight, int32_t tree_num);
626 
632  void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
633 
641  int32_t idx, float64_t weight, int32_t tree_num);
642 
648  float64_t compute_by_tree(int32_t idx);
649 
658  float64_t compute(int32_t idx_a, int32_t idx_b);
659 
669  char* avec, int32_t alen, char* bvec, int32_t blen);
670 
680  char* avec, int32_t alen, char* bvec, int32_t blen);
681 
691  char* avec, int32_t alen, char* bvec, int32_t blen);
692 
701  float64_t compute_using_block(char* avec, int32_t alen,
702  char* bvec, int32_t blen);
703 
705  virtual void remove_lhs();
706 
707  private:
710  void init();
711 
712  protected:
718  int32_t weights_degree;
720  int32_t weights_length;
721 
722 
730  int32_t mkl_stepsize;
732  int32_t degree;
734  int32_t length;
735 
737  int32_t max_mismatch;
739  int32_t seq_length;
740 
743 
746 
752  int32_t which_degree;
753 
756 
759 
762 };
763 
764 }
765 
766 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */
virtual const char * get_name() const =0
float64_t compute_with_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
void delete_trees(bool p_use_compact_terminal_nodes=true)
Definition: Trie.h:1171
virtual void add_to_normal(int32_t idx, float64_t weight)
ENormalizerType get_normalizer_type()
EKernelType
Definition: Kernel.h:57
virtual float64_t compute_optimized(int32_t idx)
void add_example_to_single_tree(int32_t idx, float64_t weight, int32_t tree_num)
float64_t compute_using_block(char *avec, int32_t alen, char *bvec, int32_t blen)
virtual bool set_normalizer(CKernelNormalizer *normalizer)
Definition: Kernel.cpp:150
void add_example_to_single_tree_mismatch(int32_t idx, float64_t weight, int32_t tree_num)
#define SG_ERROR(...)
Definition: SGIO.h:129
void set_is_initialized(bool p_init)
Definition: Kernel.h:899
The class Alphabet implements an alphabet and alphabet utility functions.
Definition: Alphabet.h:91
bool set_position_weights(float64_t *pws, int32_t len)
virtual bool set_normalizer(CKernelNormalizer *normalizer_)
float64_t * get_weights(int32_t &num_weights)
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
bool get_is_initialized()
Definition: Kernel.h:753
index_t vlen
Definition: SGVector.h:494
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *alphas)
float64_t compute_without_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
double float64_t
Definition: common.h:50
void unset_property(EKernelProperty p)
Definition: Kernel.h:890
void compute_by_tree(int32_t idx, float64_t *LevelContrib)
The Weighted Degree String kernel.
float64_t * get_degree_weights(int32_t &d, int32_t &len)
virtual void set_subkernel_weights(SGVector< float64_t > w)
float64_t compute_without_mismatch_matrix(char *avec, int32_t alen, char *bvec, int32_t blen)
virtual bool init(CFeatures *l, CFeatures *r)
The class Kernel Normalizer defines a function to post-process kernel values.
Base-class for parameterized Kernel Normalizers.
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
void add_example_to_tree(int32_t idx, float64_t weight)
void compute_by_subkernel(int32_t idx, float64_t *subkernel_contrib)
float64_t compute(int32_t idx_a, int32_t idx_b)
CKernelNormalizer * normalizer
Definition: Kernel.h:1086
void set_wd_weights(SGVector< float64_t > new_weights)
Matrix::Scalar max(Matrix m)
Definition: Redux.h:66
const float64_t * get_subkernel_weights(int32_t &num_weights)
Template class StringKernel, is the base class of all String Kernels.
Definition: StringKernel.h:26
void add_example_to_tree_mismatch(int32_t idx, float64_t weight)
bool set_weights(SGMatrix< float64_t > new_weights)
void set_property(EKernelProperty p)
Definition: Kernel.h:881
Block< Matrix > block(Matrix matrix, index_t row_begin, index_t col_begin, index_t row_size, index_t col_size)
Definition: Block.h:102

SHOGUN Machine Learning Toolbox - Documentation