SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
WeightedDegreeStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
13 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
14 
15 #include <shogun/lib/common.h>
16 #include <shogun/lib/Trie.h>
20 
21 namespace shogun
22 {
23 
26 {
27  E_WD=0,
29 
36 };
37 
38 
54 {
55  public:
56 
61 
62 
69 
75 
83  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
84 
86 
93  virtual bool init(CFeatures* l, CFeatures* r);
94 
96  virtual void cleanup();
97 
106  {
107  return type;
108  }
109 
115 
120  virtual const char* get_name() const {
121  return "WeightedDegreeStringKernel";
122  }
123 
131  inline virtual bool init_optimization(
132  int32_t count, int32_t *IDX, float64_t* alphas)
133  {
134  return init_optimization(count, IDX, alphas, -1);
135  }
136 
147  virtual bool init_optimization(
148  int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
149 
154  virtual bool delete_optimization();
155 
161  virtual float64_t compute_optimized(int32_t idx)
162  {
163  if (get_is_initialized())
164  return compute_by_tree(idx);
165 
166  SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
167  return 0;
168  }
169 
174  static void* compute_batch_helper(void* p);
175 
186  virtual void compute_batch(
187  int32_t num_vec, int32_t* vec_idx, float64_t* target,
188  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
189  float64_t factor=1.0);
190 
194  inline virtual void clear_normal()
195  {
196  if (get_is_initialized())
197  {
198 
200  SG_ERROR("not implemented");
201 
203  set_is_initialized(false);
204  }
205  }
206 
212  inline virtual void add_to_normal(int32_t idx, float64_t weight)
213  {
214 
216  SG_ERROR("not implemented");
217 
218  if (max_mismatch==0)
219  add_example_to_tree(idx, weight);
220  else
221  add_example_to_tree_mismatch(idx, weight);
222 
223  set_is_initialized(true);
224  }
225 
230  inline virtual int32_t get_num_subkernels()
231  {
233  return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
234  if (position_weights!=NULL)
235  return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
236  if (length==0)
237  return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
238  return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
239  }
240 
246  inline void compute_by_subkernel(
247  int32_t idx, float64_t * subkernel_contrib)
248  {
249 
250  if (get_is_initialized())
251  {
252 
254  SG_ERROR("not implemented");
255 
256  compute_by_tree(idx, subkernel_contrib);
257  return ;
258  }
259 
260  SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
261  }
262 
268  inline const float64_t* get_subkernel_weights(int32_t& num_weights)
269  {
270 
271  num_weights = get_num_subkernels();
272 
274  weights_buffer = SG_MALLOC(float64_t, num_weights);
275 
277  for (int32_t i=0; i<num_weights; i++)
279  else if (position_weights!=NULL)
280  for (int32_t i=0; i<num_weights; i++)
282  else
283  for (int32_t i=0; i<num_weights; i++)
284  weights_buffer[i] = weights[i*mkl_stepsize];
285 
286  return weights_buffer;
287  }
288 
294  {
295  float64_t* weights2=w.vector;
296  int32_t num_weights2=w.vlen;
297  int32_t num_weights = get_num_subkernels();
298  if (num_weights!=num_weights2)
299  SG_ERROR( "number of weights do not match\n");
300 
301 
303  for (int32_t i=0; i<num_weights; i++)
304  ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
305  else if (position_weights!=NULL)
306  {
307  for (int32_t i=0; i<num_weights; i++)
308  {
309  for (int32_t j=0; j<mkl_stepsize; j++)
310  {
311  if (i*mkl_stepsize+j<seq_length)
312  position_weights[i*mkl_stepsize+j] = weights2[i];
313  }
314  }
315  }
316  else if (length==0)
317  {
318  for (int32_t i=0; i<num_weights; i++)
319  {
320  for (int32_t j=0; j<mkl_stepsize; j++)
321  {
322  if (i*mkl_stepsize+j<get_degree())
323  weights[i*mkl_stepsize+j] = weights2[i];
324  }
325  }
326  }
327  else
328  {
329  for (int32_t i=0; i<num_weights; i++)
330  {
331  for (int32_t j=0; j<mkl_stepsize; j++)
332  {
333  if (i*mkl_stepsize+j<get_degree()*length)
334  weights[i*mkl_stepsize+j] = weights2[i];
335  }
336  }
337  }
338  }
339 
344  virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
345 
346  if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
349  }
350  else
351  {
354  }
355 
356 
357  return CStringKernel<char>::set_normalizer(normalizer_);
358 
359  }
360 
361  // other kernel tree operations
367  float64_t *compute_abs_weights(int32_t & len);
368 
375  void compute_by_tree(int32_t idx, float64_t *LevelContrib);
376 
382 
388  inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
389  {
390  d=degree;
391  len=length;
392  return weights;
393  }
394 
400  inline float64_t *get_weights(int32_t& num_weights)
401  {
402 
404  SG_ERROR("not implemented");
405 
406  if (position_weights!=NULL)
407  {
408  num_weights = seq_length ;
409  return position_weights ;
410  }
411  if (length==0)
412  num_weights = degree ;
413  else
414  num_weights = degree*length ;
415  return weights;
416  }
417 
423  inline float64_t *get_position_weights(int32_t& len)
424  {
425  len=seq_length;
426  return position_weights;
427  }
428 
435 
440  inline void set_wd_weights(SGVector<float64_t> new_weights)
441  {
442  SGMatrix<float64_t> matrix = SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0);
443  set_weights(matrix);
444  matrix.matrix = NULL;
445  }
446 
451  bool set_weights(SGMatrix<float64_t> new_weights);
452 
459  bool set_position_weights(float64_t* pws, int32_t len);
460 
465  bool init_block_weights();
466 
472 
478 
484 
490 
496 
502 
507  bool init_block_weights_exp();
508 
513  bool init_block_weights_log();
514 
520  {
522  position_weights=NULL;
523  return true;
524  }
525 
531  bool set_max_mismatch(int32_t max);
532 
537  inline int32_t get_max_mismatch() const { return max_mismatch; }
538 
544  inline bool set_degree(int32_t deg) { degree=deg; return true; }
545 
550  inline int32_t get_degree() const { return degree; }
551 
557  inline bool set_use_block_computation(bool block)
558  {
559  block_computation=block;
560  return true;
561  }
562 
568 
574  inline bool set_mkl_stepsize(int32_t step)
575  {
576  if (step<1)
577  SG_ERROR("Stepsize must be a positive integer\n");
578  mkl_stepsize=step;
579  return true;
580  }
581 
586  inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
587 
593  inline bool set_which_degree(int32_t which)
594  {
595  which_degree=which;
596  return true;
597  }
598 
603  inline int32_t get_which_degree() { return which_degree; }
604 
605  protected:
607  void create_empty_tries();
608 
614  void add_example_to_tree(int32_t idx, float64_t weight);
615 
623  int32_t idx, float64_t weight, int32_t tree_num);
624 
630  void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
631 
639  int32_t idx, float64_t weight, int32_t tree_num);
640 
646  float64_t compute_by_tree(int32_t idx);
647 
656  float64_t compute(int32_t idx_a, int32_t idx_b);
657 
667  char* avec, int32_t alen, char* bvec, int32_t blen);
668 
678  char* avec, int32_t alen, char* bvec, int32_t blen);
679 
689  char* avec, int32_t alen, char* bvec, int32_t blen);
690 
699  float64_t compute_using_block(char* avec, int32_t alen,
700  char* bvec, int32_t blen);
701 
703  virtual void remove_lhs();
704 
705  private:
708  void init();
709 
710  protected:
716  int32_t weights_degree;
718  int32_t weights_length;
719 
720 
728  int32_t mkl_stepsize;
730  int32_t degree;
732  int32_t length;
733 
735  int32_t max_mismatch;
737  int32_t seq_length;
738 
741 
744 
750  int32_t which_degree;
751 
754 
757 
760 };
761 
762 }
763 
764 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation