SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
WeightedDegreePositionStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
13 #define _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
14 
15 #include <shogun/lib/common.h>
18 #include <shogun/lib/Trie.h>
19 
20 namespace shogun
21 {
22 
23 class CSVM;
24 
49 {
50  public:
53 
62  int32_t size, int32_t degree,
63  int32_t max_mismatch=0, int32_t mkl_stepsize=1);
64 
75  int32_t size, SGVector<float64_t> weights, int32_t degree,
76  int32_t max_mismatch, SGVector<int32_t> shifts,
77  int32_t mkl_stepsize=1);
78 
86  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
87 
89 
96  virtual bool init(CFeatures* l, CFeatures* r);
97 
99  virtual void cleanup();
100 
106 
111  virtual const char* get_name() const { return "WeightedDegreePositionStringKernel"; }
112 
120  virtual bool init_optimization(
121  int32_t p_count, int32_t *IDX, float64_t * alphas)
122  {
123  return init_optimization(p_count, IDX, alphas, -1);
124  }
125 
137  virtual bool init_optimization(
138  int32_t count, int32_t *IDX, float64_t * alphas, int32_t tree_num,
139  int32_t upto_tree=-1);
140 
145  virtual bool delete_optimization();
146 
152  virtual float64_t compute_optimized(int32_t idx)
153  {
157  return compute_by_tree(idx);
158  }
159 
164  static void* compute_batch_helper(void* p);
165 
176  virtual void compute_batch(
177  int32_t num_vec, int32_t* vec_idx, float64_t* target,
178  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
179  float64_t factor=1.0);
180 
184  virtual void clear_normal()
185  {
187  {
189  SG_DEBUG("disabling compact trie nodes with FASTBUTMEMHUNGRY\n")
190  }
191 
192  if (get_is_initialized())
193  {
195  tries.delete_trees(true);
196  else if (opt_type==FASTBUTMEMHUNGRY)
197  tries.delete_trees(false); // still buggy
198  else
199  SG_ERROR("unknown optimization type\n")
200 
201  set_is_initialized(false);
202  }
203  }
204 
210  virtual void add_to_normal(int32_t idx, float64_t weight)
211  {
212  add_example_to_tree(idx, weight);
213  set_is_initialized(true);
214  }
215 
220  virtual int32_t get_num_subkernels()
221  {
222  if (position_weights!=NULL)
223  return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
224  if (length==0)
225  return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
226  return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
227  }
228 
234  inline void compute_by_subkernel(
235  int32_t idx, float64_t * subkernel_contrib)
236  {
237  if (get_is_initialized())
238  {
239  compute_by_tree(idx, subkernel_contrib);
240  return ;
241  }
242 
243  SG_ERROR("CWeightedDegreePositionStringKernel optimization not initialized\n")
244  }
245 
251  inline const float64_t* get_subkernel_weights(int32_t& num_weights)
252  {
253  num_weights = get_num_subkernels() ;
254 
255  SG_FREE(weights_buffer);
256  weights_buffer = SG_MALLOC(float64_t, num_weights);
257 
258  if (position_weights!=NULL)
259  for (int32_t i=0; i<num_weights; i++)
261  else
262  for (int32_t i=0; i<num_weights; i++)
263  weights_buffer[i] = weights[i*mkl_stepsize] ;
264 
265  return weights_buffer ;
266  }
267 
273  {
274  float64_t* weights2=w.vector;
275  int32_t num_weights2=w.vlen;
276 
277  int32_t num_weights = get_num_subkernels() ;
278  if (num_weights!=num_weights2)
279  SG_ERROR("number of weights do not match\n")
280 
281  if (position_weights!=NULL)
282  for (int32_t i=0; i<num_weights; i++)
283  for (int32_t j=0; j<mkl_stepsize; j++)
284  {
285  if (i*mkl_stepsize+j<seq_length)
286  position_weights[i*mkl_stepsize+j] = weights2[i] ;
287  }
288  else if (length==0)
289  {
290  for (int32_t i=0; i<num_weights; i++)
291  for (int32_t j=0; j<mkl_stepsize; j++)
292  if (i*mkl_stepsize+j<get_degree())
293  weights[i*mkl_stepsize+j] = weights2[i] ;
294  }
295  else
296  {
297  for (int32_t i=0; i<num_weights; i++)
298  for (int32_t j=0; j<mkl_stepsize; j++)
299  if (i*mkl_stepsize+j<get_degree()*length)
300  weights[i*mkl_stepsize+j] = weights2[i] ;
301  }
302  }
303 
304  // other kernel tree operations
310  float64_t* compute_abs_weights(int32_t & len);
311 
317 
322  inline int32_t get_max_mismatch() { return max_mismatch; }
323 
328  inline int32_t get_degree() { return degree; }
329 
335  inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
336  {
337  d=degree;
338  len=length;
339  return weights;
340  }
341 
347  inline float64_t *get_weights(int32_t& num_weights)
348  {
349  if (position_weights!=NULL)
350  {
351  num_weights = seq_length ;
352  return position_weights ;
353  }
354  if (length==0)
355  num_weights = degree ;
356  else
357  num_weights = degree*length ;
358  return weights;
359  }
360 
366  inline float64_t *get_position_weights(int32_t& len)
367  {
368  len=seq_length;
369  return position_weights;
370  }
371 
376  void set_shifts(SGVector<int32_t> shifts);
377 
382  bool set_weights(SGMatrix<float64_t> new_weights);
383 
388  virtual bool set_wd_weights();
389 
395  virtual void set_position_weights(SGVector<float64_t> pws);
396 
404  bool set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num);
405 
413  bool set_position_weights_rhs(float64_t* pws, int32_t len, int32_t num);
414 
419  bool init_block_weights();
420 
426 
432 
438 
444 
450 
456 
461  bool init_block_weights_exp();
462 
467  bool init_block_weights_log();
468 
474  {
475  SG_FREE(position_weights);
476  position_weights=NULL;
477  return true;
478  }
479 
485  {
486  SG_FREE(position_weights_lhs);
488  return true;
489  }
490 
496  {
497  SG_FREE(position_weights_rhs);
499  return true;
500  }
501 
507  virtual float64_t compute_by_tree(int32_t idx);
508 
514  virtual void compute_by_tree(int32_t idx, float64_t* LevelContrib);
515 
529  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
530  float64_t* target, int32_t num_suppvec, int32_t* IDX,
531  float64_t* weights);
532 
541  char* compute_consensus(
542  int32_t &num_feat, int32_t num_suppvec, int32_t* IDX,
543  float64_t* alphas);
544 
557  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
558  float64_t* w_result, int32_t num_suppvec, int32_t* IDX,
559  float64_t* alphas);
560 
574  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
575  float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
576  float64_t* alphas, float64_t* distrib);
577 
582  void prepare_POIM2(SGMatrix<float64_t> distrib);
583 
590  void compute_POIM2(int32_t max_degree, CSVM* svm);
591 
597 
599  void cleanup_POIM2();
600 
601  protected:
603  void create_empty_tries();
604 
610  virtual void add_example_to_tree(
611  int32_t idx, float64_t weight);
612 
620  int32_t idx, float64_t weight, int32_t tree_num);
621 
630  virtual float64_t compute(int32_t idx_a, int32_t idx_b);
631 
641  char* avec, int32_t alen, char* bvec, int32_t blen);
642 
652  char* avec, int32_t alen, char* bvec, int32_t blen);
653 
663  char* avec, int32_t alen, char* bvec, int32_t blen);
664 
676  char* avec, float64_t *posweights_lhs, int32_t alen,
677  char* bvec, float64_t *posweights_rhs, int32_t blen);
678 
680  virtual void remove_lhs();
681 
690  virtual void load_serializable_post() throw (ShogunException);
691 
692  private:
695  void init();
696 
697  protected:
699  float64_t* weights;
701  int32_t weights_degree;
703  int32_t weights_length;
704 
709 
720 
724  int32_t mkl_stepsize;
725 
727  int32_t degree;
729  int32_t length;
730 
732  int32_t max_mismatch;
734  int32_t seq_length;
735 
737  int32_t *shift;
739  int32_t shift_len;
741  int32_t max_shift;
742 
745 
751  int32_t which_degree;
752 
754  CTrie<DNATrie> tries;
756  CTrie<POIMTrie> poim_tries;
757 
762 
767 
769  int32_t m_poim_num_sym;
774 
777 };
778 }
779 #endif /* _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation