SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
WeightedDegreePositionStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
13 #define _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
14 
15 #include <shogun/lib/config.h>
16 
17 #include <shogun/lib/common.h>
20 #include <shogun/lib/Trie.h>
21 
22 namespace shogun
23 {
24 
25 class CSVM;
26 
51 {
52  public:
55 
64  int32_t size, int32_t degree,
65  int32_t max_mismatch=0, int32_t mkl_stepsize=1);
66 
77  int32_t size, SGVector<float64_t> weights, int32_t degree,
78  int32_t max_mismatch, SGVector<int32_t> shifts,
79  int32_t mkl_stepsize=1);
80 
88  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
89 
91 
98  virtual bool init(CFeatures* l, CFeatures* r);
99 
101  virtual void cleanup();
102 
108 
113  virtual const char* get_name() const { return "WeightedDegreePositionStringKernel"; }
114 
122  virtual bool init_optimization(
123  int32_t p_count, int32_t *IDX, float64_t * alphas)
124  {
125  return init_optimization(p_count, IDX, alphas, -1);
126  }
127 
139  virtual bool init_optimization(
140  int32_t count, int32_t *IDX, float64_t * alphas, int32_t tree_num,
141  int32_t upto_tree=-1);
142 
147  virtual bool delete_optimization();
148 
154  virtual float64_t compute_optimized(int32_t idx)
155  {
159  return compute_by_tree(idx);
160  }
161 
166  static void* compute_batch_helper(void* p);
167 
178  virtual void compute_batch(
179  int32_t num_vec, int32_t* vec_idx, float64_t* target,
180  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
181  float64_t factor=1.0);
182 
186  virtual void clear_normal()
187  {
189  {
191  SG_DEBUG("disabling compact trie nodes with FASTBUTMEMHUNGRY\n")
192  }
193 
194  if (get_is_initialized())
195  {
197  tries.delete_trees(true);
198  else if (opt_type==FASTBUTMEMHUNGRY)
199  tries.delete_trees(false); // still buggy
200  else
201  SG_ERROR("unknown optimization type\n")
202 
203  set_is_initialized(false);
204  }
205  }
206 
212  virtual void add_to_normal(int32_t idx, float64_t weight)
213  {
214  add_example_to_tree(idx, weight);
215  set_is_initialized(true);
216  }
217 
222  virtual int32_t get_num_subkernels()
223  {
224  if (position_weights!=NULL)
225  return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
226  if (length==0)
227  return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
228  return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
229  }
230 
236  inline void compute_by_subkernel(
237  int32_t idx, float64_t * subkernel_contrib)
238  {
239  if (get_is_initialized())
240  {
241  compute_by_tree(idx, subkernel_contrib);
242  return ;
243  }
244 
245  SG_ERROR("CWeightedDegreePositionStringKernel optimization not initialized\n")
246  }
247 
253  inline const float64_t* get_subkernel_weights(int32_t& num_weights)
254  {
255  num_weights = get_num_subkernels() ;
256 
257  SG_FREE(weights_buffer);
258  weights_buffer = SG_MALLOC(float64_t, num_weights);
259 
260  if (position_weights!=NULL)
261  for (int32_t i=0; i<num_weights; i++)
263  else
264  for (int32_t i=0; i<num_weights; i++)
265  weights_buffer[i] = weights[i*mkl_stepsize] ;
266 
267  return weights_buffer ;
268  }
269 
275  {
276  float64_t* weights2=w.vector;
277  int32_t num_weights2=w.vlen;
278 
279  int32_t num_weights = get_num_subkernels() ;
280  if (num_weights!=num_weights2)
281  SG_ERROR("number of weights do not match\n")
282 
283  if (position_weights!=NULL)
284  for (int32_t i=0; i<num_weights; i++)
285  for (int32_t j=0; j<mkl_stepsize; j++)
286  {
287  if (i*mkl_stepsize+j<seq_length)
288  position_weights[i*mkl_stepsize+j] = weights2[i] ;
289  }
290  else if (length==0)
291  {
292  for (int32_t i=0; i<num_weights; i++)
293  for (int32_t j=0; j<mkl_stepsize; j++)
294  if (i*mkl_stepsize+j<get_degree())
295  weights[i*mkl_stepsize+j] = weights2[i] ;
296  }
297  else
298  {
299  for (int32_t i=0; i<num_weights; i++)
300  for (int32_t j=0; j<mkl_stepsize; j++)
301  if (i*mkl_stepsize+j<get_degree()*length)
302  weights[i*mkl_stepsize+j] = weights2[i] ;
303  }
304  }
305 
306  // other kernel tree operations
312  float64_t* compute_abs_weights(int32_t & len);
313 
319 
324  inline int32_t get_max_mismatch() { return max_mismatch; }
325 
330  inline int32_t get_degree() { return degree; }
331 
337  inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
338  {
339  d=degree;
340  len=length;
341  return weights;
342  }
343 
349  inline float64_t *get_weights(int32_t& num_weights)
350  {
351  if (position_weights!=NULL)
352  {
353  num_weights = seq_length ;
354  return position_weights ;
355  }
356  if (length==0)
357  num_weights = degree ;
358  else
359  num_weights = degree*length ;
360  return weights;
361  }
362 
368  inline float64_t *get_position_weights(int32_t& len)
369  {
370  len=seq_length;
371  return position_weights;
372  }
373 
378  void set_shifts(SGVector<int32_t> shifts);
379 
384  bool set_weights(SGMatrix<float64_t> new_weights);
385 
390  virtual bool set_wd_weights();
391 
397  virtual void set_position_weights(SGVector<float64_t> pws);
398 
406  bool set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num);
407 
415  bool set_position_weights_rhs(float64_t* pws, int32_t len, int32_t num);
416 
421  bool init_block_weights();
422 
428 
434 
440 
446 
452 
458 
463  bool init_block_weights_exp();
464 
469  bool init_block_weights_log();
470 
476  {
477  SG_FREE(position_weights);
478  position_weights=NULL;
479  return true;
480  }
481 
487  {
488  SG_FREE(position_weights_lhs);
490  return true;
491  }
492 
498  {
499  SG_FREE(position_weights_rhs);
501  return true;
502  }
503 
509  virtual float64_t compute_by_tree(int32_t idx);
510 
516  virtual void compute_by_tree(int32_t idx, float64_t* LevelContrib);
517 
531  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
532  float64_t* target, int32_t num_suppvec, int32_t* IDX,
533  float64_t* weights);
534 
543  char* compute_consensus(
544  int32_t &num_feat, int32_t num_suppvec, int32_t* IDX,
545  float64_t* alphas);
546 
559  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
560  float64_t* w_result, int32_t num_suppvec, int32_t* IDX,
561  float64_t* alphas);
562 
576  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
577  float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
578  float64_t* alphas, float64_t* distrib);
579 
584  void prepare_POIM2(SGMatrix<float64_t> distrib);
585 
592  void compute_POIM2(int32_t max_degree, CSVM* svm);
593 
599 
601  void cleanup_POIM2();
602 
603  protected:
605  void create_empty_tries();
606 
612  virtual void add_example_to_tree(
613  int32_t idx, float64_t weight);
614 
622  int32_t idx, float64_t weight, int32_t tree_num);
623 
632  virtual float64_t compute(int32_t idx_a, int32_t idx_b);
633 
643  char* avec, int32_t alen, char* bvec, int32_t blen);
644 
654  char* avec, int32_t alen, char* bvec, int32_t blen);
655 
665  char* avec, int32_t alen, char* bvec, int32_t blen);
666 
678  char* avec, float64_t *posweights_lhs, int32_t alen,
679  char* bvec, float64_t *posweights_rhs, int32_t blen);
680 
682  virtual void remove_lhs();
683 
692  virtual void load_serializable_post() throw (ShogunException);
693 
694  private:
697  void init();
698 
699  protected:
701  float64_t* weights;
703  int32_t weights_degree;
705  int32_t weights_length;
706 
711 
722 
726  int32_t mkl_stepsize;
727 
729  int32_t degree;
731  int32_t length;
732 
734  int32_t max_mismatch;
736  int32_t seq_length;
737 
739  int32_t *shift;
741  int32_t shift_len;
743  int32_t max_shift;
744 
747 
753  int32_t which_degree;
754 
756  CTrie<DNATrie> tries;
758  CTrie<POIMTrie> poim_tries;
759 
764 
769 
771  int32_t m_poim_num_sym;
776 
779 };
780 }
781 #endif /* _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H__ */
RNA - letters A,C,G,U.
Definition: Alphabet.h:32
virtual float64_t compute(int32_t idx_a, int32_t idx_b)
void delete_trees(bool p_use_compact_terminal_nodes=true)
Definition: Trie.h:1171
float64_t * compute_scoring(int32_t max_degree, int32_t &num_feat, int32_t &num_sym, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *weights)
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
void add_example_to_single_tree(int32_t idx, float64_t weight, int32_t tree_num)
DNA - letters A,C,G,T.
Definition: Alphabet.h:26
EKernelType
Definition: Kernel.h:57
virtual void add_to_normal(int32_t idx, float64_t weight)
Template class Trie implements a suffix trie, i.e. a tree in which all suffixes up to a certain lengt...
Definition: Trie.h:136
float64_t compute_with_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
Class ShogunException defines an exception which is thrown whenever an error inside of shogun occurs...
EAlphabet get_alphabet() const
Definition: Alphabet.h:130
float64_t * compute_POIM(int32_t max_degree, int32_t &num_feat, int32_t &num_sym, float64_t *poim_result, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t *distrib)
#define SG_ERROR(...)
Definition: SGIO.h:129
void set_is_initialized(bool p_init)
Definition: Kernel.h:899
The class Alphabet implements an alphabet and alphabet utility functions.
Definition: Alphabet.h:91
bool get_is_initialized()
Definition: Kernel.h:753
index_t vlen
Definition: SGVector.h:494
virtual bool init_optimization(int32_t p_count, int32_t *IDX, float64_t *alphas)
#define ASSERT(x)
Definition: SGIO.h:201
void set_use_compact_terminal_nodes(bool p_use_compact_terminal_nodes)
Definition: Trie.h:466
virtual void add_example_to_tree(int32_t idx, float64_t weight)
double float64_t
Definition: common.h:50
float64_t compute_without_mismatch_matrix(char *avec, int32_t alen, char *bvec, int32_t blen)
virtual void set_position_weights(SGVector< float64_t > pws)
EOptimizationType opt_type
Definition: Kernel.h:1079
virtual void set_subkernel_weights(SGVector< float64_t > w)
bool set_position_weights_lhs(float64_t *pws, int32_t len, int32_t num)
const float64_t * get_subkernel_weights(int32_t &num_weights)
#define SG_DEBUG(...)
Definition: SGIO.h:107
char * compute_consensus(int32_t &num_feat, int32_t num_suppvec, int32_t *IDX, float64_t *alphas)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void compute_by_subkernel(int32_t idx, float64_t *subkernel_contrib)
The class Features is the base class of all feature objects.
Definition: Features.h:68
A generic Support Vector Machine Interface.
Definition: SVM.h:49
bool get_use_compact_terminal_nodes()
Definition: Trie.h:456
bool set_position_weights_rhs(float64_t *pws, int32_t len, int32_t num)
The Weighted Degree Position String kernel (Weighted Degree kernel with shifts).
float64_t * extract_w(int32_t max_degree, int32_t &num_feat, int32_t &num_sym, float64_t *w_result, int32_t num_suppvec, int32_t *IDX, float64_t *alphas)
Template class StringKernel, is the base class of all String Kernels.
Definition: StringKernel.h:26
float64_t compute_without_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
float64_t compute_without_mismatch_position_weights(char *avec, float64_t *posweights_lhs, int32_t alen, char *bvec, float64_t *posweights_rhs, int32_t blen)

SHOGUN Machine Learning Toolbox - Documentation