SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
WeightedDegreePositionStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
13 #define _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
14 
15 #include <shogun/lib/common.h>
18 #include <shogun/lib/Trie.h>
19 
20 namespace shogun
21 {
22 
23 class CSVM;
24 
49 {
50  public:
53 
62  int32_t size, int32_t degree,
63  int32_t max_mismatch=0, int32_t mkl_stepsize=1);
64 
76  int32_t size, float64_t* weights, int32_t degree,
77  int32_t max_mismatch, int32_t* shift, int32_t shift_len,
78  int32_t mkl_stepsize=1);
79 
87  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
88 
90 
97  virtual bool init(CFeatures* l, CFeatures* r);
98 
100  virtual void cleanup();
101 
107 
112  virtual const char* get_name() const { return "WeightedDegreePositionStringKernel"; }
113 
121  inline virtual bool init_optimization(
122  int32_t p_count, int32_t *IDX, float64_t * alphas)
123  {
124  return init_optimization(p_count, IDX, alphas, -1);
125  }
126 
138  virtual bool init_optimization(
139  int32_t count, int32_t *IDX, float64_t * alphas, int32_t tree_num,
140  int32_t upto_tree=-1);
141 
146  virtual bool delete_optimization();
147 
153  inline virtual float64_t compute_optimized(int32_t idx)
154  {
156  ASSERT(alphabet);
158  return compute_by_tree(idx);
159  }
160 
165  static void* compute_batch_helper(void* p);
166 
177  virtual void compute_batch(
178  int32_t num_vec, int32_t* vec_idx, float64_t* target,
179  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
180  float64_t factor=1.0);
181 
185  inline virtual void clear_normal()
186  {
188  {
190  SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
191  }
192 
193  if (get_is_initialized())
194  {
196  tries.delete_trees(true);
197  else if (opt_type==FASTBUTMEMHUNGRY)
198  tries.delete_trees(false); // still buggy
199  else
200  SG_ERROR( "unknown optimization type\n");
201 
202  set_is_initialized(false);
203  }
204  }
205 
211  inline virtual void add_to_normal(int32_t idx, float64_t weight)
212  {
213  add_example_to_tree(idx, weight);
214  set_is_initialized(true);
215  }
216 
221  inline virtual int32_t get_num_subkernels()
222  {
223  if (position_weights!=NULL)
224  return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
225  if (length==0)
226  return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
227  return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
228  }
229 
235  inline void compute_by_subkernel(
236  int32_t idx, float64_t * subkernel_contrib)
237  {
238  if (get_is_initialized())
239  {
240  compute_by_tree(idx, subkernel_contrib);
241  return ;
242  }
243 
244  SG_ERROR( "CWeightedDegreePositionStringKernel optimization not initialized\n") ;
245  }
246 
252  inline const float64_t* get_subkernel_weights(int32_t& num_weights)
253  {
254  num_weights = get_num_subkernels() ;
255 
257  weights_buffer = SG_MALLOC(float64_t, num_weights);
258 
259  if (position_weights!=NULL)
260  for (int32_t i=0; i<num_weights; i++)
262  else
263  for (int32_t i=0; i<num_weights; i++)
264  weights_buffer[i] = weights[i*mkl_stepsize] ;
265 
266  return weights_buffer ;
267  }
268 
274  {
275  float64_t* weights2=w.vector;
276  int32_t num_weights2=w.vlen;
277 
278  int32_t num_weights = get_num_subkernels() ;
279  if (num_weights!=num_weights2)
280  SG_ERROR( "number of weights do not match\n") ;
281 
282  if (position_weights!=NULL)
283  for (int32_t i=0; i<num_weights; i++)
284  for (int32_t j=0; j<mkl_stepsize; j++)
285  {
286  if (i*mkl_stepsize+j<seq_length)
287  position_weights[i*mkl_stepsize+j] = weights2[i] ;
288  }
289  else if (length==0)
290  {
291  for (int32_t i=0; i<num_weights; i++)
292  for (int32_t j=0; j<mkl_stepsize; j++)
293  if (i*mkl_stepsize+j<get_degree())
294  weights[i*mkl_stepsize+j] = weights2[i] ;
295  }
296  else
297  {
298  for (int32_t i=0; i<num_weights; i++)
299  for (int32_t j=0; j<mkl_stepsize; j++)
300  if (i*mkl_stepsize+j<get_degree()*length)
301  weights[i*mkl_stepsize+j] = weights2[i] ;
302  }
303  }
304 
305  // other kernel tree operations
311  float64_t* compute_abs_weights(int32_t & len);
312 
318 
323  inline int32_t get_max_mismatch() { return max_mismatch; }
324 
329  inline int32_t get_degree() { return degree; }
330 
336  inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
337  {
338  d=degree;
339  len=length;
340  return weights;
341  }
342 
348  inline float64_t *get_weights(int32_t& num_weights)
349  {
350  if (position_weights!=NULL)
351  {
352  num_weights = seq_length ;
353  return position_weights ;
354  }
355  if (length==0)
356  num_weights = degree ;
357  else
358  num_weights = degree*length ;
359  return weights;
360  }
361 
367  inline float64_t *get_position_weights(int32_t& len)
368  {
369  len=seq_length;
370  return position_weights;
371  }
372 
377  void set_shifts(SGVector<int32_t> shifts);
378 
383  bool set_weights(SGMatrix<float64_t> new_weights);
384 
389  virtual bool set_wd_weights();
390 
396  virtual void set_position_weights(SGVector<float64_t> pws);
397 
405  bool set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num);
406 
414  bool set_position_weights_rhs(float64_t* pws, int32_t len, int32_t num);
415 
420  bool init_block_weights();
421 
427 
433 
439 
445 
451 
457 
462  bool init_block_weights_exp();
463 
468  bool init_block_weights_log();
469 
475  {
477  position_weights=NULL;
478  return true;
479  }
480 
486  {
489  return true;
490  }
491 
497  {
500  return true;
501  }
502 
508  virtual float64_t compute_by_tree(int32_t idx);
509 
515  virtual void compute_by_tree(int32_t idx, float64_t* LevelContrib);
516 
530  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
531  float64_t* target, int32_t num_suppvec, int32_t* IDX,
532  float64_t* weights);
533 
542  char* compute_consensus(
543  int32_t &num_feat, int32_t num_suppvec, int32_t* IDX,
544  float64_t* alphas);
545 
558  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
559  float64_t* w_result, int32_t num_suppvec, int32_t* IDX,
560  float64_t* alphas);
561 
575  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
576  float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
577  float64_t* alphas, float64_t* distrib);
578 
583  void prepare_POIM2(SGMatrix<float64_t> distrib);
584 
591  void compute_POIM2(int32_t max_degree, CSVM* svm);
592 
598 
600  void cleanup_POIM2();
601 
602  protected:
604  void create_empty_tries();
605 
611  virtual void add_example_to_tree(
612  int32_t idx, float64_t weight);
613 
621  int32_t idx, float64_t weight, int32_t tree_num);
622 
631  virtual float64_t compute(int32_t idx_a, int32_t idx_b);
632 
642  char* avec, int32_t alen, char* bvec, int32_t blen);
643 
653  char* avec, int32_t alen, char* bvec, int32_t blen);
654 
664  char* avec, int32_t alen, char* bvec, int32_t blen);
665 
677  char* avec, float64_t *posweights_lhs, int32_t alen,
678  char* bvec, float64_t *posweights_rhs, int32_t blen);
679 
681  virtual void remove_lhs();
682 
691  virtual void load_serializable_post() throw (ShogunException);
692 
693  private:
696  void init();
697 
698  protected:
700  float64_t* weights;
702  int32_t weights_degree;
704  int32_t weights_length;
705 
710 
721 
725  int32_t mkl_stepsize;
726 
728  int32_t degree;
730  int32_t length;
731 
733  int32_t max_mismatch;
735  int32_t seq_length;
736 
738  int32_t *shift;
740  int32_t shift_len;
742  int32_t max_shift;
743 
746 
752  int32_t which_degree;
753 
755  CTrie<DNATrie> tries;
757  CTrie<POIMTrie> poim_tries;
758 
763 
768 
770  int32_t m_poim_num_sym;
775 
778 };
779 }
780 #endif /* _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation