SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
WDSVMOcas.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2008 Vojtech Franc
8  * Written (W) 2007-2009 Soeren Sonnenburg
9  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 
13 #ifndef _WDSVMOCAS_H___
14 #define _WDSVMOCAS_H___
15 
16 #include <shogun/lib/config.h>
17 #ifdef USE_GPL_SHOGUN
18 
19 #include <shogun/lib/common.h>
20 #include <shogun/machine/Machine.h>
23 #include <shogun/labels/Labels.h>
24 
25 namespace shogun
26 {
27 template <class ST> class CStringFeatures;
28 
30 class CWDSVMOcas : public CMachine
31 {
32  public:
35 
37  CWDSVMOcas();
38 
43  CWDSVMOcas(E_SVM_TYPE type);
44 
53  CWDSVMOcas(
54  float64_t C, int32_t d, int32_t from_d,
55  CStringFeatures<uint8_t>* traindat, CLabels* trainlab);
56  virtual ~CWDSVMOcas();
57 
62  virtual EMachineType get_classifier_type() { return CT_WDSVMOCAS; }
63 
70  inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
71 
76  inline float64_t get_C1() { return C1; }
77 
82  inline float64_t get_C2() { return C2; }
83 
88  inline void set_epsilon(float64_t eps) { epsilon=eps; }
89 
94  inline float64_t get_epsilon() { return epsilon; }
95 
100  inline void set_features(CStringFeatures<uint8_t>* feat)
101  {
102  SG_REF(feat);
103  SG_UNREF(features);
104  features=feat;
105  }
106 
111  inline CStringFeatures<uint8_t>* get_features()
112  {
113  SG_REF(features);
114  return features;
115  }
116 
121  inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
122 
127  inline bool get_bias_enabled() { return use_bias; }
128 
133  inline void set_bufsize(int32_t sz) { bufsize=sz; }
134 
139  inline int32_t get_bufsize() { return bufsize; }
140 
146  inline void set_degree(int32_t d, int32_t from_d)
147  {
148  degree=d;
149  from_degree=from_d;
150  }
151 
156  inline int32_t get_degree() { return degree; }
157 
164  virtual CBinaryLabels* apply_binary(CFeatures* data=NULL);
165 
172  virtual CRegressionLabels* apply_regression(CFeatures* data=NULL);
173 
179  virtual float64_t apply_one(int32_t num)
180  {
181  ASSERT(features)
182  if (!wd_weights)
183  set_wd_weights();
184 
185  int32_t len=0;
186  float64_t sum=0;
187  bool free_vec;
188  uint8_t* vec=features->get_feature_vector(num, len, free_vec);
189  //SG_INFO("len %d, string_length %d\n", len, string_length)
190  ASSERT(len==string_length)
191 
192  for (int32_t j=0; j<string_length; j++)
193  {
194  int32_t offs=w_dim_single_char*j;
195  int32_t val=0;
196  for (int32_t k=0; (j+k<string_length) && (k<degree); k++)
197  {
198  val=val*alphabet_size + vec[j+k];
199  sum+=wd_weights[k] * w[offs+val];
200  offs+=w_offsets[k];
201  }
202  }
203  features->free_feature_vector(vec, num, free_vec);
204  return sum/normalization_const;
205  }
206 
208  inline void set_normalization_const()
209  {
210  ASSERT(features)
211  normalization_const=0;
212  for (int32_t i=0; i<degree; i++)
213  normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i];
214 
215  normalization_const=CMath::sqrt(normalization_const);
216  SG_DEBUG("normalization_const:%f\n", normalization_const)
217  }
218 
223  inline float64_t get_normalization_const() { return normalization_const; }
224 
225 
226  protected:
227 
232  SGVector<float64_t> apply_get_outputs(CFeatures* data);
233 
238  int32_t set_wd_weights();
239 
248  static void compute_W(
249  float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
250  uint32_t nSel, void* ptr );
251 
258  static float64_t update_W(float64_t t, void* ptr );
259 
265  static void* add_new_cut_helper(void* ptr);
266 
275  static int add_new_cut(
276  float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
277  uint32_t nSel, void* ptr );
278 
284  static void* compute_output_helper(void* ptr);
285 
291  static int compute_output( float64_t *output, void* ptr );
292 
299  static int sort( float64_t* vals, float64_t* data, uint32_t size);
300 
302  static inline void print(ocas_return_value_T value)
303  {
304  return;
305  }
306 
307 
309  virtual const char* get_name() const { return "WDSVMOcas"; }
310 
311  protected:
320  virtual bool train_machine(CFeatures* data=NULL);
321 
322  protected:
324  CStringFeatures<uint8_t>* features;
326  bool use_bias;
328  int32_t bufsize;
330  float64_t C1;
332  float64_t C2;
334  float64_t epsilon;
336  E_SVM_TYPE method;
337 
339  int32_t degree;
341  int32_t from_degree;
343  float32_t* wd_weights;
345  int32_t num_vec;
347  int32_t string_length;
349  int32_t alphabet_size;
350 
352  float64_t normalization_const;
353 
355  float64_t bias;
357  float64_t old_bias;
359  int32_t* w_offsets;
361  int32_t w_dim;
363  int32_t w_dim_single_char;
365  float32_t* w;
367  float32_t* old_w;
369  float64_t* lab;
370 
372  float32_t** cuts;
374  float64_t* cp_bias;
375 };
376 }
377 #endif
378 
379 #endif //USE_GPL_SHOGUN
EMachineType
Definition: Machine.h:33
#define SG_REF(x)
Definition: SGObject.h:54
#define ASSERT(x)
Definition: SGIO.h:201
void print(CJLCoverTreePoint &p)
#define MACHINE_PROBLEM_TYPE(PT)
Definition: Machine.h:120
double float64_t
Definition: common.h:50
float float32_t
Definition: common.h:49
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void set_epsilon(float *begin, float max)
Definition: JLCoverTree.h:513

SHOGUN Machine Learning Toolbox - Documentation