Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WDSVMOCAS_H___
00013 #define _WDSVMOCAS_H___
00014
00015 #include <shogun/lib/common.h>
00016 #include <shogun/machine/Machine.h>
00017 #include <shogun/classifier/svm/SVMOcas.h>
00018 #include <shogun/features/StringFeatures.h>
00019 #include <shogun/labels/Labels.h>
00020
00021 namespace shogun
00022 {
00023 template <class ST> class CStringFeatures;
00024
00026 class CWDSVMOcas : public CMachine
00027 {
00028 public:
00030 MACHINE_PROBLEM_TYPE(PT_BINARY);
00031
00033 CWDSVMOcas();
00034
00039 CWDSVMOcas(E_SVM_TYPE type);
00040
00049 CWDSVMOcas(
00050 float64_t C, int32_t d, int32_t from_d,
00051 CStringFeatures<uint8_t>* traindat, CLabels* trainlab);
00052 virtual ~CWDSVMOcas();
00053
00058 virtual EMachineType get_classifier_type() { return CT_WDSVMOCAS; }
00059
00066 inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
00067
00072 inline float64_t get_C1() { return C1; }
00073
00078 inline float64_t get_C2() { return C2; }
00079
00084 inline void set_epsilon(float64_t eps) { epsilon=eps; }
00085
00090 inline float64_t get_epsilon() { return epsilon; }
00091
00096 inline void set_features(CStringFeatures<uint8_t>* feat)
00097 {
00098 SG_UNREF(features);
00099 SG_REF(feat);
00100 features=feat;
00101 }
00102
00107 inline CStringFeatures<uint8_t>* get_features()
00108 {
00109 SG_REF(features);
00110 return features;
00111 }
00112
00117 inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
00118
00123 inline bool get_bias_enabled() { return use_bias; }
00124
00129 inline void set_bufsize(int32_t sz) { bufsize=sz; }
00130
00135 inline int32_t get_bufsize() { return bufsize; }
00136
00142 inline void set_degree(int32_t d, int32_t from_d)
00143 {
00144 degree=d;
00145 from_degree=from_d;
00146 }
00147
00152 inline int32_t get_degree() { return degree; }
00153
00160 virtual CBinaryLabels* apply_binary(CFeatures* data=NULL);
00161
00168 virtual CRegressionLabels* apply_regression(CFeatures* data=NULL);
00169
00175 virtual float64_t apply_one(int32_t num)
00176 {
00177 ASSERT(features);
00178 if (!wd_weights)
00179 set_wd_weights();
00180
00181 int32_t len=0;
00182 float64_t sum=0;
00183 bool free_vec;
00184 uint8_t* vec=features->get_feature_vector(num, len, free_vec);
00185
00186 ASSERT(len==string_length);
00187
00188 for (int32_t j=0; j<string_length; j++)
00189 {
00190 int32_t offs=w_dim_single_char*j;
00191 int32_t val=0;
00192 for (int32_t k=0; (j+k<string_length) && (k<degree); k++)
00193 {
00194 val=val*alphabet_size + vec[j+k];
00195 sum+=wd_weights[k] * w[offs+val];
00196 offs+=w_offsets[k];
00197 }
00198 }
00199 features->free_feature_vector(vec, num, free_vec);
00200 return sum/normalization_const;
00201 }
00202
00204 inline void set_normalization_const()
00205 {
00206 ASSERT(features);
00207 normalization_const=0;
00208 for (int32_t i=0; i<degree; i++)
00209 normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i];
00210
00211 normalization_const=CMath::sqrt(normalization_const);
00212 SG_DEBUG("normalization_const:%f\n", normalization_const);
00213 }
00214
00219 inline float64_t get_normalization_const() { return normalization_const; }
00220
00221
00222 protected:
00223
00228 SGVector<float64_t> apply_get_outputs(CFeatures* data);
00229
00234 int32_t set_wd_weights();
00235
00244 static void compute_W(
00245 float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
00246 uint32_t nSel, void* ptr );
00247
00254 static float64_t update_W(float64_t t, void* ptr );
00255
00261 static void* add_new_cut_helper(void* ptr);
00262
00271 static int add_new_cut(
00272 float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
00273 uint32_t nSel, void* ptr );
00274
00280 static void* compute_output_helper(void* ptr);
00281
00287 static int compute_output( float64_t *output, void* ptr );
00288
00295 static int sort( float64_t* vals, float64_t* data, uint32_t size);
00296
00298 static inline void print(ocas_return_value_T value)
00299 {
00300 return;
00301 }
00302
00303
00305 virtual const char* get_name() const { return "WDSVMOcas"; }
00306
00307 protected:
00316 virtual bool train_machine(CFeatures* data=NULL);
00317
00318 protected:
00320 CStringFeatures<uint8_t>* features;
00322 bool use_bias;
00324 int32_t bufsize;
00326 float64_t C1;
00328 float64_t C2;
00330 float64_t epsilon;
00332 E_SVM_TYPE method;
00333
00335 int32_t degree;
00337 int32_t from_degree;
00339 float32_t* wd_weights;
00341 int32_t num_vec;
00343 int32_t string_length;
00345 int32_t alphabet_size;
00346
00348 float64_t normalization_const;
00349
00351 float64_t bias;
00353 float64_t old_bias;
00355 int32_t* w_offsets;
00357 int32_t w_dim;
00359 int32_t w_dim_single_char;
00361 float32_t* w;
00363 float32_t* old_w;
00365 float64_t* lab;
00366
00368 float32_t** cuts;
00370 float64_t* cp_bias;
00371 };
00372 }
00373 #endif