SHOGUN: WDSVMOcas.h Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2008 Vojtech Franc
00008  * Written (W) 2007-2009 Soeren Sonnenburg
00009  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WDSVMOCAS_H___
00013 #define _WDSVMOCAS_H___
00014 
00015 #include <shogun/lib/common.h>
00016 #include <shogun/machine/Machine.h>
00017 #include <shogun/classifier/svm/SVMOcas.h>
00018 #include <shogun/features/StringFeatures.h>
00019 #include <shogun/labels/Labels.h>
00020 
00021 namespace shogun
00022 {
00023 template <class ST> class CStringFeatures;
00024 
00026 class CWDSVMOcas : public CMachine
00027 {
00028     public:
00030         MACHINE_PROBLEM_TYPE(PT_BINARY);
00031 
00033         CWDSVMOcas();
00034 
00039         CWDSVMOcas(E_SVM_TYPE type);
00040 
00049         CWDSVMOcas(
00050             float64_t C, int32_t d, int32_t from_d,
00051             CStringFeatures<uint8_t>* traindat, CLabels* trainlab);
00052         virtual ~CWDSVMOcas();
00053 
00058         virtual EMachineType get_classifier_type() { return CT_WDSVMOCAS; }
00059 
00066         inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
00067 
00072         inline float64_t get_C1() { return C1; }
00073 
00078         inline float64_t get_C2() { return C2; }
00079 
00084         inline void set_epsilon(float64_t eps) { epsilon=eps; }
00085 
00090         inline float64_t get_epsilon() { return epsilon; }
00091 
00096         inline void set_features(CStringFeatures<uint8_t>* feat)
00097         {
00098             SG_UNREF(features);
00099             SG_REF(feat);
00100             features=feat;
00101         }
00102 
00107         inline CStringFeatures<uint8_t>* get_features()
00108         {
00109             SG_REF(features);
00110             return features;
00111         }
00112 
00117         inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
00118 
00123         inline bool get_bias_enabled() { return use_bias; }
00124 
00129         inline void set_bufsize(int32_t sz) { bufsize=sz; }
00130 
00135         inline int32_t get_bufsize() { return bufsize; }
00136 
00142         inline void set_degree(int32_t d, int32_t from_d)
00143         {
00144             degree=d;
00145             from_degree=from_d;
00146         }
00147 
00152         inline int32_t get_degree() { return degree; }
00153 
00160         virtual CBinaryLabels* apply_binary(CFeatures* data=NULL);
00161         
00168         virtual CRegressionLabels* apply_regression(CFeatures* data=NULL);
00169 
00175         virtual float64_t apply_one(int32_t num)
00176         {
00177             ASSERT(features);
00178             if (!wd_weights)
00179                 set_wd_weights();
00180 
00181             int32_t len=0;
00182             float64_t sum=0;
00183             bool free_vec;
00184             uint8_t* vec=features->get_feature_vector(num, len, free_vec);
00185             //SG_INFO("len %d, string_length %d\n", len, string_length);
00186             ASSERT(len==string_length);
00187 
00188             for (int32_t j=0; j<string_length; j++)
00189             {
00190                 int32_t offs=w_dim_single_char*j;
00191                 int32_t val=0;
00192                 for (int32_t k=0; (j+k<string_length) && (k<degree); k++)
00193                 {
00194                     val=val*alphabet_size + vec[j+k];
00195                     sum+=wd_weights[k] * w[offs+val];
00196                     offs+=w_offsets[k];
00197                 }
00198             }
00199             features->free_feature_vector(vec, num, free_vec);
00200             return sum/normalization_const;
00201         }
00202 
00204         inline void set_normalization_const()
00205         {
00206             ASSERT(features);
00207             normalization_const=0;
00208             for (int32_t i=0; i<degree; i++)
00209                 normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i];
00210 
00211             normalization_const=CMath::sqrt(normalization_const);
00212             SG_DEBUG("normalization_const:%f\n", normalization_const);
00213         }
00214 
00219         inline float64_t get_normalization_const() { return normalization_const; }
00220 
00221 
00222     protected:
00223 
00228         SGVector<float64_t> apply_get_outputs(CFeatures* data);
00229 
00234         int32_t set_wd_weights();
00235 
00244         static void compute_W(
00245             float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
00246             uint32_t nSel, void* ptr );
00247 
00254         static float64_t update_W(float64_t t, void* ptr );
00255 
00261         static void* add_new_cut_helper(void* ptr);
00262 
00271         static int add_new_cut(
00272             float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
00273             uint32_t nSel, void* ptr );
00274 
00280         static void* compute_output_helper(void* ptr);
00281 
00287         static int compute_output( float64_t *output, void* ptr );
00288 
00295         static int sort( float64_t* vals, float64_t* data, uint32_t size);
00296 
00298         static inline void print(ocas_return_value_T value)
00299         {
00300               return;
00301         }
00302 
00303 
00305         virtual const char* get_name() const { return "WDSVMOcas"; }
00306 
00307     protected:
00316         virtual bool train_machine(CFeatures* data=NULL);
00317 
00318     protected:
00320         CStringFeatures<uint8_t>* features;
00322         bool use_bias;
00324         int32_t bufsize;
00326         float64_t C1;
00328         float64_t C2;
00330         float64_t epsilon;
00332         E_SVM_TYPE method;
00333 
00335         int32_t degree;
00337         int32_t from_degree;
00339         float32_t* wd_weights;
00341         int32_t num_vec;
00343         int32_t string_length;
00345         int32_t alphabet_size;
00346 
00348         float64_t normalization_const;
00349 
00351         float64_t bias;
00353         float64_t old_bias;
00355         int32_t* w_offsets;
00357         int32_t w_dim;
00359         int32_t w_dim_single_char;
00361         float32_t* w;
00363         float32_t* old_w;
00365         float64_t* lab;
00366 
00368         float32_t** cuts;
00370         float64_t* cp_bias;
00371 };
00372 }
00373 #endif