HashedWDFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Copyright (C) 2010 Berlin Institute of Technology
00009  */
00010 
00011 #ifndef _HASHEDWDFEATURES_H___
00012 #define _HASHEDWDFEATURES_H___
00013 
00014 #include <shogun/lib/common.h>
00015 #include <shogun/features/DotFeatures.h>
00016 #include <shogun/features/StringFeatures.h>
00017 #include <shogun/lib/Hash.h>
00018 
00019 namespace shogun
00020 {
00021 template<class ST> class CStringFeatures;
00022 
00028 class CHashedWDFeatures: public CDotFeatures
00029 {
00030 public:
00032     CHashedWDFeatures(void);
00033 
00042     CHashedWDFeatures(CStringFeatures<uint8_t>* str, int32_t start_order,
00043             int32_t order, int32_t from_order, int32_t hash_bits=12);
00044 
00046     CHashedWDFeatures(const CHashedWDFeatures & orig);
00047 
00049     virtual ~CHashedWDFeatures();
00050 
00058     inline virtual int32_t get_dim_feature_space() const
00059     {
00060         return w_dim;
00061     }
00062 
00070     virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00071 
00078     virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2,
00079             int32_t vec2_len);
00080 
00089     virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00090             float64_t* vec2, int32_t vec2_len, bool abs_val=false);
00091 
00097     virtual inline int32_t get_nnz_features_for_vector(int32_t num)
00098     {
00099         int32_t vlen=-1;
00100         bool free_vec;
00101         uint8_t* vec=strings->get_feature_vector(num, vlen, free_vec);
00102         strings->free_feature_vector(vec, num, free_vec);
00103         return degree*vlen;
00104     }
00105 
00106     #ifndef DOXYGEN_SHOULD_SKIP_THIS
00107 
00108     struct hashed_wd_feature_iterator
00109     {
00111         uint16_t* vec;
00113         int32_t vidx;
00115         int32_t vlen;
00117         bool vfree;
00118 
00120         int32_t index;
00121 
00122     };
00123     #endif
00124 
00134     virtual void* get_feature_iterator(int32_t vector_index)
00135     {
00136         SG_NOTIMPLEMENTED;
00137         return NULL;
00138     }
00139 
00150     virtual bool get_next_feature(int32_t& index, float64_t& value,
00151             void* iterator)
00152     {
00153         SG_NOTIMPLEMENTED;
00154         return NULL;
00155     }
00156 
00162     virtual void free_feature_iterator(void* iterator)
00163     {
00164         SG_NOTIMPLEMENTED;
00165     }
00166 
00171     virtual CFeatures* duplicate() const;
00172 
00177     inline virtual EFeatureType get_feature_type()
00178     {
00179         return F_UNKNOWN;
00180     }
00181 
00186     inline virtual EFeatureClass get_feature_class()
00187     {
00188         return C_WD;
00189     }
00190 
00191     inline virtual int32_t get_num_vectors() const
00192     {
00193         return num_strings;
00194     }
00195 
00196     inline virtual int32_t get_size()
00197     {
00198         return sizeof(float64_t);
00199     }
00200 
00203     void set_normalization_const(float64_t n=0);
00204 
00206     inline float64_t get_normalization_const()
00207     {
00208         return normalization_const;
00209     }
00210 
00212     inline virtual const char* get_name() const
00213     {
00214         return "HashedWDFeatures";
00215     }
00216 
00217 protected:
00218 
00220     void set_wd_weights();
00221 
00222 protected:
00224     CStringFeatures<uint8_t>* strings;
00225 
00227     int32_t degree;
00229     int32_t start_degree;
00231     int32_t from_degree;
00233     int32_t string_length;
00235     int32_t num_strings;
00237     int32_t alphabet_size;
00239     int32_t w_dim;
00241     int32_t partial_w_dim;
00243     float64_t* wd_weights;
00245     uint32_t mask;
00247     int32_t m_hash_bits;
00248 
00250     float64_t normalization_const;
00251 };
00252 }
00253 #endif // _HASHEDWDFEATURES_H___
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation