HashedWDFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Copyright (C) 2010 Berlin Institute of Technology
00009  */
00010 
00011 #ifndef _HASHEDWDFEATURES_H___
00012 #define _HASHEDWDFEATURES_H___
00013 
00014 #include <shogun/lib/common.h>
00015 #include <shogun/features/DotFeatures.h>
00016 #include <shogun/features/StringFeatures.h>
00017 #include <shogun/lib/Hash.h>
00018 
00019 namespace shogun
00020 {
00021 template<class ST> class CStringFeatures;
00022 
00028 class CHashedWDFeatures: public CDotFeatures
00029 {
00030 public:
00032     CHashedWDFeatures();
00033 
00042     CHashedWDFeatures(CStringFeatures<uint8_t>* str, int32_t start_order,
00043             int32_t order, int32_t from_order, int32_t hash_bits=12);
00044 
00046     CHashedWDFeatures(const CHashedWDFeatures & orig);
00047 
00049     virtual ~CHashedWDFeatures();
00050 
00058     virtual int32_t get_dim_feature_space() const
00059     {
00060         return w_dim;
00061     }
00062 
00070     virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00071 
00078     virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2,
00079             int32_t vec2_len);
00080 
00089     virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00090             float64_t* vec2, int32_t vec2_len, bool abs_val=false);
00091 
00097     virtual int32_t get_nnz_features_for_vector(int32_t num);
00098 
00099     #ifndef DOXYGEN_SHOULD_SKIP_THIS
00100 
00101     struct hashed_wd_feature_iterator
00102     {
00104         uint16_t* vec;
00106         int32_t vidx;
00108         int32_t vlen;
00110         bool vfree;
00111 
00113         int32_t index;
00114 
00115     };
00116     #endif
00117 
00127     virtual void* get_feature_iterator(int32_t vector_index);
00128 
00139     virtual bool get_next_feature(int32_t& index, float64_t& value,
00140             void* iterator);
00141 
00147     virtual void free_feature_iterator(void* iterator);
00148 
00153     virtual CFeatures* duplicate() const;
00154 
00159     virtual EFeatureType get_feature_type() const
00160     {
00161         return F_UNKNOWN;
00162     }
00163 
00168     virtual EFeatureClass get_feature_class() const
00169     {
00170         return C_WD;
00171     }
00172 
00173     virtual int32_t get_num_vectors() const
00174     {
00175         return num_strings;
00176     }
00177 
00178     virtual int32_t get_size() const
00179     {
00180         return sizeof(float64_t);
00181     }
00182 
00185     void set_normalization_const(float64_t n=0);
00186 
00188     inline float64_t get_normalization_const()
00189     {
00190         return normalization_const;
00191     }
00192 
00194     virtual const char* get_name() const
00195     {
00196         return "HashedWDFeatures";
00197     }
00198 
00199 protected:
00200 
00202     void set_wd_weights();
00203 
00204 protected:
00206     CStringFeatures<uint8_t>* strings;
00207 
00209     int32_t degree;
00211     int32_t start_degree;
00213     int32_t from_degree;
00215     int32_t string_length;
00217     int32_t num_strings;
00219     int32_t alphabet_size;
00221     int32_t w_dim;
00223     int32_t partial_w_dim;
00225     float64_t* wd_weights;
00227     uint32_t mask;
00229     int32_t m_hash_bits;
00230 
00232     float64_t normalization_const;
00233 };
00234 }
00235 #endif // _HASHEDWDFEATURES_H___
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation