Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef _SNPFEATURES_H___
00012 #define _SNPFEATURES_H___
00013
00014 #include <shogun/lib/common.h>
00015 #include <shogun/features/DotFeatures.h>
00016 #include <shogun/features/StringFeatures.h>
00017
00018 namespace shogun
00019 {
00020 template <class ST> class CStringFeatures;
00021
00027 class CSNPFeatures : public CDotFeatures
00028 {
00029 public:
00031 CSNPFeatures();
00032
00037 CSNPFeatures(CStringFeatures<uint8_t>* str);
00038
00040 CSNPFeatures(const CSNPFeatures & orig);
00041
00043 virtual ~CSNPFeatures();
00044
00052 inline virtual int32_t get_dim_feature_space() const
00053 {
00054 return w_dim;
00055 }
00056
00064 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00065
00072 virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
00073
00082 virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val=false);
00083
00089 virtual inline int32_t get_nnz_features_for_vector(int32_t num)
00090 {
00091 return w_dim/3;
00092 }
00093
00103 virtual void* get_feature_iterator(int32_t vector_index);
00104
00115 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
00116
00122 virtual void free_feature_iterator(void* iterator);
00123
00128 virtual CFeatures* duplicate() const;
00129
00134 inline virtual EFeatureType get_feature_type()
00135 {
00136 return F_UNKNOWN;
00137 }
00138
00143 inline virtual EFeatureClass get_feature_class()
00144 {
00145 return C_WD;
00146 }
00147
00152 inline virtual int32_t get_num_vectors() const
00153 {
00154 return num_strings;
00155 }
00156
00161 inline virtual int32_t get_size()
00162 {
00163 return sizeof(float64_t);
00164 }
00165
00168 void set_normalization_const(float64_t n=0);
00169
00171 inline float64_t get_normalization_const()
00172 {
00173 return normalization_const;
00174 }
00175
00180 void set_minor_base_string(const char* str)
00181 {
00182 m_str_min=(uint8_t*) strdup(str);
00183 }
00184
00185
00190 void set_major_base_string(const char* str)
00191 {
00192 m_str_maj=(uint8_t*) strdup(str);
00193 }
00194
00195
00200 char* get_minor_base_string()
00201 {
00202 return (char*) m_str_min;
00203 }
00204
00205
00210 char* get_major_base_string()
00211 {
00212 return (char*) m_str_maj;
00213 }
00214
00220 void obtain_base_strings(CSNPFeatures* snp=NULL);
00221
00223 inline virtual const char* get_name() const { return "SNPFeatures"; }
00224
00227 virtual SGMatrix<float64_t> get_histogram(bool normalize=true);
00228
00232 static SGMatrix<float64_t> get_2x3_table(CSNPFeatures* pos, CSNPFeatures* neg);
00233
00234 private:
00241 void find_minor_major_strings(uint8_t* minor, uint8_t* major);
00242
00243 protected:
00245 CStringFeatures<uint8_t>* strings;
00246
00248 int32_t string_length;
00250 int32_t num_strings;
00252 int32_t w_dim;
00253
00255 float64_t normalization_const;
00256
00258 uint8_t* m_str_min;
00260 uint8_t* m_str_maj;
00261
00262 };
00263 }
00264 #endif // _SNPFEATURES_H___