SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashedDocDotFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
11 #ifndef _HASHEDDOCDOTFEATURES__H__
12 #define _HASHEDDOCDOTFEATURES__H__
13 
14 #include <shogun/lib/config.h>
15 
19 #include <shogun/lib/Tokenizer.h>
20 
21 namespace shogun {
22 template<class ST> class CStringFeatures;
23 template<class ST> class SGMatrix;
24 class CDotFeatures;
25 class CHashedDocConverter;
26 class CTokenizer;
27 
39 {
40 public:
41 
52  CHashedDocDotFeatures(int32_t hash_bits=0, CStringFeatures<char>* docs=NULL,
53  CTokenizer* tzer=NULL, bool normalize=true, int32_t n_grams=1, int32_t skips=0, int32_t size=0);
54 
57 
63 
65  virtual ~CHashedDocDotFeatures();
66 
74  virtual int32_t get_dim_feature_space() const;
75 
83  virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
84 
90  virtual float64_t dense_dot_sgvec(int32_t vec_idx1, const SGVector<float64_t> vec2);
91 
98  virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
99 
108  virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val=false);
109 
117  virtual int32_t get_nnz_features_for_vector(int32_t num);
118 
129  virtual void* get_feature_iterator(int32_t vector_index);
130 
142  virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
143 
150  virtual void free_feature_iterator(void* iterator);
151 
157 
158  virtual const char* get_name() const;
159 
164  virtual CFeatures* duplicate() const;
165 
170  virtual EFeatureType get_feature_type() const;
171 
176  virtual EFeatureClass get_feature_class() const;
177 
182  virtual int32_t get_num_vectors() const;
183 
192  static uint32_t calculate_token_hash(char* token, int32_t length,
193  int32_t num_bits, uint32_t seed);
194 
195 private:
196  void init(int32_t hash_bits, CStringFeatures<char>* docs, CTokenizer* tzer,
197  bool normalize, int32_t n_grams, int32_t skips);
198 
199 protected:
202 
204  int32_t num_bits;
205 
208 
211 
213  int32_t ngrams;
214 
216  int32_t tokens_to_skip;
217 };
218 }
219 
220 #endif

SHOGUN Machine Learning Toolbox - Documentation