SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashedDocDotFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
11 #ifndef _HASHEDDOCDOTFEATURES__H__
12 #define _HASHEDDOCDOTFEATURES__H__
13 
17 #include <shogun/lib/Tokenizer.h>
18 
19 namespace shogun {
20 template<class ST> class CStringFeatures;
21 template<class ST> class SGMatrix;
22 class CDotFeatures;
23 class CHashedDocConverter;
24 class CTokenizer;
25 
37 {
38 public:
39 
50  CHashedDocDotFeatures(int32_t hash_bits=0, CStringFeatures<char>* docs=NULL,
51  CTokenizer* tzer=NULL, bool normalize=true, int32_t n_grams=1, int32_t skips=0, int32_t size=0);
52 
55 
61 
63  virtual ~CHashedDocDotFeatures();
64 
72  virtual int32_t get_dim_feature_space() const;
73 
81  virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
82 
88  virtual float64_t dense_dot_sgvec(int32_t vec_idx1, const SGVector<float64_t> vec2);
89 
96  virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
97 
106  virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val=false);
107 
115  virtual int32_t get_nnz_features_for_vector(int32_t num);
116 
127  virtual void* get_feature_iterator(int32_t vector_index);
128 
140  virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
141 
148  virtual void free_feature_iterator(void* iterator);
149 
155 
156  virtual const char* get_name() const;
157 
162  virtual CFeatures* duplicate() const;
163 
168  virtual EFeatureType get_feature_type() const;
169 
174  virtual EFeatureClass get_feature_class() const;
175 
180  virtual int32_t get_num_vectors() const;
181 
190  static uint32_t calculate_token_hash(char* token, int32_t length,
191  int32_t num_bits, uint32_t seed);
192 
193 private:
194  void init(int32_t hash_bits, CStringFeatures<char>* docs, CTokenizer* tzer,
195  bool normalize, int32_t n_grams, int32_t skips);
196 
197 protected:
200 
202  int32_t num_bits;
203 
206 
209 
211  int32_t ngrams;
212 
214  int32_t tokens_to_skip;
215 };
216 }
217 
218 #endif

SHOGUN Machine Learning Toolbox - Documentation