SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StreamingHashedDocDotFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 #ifndef _STREAMING_HASHEDDOCDOTFEATURES__H__
11 #define _STREAMING_HASHEDDOCDOTFEATURES__H__
12 
15 #include <shogun/lib/Tokenizer.h>
19 
20 namespace shogun
21 {
22 class CStreamingDotFeatures;
23 class CTokenizer;
24 class CHashedDocConverter;
25 
41 {
42 public:
45 
57  CStreamingHashedDocDotFeatures(CStreamingFile* file, bool is_labelled, int32_t size,
58  CTokenizer* tzer, int32_t bits=20);
59 
77  int32_t bits=20, float64_t* lab=NULL);
78 
81 
88  virtual float32_t dot(CStreamingDotFeatures* df);
89 
95  virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len);
96 
104  virtual void add_to_dense_vec(float32_t alpha, float32_t* vec2,
105  int32_t vec2_len, bool abs_val=false);
106 
114  virtual int32_t get_dim_feature_space() const;
115 
121  virtual const char* get_name() const;
122 
128  virtual int32_t get_num_vectors() const;
129 
135  virtual CFeatures* duplicate() const;
136 
146  virtual void set_vector_reader();
147 
157  virtual void set_vector_and_label_reader();
158 
164  virtual EFeatureType get_feature_type() const;
165 
171  virtual EFeatureClass get_feature_class() const;
172 
177  virtual void start_parser();
178 
182  virtual void end_parser();
183 
191  virtual float64_t get_label();
192 
198  virtual bool get_next_example();
199 
205  virtual void release_example();
206 
212  virtual int32_t get_num_features();
213 
219 
224  void set_normalization(bool normalize);
225 
233  void set_k_skip_n_grams(int32_t k, int32_t n);
234 
235 private:
236  void init(CStreamingFile* file, bool is_labelled, int32_t size, CTokenizer* tzer,
237  int32_t bits, bool normalize, int32_t n_grams, int32_t skips);
238 
239 protected:
240 
242  int32_t num_bits;
243 
246 
249 
252 
254  CInputParser<char> parser;
255 
258 };
259 }
260 
261 #endif // _STREAMING_HASHEDDOCDOTFEATURES__H__

SHOGUN Machine Learning Toolbox - Documentation