SHOGUN
v3.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
features
streaming
StreamingHashedDocDotFeatures.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
#ifndef _STREAMING_HASHEDDOCDOTFEATURES__H__
11
#define _STREAMING_HASHEDDOCDOTFEATURES__H__
12
13
#include <
shogun/features/StringFeatures.h
>
14
#include <
shogun/features/streaming/StreamingDotFeatures.h
>
15
#include <
shogun/lib/Tokenizer.h
>
16
#include <
shogun/converter/HashedDocConverter.h
>
17
#include <
shogun/io/streaming/InputParser.h
>
18
#include <
shogun/io/streaming/StreamingFileFromStringFeatures.h
>
19
20
namespace
shogun
21
{
22
class
CStreamingDotFeatures;
23
class
CTokenizer;
24
class
CHashedDocConverter;
25
40
class
CStreamingHashedDocDotFeatures
:
public
CStreamingDotFeatures
41
{
42
public
:
44
CStreamingHashedDocDotFeatures
();
45
57
CStreamingHashedDocDotFeatures
(
CStreamingFile
* file,
bool
is_labelled, int32_t size,
58
CTokenizer
* tzer, int32_t bits=20);
59
76
CStreamingHashedDocDotFeatures
(
CStringFeatures<char>
* dot_features,
CTokenizer
* tzer,
77
int32_t bits=20,
float64_t
* lab=NULL);
78
80
virtual
~CStreamingHashedDocDotFeatures
();
81
88
virtual
float32_t
dot
(
CStreamingDotFeatures
* df);
89
95
virtual
float32_t
dense_dot
(
const
float32_t
* vec2, int32_t vec2_len);
96
104
virtual
void
add_to_dense_vec
(
float32_t
alpha,
float32_t
* vec2,
105
int32_t vec2_len,
bool
abs_val=
false
);
106
114
virtual
int32_t
get_dim_feature_space
()
const
;
115
121
virtual
const
char
*
get_name
()
const
;
122
128
virtual
int32_t
get_num_vectors
()
const
;
129
135
virtual
CFeatures
*
duplicate
()
const
;
136
146
virtual
void
set_vector_reader
();
147
157
virtual
void
set_vector_and_label_reader
();
158
164
virtual
EFeatureType
get_feature_type
()
const
;
165
171
virtual
EFeatureClass
get_feature_class
()
const
;
172
177
virtual
void
start_parser
();
178
182
virtual
void
end_parser
();
183
191
virtual
float64_t
get_label
();
192
198
virtual
bool
get_next_example
();
199
205
virtual
void
release_example
();
206
212
virtual
int32_t
get_num_features
();
213
218
SGSparseVector<float64_t>
get_vector
();
219
224
void
set_normalization
(
bool
normalize);
225
233
void
set_k_skip_n_grams
(int32_t k, int32_t n);
234
235
private
:
236
void
init(
CStreamingFile
* file,
bool
is_labelled, int32_t size,
CTokenizer
* tzer,
237
int32_t bits,
bool
normalize, int32_t n_grams, int32_t skips);
238
239
protected
:
240
242
int32_t
num_bits
;
243
245
SGSparseVector<float64_t>
current_vector
;
246
248
CTokenizer
*
tokenizer
;
249
251
CHashedDocConverter
*
converter
;
252
254
CInputParser<char>
parser
;
255
257
float64_t
current_label
;
258
};
259
}
260
261
#endif // _STREAMING_HASHEDDOCDOTFEATURES__H__
SHOGUN
Machine Learning Toolbox - Documentation