SHOGUN
v2.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
features
streaming
StreamingStringFeatures.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2011 Shashwat Lal Das
8
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9
*/
10
#ifndef _STREAMING_STRINGFEATURES__H__
11
#define _STREAMING_STRINGFEATURES__H__
12
13
#include <
shogun/lib/common.h
>
14
#include <
shogun/mathematics/Math.h
>
15
#include <
shogun/base/Parameter.h
>
16
#include <
shogun/lib/DataType.h
>
17
#include <
shogun/io/streaming/InputParser.h
>
18
19
#include <
shogun/features/streaming/StreamingFeatures.h
>
20
#include <
shogun/features/Alphabet.h
>
21
22
namespace
shogun
23
{
27
template
<
class
T>
class
CStreamingStringFeatures
:
public
CStreamingFeatures
28
{
29
public
:
30
38
CStreamingStringFeatures
();
39
48
CStreamingStringFeatures
(
CStreamingFile
* file,
49
bool
is_labelled,
50
int32_t size);
51
57
virtual
~CStreamingStringFeatures
();
58
68
virtual
void
set_vector_reader
();
69
79
virtual
void
set_vector_and_label_reader
();
80
87
void
use_alphabet
(
EAlphabet
alpha);
88
95
void
use_alphabet
(
CAlphabet
* alpha);
96
104
void
set_remap
(
CAlphabet
* ascii_alphabet,
CAlphabet
* binary_alphabet);
105
113
void
set_remap
(
EAlphabet
ascii_alphabet=
DNA
,
EAlphabet
binary_alphabet=
RAWDNA
);
114
119
CAlphabet
*
get_alphabet
();
120
127
floatmax_t
get_num_symbols
();
128
134
virtual
void
start_parser
();
135
141
virtual
void
end_parser
();
142
151
virtual
bool
get_next_example
();
152
158
SGString<T>
get_vector
();
159
167
virtual
float64_t
get_label
();
168
175
virtual
void
release_example
();
176
182
virtual
int32_t
get_vector_length
();
183
189
virtual
EFeatureType
get_feature_type
()
const
;
190
196
virtual
EFeatureClass
get_feature_class
()
const
;
197
203
virtual
CFeatures
*
duplicate
()
const
;
204
210
inline
virtual
const
char
*
get_name
()
const
{
return
"StreamingStringFeatures"
; }
211
217
virtual
int32_t
get_num_vectors
()
const
;
218
224
virtual
int32_t
get_size
()
const
;
225
231
virtual
int32_t
get_num_features
();
232
233
private
:
234
239
void
init();
240
248
void
init(
CStreamingFile
*file,
bool
is_labelled, int32_t size);
249
250
protected
:
251
253
CInputParser<T>
parser
;
254
256
CAlphabet
*
alphabet
;
257
259
CAlphabet
*
alpha_ascii
;
260
262
CAlphabet
*
alpha_bin
;
263
265
CStreamingFile
*
working_file
;
266
268
SGString<T>
current_sgstring
;
269
271
T*
current_string
;
272
274
int32_t
current_length
;
275
277
float64_t
current_label
;
278
280
bool
has_labels
;
281
283
bool
remap_to_bin
;
284
286
int32_t
num_symbols
;
287
};
288
289
}
290
#endif // _STREAMING_STRINGFEATURES__H__
SHOGUN
Machine Learning Toolbox - Documentation