SHOGUN
v3.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
features
streaming
StreamingStringFeatures.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2011 Shashwat Lal Das
8
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9
*/
10
#ifndef _STREAMING_STRINGFEATURES__H__
11
#define _STREAMING_STRINGFEATURES__H__
12
13
#include <
shogun/lib/common.h
>
14
#include <
shogun/mathematics/Math.h
>
15
#include <
shogun/base/Parameter.h
>
16
#include <
shogun/lib/DataType.h
>
17
#include <
shogun/io/streaming/InputParser.h
>
18
19
#include <
shogun/features/streaming/StreamingFeatures.h
>
20
#include <
shogun/features/Alphabet.h
>
21
22
namespace
shogun
23
{
27
template
<
class
T>
class
CStreamingStringFeatures
:
public
CStreamingFeatures
28
{
29
public
:
30
38
CStreamingStringFeatures
();
39
48
CStreamingStringFeatures
(
CStreamingFile
* file,
49
bool
is_labelled,
50
int32_t size);
51
57
virtual
~CStreamingStringFeatures
();
58
68
virtual
void
set_vector_reader
();
69
79
virtual
void
set_vector_and_label_reader
();
80
87
void
use_alphabet
(
EAlphabet
alpha);
88
95
void
use_alphabet
(
CAlphabet
* alpha);
96
104
void
set_remap
(
CAlphabet
* ascii_alphabet,
CAlphabet
* binary_alphabet);
105
113
void
set_remap
(
EAlphabet
ascii_alphabet=
DNA
,
EAlphabet
binary_alphabet=
RAWDNA
);
114
119
CAlphabet
*
get_alphabet
();
120
127
floatmax_t
get_num_symbols
();
128
134
virtual
void
start_parser
();
135
141
virtual
void
end_parser
();
142
151
virtual
bool
get_next_example
();
152
158
SGString<T>
get_vector
();
159
167
virtual
float64_t
get_label
();
168
175
virtual
void
release_example
();
176
182
virtual
int32_t
get_vector_length
();
183
189
virtual
EFeatureType
get_feature_type
()
const
;
190
196
virtual
EFeatureClass
get_feature_class
()
const
;
197
203
virtual
CFeatures
*
duplicate
()
const
;
204
210
virtual
const
char
*
get_name
()
const
{
return
"StreamingStringFeatures"
; }
211
217
virtual
int32_t
get_num_vectors
()
const
;
218
224
virtual
int32_t
get_num_features
();
225
226
private
:
227
232
void
init();
233
241
void
init(
CStreamingFile
*file,
bool
is_labelled, int32_t size);
242
243
protected
:
244
246
CInputParser<T>
parser
;
247
249
CAlphabet
*
alphabet
;
250
252
CAlphabet
*
alpha_ascii
;
253
255
CAlphabet
*
alpha_bin
;
256
258
CStreamingFile
*
working_file
;
259
261
SGString<T>
current_sgstring
;
262
264
T*
current_string
;
265
267
int32_t
current_length
;
268
270
float64_t
current_label
;
271
273
bool
has_labels
;
274
276
bool
remap_to_bin
;
277
279
int32_t
num_symbols
;
280
};
281
282
}
283
#endif // _STREAMING_STRINGFEATURES__H__
SHOGUN
Machine Learning Toolbox - Documentation