SHOGUN
v3.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
classifier
vw
VwParser.h
Go to the documentation of this file.
1
/*
2
* Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3
* embodied in the content of this file are licensed under the BSD
4
* (revised) open source license.
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* Written (W) 2011 Shashwat Lal Das
12
* Adaptation of Vowpal Wabbit v5.1.
13
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14
*/
15
16
#ifndef _VW_PARSER_H__
17
#define _VW_PARSER_H__
18
19
#include <
shogun/base/SGObject.h
>
20
#include <
shogun/io/SGIO.h
>
21
#include <
shogun/lib/Hash.h
>
22
#include <
shogun/classifier/vw/vw_common.h
>
23
#include <
shogun/classifier/vw/cache/VwCacheWriter.h
>
24
25
namespace
shogun
26
{
28
enum
E_VW_PARSER_TYPE
29
{
30
T_VW
= 1,
31
T_SVMLIGHT
= 2,
32
T_DENSE
= 3
33
};
34
46
class
CVwParser
:
public
CSGObject
47
{
48
public
:
52
CVwParser
();
53
59
CVwParser
(
CVwEnvironment
* env_to_use);
60
64
virtual
~CVwParser
();
65
71
CVwEnvironment
*
get_env
()
72
{
73
SG_REF
(
env
);
74
return
env
;
75
}
76
82
void
set_env
(
CVwEnvironment
* env_to_use)
83
{
84
env
= env_to_use;
85
SG_REF
(
env
);
86
}
87
94
void
set_cache_parameters
(
char
* fname,
EVwCacheType
type =
C_NATIVE
)
95
{
96
init_cache
(fname, type);
97
}
98
104
EVwCacheType
get_cache_type
()
105
{
106
return
cache_type
;
107
}
108
114
void
set_write_cache
(
bool
wr_cache)
115
{
116
write_cache
= wr_cache;
117
if
(wr_cache)
118
init_cache
(NULL);
119
else
120
if
(
cache_writer
)
121
SG_UNREF
(
cache_writer
);
122
}
123
129
bool
get_write_cache
()
130
{
131
return
write_cache
;
132
}
133
139
void
set_mm
(
float64_t
label)
140
{
141
env
->
min_label
=
CMath::min
(
env
->
min_label
, label);
142
if
(label != FLT_MAX)
143
env
->
max_label
=
CMath::max
(
env
->
max_label
, label);
144
}
145
152
void
noop_mm
(
float64_t
label) { }
153
160
void
set_minmax
(
float64_t
label)
161
{
162
set_mm
(label);
163
}
164
173
int32_t
read_features
(
CIOBuffer
* buf,
VwExample
*& ex);
174
183
int32_t
read_svmlight_features
(
CIOBuffer
* buf,
VwExample
*& ae);
184
193
int32_t
read_dense_features
(
CIOBuffer
* buf,
VwExample
*& ae);
194
200
virtual
const
char
*
get_name
()
const
{
return
"VwParser"
; }
201
202
protected
:
209
void
init_cache
(
char
* fname,
EVwCacheType
type =
C_NATIVE
);
210
219
void
feature_value
(
substring
&s,
v_array<substring>
& name,
float32_t
&v);
220
229
void
tokenize
(
char
delim,
substring
s,
v_array<substring>
&ret);
230
241
inline
char
*
safe_index
(
char
*start,
char
v,
char
*max)
242
{
243
while
(start != max && *start != v)
244
start++;
245
return
start;
246
}
247
248
public
:
250
hash_func_t
hasher
;
251
252
protected
:
254
CVwEnvironment
*
env
;
256
CVwCacheWriter
*
cache_writer
;
258
EVwCacheType
cache_type
;
260
bool
write_cache
;
261
262
private
:
264
v_array<substring>
channels;
265
v_array<substring>
words;
266
v_array<substring>
name;
267
};
268
269
}
270
#endif // _VW_PARSER_H__
SHOGUN
Machine Learning Toolbox - Documentation