SHOGUN
v2.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
features
HashedWDFeatures.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2010 Soeren Sonnenburg
8
* Copyright (C) 2010 Berlin Institute of Technology
9
*/
10
11
#ifndef _HASHEDWDFEATURES_H___
12
#define _HASHEDWDFEATURES_H___
13
14
#include <
shogun/lib/common.h
>
15
#include <
shogun/features/DotFeatures.h
>
16
#include <
shogun/features/StringFeatures.h
>
17
#include <
shogun/lib/Hash.h
>
18
19
namespace
shogun
20
{
21
template
<
class
ST>
class
CStringFeatures;
22
28
class
CHashedWDFeatures
:
public
CDotFeatures
29
{
30
public
:
32
CHashedWDFeatures
();
33
42
CHashedWDFeatures
(
CStringFeatures<uint8_t>
* str, int32_t start_order,
43
int32_t order, int32_t from_order, int32_t hash_bits=12);
44
46
CHashedWDFeatures
(
const
CHashedWDFeatures
& orig);
47
49
virtual
~CHashedWDFeatures
();
50
58
inline
virtual
int32_t
get_dim_feature_space
()
const
59
{
60
return
w_dim
;
61
}
62
70
virtual
float64_t
dot
(int32_t vec_idx1,
CDotFeatures
* df, int32_t vec_idx2);
71
78
virtual
float64_t
dense_dot
(int32_t vec_idx1,
const
float64_t
* vec2,
79
int32_t vec2_len);
80
89
virtual
void
add_to_dense_vec
(
float64_t
alpha, int32_t vec_idx1,
90
float64_t
* vec2, int32_t vec2_len,
bool
abs_val=
false
);
91
97
virtual
int32_t
get_nnz_features_for_vector
(int32_t num);
98
99
#ifndef DOXYGEN_SHOULD_SKIP_THIS
100
101
struct
hashed_wd_feature_iterator
102
{
104
uint16_t* vec;
106
int32_t vidx;
108
int32_t vlen;
110
bool
vfree;
111
113
int32_t index;
114
115
};
116
#endif
117
127
virtual
void
*
get_feature_iterator
(int32_t vector_index);
128
139
virtual
bool
get_next_feature
(int32_t& index,
float64_t
& value,
140
void
* iterator);
141
147
virtual
void
free_feature_iterator
(
void
* iterator);
148
153
virtual
CFeatures
*
duplicate
()
const
;
154
159
inline
virtual
EFeatureType
get_feature_type
()
const
160
{
161
return
F_UNKNOWN
;
162
}
163
168
inline
virtual
EFeatureClass
get_feature_class
()
const
169
{
170
return
C_WD
;
171
}
172
173
inline
virtual
int32_t
get_num_vectors
()
const
174
{
175
return
num_strings
;
176
}
177
178
inline
virtual
int32_t
get_size
()
const
179
{
180
return
sizeof
(
float64_t
);
181
}
182
185
void
set_normalization_const
(
float64_t
n=0);
186
188
inline
float64_t
get_normalization_const
()
189
{
190
return
normalization_const
;
191
}
192
194
inline
virtual
const
char
*
get_name
()
const
195
{
196
return
"HashedWDFeatures"
;
197
}
198
199
protected
:
200
202
void
set_wd_weights
();
203
204
protected
:
206
CStringFeatures<uint8_t>
*
strings
;
207
209
int32_t
degree
;
211
int32_t
start_degree
;
213
int32_t
from_degree
;
215
int32_t
string_length
;
217
int32_t
num_strings
;
219
int32_t
alphabet_size
;
221
int32_t
w_dim
;
223
int32_t
partial_w_dim
;
225
float64_t
*
wd_weights
;
227
uint32_t
mask
;
229
int32_t
m_hash_bits
;
230
232
float64_t
normalization_const
;
233
};
234
}
235
#endif // _HASHEDWDFEATURES_H___
SHOGUN
Machine Learning Toolbox - Documentation