SHOGUN
v3.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
features
HashedDocDotFeatures.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
11
#ifndef _HASHEDDOCDOTFEATURES__H__
12
#define _HASHEDDOCDOTFEATURES__H__
13
14
#include <
shogun/features/DotFeatures.h
>
15
#include <
shogun/features/StringFeatures.h
>
16
#include <
shogun/converter/HashedDocConverter.h
>
17
#include <
shogun/lib/Tokenizer.h
>
18
19
namespace
shogun {
20
template
<
class
ST>
class
CStringFeatures;
21
template
<
class
ST>
class
SGMatrix;
22
class
CDotFeatures;
23
class
CHashedDocConverter;
24
class
CTokenizer;
25
36
class
CHashedDocDotFeatures
:
public
CDotFeatures
37
{
38
public
:
39
50
CHashedDocDotFeatures
(int32_t hash_bits=0,
CStringFeatures<char>
* docs=NULL,
51
CTokenizer
* tzer=NULL,
bool
normalize=
true
, int32_t n_grams=1, int32_t skips=0, int32_t size=0);
52
54
CHashedDocDotFeatures
(
const
CHashedDocDotFeatures
& orig);
55
60
CHashedDocDotFeatures
(
CFile
* loader);
61
63
virtual
~CHashedDocDotFeatures
();
64
72
virtual
int32_t
get_dim_feature_space
()
const
;
73
81
virtual
float64_t
dot
(int32_t vec_idx1,
CDotFeatures
* df, int32_t vec_idx2);
82
88
virtual
float64_t
dense_dot_sgvec
(int32_t vec_idx1,
const
SGVector<float64_t>
vec2);
89
96
virtual
float64_t
dense_dot
(int32_t vec_idx1,
const
float64_t
* vec2, int32_t vec2_len);
97
106
virtual
void
add_to_dense_vec
(
float64_t
alpha, int32_t vec_idx1,
float64_t
* vec2, int32_t vec2_len,
bool
abs_val=
false
);
107
115
virtual
int32_t
get_nnz_features_for_vector
(int32_t num);
116
127
virtual
void
*
get_feature_iterator
(int32_t vector_index);
128
140
virtual
bool
get_next_feature
(int32_t& index,
float64_t
& value,
void
* iterator);
141
148
virtual
void
free_feature_iterator
(
void
* iterator);
149
154
void
set_doc_collection
(
CStringFeatures<char>
* docs);
155
156
virtual
const
char
*
get_name
()
const
;
157
162
virtual
CFeatures
*
duplicate
()
const
;
163
168
virtual
EFeatureType
get_feature_type
()
const
;
169
174
virtual
EFeatureClass
get_feature_class
()
const
;
175
180
virtual
int32_t
get_num_vectors
()
const
;
181
190
static
uint32_t
calculate_token_hash
(
char
* token, int32_t length,
191
int32_t
num_bits
, uint32_t seed);
192
193
private
:
194
void
init(int32_t hash_bits,
CStringFeatures<char>
* docs,
CTokenizer
* tzer,
195
bool
normalize, int32_t n_grams, int32_t skips);
196
197
protected
:
199
CStringFeatures<char>
*
doc_collection
;
200
202
int32_t
num_bits
;
203
205
CTokenizer
*
tokenizer
;
206
208
bool
should_normalize
;
209
211
int32_t
ngrams
;
212
214
int32_t
tokens_to_skip
;
215
};
216
}
217
218
#endif
SHOGUN
Machine Learning Toolbox - Documentation