SHOGUN
v2.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
features
ExplicitSpecFeatures.cpp
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2009 Soeren Sonnenburg
8
* Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9
*/
10
11
#include <
shogun/features/ExplicitSpecFeatures.h
>
12
#include <
shogun/io/SGIO.h
>
13
14
using namespace
shogun;
15
16
CExplicitSpecFeatures::CExplicitSpecFeatures
() :
CDotFeatures
()
17
{
18
SG_UNSTABLE
(
"CExplicitSpecFeatures::CExplicitSpecFeatures()"
,
19
"\n"
);
20
21
use_normalization
=
false
;
22
num_strings
= 0;
23
alphabet_size
= 0;
24
25
spec_size
= 0;
26
k_spectrum
= NULL;
27
}
28
29
30
CExplicitSpecFeatures::CExplicitSpecFeatures
(
CStringFeatures<uint16_t>
* str,
bool
normalize) :
CDotFeatures
()
31
{
32
ASSERT
(str);
33
34
use_normalization
=normalize;
35
num_strings
= str->
get_num_vectors
();
36
spec_size
= str->
get_num_symbols
();
37
38
obtain_kmer_spectrum
(str);
39
40
SG_DEBUG
(
"SPEC size=%d, num_str=%d\n"
,
spec_size
,
num_strings
);
41
}
42
43
CExplicitSpecFeatures::CExplicitSpecFeatures
(
const
CExplicitSpecFeatures
& orig) :
CDotFeatures
(orig),
44
num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
45
{
46
k_spectrum
=
SG_MALLOC
(
float64_t
*,
num_strings
);
47
for
(int32_t i=0; i<
num_strings
; i++)
48
k_spectrum
[i]=
SGVector<float64_t>::clone_vector
(
k_spectrum
[i],
spec_size
);
49
}
50
51
CExplicitSpecFeatures::~CExplicitSpecFeatures
()
52
{
53
delete_kmer_spectrum
();
54
}
55
56
int32_t
CExplicitSpecFeatures::get_dim_feature_space
()
const
57
{
58
return
spec_size
;
59
}
60
61
float64_t
CExplicitSpecFeatures::dot
(int32_t vec_idx1,
CDotFeatures
* df, int32_t vec_idx2)
62
{
63
ASSERT
(df);
64
ASSERT
(df->
get_feature_type
() ==
get_feature_type
());
65
ASSERT
(df->
get_feature_class
() ==
get_feature_class
());
66
CExplicitSpecFeatures
* sf = (
CExplicitSpecFeatures
*) df;
67
68
ASSERT
(vec_idx1 <
num_strings
);
69
ASSERT
(vec_idx2 < sf->
num_strings
);
70
float64_t
* vec1=
k_spectrum
[vec_idx1];
71
float64_t
* vec2=sf->k_spectrum[vec_idx2];
72
73
return
SGVector<float64_t>::dot
(vec1, vec2,
spec_size
);
74
}
75
76
float64_t
CExplicitSpecFeatures::dense_dot
(int32_t vec_idx1,
const
float64_t
* vec2, int32_t vec2_len)
77
{
78
ASSERT
(vec2_len ==
spec_size
);
79
ASSERT
(vec_idx1 <
num_strings
);
80
float64_t
* vec1=
k_spectrum
[vec_idx1];
81
float64_t
result=0;
82
83
for
(int32_t i=0; i<
spec_size
; i++)
84
result+=vec1[i]*vec2[i];
85
86
return
result;
87
}
88
89
void
CExplicitSpecFeatures::add_to_dense_vec
(
float64_t
alpha, int32_t vec_idx1,
float64_t
* vec2, int32_t vec2_len,
bool
abs_val)
90
{
91
ASSERT
(vec2_len ==
spec_size
);
92
ASSERT
(vec_idx1 <
num_strings
);
93
float64_t
* vec1=
k_spectrum
[vec_idx1];
94
95
if
(abs_val)
96
{
97
for
(int32_t i=0; i<
spec_size
; i++)
98
vec2[i]+=alpha*
CMath::abs
(vec1[i]);
99
}
100
else
101
{
102
for
(int32_t i=0; i<
spec_size
; i++)
103
vec2[i]+=alpha*vec1[i];
104
}
105
}
106
107
void
CExplicitSpecFeatures::obtain_kmer_spectrum
(
CStringFeatures<uint16_t>
* str)
108
{
109
k_spectrum
=
SG_MALLOC
(
float64_t
*,
num_strings
);
110
111
for
(int32_t i=0; i<
num_strings
; i++)
112
{
113
k_spectrum
[i]=
SG_MALLOC
(
float64_t
,
spec_size
);
114
memset(
k_spectrum
[i], 0,
sizeof
(
float64_t
)*
spec_size
);
115
116
int32_t len=0;
117
bool
free_fv;
118
uint16_t* fv=str->
get_feature_vector
(i, len, free_fv);
119
120
for
(int32_t j=0; j<len; j++)
121
k_spectrum
[i][fv[j]]++;
122
123
str->
free_feature_vector
(fv, i, free_fv);
124
125
if
(
use_normalization
)
126
{
127
float64_t
n=0;
128
for
(int32_t j=0; j<
spec_size
; j++)
129
n+=
CMath::sq
(
k_spectrum
[i][j]);
130
131
n=
CMath::sqrt
(n);
132
133
for
(int32_t j=0; j<
spec_size
; j++)
134
k_spectrum
[i][j]/=n;
135
}
136
}
137
}
138
139
void
CExplicitSpecFeatures::delete_kmer_spectrum
()
140
{
141
for
(int32_t i=0; i<
num_strings
; i++)
142
SG_FREE
(
k_spectrum
[i]);
143
144
SG_FREE
(
k_spectrum
);
145
k_spectrum
=NULL;
146
}
147
148
CFeatures
*
CExplicitSpecFeatures::duplicate
()
const
149
{
150
return
new
CExplicitSpecFeatures
(*
this
);
151
}
152
153
154
155
void
*
CExplicitSpecFeatures::get_feature_iterator
(int32_t vector_index)
156
{
157
SG_NOTIMPLEMENTED
;
158
return
NULL;
159
}
160
161
bool
CExplicitSpecFeatures::get_next_feature
(int32_t& index,
float64_t
& value,
void
* iterator)
162
{
163
SG_NOTIMPLEMENTED
;
164
return
NULL;
165
}
166
167
void
CExplicitSpecFeatures::free_feature_iterator
(
void
* iterator)
168
{
169
SG_NOTIMPLEMENTED
;
170
}
171
172
int32_t
CExplicitSpecFeatures::get_nnz_features_for_vector
(int32_t num)
173
{
174
SG_NOTIMPLEMENTED
;
175
return
0;
176
}
177
178
EFeatureType
CExplicitSpecFeatures::get_feature_type
()
const
179
{
180
return
F_UNKNOWN
;
181
}
182
183
EFeatureClass
CExplicitSpecFeatures::get_feature_class
()
const
184
{
185
return
C_SPEC
;
186
}
187
188
int32_t
CExplicitSpecFeatures::get_num_vectors
()
const
189
{
190
return
num_strings
;
191
}
192
193
int32_t
CExplicitSpecFeatures::get_size
()
const
194
{
195
return
sizeof
(
float64_t
);
196
}
SHOGUN
Machine Learning Toolbox - Documentation