SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SparsePolyFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2010 Soeren Sonnenburg
8  * Copyright (C) 2010 Berlin Institute of Technology
9  */
11 #include <shogun/lib/Hash.h>
12 
13 using namespace shogun;
14 
16 {
17  SG_UNSTABLE("CSparsePolyFeatures::CSparsePolyFeatures()",
18  "\n");
19 
20  m_feat = NULL;
21  m_degree = 0;
22  m_normalize = false;
26  mask = 0;
27  m_hash_bits = 0;
28 }
29 
30 CSparsePolyFeatures::CSparsePolyFeatures(CSparseFeatures<float64_t>* feat, int32_t degree, bool normalize, int32_t hash_bits)
31  : CDotFeatures(), m_normalization_values(NULL)
32 {
33  ASSERT(feat);
34 
35  m_feat = feat;
36  SG_REF(m_feat);
37  m_degree=degree;
38  m_normalize=normalize;
39  m_hash_bits=hash_bits;
40  mask=(uint32_t) (((uint64_t) 1)<<m_hash_bits)-1;
43 
44  if (m_normalize)
46 }
47 
49 {
52 }
53 
55 {
56  SG_PRINT("CSparsePolyFeatures:\n");
58 }
59 
61 {
62  return m_output_dimensions;
63 }
64 
66 {
67  int32_t vlen;
69  vlen=vec.num_feat_entries;
71  return vlen*(vlen+1)/2;
72 }
73 
75 {
76  return F_UNKNOWN;
77 }
78 
80 {
81  return C_POLY;
82 }
83 
85 {
86  if (m_feat)
87  return m_feat->get_num_vectors();
88  else
89  return 0;
90 
91 }
92 
94 {
95  return sizeof(float64_t);
96 }
97 
98 void* CSparsePolyFeatures::get_feature_iterator(int32_t vector_index)
99 {
101  return NULL;
102 }
103 
104 bool CSparsePolyFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
105 {
107  return NULL;
108 }
109 
111 {
113 }
114 
115 float64_t CSparsePolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
116 {
117  ASSERT(df);
120 
122 
124  SGSparseVector<float64_t> vec2=pf->m_feat->get_sparse_feature_vector(
125  vec_idx2);
126 
128  vec1.num_feat_entries, vec2.features, vec2.num_feat_entries);
129  result=CMath::pow(result, m_degree);
130 
131  m_feat->free_feature_vector(vec_idx1);
132  pf->m_feat->free_feature_vector(vec_idx2);
133 
134  return result;
135 }
136 
137 float64_t CSparsePolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
138 {
139  if (vec2_len != m_output_dimensions)
140  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
141 
143 
144  float64_t result=0;
145 
146  if (vec.features)
147  {
148  if (m_degree==2)
149  {
150  /* (a+b)^2 = a^2 + 2ab +b^2 */
151  for (int32_t i=0; i<vec.num_feat_entries; i++)
152  {
153  float64_t v1=vec.features[i].entry;
154  uint32_t seed=CHash::MurmurHash3(
155  (uint8_t*)&(vec.features[i].feat_index),
156  sizeof(int32_t), 0xDEADBEAF);
157 
158  for (int32_t j=i; j<vec.num_feat_entries; j++)
159  {
160  float64_t v2=vec.features[j].entry;
161  uint32_t h=CHash::MurmurHash3(
162  (uint8_t*)&(vec.features[j].feat_index),
163  sizeof(int32_t), seed)&mask;
164  float64_t v;
165 
166  if (i==j)
167  v=v1*v1;
168  else
169  v=CMath::sqrt(2.0)*v1*v2;
170 
171  result+=v*vec2[h];
172  }
173  }
174  }
175  else if (m_degree==3)
177  }
178 
179  if (m_normalize)
180  result/=m_normalization_values[vec_idx1];
181 
182  m_feat->free_feature_vector(vec_idx1);
183  return result;
184 }
185 
186 void CSparsePolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
187 {
188  if (vec2_len!=m_output_dimensions)
189  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
190 
192 
193  float64_t norm_val=1.0;
194  if (m_normalize)
195  norm_val = m_normalization_values[vec_idx1];
196  alpha/=norm_val;
197 
198  if (m_degree==2)
199  {
200  /* (a+b)^2 = a^2 + 2ab +b^2 */
201  for (int32_t i=0; i<vec.num_feat_entries; i++)
202  {
203  float64_t v1=vec.features[i].entry;
204  uint32_t seed=CHash::MurmurHash3(
205  (uint8_t*)&(vec.features[i].feat_index), sizeof(int32_t),
206  0xDEADBEAF);
207 
208  for (int32_t j=i; j<vec.num_feat_entries; j++)
209  {
210  float64_t v2=vec.features[j].entry;
211  uint32_t h=CHash::MurmurHash3(
212  (uint8_t*)&(vec.features[j].feat_index),
213  sizeof(int32_t), seed)&mask;
214  float64_t v;
215 
216  if (i==j)
217  v=alpha*v1*v1;
218  else
219  v=alpha*CMath::sqrt(2.0)*v1*v2;
220 
221  if (abs_val)
222  vec2[h]+=CMath::abs(v);
223  else
224  vec2[h]+=v;
225  }
226  }
227  }
228  else if (m_degree==3)
230 
231  m_feat->free_feature_vector(vec_idx1);
232 }
233 
235 {
237 
238  m_normalization_values_len = this->get_num_vectors();
239 
240  m_normalization_values=SG_MALLOC(float64_t, m_normalization_values_len);
241  for (int i=0; i<m_normalization_values_len; i++)
242  {
243  float64_t val = CMath::sqrt(dot(i, this,i));
244  if (val==0)
245  // trap division by zero
246  m_normalization_values[i]=1.0;
247  else
248  m_normalization_values[i]=val;
249  }
250 
251 }
252 
254 {
255  return new CSparsePolyFeatures(*this);
256 }
257 
258 void CSparsePolyFeatures::init()
259 {
260  m_parameters->add((CSGObject**) &m_feat, "features",
261  "Features in original space.");
262  m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
263  m_parameters->add(&m_normalize, "normalize", "Normalize");
264  m_parameters->add(&m_input_dimensions, "input_dimensions",
265  "Dimensions of the input space.");
266  m_parameters->add(&m_output_dimensions, "output_dimensions",
267  "Dimensions of the feature space of the polynomial kernel.");
268  m_normalization_values_len = get_num_vectors();
269  m_parameters->add_vector(&m_normalization_values, &m_normalization_values_len,
270  "m_normalization_values", "Norm of each training example");
271  m_parameters->add(&mask, "mask", "Mask.");
272  m_parameters->add(&m_hash_bits, "m_hash_bits", "Number of bits in hash");
273 }

SHOGUN Machine Learning Toolbox - Documentation