SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SparsePolyFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2010 Soeren Sonnenburg
8  * Copyright (C) 2010 Berlin Institute of Technology
9  */
11 #include <shogun/lib/Hash.h>
12 
13 using namespace shogun;
14 
16 {
17  SG_UNSTABLE("CSparsePolyFeatures::CSparsePolyFeatures()",
18  "\n");
19 
20  m_feat = NULL;
21  m_degree = 0;
22  m_normalize = false;
26  mask = 0;
27  m_hash_bits = 0;
28 }
29 
30 CSparsePolyFeatures::CSparsePolyFeatures(CSparseFeatures<float64_t>* feat, int32_t degree, bool normalize, int32_t hash_bits)
31  : CDotFeatures(), m_normalization_values(NULL)
32 {
33  ASSERT(feat)
34 
35  m_feat = feat;
36  SG_REF(m_feat);
37  m_degree=degree;
38  m_normalize=normalize;
39  m_hash_bits=hash_bits;
40  mask=(uint32_t) (((uint64_t) 1)<<m_hash_bits)-1;
43 
44  if (m_normalize)
46 }
47 
49 {
50  SG_FREE(m_normalization_values);
52 }
53 
55 {
56  SG_PRINT("CSparsePolyFeatures:\n")
58 }
59 
61 {
62  return m_output_dimensions;
63 }
64 
66 {
67  int32_t vlen;
69  vlen=vec.num_feat_entries;
71  return vlen*(vlen+1)/2;
72 }
73 
75 {
76  return F_UNKNOWN;
77 }
78 
80 {
81  return C_POLY;
82 }
83 
85 {
86  if (m_feat)
87  return m_feat->get_num_vectors();
88  else
89  return 0;
90 
91 }
92 
93 void* CSparsePolyFeatures::get_feature_iterator(int32_t vector_index)
94 {
96  return NULL;
97 }
98 
99 bool CSparsePolyFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
100 {
102  return false;
103 }
104 
106 {
108 }
109 
110 float64_t CSparsePolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
111 {
112  ASSERT(df)
115 
117 
119  SGSparseVector<float64_t> vec2=pf->m_feat->get_sparse_feature_vector(
120  vec_idx2);
121 
123  result=CMath::pow(result, m_degree);
124 
125  m_feat->free_feature_vector(vec_idx1);
126  pf->m_feat->free_feature_vector(vec_idx2);
127 
128  return result;
129 }
130 
131 float64_t CSparsePolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
132 {
133  if (vec2_len != m_output_dimensions)
134  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions)
135 
137 
138  float64_t result=0;
139 
140  if (vec.features)
141  {
142  if (m_degree==2)
143  {
144  /* (a+b)^2 = a^2 + 2ab +b^2 */
145  for (int32_t i=0; i<vec.num_feat_entries; i++)
146  {
147  float64_t v1=vec.features[i].entry;
148  uint32_t seed=CHash::MurmurHash3(
149  (uint8_t*)&(vec.features[i].feat_index),
150  sizeof(int32_t), 0xDEADBEAF);
151 
152  for (int32_t j=i; j<vec.num_feat_entries; j++)
153  {
154  float64_t v2=vec.features[j].entry;
155  uint32_t h=CHash::MurmurHash3(
156  (uint8_t*)&(vec.features[j].feat_index),
157  sizeof(int32_t), seed)&mask;
158  float64_t v;
159 
160  if (i==j)
161  v=v1*v1;
162  else
163  v=CMath::sqrt(2.0)*v1*v2;
164 
165  result+=v*vec2[h];
166  }
167  }
168  }
169  else if (m_degree==3)
171  }
172 
173  if (m_normalize)
174  result/=m_normalization_values[vec_idx1];
175 
176  m_feat->free_feature_vector(vec_idx1);
177  return result;
178 }
179 
180 void CSparsePolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
181 {
182  if (vec2_len!=m_output_dimensions)
183  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions)
184 
186 
187  float64_t norm_val=1.0;
188  if (m_normalize)
189  norm_val = m_normalization_values[vec_idx1];
190  alpha/=norm_val;
191 
192  if (m_degree==2)
193  {
194  /* (a+b)^2 = a^2 + 2ab +b^2 */
195  for (int32_t i=0; i<vec.num_feat_entries; i++)
196  {
197  float64_t v1=vec.features[i].entry;
198  uint32_t seed=CHash::MurmurHash3(
199  (uint8_t*)&(vec.features[i].feat_index), sizeof(int32_t),
200  0xDEADBEAF);
201 
202  for (int32_t j=i; j<vec.num_feat_entries; j++)
203  {
204  float64_t v2=vec.features[j].entry;
205  uint32_t h=CHash::MurmurHash3(
206  (uint8_t*)&(vec.features[j].feat_index),
207  sizeof(int32_t), seed)&mask;
208  float64_t v;
209 
210  if (i==j)
211  v=alpha*v1*v1;
212  else
213  v=alpha*CMath::sqrt(2.0)*v1*v2;
214 
215  if (abs_val)
216  vec2[h]+=CMath::abs(v);
217  else
218  vec2[h]+=v;
219  }
220  }
221  }
222  else if (m_degree==3)
224 
225  m_feat->free_feature_vector(vec_idx1);
226 }
227 
229 {
230  SG_FREE(m_normalization_values);
231 
232  m_normalization_values_len = this->get_num_vectors();
233 
234  m_normalization_values=SG_MALLOC(float64_t, m_normalization_values_len);
235  for (int i=0; i<m_normalization_values_len; i++)
236  {
237  float64_t val = CMath::sqrt(dot(i, this,i));
238  if (val==0)
239  // trap division by zero
240  m_normalization_values[i]=1.0;
241  else
242  m_normalization_values[i]=val;
243  }
244 
245 }
246 
248 {
249  return new CSparsePolyFeatures(*this);
250 }
251 
252 void CSparsePolyFeatures::init()
253 {
254  m_parameters->add((CSGObject**) &m_feat, "features",
255  "Features in original space.");
256  m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
257  m_parameters->add(&m_normalize, "normalize", "Normalize");
258  m_parameters->add(&m_input_dimensions, "input_dimensions",
259  "Dimensions of the input space.");
260  m_parameters->add(&m_output_dimensions, "output_dimensions",
261  "Dimensions of the feature space of the polynomial kernel.");
262  m_normalization_values_len = get_num_vectors();
263  m_parameters->add_vector(&m_normalization_values, &m_normalization_values_len,
264  "m_normalization_values", "Norm of each training example");
265  m_parameters->add(&mask, "mask", "Mask.");
266  m_parameters->add(&m_hash_bits, "m_hash_bits", "Number of bits in hash");
267 }
virtual EFeatureClass get_feature_class() const
T sparse_dot(const SGSparseVector< T > &v)
CSparseFeatures< float64_t > * m_feat
void free_feature_iterator(void *iterator)
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
#define SG_ERROR(...)
Definition: SGIO.h:129
virtual void * get_feature_iterator(int32_t vector_index)
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
Parameter * m_parameters
Definition: SGObject.h:378
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:51
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
static uint32_t MurmurHash3(uint8_t *data, int32_t len, uint32_t seed)
Definition: Hash.cpp:366
virtual int32_t get_dim_feature_space() const
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
int32_t get_num_features() const
double float64_t
Definition: common.h:50
virtual EFeatureClass get_feature_class() const =0
void free_feature_vector(int32_t num)
SGSparseVectorEntry< T > * features
SGSparseVector< ST > get_sparse_feature_vector(int32_t num)
virtual EFeatureType get_feature_type() const
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19
bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
#define SG_UNREF(x)
Definition: SGObject.h:52
void add_vector(bool **param, index_t *length, const char *name, const char *description="")
Definition: Parameter.cpp:334
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual int32_t get_num_vectors() const
implement DotFeatures for the polynomial kernel
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual int32_t get_num_vectors() const
#define SG_UNSTABLE(func,...)
Definition: SGIO.h:132
static int32_t pow(bool x, int32_t n)
Definition: Math.h:535
virtual EFeatureType get_feature_type() const =0
static T abs(T a)
Definition: Math.h:179
virtual int32_t get_nnz_features_for_vector(int32_t num)

SHOGUN Machine Learning Toolbox - Documentation