SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
PolyFeatures.cpp
Go to the documentation of this file.
2 
3 using namespace shogun;
4 
6 {
7  m_feat=NULL;
8  m_degree=0;
9  m_normalize=false;
11  m_multi_index=NULL;
15 
16  register_parameters();
17 }
18 
19 CPolyFeatures::CPolyFeatures(CDenseFeatures<float64_t>* feat, int32_t degree, bool normalize)
20  : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
21  m_normalization_values(NULL)
22 {
23  ASSERT(feat)
24 
25  m_feat = feat;
26  SG_REF(m_feat);
27  m_degree=degree;
28  m_normalize=normalize;
31 
34  if (m_normalize)
36 
37  register_parameters();
38 }
39 
40 
42 {
43  SG_FREE(m_multi_index);
45  SG_FREE(m_normalization_values);
47 }
48 
50 {
51  SG_PRINT("CPolyFeatures:\n")
53 };
54 
56 {
57  return m_output_dimensions;
58 }
59 
61 {
62  return m_output_dimensions;
63 }
64 
66 {
67  return F_UNKNOWN;
68 }
69 
71 {
72  return C_POLY;
73 }
74 
76 {
77  if (m_feat)
78  return m_feat->get_num_vectors();
79  else
80  return 0;
81 
82 }
83 
84 void* CPolyFeatures::get_feature_iterator(int32_t vector_index)
85 {
87  return NULL;
88 }
89 
90 bool CPolyFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
91 {
93  return false;
94 }
95 
97 {
99 }
100 
101 
102 
103 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
104 {
105  ASSERT(df)
108 
109  CPolyFeatures* pf=(CPolyFeatures*) df;
110 
111  int32_t len1;
112  bool do_free1;
113  float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
114 
115  int32_t len2;
116  bool do_free2;
117  float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
118 
119  float64_t sum=0;
120  int cnt=0;
121  for (int j=0; j<m_output_dimensions; j++)
122  {
125  for (int k=0; k<m_degree; k++)
126  {
127  out1*=vec1[m_multi_index[cnt]];
128  out2*=vec2[m_multi_index[cnt]];
129  cnt++;
130  }
131  sum+=out1*out2;
132  }
133  m_feat->free_feature_vector(vec1, len1, do_free1);
134  pf->m_feat->free_feature_vector(vec2, len2, do_free2);
135 
136  return sum;
137 }
138 
139 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
140 {
141  if (vec2_len != m_output_dimensions)
142  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions)
143 
144  int32_t len;
145  bool do_free;
146  float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
147 
148 
149  int cnt=0;
150  float64_t sum=0;
151  for (int j=0; j<vec2_len; j++)
152  {
154  for (int k=0; k<m_degree; k++)
155  {
156  output*=vec[m_multi_index[cnt]];
157  cnt++;
158  }
159  sum+=output*vec2[j];
160  }
161  if (m_normalize)
162  sum = sum/m_normalization_values[vec_idx1];
163 
164  m_feat->free_feature_vector(vec, len, do_free);
165  return sum;
166 }
167 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
168 {
169  if (vec2_len != m_output_dimensions)
170  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions)
171 
172  int32_t len;
173  bool do_free;
174  float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
175 
176 
177  int cnt=0;
178  float32_t norm_val=1;
179  if (m_normalize)
180  norm_val = m_normalization_values[vec_idx1];
181  alpha/=norm_val;
182  for (int j=0; j<vec2_len; j++)
183  {
185  for (int k=0; k<m_degree; k++)
186  {
187  output*=vec[m_multi_index[cnt]];
188  cnt++;
189  }
190  if (abs_val)
191  output=CMath::abs(output);
192 
193  vec2[j]+=alpha*output;
194  }
195  m_feat->free_feature_vector(vec, len, do_free);
196 }
198 {
199  SG_FREE(m_normalization_values);
200 
201  int32_t num_vec = this->get_num_vectors();
202 
203  m_normalization_values=SG_MALLOC(float32_t, num_vec);
204  for (int i=0; i<num_vec; i++)
205  {
206  float64_t tmp = CMath::sqrt(dot(i, this,i));
207  if (tmp==0)
208  // trap division by zero
210  else
211  m_normalization_values[i]=tmp;
212  }
213 
214 }
215 
217 {
218  SG_FREE(m_multi_index);
219 
220  m_multi_index=SG_MALLOC(uint16_t, m_output_dimensions*m_degree);
221 
222  uint16_t* exponents = SG_MALLOC(uint16_t, m_input_dimensions);
223  if (!exponents)
224  SG_ERROR("Error allocating mem \n")
225  /*copy adress: otherwise it will be overwritten in recursion*/
226  uint16_t* index = m_multi_index;
227  enumerate_multi_index(0, &index, exponents, m_degree);
228 
229  SG_FREE(exponents);
230 }
231 
232 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
233 {
234  if (feat_idx==m_input_dimensions-1 || degree==0)
235  {
236  if (feat_idx==m_input_dimensions-1)
237  exponents[feat_idx] = degree;
238  if (degree==0)
239  exponents[feat_idx] = 0;
240  int32_t i, j;
241  for (j=0; j<feat_idx+1; j++)
242  for (i=0; i<exponents[j]; i++)
243  {
244  **index = j;
245  (*index)++;
246  }
247  exponents[feat_idx] = 0;
248  return;
249  }
250  int32_t k;
251  for (k=0; k<=degree; k++)
252  {
253  exponents[feat_idx] = k;
254  enumerate_multi_index(feat_idx+1, index, exponents, degree-k);
255  }
256  return;
257 
258 }
259 
261 {
263 
265  int32_t* exponents = SG_MALLOC(int32_t, m_input_dimensions);
266  if (!exponents)
267  SG_ERROR("Error allocating mem \n")
268  int32_t j=0;
269  for (j=0; j<m_input_dimensions; j++)
270  exponents[j] = 0;
271  int32_t k, cnt=0;
272  for (j=0; j<m_output_dimensions; j++)
273  {
274  for (k=0; k<m_degree; k++)
275  {
276  exponents[m_multi_index[cnt]] ++;
277  cnt++;
278  }
279  m_multinomial_coefficients[j] = sqrt((double) multinomialcoef(exponents, m_input_dimensions));
280  for (k=0; k<m_input_dimensions; k++)
281  {
282  exponents[k]=0;
283  }
284  }
285  SG_FREE(exponents);
286 }
287 
288 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
289 {
290 
291  /* for this problem k is usually small (<=degree),
292  * thus it is efficient to
293  * to use recursion and prune end recursions*/
294  if (n<k)
295  return 0;
296  if (k>n/2)
297  k = n-k;
298  if (k<0)
299  return 0;
300  if (k==0)
301  return 1;
302  if (k==1)
303  return n;
304  if (k<4)
305  return bico2(n-1, k-1)+bico2(n-1, k);
306 
307  /* call function as implemented in numerical recipies:
308  * much more efficient for large binomial coefficients*/
309  return bico(n, k);
310 
311 }
312 
313 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
314 {
315  if (N==1)
316  return 1;
317  if (D==0)
318  return 1;
319  int32_t d;
320  int32_t ret = 0;
321  for (d=0; d<=D; d++)
322  ret += calc_feature_space_dimensions(N-1, d);
323 
324  return ret;
325 }
326 
327 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
328 {
329  int32_t ret = 1, i;
330  int32_t n = 0;
331  for (i=0; i<len; i++)
332  {
333  n += exps[i];
334  ret *= bico2(n, exps[i]);
335  }
336  return ret;
337 }
338 
339 /* gammln as implemented in the
340  * second edition of Numerical Recipes in C */
342 {
343  float64_t x,y,tmp,ser;
344  static float64_t cof[6]={76.18009172947146, -86.50532032941677,
345  24.01409824083091, -1.231739572450155,
346  0.1208650973866179e-2,-0.5395239384953e-5};
347  int32_t j;
348 
349  y=x=xx;
350  tmp=x+5.5;
351  tmp -= (x+0.5)*log(tmp);
352  ser=1.000000000190015;
353  for (j=0;j<=5;j++) ser += cof[j]/++y;
354  return -tmp+log(2.5066282746310005*ser/x);
355 }
356 
358 {
359  static float64_t a[101];
360 
361  if (n < 0) SG_ERROR("Negative factorial in routine factln\n")
362  if (n <= 1) return 0.0;
363  if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
364  else return gammln(n+1.0);
365 }
366 
367 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
368 {
369  /* use floor to clean roundoff errors*/
370  return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
371 }
373 {
374  return new CPolyFeatures(*this);
375 }
376 
377 void CPolyFeatures::register_parameters()
378 {
379  m_parameters->add((CSGObject**) &m_feat, "features",
380  "Features in original space.");
381  m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
382  m_parameters->add(&m_normalize, "normalize", "Normalize?");
383  m_parameters->add(&m_input_dimensions, "input_dimensions",
384  "Dimensions of the input space.");
385  m_parameters->add(&m_output_dimensions, "output_dimensions",
386  "Dimensions of the feature space of the polynomial kernel.");
387 
388  multi_index_length=m_output_dimensions*m_degree;
390  &m_multi_index,
391  &multi_index_length,
392  "multi_index",
393  "Flattened matrix of all multi indices that sum do the"
394  " degree of the polynomial kernel.");
395 
396  multinomial_coefficients_length=m_output_dimensions;
398  &multinomial_coefficients_length, "multinomial_coefficients",
399  "Multinomial coefficients for all multi-indices.");
400 
401  normalization_values_length=get_num_vectors();
403  &normalization_values_length, "normalization_values",
404  "Norm of each training example.");
405 }
virtual EFeatureType get_feature_type() const
virtual int32_t get_dim_feature_space() const
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const
int32_t bico2(int32_t n, int32_t k)
int32_t bico(int32_t n, int32_t k)
void store_multinomial_coefficients()
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
Parameter * m_parameters
Definition: SGObject.h:378
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
virtual void free_feature_iterator(void *iterator)
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
uint16_t * m_multi_index
Definition: PolyFeatures.h:228
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:51
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
float32_t * m_normalization_values
Definition: PolyFeatures.h:232
void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val)
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
CFeatures * duplicate() const
virtual int32_t get_num_vectors() const
void enumerate_multi_index(const int32_t feat_idx, uint16_t **index, uint16_t *exponents, const int32_t degree)
double float64_t
Definition: common.h:50
float64_t * m_multinomial_coefficients
Definition: PolyFeatures.h:230
CDenseFeatures< float64_t > * m_feat
Definition: PolyFeatures.h:217
virtual int32_t get_nnz_features_for_vector(int32_t num)
virtual EFeatureClass get_feature_class() const =0
float64_t factln(int32_t n)
virtual void * get_feature_iterator(int32_t vector_index)
float float32_t
Definition: common.h:49
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19
virtual int32_t get_num_vectors() const
float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
#define SG_UNREF(x)
Definition: SGObject.h:52
int32_t calc_feature_space_dimensions(int32_t N, int32_t D)
void add_vector(bool **param, index_t *length, const char *name, const char *description="")
Definition: Parameter.cpp:334
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
The class Features is the base class of all feature objects.
Definition: Features.h:68
float64_t gammln(float64_t xx)
virtual EFeatureClass get_feature_class() const
implement DotFeatures for the polynomial kernel
Definition: PolyFeatures.h:27
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual EFeatureType get_feature_type() const =0
static T abs(T a)
Definition: Math.h:179
int32_t multinomialcoef(int32_t *exps, int32_t len)

SHOGUN Machine Learning Toolbox - Documentation