SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PolyFeatures.cpp
Go to the documentation of this file.
2 
3 using namespace shogun;
4 
6 {
7  m_feat=NULL;
8  m_degree=0;
9  m_normalize=false;
11  m_multi_index=NULL;
14 
15  register_parameters();
16 }
17 
18 CPolyFeatures::CPolyFeatures(CDenseFeatures<float64_t>* feat, int32_t degree, bool normalize)
19  : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
20  m_normalization_values(NULL)
21 {
22  ASSERT(feat);
23 
24  m_feat = feat;
25  SG_REF(m_feat);
26  m_degree=degree;
27  m_normalize=normalize;
30 
33  if (m_normalize)
35 
36  register_parameters();
37 }
38 
39 
41 {
46 }
47 
49 {
50  SG_PRINT("CPolyFeatures:\n");
52 };
53 
55 {
56  return m_output_dimensions;
57 }
58 
60 {
61  return m_output_dimensions;
62 }
63 
65 {
66  return F_UNKNOWN;
67 }
68 
70 {
71  return C_POLY;
72 }
73 
75 {
76  if (m_feat)
77  return m_feat->get_num_vectors();
78  else
79  return 0;
80 
81 }
82 
83 int32_t CPolyFeatures::get_size() const
84 {
85  return sizeof(float64_t);
86 }
87 
88 void* CPolyFeatures::get_feature_iterator(int32_t vector_index)
89 {
91  return NULL;
92 }
93 
94 bool CPolyFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
95 {
97  return NULL;
98 }
99 
101 {
103 }
104 
105 
106 
107 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
108 {
109  ASSERT(df);
112 
113  CPolyFeatures* pf=(CPolyFeatures*) df;
114 
115  int32_t len1;
116  bool do_free1;
117  float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
118 
119  int32_t len2;
120  bool do_free2;
121  float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
122 
123  float64_t sum=0;
124  int cnt=0;
125  for (int j=0; j<m_output_dimensions; j++)
126  {
129  for (int k=0; k<m_degree; k++)
130  {
131  out1*=vec1[m_multi_index[cnt]];
132  out2*=vec2[m_multi_index[cnt]];
133  cnt++;
134  }
135  sum+=out1*out2;
136  }
137  m_feat->free_feature_vector(vec1, len1, do_free1);
138  pf->m_feat->free_feature_vector(vec2, len2, do_free2);
139 
140  return sum;
141 }
142 
143 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
144 {
145  if (vec2_len != m_output_dimensions)
146  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
147 
148  int32_t len;
149  bool do_free;
150  float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
151 
152 
153  int cnt=0;
154  float64_t sum=0;
155  for (int j=0; j<vec2_len; j++)
156  {
158  for (int k=0; k<m_degree; k++)
159  {
160  output*=vec[m_multi_index[cnt]];
161  cnt++;
162  }
163  sum+=output*vec2[j];
164  }
165  if (m_normalize)
166  sum = sum/m_normalization_values[vec_idx1];
167 
168  m_feat->free_feature_vector(vec, len, do_free);
169  return sum;
170 }
171 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
172 {
173  if (vec2_len != m_output_dimensions)
174  SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
175 
176  int32_t len;
177  bool do_free;
178  float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
179 
180 
181  int cnt=0;
182  float32_t norm_val=1;
183  if (m_normalize)
184  norm_val = m_normalization_values[vec_idx1];
185  alpha/=norm_val;
186  for (int j=0; j<vec2_len; j++)
187  {
189  for (int k=0; k<m_degree; k++)
190  {
191  output*=vec[m_multi_index[cnt]];
192  cnt++;
193  }
194  if (abs_val)
195  output=CMath::abs(output);
196 
197  vec2[j]+=alpha*output;
198  }
199  m_feat->free_feature_vector(vec, len, do_free);
200 }
202 {
204 
205  int32_t num_vec = this->get_num_vectors();
206 
208  for (int i=0; i<num_vec; i++)
209  {
210  float64_t tmp = CMath::sqrt(dot(i, this,i));
211  if (tmp==0)
212  // trap division by zero
214  else
215  m_normalization_values[i]=tmp;
216  }
217 
218 }
219 
221 {
223 
225 
226  uint16_t* exponents = SG_MALLOC(uint16_t, m_input_dimensions);
227  if (!exponents)
228  SG_ERROR( "Error allocating mem \n");
229  /*copy adress: otherwise it will be overwritten in recursion*/
230  uint16_t* index = m_multi_index;
231  enumerate_multi_index(0, &index, exponents, m_degree);
232 
233  SG_FREE(exponents);
234 }
235 
236 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
237 {
238  if (feat_idx==m_input_dimensions-1 || degree==0)
239  {
240  if (feat_idx==m_input_dimensions-1)
241  exponents[feat_idx] = degree;
242  if (degree==0)
243  exponents[feat_idx] = 0;
244  int32_t i, j;
245  for (j=0; j<feat_idx+1; j++)
246  for (i=0; i<exponents[j]; i++)
247  {
248  **index = j;
249  (*index)++;
250  }
251  exponents[feat_idx] = 0;
252  return;
253  }
254  int32_t k;
255  for (k=0; k<=degree; k++)
256  {
257  exponents[feat_idx] = k;
258  enumerate_multi_index(feat_idx+1, index, exponents, degree-k);
259  }
260  return;
261 
262 }
263 
265 {
267 
269  int32_t* exponents = SG_MALLOC(int32_t, m_input_dimensions);
270  if (!exponents)
271  SG_ERROR( "Error allocating mem \n");
272  int32_t j=0;
273  for (j=0; j<m_input_dimensions; j++)
274  exponents[j] = 0;
275  int32_t k, cnt=0;
276  for (j=0; j<m_output_dimensions; j++)
277  {
278  for (k=0; k<m_degree; k++)
279  {
280  exponents[m_multi_index[cnt]] ++;
281  cnt++;
282  }
283  m_multinomial_coefficients[j] = sqrt((double) multinomialcoef(exponents, m_input_dimensions));
284  for (k=0; k<m_input_dimensions; k++)
285  {
286  exponents[k]=0;
287  }
288  }
289  SG_FREE(exponents);
290 }
291 
292 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
293 {
294 
295  /* for this problem k is usually small (<=degree),
296  * thus it is efficient to
297  * to use recursion and prune end recursions*/
298  if (n<k)
299  return 0;
300  if (k>n/2)
301  k = n-k;
302  if (k<0)
303  return 0;
304  if (k==0)
305  return 1;
306  if (k==1)
307  return n;
308  if (k<4)
309  return bico2(n-1, k-1)+bico2(n-1, k);
310 
311  /* call function as implemented in numerical recipies:
312  * much more efficient for large binomial coefficients*/
313  return bico(n, k);
314 
315 }
316 
317 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
318 {
319  if (N==1)
320  return 1;
321  if (D==0)
322  return 1;
323  int32_t d;
324  int32_t ret = 0;
325  for (d=0; d<=D; d++)
326  ret += calc_feature_space_dimensions(N-1, d);
327 
328  return ret;
329 }
330 
331 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
332 {
333  int32_t ret = 1, i;
334  int32_t n = 0;
335  for (i=0; i<len; i++)
336  {
337  n += exps[i];
338  ret *= bico2(n, exps[i]);
339  }
340  return ret;
341 }
342 
343 /* gammln as implemented in the
344  * second edition of Numerical Recipes in C */
346 {
347  float64_t x,y,tmp,ser;
348  static float64_t cof[6]={76.18009172947146, -86.50532032941677,
349  24.01409824083091, -1.231739572450155,
350  0.1208650973866179e-2,-0.5395239384953e-5};
351  int32_t j;
352 
353  y=x=xx;
354  tmp=x+5.5;
355  tmp -= (x+0.5)*log(tmp);
356  ser=1.000000000190015;
357  for (j=0;j<=5;j++) ser += cof[j]/++y;
358  return -tmp+log(2.5066282746310005*ser/x);
359 }
360 
362 {
363  static float64_t a[101];
364 
365  if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
366  if (n <= 1) return 0.0;
367  if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
368  else return gammln(n+1.0);
369 }
370 
371 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
372 {
373  /* use floor to clean roundoff errors*/
374  return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
375 }
377 {
378  return new CPolyFeatures(*this);
379 }
380 
381 void CPolyFeatures::register_parameters()
382 {
383  m_parameters->add((CSGObject**) &m_feat, "features",
384  "Features in original space.");
385  m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
386  m_parameters->add(&m_normalize, "normalize", "Normalize?");
387  m_parameters->add(&m_input_dimensions, "input_dimensions",
388  "Dimensions of the input space.");
389  m_parameters->add(&m_output_dimensions, "output_dimensions",
390  "Dimensions of the feature space of the polynomial kernel.");
391 
392  multi_index_length=m_output_dimensions*m_degree;
394  &m_multi_index,
395  &multi_index_length,
396  "multi_index",
397  "Flattened matrix of all multi indices that sum do the"
398  " degree of the polynomial kernel.");
399 
400  multinomial_coefficients_length=m_output_dimensions;
402  &multinomial_coefficients_length, "multinomial_coefficients",
403  "Multinomial coefficients for all multi-indices.");
404 
405  normalization_values_length=get_num_vectors();
407  &normalization_values_length, "normalization_values",
408  "Norm of each training example.");
409 }

SHOGUN Machine Learning Toolbox - Documentation