SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DotFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
12 #include <shogun/io/SGIO.h>
13 #include <shogun/lib/Signal.h>
14 #include <shogun/lib/Time.h>
16 #include <shogun/base/Parallel.h>
17 #include <shogun/base/Parameter.h>
18 
19 #ifdef HAVE_PTHREAD
20 #include <pthread.h>
21 #endif
22 
23 using namespace shogun;
24 
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 struct DF_THREAD_PARAM
27 {
28  CDotFeatures* df;
29  int32_t* sub_index;
30  float64_t* output;
31  int32_t start;
32  int32_t stop;
33  float64_t* alphas;
34  float64_t* vec;
35  int32_t dim;
36  float64_t bias;
37  bool progress;
38 };
39 #endif // DOXYGEN_SHOULD_SKIP_THIS
40 
41 
43  :CFeatures(size), combined_weight(1.0)
44 {
45  init();
46 }
47 
48 
50  :CFeatures(orig), combined_weight(orig.combined_weight)
51 {
52  init();
53 }
54 
55 
57  :CFeatures(loader)
58 {
59  init();
60 }
61 
63 {
64  return dense_dot(vec_idx1, vec2.vector, vec2.vlen);
65 }
66 
67 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
68 {
69  ASSERT(output)
70  // write access is internally between output[start..stop] so the following
71  // line is necessary to write to output[0...(stop-start-1)]
72  output-=start;
73  ASSERT(start>=0)
74  ASSERT(start<stop)
75  ASSERT(stop<=get_num_vectors())
76 
77  int32_t num_vectors=stop-start;
78  ASSERT(num_vectors>0)
79 
80  int32_t num_threads=parallel->get_num_threads();
81  ASSERT(num_threads>0)
82 
84 
85 #ifdef HAVE_PTHREAD
86  if (num_threads < 2)
87  {
88 #endif
89  DF_THREAD_PARAM params;
90  params.df=this;
91  params.sub_index=NULL;
92  params.output=output;
93  params.start=start;
94  params.stop=stop;
95  params.alphas=alphas;
96  params.vec=vec;
97  params.dim=dim;
98  params.bias=b;
99  params.progress=false; //true;
100  dense_dot_range_helper((void*) &params);
101 #ifdef HAVE_PTHREAD
102  }
103  else
104  {
105  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
106  DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
107  int32_t step= num_vectors/num_threads;
108 
109  int32_t t;
110 
111  for (t=0; t<num_threads-1; t++)
112  {
113  params[t].df = this;
114  params[t].sub_index=NULL;
115  params[t].output = output;
116  params[t].start = start+t*step;
117  params[t].stop = start+(t+1)*step;
118  params[t].alphas=alphas;
119  params[t].vec=vec;
120  params[t].dim=dim;
121  params[t].bias=b;
122  params[t].progress = false;
123  pthread_create(&threads[t], NULL,
124  CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
125  }
126 
127  params[t].df = this;
128  params[t].output = output;
129  params[t].sub_index=NULL;
130  params[t].start = start+t*step;
131  params[t].stop = stop;
132  params[t].alphas=alphas;
133  params[t].vec=vec;
134  params[t].dim=dim;
135  params[t].bias=b;
136  params[t].progress = false; //true;
137  dense_dot_range_helper((void*) &params[t]);
138 
139  for (t=0; t<num_threads-1; t++)
140  pthread_join(threads[t], NULL);
141 
142  SG_FREE(params);
143  SG_FREE(threads);
144  }
145 #endif
146 
147 #ifndef WIN32
149  SG_INFO("prematurely stopped. \n")
150 #endif
151 }
152 
153 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
154 {
155  ASSERT(sub_index)
156  ASSERT(output)
157 
158  int32_t num_threads=parallel->get_num_threads();
159  ASSERT(num_threads>0)
160 
162 
163 #ifdef HAVE_PTHREAD
164  if (num_threads < 2)
165  {
166 #endif
167  DF_THREAD_PARAM params;
168  params.df=this;
169  params.sub_index=sub_index;
170  params.output=output;
171  params.start=0;
172  params.stop=num;
173  params.alphas=alphas;
174  params.vec=vec;
175  params.dim=dim;
176  params.bias=b;
177  params.progress=false; //true;
178  dense_dot_range_helper((void*) &params);
179 #ifdef HAVE_PTHREAD
180  }
181  else
182  {
183  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
184  DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
185  int32_t step= num/num_threads;
186 
187  int32_t t;
188 
189  for (t=0; t<num_threads-1; t++)
190  {
191  params[t].df = this;
192  params[t].sub_index=sub_index;
193  params[t].output = output;
194  params[t].start = t*step;
195  params[t].stop = (t+1)*step;
196  params[t].alphas=alphas;
197  params[t].vec=vec;
198  params[t].dim=dim;
199  params[t].bias=b;
200  params[t].progress = false;
201  pthread_create(&threads[t], NULL,
202  CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
203  }
204 
205  params[t].df = this;
206  params[t].sub_index=sub_index;
207  params[t].output = output;
208  params[t].start = t*step;
209  params[t].stop = num;
210  params[t].alphas=alphas;
211  params[t].vec=vec;
212  params[t].dim=dim;
213  params[t].bias=b;
214  params[t].progress = false; //true;
215  dense_dot_range_helper((void*) &params[t]);
216 
217  for (t=0; t<num_threads-1; t++)
218  pthread_join(threads[t], NULL);
219 
220  SG_FREE(params);
221  SG_FREE(threads);
222  }
223 #endif
224 
225 #ifndef WIN32
227  SG_INFO("prematurely stopped. \n")
228 #endif
229 }
230 
232 {
233  DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p;
234  CDotFeatures* df=par->df;
235  int32_t* sub_index=par->sub_index;
236  float64_t* output=par->output;
237  int32_t start=par->start;
238  int32_t stop=par->stop;
239  float64_t* alphas=par->alphas;
240  float64_t* vec=par->vec;
241  int32_t dim=par->dim;
242  float64_t bias=par->bias;
243  bool progress=par->progress;
244 
245  if (sub_index)
246  {
247 #ifdef WIN32
248  for (int32_t i=start; i<stop i++)
249 #else
250  for (int32_t i=start; i<stop &&
252 #endif
253  {
254  if (alphas)
255  output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias;
256  else
257  output[i]=df->dense_dot(sub_index[i], vec, dim)+bias;
258  if (progress)
259  df->display_progress(start, stop, i);
260  }
261 
262  }
263  else
264  {
265 #ifdef WIN32
266  for (int32_t i=start; i<stop i++)
267 #else
268  for (int32_t i=start; i<stop &&
270 #endif
271  {
272  if (alphas)
273  output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias;
274  else
275  output[i]=df->dense_dot(i, vec, dim)+bias;
276  if (progress)
277  df->display_progress(start, stop, i);
278  }
279  }
280 
281  return NULL;
282 }
283 
285 {
286 
287  int64_t offs=0;
288  int32_t num=get_num_vectors();
289  int32_t dim=get_dim_feature_space();
290  ASSERT(num>0)
291  ASSERT(dim>0)
292 
293  SGMatrix<float64_t> m(dim, num);
294  m.zero();
295 
296  for (int32_t i=0; i<num; i++)
297  {
298  add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim);
299  offs+=dim;
300  }
301 
302  return m;
303 }
304 
306 {
307 
308  int32_t dim=get_dim_feature_space();
309  ASSERT(num>=0 && num<=get_num_vectors())
310  ASSERT(dim>0)
311 
312  SGVector<float64_t> v(dim);
313  v.zero();
314  add_to_dense_vec(1.0, num, v.vector, dim);
315  return v;
316 }
317 
319 {
320  int32_t num=get_num_vectors();
321  int32_t d=get_dim_feature_space();
322  float64_t* w= SG_MALLOC(float64_t, d);
324 
325  CTime t;
326  float64_t start_cpu=t.get_runtime();
327  float64_t start_wall=t.get_curtime();
328  for (int32_t r=0; r<repeats; r++)
329  {
330  for (int32_t i=0; i<num; i++)
331  add_to_dense_vec(1.172343*(r+1), i, w, d);
332  }
333 
334  SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
335  repeats, num, (t.get_runtime()-start_cpu)/repeats,
336  (t.get_curtime()-start_wall)/repeats);
337 
338  SG_FREE(w);
339 }
340 
342 {
343  int32_t num=get_num_vectors();
344  int32_t d=get_dim_feature_space();
345  float64_t* w= SG_MALLOC(float64_t, d);
346  float64_t* out= SG_MALLOC(float64_t, num);
347  float64_t* alphas= SG_MALLOC(float64_t, num);
349  SGVector<float64_t>::range_fill_vector(alphas, num, 1.2345);
350  //SGVector<float64_t>::fill_vector(w, d, 17.0);
351  //SGVector<float64_t>::fill_vector(alphas, num, 1.2345);
352 
353  CTime t;
354  float64_t start_cpu=t.get_runtime();
355  float64_t start_wall=t.get_curtime();
356 
357  for (int32_t r=0; r<repeats; r++)
358  dense_dot_range(out, 0, num, alphas, w, d, 23);
359 
360 #ifdef DEBUG_DOTFEATURES
361  CMath::display_vector(out, 40, "dense_dot_range");
362  float64_t* out2= SG_MALLOC(float64_t, num);
363 
364  for (int32_t r=0; r<repeats; r++)
365  {
366  CMath::fill_vector(out2, num, 0.0);
367  for (int32_t i=0; i<num; i++)
368  out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
369  }
370  CMath::display_vector(out2, 40, "dense_dot");
371  for (int32_t i=0; i<num; i++)
372  out2[i]-=out[i];
373  CMath::display_vector(out2, 40, "diff");
374 #endif
375  SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
376  repeats, num, (t.get_runtime()-start_cpu)/repeats,
377  (t.get_curtime()-start_wall)/repeats);
378 
379  SG_FREE(alphas);
380  SG_FREE(out);
381  SG_FREE(w);
382 }
383 
385 {
386  int32_t num=get_num_vectors();
387  int32_t dim=get_dim_feature_space();
388  ASSERT(num>0)
389  ASSERT(dim>0)
390 
391  SGVector<float64_t> mean(dim);
392  memset(mean.vector, 0, sizeof(float64_t)*dim);
393 
394  for (int i = 0; i < num; i++)
395  add_to_dense_vec(1, i, mean.vector, dim);
396  for (int j = 0; j < dim; j++)
397  mean.vector[j] /= num;
398 
399  return mean;
400 }
401 
403 {
404  ASSERT(lhs && rhs)
406 
407  int32_t num_lhs=lhs->get_num_vectors();
408  int32_t num_rhs=rhs->get_num_vectors();
409  int32_t dim=lhs->get_dim_feature_space();
410  ASSERT(num_lhs>0)
411  ASSERT(num_rhs>0)
412  ASSERT(dim>0)
413 
414  SGVector<float64_t> mean(dim);
415  memset(mean.vector, 0, sizeof(float64_t)*dim);
416 
417  for (int i = 0; i < num_lhs; i++)
418  lhs->add_to_dense_vec(1, i, mean.vector, dim);
419  for (int i = 0; i < num_rhs; i++)
420  rhs->add_to_dense_vec(1, i, mean.vector, dim);
421  for (int j = 0; j < dim; j++)
422  mean.vector[j] /= (num_lhs+num_rhs);
423 
424  return mean;
425 }
426 
428 {
429  int32_t num=get_num_vectors();
430  int32_t dim=get_dim_feature_space();
431  ASSERT(num>0)
432  ASSERT(dim>0)
433 
434  SGMatrix<float64_t> cov(dim, dim);
435 
436  memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
437 
438  SGVector<float64_t> mean = get_mean();
439 
440  for (int i = 0; i < num; i++)
441  {
443  SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
444  for (int m = 0; m < v.vlen; m++)
445  {
446  for (int n = 0; n <= m ; n++)
447  {
448  (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
449  }
450  }
451  }
452  for (int m = 0; m < dim; m++)
453  {
454  for (int n = 0; n <= m ; n++)
455  {
456  (cov.matrix)[m*dim+n] /= num;
457  }
458  }
459  for (int m = 0; m < dim-1; m++)
460  {
461  for (int n = m+1; n < dim; n++)
462  {
463  (cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
464  }
465  }
466  return cov;
467 }
468 
470 {
471  CDotFeatures* feats[2];
472  feats[0]=lhs;
473  feats[1]=rhs;
474 
475  int32_t nums[2], dims[2], num=0;
476 
477  for (int i = 0; i < 2; i++)
478  {
479  nums[i]=feats[i]->get_num_vectors();
480  dims[i]=feats[i]->get_dim_feature_space();
481  ASSERT(nums[i]>0)
482  ASSERT(dims[i]>0)
483  num += nums[i];
484  }
485 
486  ASSERT(dims[0]==dims[1])
487  int32_t dim = dims[0];
488 
489  SGMatrix<float64_t> cov(dim, dim);
490 
491  memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
492 
493  SGVector<float64_t> mean=get_mean(lhs,rhs);
494 
495  for (int i = 0; i < 2; i++)
496  {
497  for (int j = 0; j < nums[i]; j++)
498  {
500  SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
501  for (int m = 0; m < v.vlen; m++)
502  {
503  for (int n = 0; n <= m; n++)
504  {
505  (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
506  }
507  }
508  }
509  }
510  for (int m = 0; m < dim; m++)
511  {
512  for (int n = 0; n <= m; n++)
513  {
514  (cov.matrix)[m*dim+n] /= num;
515  }
516  }
517  for (int m = 0; m < dim-1; m++)
518  {
519  for (int n = m+1; n < dim; n++)
520  {
521  (cov.matrix[m*dim+n]) = (cov.matrix)[n*dim+m];
522  }
523  }
524 
525  return cov;
526 }
527 
528 void CDotFeatures::display_progress(int32_t start, int32_t stop, int32_t v)
529 {
530  int32_t num_vectors=stop-start;
531  int32_t i=v-start;
532 
533  if ( (i% (num_vectors/100+1))== 0)
534  SG_PROGRESS(v, 0.0, num_vectors-1)
535 }
536 
537 void CDotFeatures::init()
538 {
540  m_parameters->add(&combined_weight, "combined_weight",
541  "Feature weighting in combined dot features.");
542 }

SHOGUN Machine Learning Toolbox - Documentation