SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Distance.cpp
Go to the documentation of this file.
1 /*
2  * this program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2006-2009 Christian Gehl
8  * Written (W) 2006-2009 Soeren Sonnenburg
9  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
15 #include <shogun/io/File.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/lib/Signal.h>
18 #include <shogun/base/Parallel.h>
19 #include <shogun/base/Parameter.h>
20 
23 
24 #include <string.h>
25 #include <unistd.h>
26 
27 #ifdef HAVE_PTHREAD
28 #include <pthread.h>
29 #endif
30 
31 using namespace shogun;
32 
34 template <class T> struct D_THREAD_PARAM
35 {
39  int32_t start;
41  int32_t end;
43  int32_t total_start;
45  int32_t total_end;
47  int32_t m;
49  int32_t n;
51  T* result;
53  bool symmetric;
55  bool verbose;
56 };
57 
59 {
60  init();
61 }
62 
63 
65 {
66  init();
67  init(p_lhs, p_rhs);
68 }
69 
71 {
72  SG_FREE(precomputed_matrix);
73  precomputed_matrix=NULL;
74 
76 }
77 
78 bool CDistance::init(CFeatures* l, CFeatures* r)
79 {
80  REQUIRE(check_compatibility(l, r), "Features are not compatible!\n");
81 
82  //remove references to previous features
84 
85  //increase reference counts
86  SG_REF(l);
87  SG_REF(r);
88 
89  lhs=l;
90  rhs=r;
91 
94 
95  SG_FREE(precomputed_matrix);
96  precomputed_matrix=NULL ;
97 
98  return true;
99 }
100 
102 {
103  REQUIRE(l, "Left hand side features must be set!\n");
104  REQUIRE(r, "Right hand side features must be set!\n");
105 
107  "Right hand side of features (%s) must be of same type with left hand side features (%s)\n",
108  r->get_name(), l->get_name());
109 
110  if (l->support_compatible_class())
111  {
113  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
114  r->get_name(), l->get_name());
115  }
116  else if (r->support_compatible_class())
117  {
119  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
120  r->get_name(), l->get_name());
121  }
122  else
123  {
125  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
126  r->get_name(), l->get_name());
127  }
128 
129  return true;
130 }
131 
132 void CDistance::load(CFile* loader)
133 {
136 }
137 
138 void CDistance::save(CFile* writer)
139 {
142 }
143 
145 {
146  SG_UNREF(rhs);
147  rhs = NULL;
148  num_rhs=0;
149 
150  SG_UNREF(lhs);
151  lhs = NULL;
152  num_lhs=0;
153 }
154 
156 {
157  SG_UNREF(lhs);
158  lhs = NULL;
159  num_lhs=0;
160 }
161 
164 {
165  SG_UNREF(rhs);
166  rhs = NULL;
167  num_rhs=0;
168 }
169 
171 {
172  //make sure features are compatible
173  REQUIRE(check_compatibility(lhs, r), "Features are not compatible!\n");
174 
175  //remove references to previous rhs features
176  CFeatures* tmp=rhs;
177 
178  rhs=r;
180 
181  SG_FREE(precomputed_matrix);
182  precomputed_matrix=NULL ;
183 
184  // return old features including reference count
185  return tmp;
186 }
187 
189 {
190  //make sure features are compatible
191  REQUIRE(check_compatibility(l, rhs), "Features are not compatible!\n");
192 
193  //remove references to previous rhs features
194  CFeatures* tmp=lhs;
195 
196  lhs=l;
198 
199  SG_FREE(precomputed_matrix);
200  precomputed_matrix=NULL ;
201 
202  // return old features including reference count
203  return tmp;
204 }
205 
206 float64_t CDistance::distance(int32_t idx_a, int32_t idx_b)
207 {
208  REQUIRE(idx_a >= 0 && idx_b >= 0, "In CDistance::distance(int32_t,int32_t), idx_a and "
209  "idx_b must be positive, %d and %d given instead\n", idx_a, idx_b)
210 
211  ASSERT(lhs)
212  ASSERT(rhs)
213 
214  if (lhs==rhs)
215  {
216  int32_t num_vectors = lhs->get_num_vectors();
217 
218  if (idx_a>=num_vectors)
219  idx_a=2*num_vectors-1-idx_a;
220 
221  if (idx_b>=num_vectors)
222  idx_b=2*num_vectors-1-idx_b;
223  }
224 
225  REQUIRE(idx_a < lhs->get_num_vectors() && idx_b < rhs->get_num_vectors(),
226  "In CDistance::distance(int32_t,int32_t), idx_a and idx_b must be less than "
227  "the number of vectors, but %d >= %d or %d >= %d\n",
228  idx_a, lhs->get_num_vectors(), idx_b, rhs->get_num_vectors())
229 
230  if (precompute_matrix && (precomputed_matrix==NULL) && (lhs==rhs))
232 
233  if (precompute_matrix && (precomputed_matrix!=NULL))
234  {
235  if (idx_a>=idx_b)
236  return precomputed_matrix[idx_a*(idx_a+1)/2+idx_b] ;
237  else
238  return precomputed_matrix[idx_b*(idx_b+1)/2+idx_a] ;
239  }
240 
241  return compute(idx_a, idx_b);
242 }
243 
245 {
246  int32_t num_left=lhs->get_num_vectors();
247  int32_t num_right=rhs->get_num_vectors();
248  SG_INFO("precomputing distance matrix (%ix%i)\n", num_left, num_right)
249 
250  ASSERT(num_left==num_right)
251  ASSERT(lhs==rhs)
252  int32_t num=num_left;
253 
254  SG_FREE(precomputed_matrix);
255  precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
256 
257  for (int32_t i=0; i<num; i++)
258  {
259  SG_PROGRESS(i*i,0,num*num)
260  for (int32_t j=0; j<=i; j++)
261  precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
262  }
263 
264  SG_PROGRESS(num*num,0,num*num)
265  SG_DONE()
266 }
267 
268 void CDistance::init()
269 {
270  precomputed_matrix = NULL;
271  precompute_matrix = false;
272  lhs = NULL;
273  rhs = NULL;
274  num_lhs=0;
275  num_rhs=0;
276 
277  m_parameters->add((CSGObject**) &lhs, "lhs",
278  "Feature vectors to occur on left hand side.");
279  m_parameters->add((CSGObject**) &rhs, "rhs",
280  "Feature vectors to occur on right hand side.");
281 }
282 
283 template <class T> void* CDistance::get_distance_matrix_helper(void* p)
284 {
285  D_THREAD_PARAM<T>* params= (D_THREAD_PARAM<T>*) p;
286  int32_t i_start=params->start;
287  int32_t i_end=params->end;
288  CDistance* k=params->distance;
289  T* result=params->result;
290  bool symmetric=params->symmetric;
291  int32_t n=params->n;
292  int32_t m=params->m;
293  bool verbose=params->verbose;
294  int64_t total_start=params->total_start;
295  int64_t total_end=params->total_end;
296  int64_t total=total_start;
297 
298  for (int32_t i=i_start; i<i_end; i++)
299  {
300  int32_t j_start=0;
301 
302  if (symmetric)
303  j_start=i;
304 
305  for (int32_t j=j_start; j<n; j++)
306  {
307  float64_t v=k->distance(i,j);
308  result[i+j*m]=v;
309 
310  if (symmetric && i!=j)
311  result[j+i*m]=v;
312 
313  if (verbose)
314  {
315  total++;
316 
317  if (symmetric && i!=j)
318  total++;
319 
320  if (total%100 == 0)
321  SG_OBJ_PROGRESS(k, total, total_start, total_end)
322 
324  break;
325  }
326  }
327 
328  }
329 
330  return NULL;
331 }
332 
333 template <class T>
335 {
336  T* result = NULL;
337 
338  REQUIRE(has_features(), "no features assigned to distance\n")
339 
340  int32_t m=get_num_vec_lhs();
341  int32_t n=get_num_vec_rhs();
342 
343  int64_t total_num = int64_t(m)*n;
344 
345  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
346  bool symmetric= (lhs && lhs==rhs && m==n);
347 
348  SG_DEBUG("returning distance matrix of size %dx%d\n", m, n)
349 
350  result=SG_MALLOC(T, total_num);
351 
352  int32_t num_threads=parallel->get_num_threads();
353  if (num_threads < 2)
354  {
355  D_THREAD_PARAM<T> params;
356  params.distance=this;
357  params.result=result;
358  params.start=0;
359  params.end=m;
360  params.total_start=0;
361  params.total_end=total_num;
362  params.n=n;
363  params.m=m;
364  params.symmetric=symmetric;
365  params.verbose=true;
366  get_distance_matrix_helper<T>((void*) &params);
367  }
368  else
369  {
370  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
371  D_THREAD_PARAM<T>* params = SG_MALLOC(D_THREAD_PARAM<T>, num_threads);
372  int64_t step= total_num/num_threads;
373 
374  int32_t t;
375 
376  num_threads--;
377  for (t=0; t<num_threads; t++)
378  {
379  params[t].distance = this;
380  params[t].result = result;
381  params[t].start = compute_row_start(t*step, n, symmetric);
382  params[t].end = compute_row_start((t+1)*step, n, symmetric);
383  params[t].total_start=t*step;
384  params[t].total_end=(t+1)*step;
385  params[t].n=n;
386  params[t].m=m;
387  params[t].symmetric=symmetric;
388  params[t].verbose=false;
389 
390  int code=pthread_create(&threads[t], NULL,
391  CDistance::get_distance_matrix_helper<T>, (void*)&params[t]);
392 
393  if (code != 0)
394  {
395  SG_WARNING("Thread creation failed (thread %d of %d) "
396  "with error:'%s'\n",t, num_threads, strerror(code));
397  num_threads=t;
398  break;
399  }
400  }
401 
402  params[t].distance = this;
403  params[t].result = result;
404  params[t].start = compute_row_start(t*step, n, symmetric);
405  params[t].end = m;
406  params[t].total_start=t*step;
407  params[t].total_end=total_num;
408  params[t].n=n;
409  params[t].m=m;
410  params[t].symmetric=symmetric;
411  params[t].verbose=true;
412  get_distance_matrix_helper<T>(&params[t]);
413 
414  for (t=0; t<num_threads; t++)
415  {
416  if (pthread_join(threads[t], NULL) != 0)
417  SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads)
418  }
419 
420  SG_FREE(params);
421  SG_FREE(threads);
422  }
423 
424  SG_DONE()
425 
426  return SGMatrix<T>(result,m,n,true);
427 }
428 
429 template SGMatrix<float64_t> CDistance::get_distance_matrix<float64_t>();
430 template SGMatrix<float32_t> CDistance::get_distance_matrix<float32_t>();
431 
432 template void* CDistance::get_distance_matrix_helper<float64_t>(void* p);
433 template void* CDistance::get_distance_matrix_helper<float32_t>(void* p);
int32_t end
Definition: Distance.cpp:41
virtual const char * get_name() const =0
virtual bool support_compatible_class() const
Definition: Features.h:323
#define SG_INFO(...)
Definition: SGIO.h:118
#define SG_RESET_LOCALE
Definition: SGIO.h:86
#define SG_DONE()
Definition: SGIO.h:157
void do_precompute_matrix()
matrix precomputation
Definition: Distance.cpp:244
int32_t start
Definition: Distance.cpp:39
virtual bool has_features()
Definition: Distance.h:330
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:87
virtual bool get_feature_class_compatibility(EFeatureClass rhs) const
Definition: Features.cpp:355
int32_t get_num_threads() const
Definition: Parallel.cpp:78
#define SG_PROGRESS(...)
Definition: SGIO.h:142
virtual int32_t get_num_vec_lhs()
Definition: Distance.h:312
virtual CFeatures * replace_lhs(CFeatures *lhs)
Definition: Distance.cpp:188
virtual void remove_lhs()
takes all necessary steps if the lhs is removed from distance matrix
Definition: Distance.cpp:155
int32_t num_rhs
Definition: Distance.h:388
virtual int32_t get_num_vectors() const =0
virtual ~CDistance()
Definition: Distance.cpp:70
#define REQUIRE(x,...)
Definition: SGIO.h:206
Parameter * m_parameters
Definition: SGObject.h:546
Parallel * parallel
Definition: SGObject.h:540
int32_t total_end
Definition: Distance.cpp:45
#define SG_REF(x)
Definition: SGObject.h:54
#define SG_SET_LOCALE_C
Definition: SGIO.h:85
virtual bool check_compatibility(CFeatures *l, CFeatures *r)
Definition: Distance.cpp:101
shogun matrix
int32_t total_start
Definition: Distance.cpp:43
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
CDistance * distance
Definition: Distance.cpp:37
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
#define SG_OBJ_PROGRESS(o,...)
Definition: SGIO.h:147
virtual void remove_lhs_and_rhs()
Definition: Distance.cpp:144
double float64_t
Definition: common.h:50
void save(CFile *writer)
Definition: Distance.cpp:138
A File access base class.
Definition: File.h:34
void load(CFile *loader)
Definition: Distance.cpp:132
virtual EFeatureClass get_feature_class() const =0
virtual int32_t get_num_vec_rhs()
Definition: Distance.h:321
int32_t num_lhs
Definition: Distance.h:386
static bool cancel_computations()
Definition: Signal.h:86
virtual CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:170
float float32_t
Definition: common.h:49
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
Definition: Distance.h:173
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:206
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
bool precompute_matrix
Definition: Distance.h:378
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on the left hand side
Definition: Distance.h:381
The class Features is the base class of all feature objects.
Definition: Features.h:68
CFeatures * rhs
feature vectors to occur on the right hand side
Definition: Distance.h:383
SGMatrix< float64_t > get_distance_matrix()
Definition: Distance.h:156
#define SG_WARNING(...)
Definition: SGIO.h:128
float32_t * precomputed_matrix
Definition: Distance.h:373
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Distance.cpp:78
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from distance matrix
Definition: Distance.cpp:163
static void * get_distance_matrix_helper(void *p)
Definition: Distance.cpp:283
virtual float64_t compute(int32_t idx_a, int32_t idx_b)=0
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation