SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Distance.cpp
Go to the documentation of this file.
1 /*
2  * this program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2006-2009 Christian Gehl
8  * Written (W) 2006-2009 Soeren Sonnenburg
9  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
15 #include <shogun/io/File.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/lib/Signal.h>
18 #include <shogun/base/Parallel.h>
19 #include <shogun/base/Parameter.h>
20 
23 
24 #include <string.h>
25 #include <unistd.h>
26 
27 #ifdef HAVE_PTHREAD
28 #include <pthread.h>
29 #endif
30 
31 using namespace shogun;
32 
34 template <class T> struct D_THREAD_PARAM
35 {
39  int32_t start;
41  int32_t end;
43  int32_t total_start;
45  int32_t total_end;
47  int32_t m;
49  int32_t n;
51  T* result;
53  bool symmetric;
55  bool verbose;
56 };
57 
59 {
60  init();
61 }
62 
63 
65 {
66  init();
67  init(p_lhs, p_rhs);
68 }
69 
71 {
72  SG_FREE(precomputed_matrix);
73  precomputed_matrix=NULL;
74 
76 }
77 
78 bool CDistance::init(CFeatures* l, CFeatures* r)
79 {
80  //make sure features were indeed supplied
81  ASSERT(l)
82  ASSERT(r)
83 
84  //make sure features are compatible
87 
88  //remove references to previous features
90 
91  //increase reference counts
92  SG_REF(l);
93  SG_REF(r);
94 
95  lhs=l;
96  rhs=r;
97 
100 
101  SG_FREE(precomputed_matrix);
102  precomputed_matrix=NULL ;
103 
104  return true;
105 }
106 
107 void CDistance::load(CFile* loader)
108 {
111 }
112 
113 void CDistance::save(CFile* writer)
114 {
117 }
118 
120 {
121  SG_UNREF(rhs);
122  rhs = NULL;
123  num_rhs=0;
124 
125  SG_UNREF(lhs);
126  lhs = NULL;
127  num_lhs=0;
128 }
129 
131 {
132  SG_UNREF(lhs);
133  lhs = NULL;
134  num_lhs=0;
135 }
136 
139 {
140  SG_UNREF(rhs);
141  rhs = NULL;
142  num_rhs=0;
143 }
144 
146 {
147  //make sure features were indeed supplied
148  ASSERT(r)
149 
150  //make sure features are compatible
153 
154  //remove references to previous rhs features
155  CFeatures* tmp=rhs;
156 
157  rhs=r;
159 
160  SG_FREE(precomputed_matrix);
161  precomputed_matrix=NULL ;
162 
163  // return old features including reference count
164  return tmp;
165 }
166 
168 {
169  //make sure features were indeed supplied
170  ASSERT(l)
171 
172  //make sure features are compatible
175 
176  //remove references to previous rhs features
177  CFeatures* tmp=lhs;
178 
179  lhs=l;
181 
182  SG_FREE(precomputed_matrix);
183  precomputed_matrix=NULL ;
184 
185  // return old features including reference count
186  return tmp;
187 }
188 
189 float64_t CDistance::distance(int32_t idx_a, int32_t idx_b)
190 {
191  REQUIRE(idx_a >= 0 && idx_b >= 0, "In CDistance::distance(int32_t,int32_t), idx_a and "
192  "idx_b must be positive, %d and %d given instead\n", idx_a, idx_b)
193 
194  ASSERT(lhs)
195  ASSERT(rhs)
196 
197  if (lhs==rhs)
198  {
199  int32_t num_vectors = lhs->get_num_vectors();
200 
201  if (idx_a>=num_vectors)
202  idx_a=2*num_vectors-1-idx_a;
203 
204  if (idx_b>=num_vectors)
205  idx_b=2*num_vectors-1-idx_b;
206  }
207 
208  REQUIRE(idx_a < lhs->get_num_vectors() && idx_b < rhs->get_num_vectors(),
209  "In CDistance::distance(int32_t,int32_t), idx_a and idx_b must be less than "
210  "the number of vectors, but %d >= %d or %d >= %d\n",
211  idx_a, lhs->get_num_vectors(), idx_b, rhs->get_num_vectors())
212 
213  if (precompute_matrix && (precomputed_matrix==NULL) && (lhs==rhs))
215 
216  if (precompute_matrix && (precomputed_matrix!=NULL))
217  {
218  if (idx_a>=idx_b)
219  return precomputed_matrix[idx_a*(idx_a+1)/2+idx_b] ;
220  else
221  return precomputed_matrix[idx_b*(idx_b+1)/2+idx_a] ;
222  }
223 
224  return compute(idx_a, idx_b);
225 }
226 
228 {
229  int32_t num_left=lhs->get_num_vectors();
230  int32_t num_right=rhs->get_num_vectors();
231  SG_INFO("precomputing distance matrix (%ix%i)\n", num_left, num_right)
232 
233  ASSERT(num_left==num_right)
234  ASSERT(lhs==rhs)
235  int32_t num=num_left;
236 
237  SG_FREE(precomputed_matrix);
238  precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
239 
240  for (int32_t i=0; i<num; i++)
241  {
242  SG_PROGRESS(i*i,0,num*num)
243  for (int32_t j=0; j<=i; j++)
244  precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
245  }
246 
247  SG_PROGRESS(num*num,0,num*num)
248  SG_DONE()
249 }
250 
251 void CDistance::init()
252 {
253  precomputed_matrix = NULL;
254  precompute_matrix = false;
255  lhs = NULL;
256  rhs = NULL;
257  num_lhs=0;
258  num_rhs=0;
259 
260  m_parameters->add((CSGObject**) &lhs, "lhs",
261  "Feature vectors to occur on left hand side.");
262  m_parameters->add((CSGObject**) &rhs, "rhs",
263  "Feature vectors to occur on right hand side.");
264 }
265 
266 template <class T> void* CDistance::get_distance_matrix_helper(void* p)
267 {
268  D_THREAD_PARAM<T>* params= (D_THREAD_PARAM<T>*) p;
269  int32_t i_start=params->start;
270  int32_t i_end=params->end;
271  CDistance* k=params->distance;
272  T* result=params->result;
273  bool symmetric=params->symmetric;
274  int32_t n=params->n;
275  int32_t m=params->m;
276  bool verbose=params->verbose;
277  int64_t total_start=params->total_start;
278  int64_t total_end=params->total_end;
279  int64_t total=total_start;
280 
281  for (int32_t i=i_start; i<i_end; i++)
282  {
283  int32_t j_start=0;
284 
285  if (symmetric)
286  j_start=i;
287 
288  for (int32_t j=j_start; j<n; j++)
289  {
290  float64_t v=k->distance(i,j);
291  result[i+j*m]=v;
292 
293  if (symmetric && i!=j)
294  result[j+i*m]=v;
295 
296  if (verbose)
297  {
298  total++;
299 
300  if (symmetric && i!=j)
301  total++;
302 
303  if (total%100 == 0)
304  SG_OBJ_PROGRESS(k, total, total_start, total_end)
305 
307  break;
308  }
309  }
310 
311  }
312 
313  return NULL;
314 }
315 
316 template <class T>
318 {
319  T* result = NULL;
320 
321  REQUIRE(has_features(), "no features assigned to distance\n")
322 
323  int32_t m=get_num_vec_lhs();
324  int32_t n=get_num_vec_rhs();
325 
326  int64_t total_num = int64_t(m)*n;
327 
328  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
329  bool symmetric= (lhs && lhs==rhs && m==n);
330 
331  SG_DEBUG("returning distance matrix of size %dx%d\n", m, n)
332 
333  result=SG_MALLOC(T, total_num);
334 
335  int32_t num_threads=parallel->get_num_threads();
336  if (num_threads < 2)
337  {
338  D_THREAD_PARAM<T> params;
339  params.distance=this;
340  params.result=result;
341  params.start=0;
342  params.end=m;
343  params.total_start=0;
344  params.total_end=total_num;
345  params.n=n;
346  params.m=m;
347  params.symmetric=symmetric;
348  params.verbose=true;
349  get_distance_matrix_helper<T>((void*) &params);
350  }
351  else
352  {
353  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
354  D_THREAD_PARAM<T>* params = SG_MALLOC(D_THREAD_PARAM<T>, num_threads);
355  int64_t step= total_num/num_threads;
356 
357  int32_t t;
358 
359  num_threads--;
360  for (t=0; t<num_threads; t++)
361  {
362  params[t].distance = this;
363  params[t].result = result;
364  params[t].start = compute_row_start(t*step, n, symmetric);
365  params[t].end = compute_row_start((t+1)*step, n, symmetric);
366  params[t].total_start=t*step;
367  params[t].total_end=(t+1)*step;
368  params[t].n=n;
369  params[t].m=m;
370  params[t].symmetric=symmetric;
371  params[t].verbose=false;
372 
373  int code=pthread_create(&threads[t], NULL,
374  CDistance::get_distance_matrix_helper<T>, (void*)&params[t]);
375 
376  if (code != 0)
377  {
378  SG_WARNING("Thread creation failed (thread %d of %d) "
379  "with error:'%s'\n",t, num_threads, strerror(code));
380  num_threads=t;
381  break;
382  }
383  }
384 
385  params[t].distance = this;
386  params[t].result = result;
387  params[t].start = compute_row_start(t*step, n, symmetric);
388  params[t].end = m;
389  params[t].total_start=t*step;
390  params[t].total_end=total_num;
391  params[t].n=n;
392  params[t].m=m;
393  params[t].symmetric=symmetric;
394  params[t].verbose=true;
395  get_distance_matrix_helper<T>(&params[t]);
396 
397  for (t=0; t<num_threads; t++)
398  {
399  if (pthread_join(threads[t], NULL) != 0)
400  SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads)
401  }
402 
403  SG_FREE(params);
404  SG_FREE(threads);
405  }
406 
407  SG_DONE()
408 
409  return SGMatrix<T>(result,m,n,true);
410 }
411 
414 
415 template void* CDistance::get_distance_matrix_helper<float64_t>(void* p);
416 template void* CDistance::get_distance_matrix_helper<float32_t>(void* p);

SHOGUN Machine Learning Toolbox - Documentation