SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DistanceMachine.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Christian Gehl
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST
9  */
10 
12 #include <shogun/base/Parameter.h>
13 
14 using namespace shogun;
15 
16 #ifndef DOXYGEN_SHOULD_SKIP_THIS
17 struct D_THREAD_PARAM
18 {
19  CDistance* d;
20  float64_t* r;
21  int32_t idx_r_start;
22  int32_t idx_start;
23  int32_t idx_stop;
24  int32_t idx_comp;
25 };
26 #endif // DOXYGEN_SHOULD_SKIP_THIS
27 
29 : CMachine()
30 {
31  init();
32 }
33 
35 {
37 }
38 
39 void CDistanceMachine::init()
40 {
41  /* all distance machines should store their models, i.e. cluster centers
42  * At least, it has to be ensured, that after calling train(), or in the
43  * call of apply() in the cases where there is no train method, the lhs
44  * of the underlying distance is set to cluster centers */
46 
47  distance=NULL;
48  m_parameters->add((CSGObject**)&distance, "distance", "Distance to use");
49 }
50 
51 void CDistanceMachine::distances_lhs(float64_t* result,int32_t idx_a1,int32_t idx_a2,int32_t idx_b)
52 {
53  int32_t num_threads=parallel->get_num_threads();
54  ASSERT(num_threads>0)
55 
56  ASSERT(result)
57 
58  if (num_threads < 2)
59  {
60  D_THREAD_PARAM param;
61  param.d=distance;
62  param.r=result;
63  param.idx_r_start=idx_a1;
64  param.idx_start=idx_a1;
65  param.idx_stop=idx_a2+1;
66  param.idx_comp=idx_b;
67 
68  run_distance_thread_lhs((void*) &param);
69  }
70 #ifdef HAVE_PTHREAD
71  else
72  {
73  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
74  D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
75  int32_t num_vec=idx_a2-idx_a1+1;
76  int32_t step= num_vec/num_threads;
77  int32_t t;
78 
79  pthread_attr_t attr;
80  pthread_attr_init(&attr);
81  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
82 
83  for (t=0; t<num_threads-1; t++)
84  {
85  params[t].d = distance;
86  params[t].r = result;
87  params[t].idx_r_start=t*step;
88  params[t].idx_start = (t*step)+idx_a1;
89  params[t].idx_stop = ((t+1)*step)+idx_a1;
90  params[t].idx_comp=idx_b;
91 
92  pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_lhs, (void*)&params[t]);
93  }
94  params[t].d = distance;
95  params[t].r = result;
96  params[t].idx_r_start=t*step;
97  params[t].idx_start = (t*step)+idx_a1;
98  params[t].idx_stop = idx_a2+1;
99  params[t].idx_comp=idx_b;
100 
101  run_distance_thread_lhs(&params[t]);
102 
103  for (t=0; t<num_threads-1; t++)
104  pthread_join(threads[t], NULL);
105 
106  pthread_attr_destroy(&attr);
107  SG_FREE(params);
108  SG_FREE(threads);
109  }
110 #endif
111 }
112 
113 void CDistanceMachine::distances_rhs(float64_t* result,int32_t idx_b1,int32_t idx_b2,int32_t idx_a)
114 {
115  int32_t num_threads=parallel->get_num_threads();
116  ASSERT(num_threads>0)
117 
118  ASSERT(result)
119 
120  if (num_threads < 2)
121  {
122  D_THREAD_PARAM param;
123  param.d=distance;
124  param.r=result;
125  param.idx_r_start=idx_b1;
126  param.idx_start=idx_b1;
127  param.idx_stop=idx_b2+1;
128  param.idx_comp=idx_a;
129 
130  run_distance_thread_rhs((void*) &param);
131  }
132 #ifndef WIN32
133  else
134  {
135  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
136  D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
137  int32_t num_vec=idx_b2-idx_b1+1;
138  int32_t step= num_vec/num_threads;
139  int32_t t;
140 
141  pthread_attr_t attr;
142  pthread_attr_init(&attr);
143  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
144 
145  for (t=0; t<num_threads-1; t++)
146  {
147  params[t].d = distance;
148  params[t].r = result;
149  params[t].idx_r_start=t*step;
150  params[t].idx_start = (t*step)+idx_b1;
151  params[t].idx_stop = ((t+1)*step)+idx_b1;
152  params[t].idx_comp=idx_a;
153 
154  pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_rhs, (void*)&params[t]);
155  }
156  params[t].d = distance;
157  params[t].r = result;
158  params[t].idx_r_start=t*step;
159  params[t].idx_start = (t*step)+idx_b1;
160  params[t].idx_stop = idx_b2+1;
161  params[t].idx_comp=idx_a;
162 
163  run_distance_thread_rhs(&params[t]);
164 
165  for (t=0; t<num_threads-1; t++)
166  pthread_join(threads[t], NULL);
167 
168  pthread_attr_destroy(&attr);
169  SG_FREE(params);
170  SG_FREE(threads);
171  }
172 #endif
173 }
174 
176 {
177  D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
178  CDistance* distance=params->d;
179  float64_t* res=params->r;
180  int32_t idx_res_start=params->idx_r_start;
181  int32_t idx_act=params->idx_start;
182  int32_t idx_stop=params->idx_stop;
183  int32_t idx_c=params->idx_comp;
184 
185  for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
186  res[i] =distance->distance(idx_act,idx_c);
187 
188  return NULL;
189 }
190 
192 {
193  D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
194  CDistance* distance=params->d;
195  float64_t* res=params->r;
196  int32_t idx_res_start=params->idx_r_start;
197  int32_t idx_act=params->idx_start;
198  int32_t idx_stop=params->idx_stop;
199  int32_t idx_c=params->idx_comp;
200 
201  for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
202  res[i] =distance->distance(idx_c,idx_act);
203 
204  return NULL;
205 }
206 
208 {
209  if (data)
210  {
211  /* set distance features to given ones and apply to all */
212  CFeatures* lhs=distance->get_lhs();
213  distance->init(lhs, data);
214  SG_UNREF(lhs);
215 
216  /* build result labels and classify all elements of procedure */
218  for (index_t i=0; i<data->get_num_vectors(); ++i)
219  result->set_label(i, apply_one(i));
220  return result;
221  }
222  else
223  {
224  /* call apply on complete right hand side */
225  CFeatures* all=distance->get_rhs();
226  CMulticlassLabels* result = apply_multiclass(all);
227  SG_UNREF(all);
228  return result;
229  }
230  return NULL;
231 }
232 
234 {
235  /* number of clusters */
236  CFeatures* lhs=distance->get_lhs();
237  int32_t num_clusters=lhs->get_num_vectors();
238  SG_UNREF(lhs);
239 
240  /* (multiple threads) calculate distances to all cluster centers */
241  float64_t* dists=SG_MALLOC(float64_t, num_clusters);
242  distances_lhs(dists, 0, num_clusters-1, num);
243 
244  /* find cluster index with smallest distance */
245  float64_t result=dists[0];
246  index_t best_index=0;
247  for (index_t i=1; i<num_clusters; ++i)
248  {
249  if (dists[i]<result)
250  {
251  result=dists[i];
252  best_index=i;
253  }
254  }
255 
256  SG_FREE(dists);
257 
258  /* implicit cast */
259  return best_index;
260 }
261 
263 {
264  SG_REF(d);
266  distance=d;
267 }
268 
270 {
271  SG_REF(distance);
272  return distance;
273 }
274 
276 {
277  SG_ERROR("store_model_features not yet implemented for %s!\n",
278  get_name());
279 }
280 

SHOGUN Machine Learning Toolbox - Documentation