SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DistanceMachine.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Christian Gehl
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST
9  */
10 
13 #include <shogun/base/Parameter.h>
14 
15 using namespace shogun;
16 
17 #ifndef DOXYGEN_SHOULD_SKIP_THIS
18 struct D_THREAD_PARAM
19 {
20  CDistance* d;
21  float64_t* r;
22  int32_t idx_r_start;
23  int32_t idx_start;
24  int32_t idx_stop;
25  int32_t idx_comp;
26 };
27 #endif // DOXYGEN_SHOULD_SKIP_THIS
28 
30 : CMachine()
31 {
32  init();
33 }
34 
36 {
38 }
39 
40 void CDistanceMachine::init()
41 {
42  /* all distance machines should store their models, i.e. cluster centers
43  * At least, it has to be ensured, that after calling train(), or in the
44  * call of apply() in the cases where there is no train method, the lhs
45  * of the underlying distance is set to cluster centers */
47 
48  distance=NULL;
49  m_parameters->add((CSGObject**)&distance, "distance", "Distance to use");
50 }
51 
52 void CDistanceMachine::distances_lhs(float64_t* result,int32_t idx_a1,int32_t idx_a2,int32_t idx_b)
53 {
54  int32_t num_threads=parallel->get_num_threads();
55  ASSERT(num_threads>0)
56 
57  ASSERT(result)
58 
59  if (num_threads < 2)
60  {
61  D_THREAD_PARAM param;
62  param.d=distance;
63  param.r=result;
64  param.idx_r_start=idx_a1;
65  param.idx_start=idx_a1;
66  param.idx_stop=idx_a2+1;
67  param.idx_comp=idx_b;
68 
69  run_distance_thread_lhs((void*) &param);
70  }
71 #ifdef HAVE_PTHREAD
72  else
73  {
74  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
75  D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
76  int32_t num_vec=idx_a2-idx_a1+1;
77  int32_t step= num_vec/num_threads;
78  int32_t t;
79 
80  pthread_attr_t attr;
81  pthread_attr_init(&attr);
82  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
83 
84  for (t=0; t<num_threads-1; t++)
85  {
86  params[t].d = distance;
87  params[t].r = result;
88  params[t].idx_r_start=t*step;
89  params[t].idx_start = (t*step)+idx_a1;
90  params[t].idx_stop = ((t+1)*step)+idx_a1;
91  params[t].idx_comp=idx_b;
92 
93  pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_lhs, (void*)&params[t]);
94  }
95  params[t].d = distance;
96  params[t].r = result;
97  params[t].idx_r_start=t*step;
98  params[t].idx_start = (t*step)+idx_a1;
99  params[t].idx_stop = idx_a2+1;
100  params[t].idx_comp=idx_b;
101 
102  run_distance_thread_lhs(&params[t]);
103 
104  for (t=0; t<num_threads-1; t++)
105  pthread_join(threads[t], NULL);
106 
107  pthread_attr_destroy(&attr);
108  SG_FREE(params);
109  SG_FREE(threads);
110  }
111 #endif
112 }
113 
114 void CDistanceMachine::distances_rhs(float64_t* result,int32_t idx_b1,int32_t idx_b2,int32_t idx_a)
115 {
116  int32_t num_threads=parallel->get_num_threads();
117  ASSERT(num_threads>0)
118 
119  ASSERT(result)
120 
121  if (num_threads < 2)
122  {
123  D_THREAD_PARAM param;
124  param.d=distance;
125  param.r=result;
126  param.idx_r_start=idx_b1;
127  param.idx_start=idx_b1;
128  param.idx_stop=idx_b2+1;
129  param.idx_comp=idx_a;
130 
131  run_distance_thread_rhs((void*) &param);
132  }
133 #ifndef WIN32
134  else
135  {
136  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
137  D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
138  int32_t num_vec=idx_b2-idx_b1+1;
139  int32_t step= num_vec/num_threads;
140  int32_t t;
141 
142  pthread_attr_t attr;
143  pthread_attr_init(&attr);
144  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
145 
146  for (t=0; t<num_threads-1; t++)
147  {
148  params[t].d = distance;
149  params[t].r = result;
150  params[t].idx_r_start=t*step;
151  params[t].idx_start = (t*step)+idx_b1;
152  params[t].idx_stop = ((t+1)*step)+idx_b1;
153  params[t].idx_comp=idx_a;
154 
155  pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_rhs, (void*)&params[t]);
156  }
157  params[t].d = distance;
158  params[t].r = result;
159  params[t].idx_r_start=t*step;
160  params[t].idx_start = (t*step)+idx_b1;
161  params[t].idx_stop = idx_b2+1;
162  params[t].idx_comp=idx_a;
163 
164  run_distance_thread_rhs(&params[t]);
165 
166  for (t=0; t<num_threads-1; t++)
167  pthread_join(threads[t], NULL);
168 
169  pthread_attr_destroy(&attr);
170  SG_FREE(params);
171  SG_FREE(threads);
172  }
173 #endif
174 }
175 
177 {
178  D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
179  CDistance* distance=params->d;
180  float64_t* res=params->r;
181  int32_t idx_res_start=params->idx_r_start;
182  int32_t idx_act=params->idx_start;
183  int32_t idx_stop=params->idx_stop;
184  int32_t idx_c=params->idx_comp;
185 
186  for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
187  res[i] =distance->distance(idx_act,idx_c);
188 
189  return NULL;
190 }
191 
193 {
194  D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
195  CDistance* distance=params->d;
196  float64_t* res=params->r;
197  int32_t idx_res_start=params->idx_r_start;
198  int32_t idx_act=params->idx_start;
199  int32_t idx_stop=params->idx_stop;
200  int32_t idx_c=params->idx_comp;
201 
202  for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
203  res[i] =distance->distance(idx_c,idx_act);
204 
205  return NULL;
206 }
207 
209 {
210  if (data)
211  {
212  /* set distance features to given ones and apply to all */
213  CFeatures* lhs=distance->get_lhs();
214  distance->init(lhs, data);
215  SG_UNREF(lhs);
216 
217  /* build result labels and classify all elements of procedure */
219  for (index_t i=0; i<data->get_num_vectors(); ++i)
220  result->set_label(i, apply_one(i));
221  return result;
222  }
223  else
224  {
225  /* call apply on complete right hand side */
226  CFeatures* all=distance->get_rhs();
227  CMulticlassLabels* result = apply_multiclass(all);
228  SG_UNREF(all);
229  return result;
230  }
231  return NULL;
232 }
233 
235 {
236  /* number of clusters */
237  CFeatures* lhs=distance->get_lhs();
238  int32_t num_clusters=lhs->get_num_vectors();
239  SG_UNREF(lhs);
240 
241  /* (multiple threads) calculate distances to all cluster centers */
242  float64_t* dists=SG_MALLOC(float64_t, num_clusters);
243  distances_lhs(dists, 0, num_clusters-1, num);
244 
245  /* find cluster index with smallest distance */
246  float64_t result=dists[0];
247  index_t best_index=0;
248  for (index_t i=1; i<num_clusters; ++i)
249  {
250  if (dists[i]<result)
251  {
252  result=dists[i];
253  best_index=i;
254  }
255  }
256 
257  SG_FREE(dists);
258 
259  /* implicit cast */
260  return best_index;
261 }
262 
264 {
265  SG_REF(d);
267  distance=d;
268 }
269 
271 {
272  SG_REF(distance);
273  return distance;
274 }
275 
277 {
278  SG_ERROR("store_model_features not yet implemented for %s!\n",
279  get_name());
280 }
281 
CDistance * get_distance() const
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:87
int32_t get_num_threads() const
Definition: Parallel.cpp:78
int32_t index_t
Definition: common.h:62
virtual void store_model_features()
CFeatures * get_lhs()
Definition: Distance.h:224
virtual CMulticlassLabels * apply_multiclass(CFeatures *data=NULL)
virtual int32_t get_num_vectors() const =0
void distances_lhs(float64_t *result, int32_t idx_a1, int32_t idx_a2, int32_t idx_b)
#define SG_ERROR(...)
Definition: SGIO.h:129
CFeatures * get_rhs()
Definition: Distance.h:230
Parameter * m_parameters
Definition: SGObject.h:546
static void * run_distance_thread_lhs(void *p)
virtual const char * get_name() const
Parallel * parallel
Definition: SGObject.h:540
#define SG_REF(x)
Definition: SGObject.h:54
A generic learning machine interface.
Definition: Machine.h:143
bool set_label(int32_t idx, float64_t label)
Multiclass Labels for multi-class classification.
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
virtual void set_store_model_features(bool store_model)
Definition: Machine.cpp:107
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
double float64_t
Definition: common.h:50
virtual float64_t apply_one(int32_t num)
void distances_rhs(float64_t *result, int32_t idx_b1, int32_t idx_b2, int32_t idx_a)
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:206
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
void set_distance(CDistance *d)
static void * run_distance_thread_rhs(void *p)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Distance.cpp:78

SHOGUN Machine Learning Toolbox - Documentation