SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Isomap.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Sergey Lisitsyn
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
10 
12 #ifdef HAVE_LAPACK
13 #include <shogun/lib/common.h>
16 #include <shogun/io/SGIO.h>
17 #include <shogun/base/Parallel.h>
18 #include <shogun/lib/Signal.h>
19 #include <shogun/lib/CoverTree.h>
20 
21 #ifdef HAVE_PTHREAD
22 #include <pthread.h>
23 #endif
24 
25 using namespace shogun;
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 /* struct storing thread params
29  */
30 struct DIJKSTRA_THREAD_PARAM
31 {
33  CFibonacciHeap* heap;
36  const float64_t* edges_matrix;
39  const int32_t* edges_idx_matrix;
41  float64_t* shortest_D;
43  int32_t i_start;
45  int32_t i_stop;
47  int32_t i_step;
49  int32_t m_k;
51  bool* s;
53  bool* f;
54 };
55 
56 class ISOMAP_COVERTREE_POINT
57 {
58 public:
59 
60  ISOMAP_COVERTREE_POINT(int32_t index, const SGMatrix<float64_t>& dmatrix)
61  {
62  point_index = index;
63  distance_matrix = dmatrix;
64  }
65 
66  inline double distance(const ISOMAP_COVERTREE_POINT& p) const
67  {
68  return distance_matrix[point_index*distance_matrix.num_rows+p.point_index];
69  }
70 
71  inline bool operator==(const ISOMAP_COVERTREE_POINT& p) const
72  {
73  return (p.point_index==point_index);
74  }
75 
76  int32_t point_index;
77  SGMatrix<float64_t> distance_matrix;
78 };
79 
80 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
81 
83 {
84  m_k = 3;
85 
86  init();
87 }
88 
90 {
91  SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE);
92 }
93 
95 {
96 }
97 
98 void CIsomap::set_k(int32_t k)
99 {
100  ASSERT(k>0);
101  m_k = k;
102 }
103 
104 int32_t CIsomap::get_k() const
105 {
106  return m_k;
107 }
108 
109 const char* CIsomap::get_name() const
110 {
111  return "Isomap";
112 }
113 
115 {
116  return isomap_distance(distance_matrix);
117 }
118 
120 {
121  int32_t N,i;
122  N = D_matrix.num_cols;
123  if (D_matrix.num_cols!=D_matrix.num_rows)
124  {
125  SG_ERROR("Given distance matrix is not square.\n");
126  }
127  if (m_k>=N)
128  {
129  SG_ERROR("K parameter should be less than number of given vectors (k=%d, N=%d)\n", m_k, N);
130  }
131 
132  // cut by k-nearest neighbors
133  int32_t* edges_idx_matrix = SG_MALLOC(int32_t, N*m_k);
134  float64_t* edges_matrix = SG_MALLOC(float64_t, N*m_k);
135 
136  float64_t max_dist = SGVector<float64_t>::max(D_matrix.matrix,N*N);
138 
139  for (i=0; i<N; i++)
140  coverTree->insert(ISOMAP_COVERTREE_POINT(i,D_matrix));
141 
142  for (i=0; i<N; i++)
143  {
144  ISOMAP_COVERTREE_POINT origin(i,D_matrix);
145  std::vector<ISOMAP_COVERTREE_POINT> neighbors =
146  coverTree->kNearestNeighbors(origin,m_k+1);
147  for (std::size_t m=1; m<neighbors.size(); m++)
148  {
149  edges_matrix[i*m_k+m-1] = origin.distance(neighbors[m]);
150  edges_idx_matrix[i*m_k+m-1] = neighbors[m].point_index;
151  }
152  }
153 
154  delete coverTree;
155 
156 #ifdef HAVE_PTHREAD
157  int32_t t;
158  // Parallel Dijkstra with Fibonacci Heap
159  int32_t num_threads = parallel->get_num_threads();
160  ASSERT(num_threads>0);
161  // allocate threads and thread parameters
162  pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
163  DIJKSTRA_THREAD_PARAM* parameters = SG_MALLOC(DIJKSTRA_THREAD_PARAM, num_threads);
164  // allocate heaps
165  CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
166  for (t=0; t<num_threads; t++)
167  heaps[t] = new CFibonacciHeap(N);
168 
169 #else
170  int32_t num_threads = 1;
171 #endif
172 
173  // allocate (s)olution
174  bool* s = SG_MALLOC(bool,N*num_threads);
175  // allocate (f)rontier
176  bool* f = SG_MALLOC(bool,N*num_threads);
177  // init matrix to store shortest distances
178  float64_t* shortest_D = D_matrix.matrix;
179 
180 #ifdef HAVE_PTHREAD
181 
182  pthread_attr_t attr;
183  pthread_attr_init(&attr);
184  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
185  for (t=0; t<num_threads; t++)
186  {
187  parameters[t].i_start = t;
188  parameters[t].i_stop = N;
189  parameters[t].i_step = num_threads;
190  parameters[t].heap = heaps[t];
191  parameters[t].edges_matrix = edges_matrix;
192  parameters[t].edges_idx_matrix = edges_idx_matrix;
193  parameters[t].s = s+t*N;
194  parameters[t].f = f+t*N;
195  parameters[t].m_k = m_k;
196  parameters[t].shortest_D = shortest_D;
197  pthread_create(&threads[t], &attr, CIsomap::run_dijkstra_thread, (void*)&parameters[t]);
198  }
199  for (t=0; t<num_threads; t++)
200  pthread_join(threads[t], NULL);
201  pthread_attr_destroy(&attr);
202  for (t=0; t<num_threads; t++)
203  delete heaps[t];
204  SG_FREE(heaps);
205  SG_FREE(parameters);
206  SG_FREE(threads);
207 #else
208  DIJKSTRA_THREAD_PARAM single_thread_param;
209  single_thread_param.i_start = 0;
210  single_thread_param.i_stop = N;
211  single_thread_param.i_step = 1;
212  single_thread_param.m_k = m_k;
213  single_thread_param.heap = new CFibonacciHeap(N);
214  single_thread_param.edges_matrix = edges_matrix;
215  single_thread_param.edges_idx_matrix = edges_idx_matrix;
216  single_thread_param.s = s;
217  single_thread_param.f = f;
218  single_thread_param.shortest_D = shortest_D;
219  run_dijkstra_thread((void*)&single_thread_param);
220  delete single_thread_param.heap;
221 #endif
222  // cleanup
223  SG_FREE(edges_matrix);
224  SG_FREE(edges_idx_matrix);
225  SG_FREE(s);
226  SG_FREE(f);
227 
228  return D_matrix;
229 }
230 
232 {
233  // get parameters from p
234  DIJKSTRA_THREAD_PARAM* parameters = (DIJKSTRA_THREAD_PARAM*)p;
235  CFibonacciHeap* heap = parameters->heap;
236  int32_t i_start = parameters->i_start;
237  int32_t i_stop = parameters->i_stop;
238  int32_t i_step = parameters->i_step;
239  bool* s = parameters->s;
240  bool* f = parameters->f;
241  const float64_t* edges_matrix = parameters->edges_matrix;
242  const int32_t* edges_idx_matrix = parameters->edges_idx_matrix;
243  float64_t* shortest_D = parameters->shortest_D;
244  int32_t m_k = parameters->m_k;
245  int32_t k,j,i,min_item,w;
246  int32_t N = i_stop;
247 
248  // temporary vars
249  float64_t dist,tmp;
250 
251  // main loop
252  for (k=i_start; k<i_stop; k+=i_step)
253  {
254  // fill s and f with false, fill shortest_D with infinity
255  for (j=0; j<N; j++)
256  {
257  shortest_D[k*N+j] = CMath::ALMOST_INFTY;
258  s[j] = false;
259  f[j] = false;
260  }
261  // set distance from k to k as zero
262  shortest_D[k*N+k] = 0.0;
263 
264  // insert kth object to heap with zero distance and set f[k] true
265  heap->insert(k,0.0);
266  f[k] = true;
267 
268  // while heap is not empty
269  while (heap->get_num_nodes()>0)
270  {
271  // extract min and set (s)olution state as true and (f)rontier as false
272  min_item = heap->extract_min(tmp);
273  s[min_item] = true;
274  f[min_item] = false;
275 
276  // for-each edge (min_item->w)
277  for (i=0; i<m_k; i++)
278  {
279  // get w idx
280  w = edges_idx_matrix[min_item*m_k+i];
281  // if w is not in solution yet
282  if (s[w] == false)
283  {
284  // get distance from k to i through min_item
285  dist = shortest_D[k*N+min_item] + edges_matrix[min_item*m_k+i];
286  // if distance can be relaxed
287  if (dist < shortest_D[k*N+w])
288  {
289  // relax distance
290  shortest_D[k*N+w] = dist;
291  // if w is in (f)rontier
292  if (f[w])
293  {
294  // decrease distance in heap
295  heap->decrease_key(w, dist);
296  }
297  else
298  {
299  // insert w to heap and set (f)rontier as true
300  heap->insert(w, dist);
301  f[w] = true;
302  }
303  }
304  }
305  }
306  }
307  // clear heap to re-use
308  heap->clear();
309  }
310  return NULL;
311 }
312 
313 #endif /* HAVE_LAPACK */

SHOGUN Machine Learning Toolbox - Documentation