SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KernelLocallyLinearEmbedding.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Sergey Lisitsyn
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
10 
12 #ifdef HAVE_LAPACK
15 #include <shogun/lib/common.h>
16 #include <shogun/base/DynArray.h>
18 #include <shogun/io/SGIO.h>
19 #include <shogun/lib/Time.h>
20 #include <shogun/lib/Signal.h>
21 
22 #ifdef HAVE_PTHREAD
23 #include <pthread.h>
24 #endif
25 
26 using namespace shogun;
27 
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 struct LK_RECONSTRUCTION_THREAD_PARAM
30 {
32  int32_t idx_start;
34  int32_t idx_stop;
36  int32_t idx_step;
38  int32_t m_k;
40  int32_t N;
42  const int32_t* neighborhood_matrix;
44  float64_t* local_gram_matrix;
46  const float64_t* kernel_matrix;
48  float64_t* id_vector;
50  float64_t reconstruction_shift;
52  float64_t* W_matrix;
53 };
54 
55 class KLLE_COVERTREE_POINT
56 {
57 public:
58 
59  KLLE_COVERTREE_POINT(int32_t index, const SGMatrix<float64_t>& dmatrix)
60  {
61  this->point_index = index;
62  this->kernel_matrix = dmatrix;
63  this->kii = dmatrix[index*dmatrix.num_rows+index];
64  }
65 
66  inline double distance(const KLLE_COVERTREE_POINT& p) const
67  {
68  int32_t N = kernel_matrix.num_rows;
69  return kii+kernel_matrix[p.point_index*N+p.point_index]-2.0*kernel_matrix[point_index*N+p.point_index];
70  }
71 
72  inline bool operator==(const KLLE_COVERTREE_POINT& p) const
73  {
74  return (p.point_index==this->point_index);
75  }
76 
77  int32_t point_index;
78  float64_t kii;
79  SGMatrix<float64_t> kernel_matrix;
80 };
81 
82 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
83 
86 {
87 }
88 
91 {
92  set_kernel(kernel);
93 }
94 
96 {
97  return "KernelLocallyLinearEmbedding";
98 };
99 
101 {
102 }
103 
105 {
106  ASSERT(features);
107  SG_REF(features);
108 
109  // get dimensionality and number of vectors of data
110  int32_t N = features->get_num_vectors();
111  if (m_k>=N)
112  SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
113  m_k, N);
114 
115  // compute kernel matrix
116  ASSERT(m_kernel);
117  m_kernel->init(features,features);
119  m_kernel->cleanup();
120  SG_UNREF(features);
121  return (CFeatures*)embedding;
122 }
123 
125 {
126  CTime* time = new CTime();
127 
128  time->start();
129  SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix();
130  SG_DEBUG("Kernel matrix computation took %fs\n",time->cur_time_diff());
131 
132  time->start();
133  SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(kernel_matrix,m_k);
134  SG_DEBUG("Neighbors finding took %fs\n",time->cur_time_diff());
135 
136  time->start();
137  SGMatrix<float64_t> M_matrix = construct_weight_matrix(kernel_matrix,neighborhood_matrix);
138  SG_DEBUG("Weights computation took %fs\n",time->cur_time_diff());
139 
140  time->start();
142  SG_DEBUG("Embedding construction took %fs\n",time->cur_time_diff());
143 
144  delete time;
145 
146  return new CDenseFeatures<float64_t>(nullspace);
147 }
148 
150  SGMatrix<int32_t> neighborhood_matrix)
151 {
152  int32_t N = kernel_matrix.num_cols;
153  // loop variables
154 #ifdef HAVE_PTHREAD
155  int32_t t;
156  int32_t num_threads = parallel->get_num_threads();
157  ASSERT(num_threads>0);
158  // allocate threads
159  pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
160  LK_RECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LK_RECONSTRUCTION_THREAD_PARAM, num_threads);
161  pthread_attr_t attr;
162  pthread_attr_init(&attr);
163  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
164 #else
165  int32_t num_threads = 1;
166 #endif
167  float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
168  // init matrices and norm factor to be used
169  float64_t* local_gram_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
170  float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads);
171 
172 #ifdef HAVE_PTHREAD
173  for (t=0; t<num_threads; t++)
174  {
175  parameters[t].idx_start = t;
176  parameters[t].idx_step = num_threads;
177  parameters[t].idx_stop = N;
178  parameters[t].m_k = m_k;
179  parameters[t].N = N;
180  parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
181  parameters[t].kernel_matrix = kernel_matrix.matrix;
182  parameters[t].local_gram_matrix = local_gram_matrix+(m_k*m_k)*t;
183  parameters[t].id_vector = id_vector+m_k*t;
184  parameters[t].W_matrix = W_matrix;
185  parameters[t].reconstruction_shift = m_reconstruction_shift;
186  pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)&parameters[t]);
187  }
188  for (t=0; t<num_threads; t++)
189  pthread_join(threads[t], NULL);
190  pthread_attr_destroy(&attr);
191  SG_FREE(parameters);
192  SG_FREE(threads);
193 #else
194  LK_RECONSTRUCTION_THREAD_PARAM single_thread_param;
195  single_thread_param.idx_start = 0;
196  single_thread_param.idx_step = 1;
197  single_thread_param.idx_stop = N;
198  single_thread_param.m_k = m_k;
199  single_thread_param.N = N;
200  single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
201  single_thread_param.local_gram_matrix = local_gram_matrix;
202  single_thread_param.kernel_matrix = kernel_matrix.matrix;
203  single_thread_param.id_vector = id_vector;
204  single_thread_param.W_matrix = W_matrix;
205  run_linearreconstruction_thread((void*)&single_thread_param);
206 #endif
207 
208  // clean
209  SG_FREE(id_vector);
210  SG_FREE(local_gram_matrix);
211 
212  return SGMatrix<float64_t>(W_matrix,N,N);
213 }
214 
216 {
217  LK_RECONSTRUCTION_THREAD_PARAM* parameters = (LK_RECONSTRUCTION_THREAD_PARAM*)p;
218  int32_t idx_start = parameters->idx_start;
219  int32_t idx_step = parameters->idx_step;
220  int32_t idx_stop = parameters->idx_stop;
221  int32_t m_k = parameters->m_k;
222  int32_t N = parameters->N;
223  const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
224  float64_t* local_gram_matrix = parameters->local_gram_matrix;
225  const float64_t* kernel_matrix = parameters->kernel_matrix;
226  float64_t* id_vector = parameters->id_vector;
227  float64_t* W_matrix = parameters->W_matrix;
228  float64_t reconstruction_shift = parameters->reconstruction_shift;
229 
230  int32_t i,j,k;
231  float64_t norming,trace;
232 
233  for (i=idx_start; i<idx_stop; i+=idx_step)
234  {
235  for (j=0; j<m_k; j++)
236  {
237  for (k=0; k<m_k; k++)
238  local_gram_matrix[j*m_k+k] =
239  kernel_matrix[i*N+i] -
240  kernel_matrix[i*N+neighborhood_matrix[i*m_k+j]] -
241  kernel_matrix[i*N+neighborhood_matrix[i*m_k+k]] +
242  kernel_matrix[neighborhood_matrix[i*m_k+j]*N+neighborhood_matrix[i*m_k+k]];
243  }
244 
245  for (j=0; j<m_k; j++)
246  id_vector[j] = 1.0;
247 
248  // compute tr(C)
249  trace = 0.0;
250  for (j=0; j<m_k; j++)
251  trace += local_gram_matrix[j*m_k+j];
252 
253  // regularize gram matrix
254  for (j=0; j<m_k; j++)
255  local_gram_matrix[j*m_k+j] += reconstruction_shift*trace;
256 
257  clapack_dposv(CblasColMajor,CblasLower,m_k,1,local_gram_matrix,m_k,id_vector,m_k);
258 
259  // normalize weights
260  norming=0.0;
261  for (j=0; j<m_k; j++)
262  norming += id_vector[j];
263 
264  cblas_dscal(m_k,1.0/norming,id_vector,1);
265 
266  memset(local_gram_matrix,0,sizeof(float64_t)*m_k*m_k);
267  cblas_dger(CblasColMajor,m_k,m_k,1.0,id_vector,1,id_vector,1,local_gram_matrix,m_k);
268 
269  // put weights into W matrix
270  W_matrix[N*i+i] += 1.0;
271  for (j=0; j<m_k; j++)
272  {
273  W_matrix[N*i+neighborhood_matrix[i*m_k+j]] -= id_vector[j];
274  W_matrix[N*neighborhood_matrix[i*m_k+j]+i] -= id_vector[j];
275  }
276  for (j=0; j<m_k; j++)
277  {
278  for (k=0; k<m_k; k++)
279  W_matrix[N*neighborhood_matrix[i*m_k+j]+neighborhood_matrix[i*m_k+k]]+=local_gram_matrix[j*m_k+k];
280  }
281  }
282  return NULL;
283 }
284 
286 {
287  int32_t i;
288  int32_t N = kernel_matrix.num_cols;
289 
290  int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*k);
291 
292  float64_t max_dist=0.0;
293  for (i=0; i<N; i++)
294  max_dist = CMath::max(max_dist,kernel_matrix[i*N+i]);
295 
296  std::vector<KLLE_COVERTREE_POINT> vectors;
297  vectors.reserve(N);
298  for (i=0; i<N; i++)
299  vectors.push_back(KLLE_COVERTREE_POINT(i,kernel_matrix));
300 
301  CoverTree<KLLE_COVERTREE_POINT>* coverTree = new CoverTree<KLLE_COVERTREE_POINT>(2.0*max_dist,vectors);
302 
303  for (i=0; i<N; i++)
304  {
305  std::vector<KLLE_COVERTREE_POINT> neighbors =
306  coverTree->kNearestNeighbors(vectors[i],k+1);
307 
308  ASSERT(neighbors.size()>=unsigned(k+1));
309 
310  for (std::size_t m=1; m<unsigned(k+1); m++)
311  neighborhood_matrix[i*k+m-1] = neighbors[m].point_index;
312  }
313 
314  delete coverTree;
315 
316  return SGMatrix<int32_t>(neighborhood_matrix,k,N);
317 }
318 
319 #endif /* HAVE_LAPACK */

SHOGUN Machine Learning Toolbox - Documentation