SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MultidimensionalScaling.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Sergey Lisitsyn
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
10 
12 #ifdef HAVE_LAPACK
17 #include <shogun/lib/common.h>
20 #include <shogun/io/SGIO.h>
22 
23 #ifdef HAVE_PTHREAD
24 #include <pthread.h>
25 #endif
26 
27 using namespace shogun;
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 struct TRIANGULATION_THREAD_PARAM
31 {
33  int32_t idx_start;
35  int32_t idx_stop;
37  int32_t idx_step;
39  int32_t lmk_N;
41  int32_t total_N;
43  int32_t m_target_dim;
45  float64_t* current_dist_to_lmks;
47  float64_t* lmk_feature_matrix;
49  float64_t* new_feature_matrix;
51  const float64_t* distance_matrix;
53  const float64_t* mean_sq_dist_vector;
55  const int32_t* lmk_idxs;
57  const bool* to_process;
58 };
59 #endif
60 
62 {
65  m_landmark = false;
66 
67  init();
68 }
69 
71 {
72  SG_ADD(&m_eigenvalues, "eigenvalues", "eigenvalues of last embedding",
74  SG_ADD(&m_landmark, "landmark",
75  "indicates if landmark approximation should be used", MS_NOT_AVAILABLE);
76  SG_ADD(&m_landmark_number, "landmark number",
77  "the number of landmarks for approximation", MS_AVAILABLE);
78 }
79 
81 {
82 }
83 
85 {
86  return m_eigenvalues;
87 }
88 
90 {
91  if (num<3)
92  SG_ERROR("Number of landmarks should be greater than 3 to make triangulation possible while %d given.",
93  num);
94  m_landmark_number = num;
95 }
96 
98 {
99  return m_landmark_number;
100 }
101 
103 {
104  m_landmark = landmark;
105 }
106 
108 {
109  return m_landmark;
110 }
111 
113 {
114  return "MultidimensionalScaling";
115 };
116 
118 {
119  ASSERT(distance);
120 
121  // compute feature_matrix by landmark or classic embedding of distance matrix
122  SGMatrix<float64_t> distance_matrix = process_distance_matrix(distance->get_distance_matrix());
123  SGMatrix<float64_t> feature_matrix;
124  if (m_landmark)
125  feature_matrix = landmark_embedding(distance_matrix);
126  else
127  feature_matrix = classic_embedding(distance_matrix);
128 
129  return new CDenseFeatures<float64_t>(feature_matrix);
130 }
131 
133 {
134  return distance_matrix;
135 }
136 
138 {
139  SG_REF(features);
141 
142  m_distance->init(features,features);
145 
146  SG_UNREF(features);
147  return (CFeatures*)embedding;
148 }
149 
151 {
152  ASSERT(m_target_dim>0);
153  ASSERT(distance_matrix.num_cols==distance_matrix.num_rows);
154  int32_t N = distance_matrix.num_cols;
155 
156  // loop variables
157  int32_t i,j;
158 
159  // double center distance_matrix
160  for (i=0; i<N; i++)
161  {
162  for (j=0; j<N; j++)
163  distance_matrix(i,j) = CMath::sq(distance_matrix(i,j));
164  }
165  SGMatrix<float64_t>::center_matrix(distance_matrix.matrix,N,N);
166  for (i=0; i<N; i++)
167  {
168  for (j=0; j<N; j++)
169  distance_matrix(i,j) = distance_matrix(i,j)*(-0.5);
170  }
171 
172  // feature matrix representing given distance
173  float64_t* replace_feature_matrix = SG_MALLOC(float64_t, N*m_target_dim);
174 
175  // status of eigenproblem to be solved
176  int eigenproblem_status = 0;
177 #ifdef HAVE_ARPACK
178  // using ARPACK
179  float64_t* eigenvalues_vector = SG_MALLOC(float64_t, m_target_dim);
180  // solve eigenproblem with ARPACK (faster)
181  arpack_dsxupd(distance_matrix.matrix,NULL,false,N,m_target_dim,"LM",false,1,false,false,0.0,0.0,
182  eigenvalues_vector,replace_feature_matrix,eigenproblem_status);
183  // check for failure
184  ASSERT(eigenproblem_status == 0);
185  // reverse eigenvectors order
186  float64_t tmp;
187  for (j=0; j<N; j++)
188  {
189  for (i=0; i<m_target_dim/2; i++)
190  {
191  tmp = replace_feature_matrix[j*m_target_dim+i];
192  replace_feature_matrix[j*m_target_dim+i] =
193  replace_feature_matrix[j*m_target_dim+(m_target_dim-i-1)];
194  replace_feature_matrix[j*m_target_dim+(m_target_dim-i-1)] = tmp;
195  }
196  }
197  // reverse eigenvalues order
198  for (i=0; i<m_target_dim/2; i++)
199  {
200  tmp = eigenvalues_vector[i];
201  eigenvalues_vector[i] = eigenvalues_vector[m_target_dim-i-1];
202  eigenvalues_vector[m_target_dim-i-1] = tmp;
203  }
204 
205  // finally construct embedding
206  for (i=0; i<m_target_dim; i++)
207  {
208  for (j=0; j<N; j++)
209  replace_feature_matrix[j*m_target_dim+i] *=
210  CMath::sqrt(eigenvalues_vector[i]);
211  }
212 
213  // set eigenvalues vector
214  m_eigenvalues = SGVector<float64_t>(eigenvalues_vector,m_target_dim);
215 #else /* not HAVE_ARPACK */
216  // using LAPACK
217  float64_t* eigenvalues_vector = SG_MALLOC(float64_t, N);
218  float64_t* eigenvectors = SG_MALLOC(float64_t, m_target_dim*N);
219  // solve eigenproblem with LAPACK
220  wrap_dsyevr('V','U',N,distance_matrix.matrix,N,N-m_target_dim+1,N,eigenvalues_vector,eigenvectors,&eigenproblem_status);
221  // check for failure
222  ASSERT(eigenproblem_status==0);
223 
224  // set eigenvalues vector
226 
227  // fill eigenvalues vector in backwards order
228  for (i=0; i<m_target_dim; i++)
229  m_eigenvalues.vector[i] = eigenvalues_vector[m_target_dim-i-1];
230 
231  // construct embedding
232  for (i=0; i<m_target_dim; i++)
233  {
234  for (j=0; j<N; j++)
235  {
236  replace_feature_matrix[j*m_target_dim+i] =
237  eigenvectors[(m_target_dim-i-1)*N+j] * CMath::sqrt(m_eigenvalues.vector[i]);
238  }
239  }
240  SG_FREE(eigenvectors);
241  SG_FREE(eigenvalues_vector);
242 #endif /* HAVE_ARPACK else */
243 
244  // warn user if there are negative or zero eigenvalues
245  for (i=0; i<m_eigenvalues.vlen; i++)
246  {
247  if (m_eigenvalues.vector[i]<=0.0)
248  {
249  SG_WARNING("Embedding is not consistent (got neg eigenvalues): features %d-%d are wrong",
250  i, m_eigenvalues.vlen-1);
251  break;
252  }
253  }
254 
255  return SGMatrix<float64_t>(replace_feature_matrix,m_target_dim,N);
256 }
257 
259 {
260  ASSERT(m_target_dim>0);
261  ASSERT(distance_matrix.num_cols==distance_matrix.num_rows);
262  int32_t lmk_N = m_landmark_number;
263  int32_t i,j;
264  int32_t total_N = distance_matrix.num_cols;
265  if (lmk_N<3)
266  {
267  SG_ERROR("Number of landmarks (%d) should be greater than 3 for proper triangulation.\n",
268  lmk_N);
269  }
270  if (lmk_N>total_N)
271  {
272  SG_ERROR("Number of landmarks (%d) should be less than total number of vectors (%d).\n",
273  lmk_N, total_N);
274  }
275 
276  // get landmark indexes with random permutation
277  SGVector<int32_t> lmk_idxs = CStatistics::sample_indices(lmk_N,total_N);
278  // compute distances between landmarks
279  float64_t* lmk_dist_matrix = SG_MALLOC(float64_t, lmk_N*lmk_N);
280  for (i=0; i<lmk_N; i++)
281  {
282  for (j=0; j<lmk_N; j++)
283  lmk_dist_matrix[i*lmk_N+j] =
284  distance_matrix[lmk_idxs.vector[i]*total_N+lmk_idxs.vector[j]];
285  }
286 
287  // get landmarks embedding
288  SGMatrix<float64_t> lmk_dist_sgmatrix(lmk_dist_matrix,lmk_N,lmk_N);
289  // compute mean vector of squared distances
290  float64_t* mean_sq_dist_vector = SG_CALLOC(float64_t, lmk_N);
291  for (i=0; i<lmk_N; i++)
292  {
293  for (j=0; j<lmk_N; j++)
294  mean_sq_dist_vector[i] += CMath::sq(lmk_dist_matrix[i*lmk_N+j]);
295 
296  mean_sq_dist_vector[i] /= lmk_N;
297  }
298  SGMatrix<float64_t> lmk_feature_matrix = classic_embedding(lmk_dist_sgmatrix);
299 
300  // construct new feature matrix
301  float64_t* new_feature_matrix = SG_CALLOC(float64_t, m_target_dim*total_N);
302 
303  // fill new feature matrix with embedded landmarks
304  for (i=0; i<lmk_N; i++)
305  {
306  for (j=0; j<m_target_dim; j++)
307  new_feature_matrix[lmk_idxs.vector[i]*m_target_dim+j] =
308  lmk_feature_matrix[i*m_target_dim+j];
309  }
310 
311  // get exactly defined pseudoinverse of landmarks feature matrix
313  for (i=0; i<lmk_N; i++)
314  {
315  for (j=0; j<m_target_dim; j++)
316  lmk_feature_matrix[i*m_target_dim+j] /= m_eigenvalues.vector[j];
317  }
318 
319 
320  // set to_process els true if should be processed
321  bool* to_process = SG_MALLOC(bool, total_N);
322  for (j=0; j<total_N; j++)
323  to_process[j] = true;
324  for (j=0; j<lmk_N; j++)
325  to_process[lmk_idxs.vector[j]] = false;
326 
327  // get embedding for non-landmark vectors
328 #ifdef HAVE_PTHREAD
329  int32_t t;
330  int32_t num_threads = parallel->get_num_threads();
331  ASSERT(num_threads>0);
332  // allocate threads and it's parameters
333  pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
334  TRIANGULATION_THREAD_PARAM* parameters = SG_MALLOC(TRIANGULATION_THREAD_PARAM, num_threads);
335  pthread_attr_t attr;
336  pthread_attr_init(&attr);
337  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
338  float64_t* current_dist_to_lmks = SG_MALLOC(float64_t, lmk_N*num_threads);
339  // run threads
340  for (t=0; t<num_threads; t++)
341  {
342  parameters[t].idx_start = t;
343  parameters[t].idx_stop = total_N;
344  parameters[t].idx_step = num_threads;
345  parameters[t].lmk_N = lmk_N;
346  parameters[t].total_N = total_N;
347  parameters[t].m_target_dim = m_target_dim;
348  parameters[t].current_dist_to_lmks = current_dist_to_lmks+t*lmk_N;
349  parameters[t].distance_matrix = distance_matrix.matrix;
350  parameters[t].mean_sq_dist_vector = mean_sq_dist_vector;
351  parameters[t].lmk_idxs = lmk_idxs.vector;
352  parameters[t].lmk_feature_matrix = lmk_feature_matrix.matrix;
353  parameters[t].new_feature_matrix = new_feature_matrix;
354  parameters[t].to_process = to_process;
355  pthread_create(&threads[t], &attr, run_triangulation_thread, (void*)&parameters[t]);
356  }
357  // join threads
358  for (t=0; t<num_threads; t++)
359  pthread_join(threads[t], NULL);
360  pthread_attr_destroy(&attr);
361  SG_FREE(parameters);
362  SG_FREE(threads);
363 #else
364  // run single 'thread'
365  float64_t* current_dist_to_lmks = SG_MALLOC(float64_t, lmk_N);
366  TRIANGULATION_THREAD_PARAM single_thread_param;
367  single_thread_param.idx_start = 0;
368  single_thread_param.idx_stop = total_N;
369  single_thread_param.idx_step = 1;
370  single_thread_param.lmk_N = lmk_N;
371  single_thread_param.total_N = total_N;
372  single_thread_param.m_target_dim = m_target_dim;
373  single_thread_param.current_dist_to_lmks = current_dist_to_lmks;
374  single_thread_param.distance_matrix = distance_matrix.matrix;
375  single_thread_param.mean_sq_dist_vector = mean_sq_dist_vector;
376  single_thread_param.lmk_idxs = lmk_idxs.vector;
377  single_thread_param.lmk_feature_matrix = lmk_feature_matrix.matrix;
378  single_thread_param.new_feature_matrix = new_feature_matrix;
379  single_thread_param.to_process = to_process;
380  run_triangulation_thread((void*)&single_thread_param);
381 #endif
382  // cleanup
383  SG_FREE(current_dist_to_lmks);
384  SG_FREE(mean_sq_dist_vector);
385  SG_FREE(to_process);
386 
387  return SGMatrix<float64_t>(new_feature_matrix,m_target_dim,total_N);
388 }
389 
391 {
392  TRIANGULATION_THREAD_PARAM* parameters = (TRIANGULATION_THREAD_PARAM*)p;
393  int32_t idx_start = parameters->idx_start;
394  int32_t idx_step = parameters->idx_step;
395  int32_t idx_stop = parameters->idx_stop;
396  const int32_t* lmk_idxs = parameters->lmk_idxs;
397  const float64_t* distance_matrix = parameters->distance_matrix;
398  const float64_t* mean_sq_dist_vector = parameters->mean_sq_dist_vector;
399  float64_t* current_dist_to_lmks = parameters->current_dist_to_lmks;
400  int32_t m_target_dim = parameters->m_target_dim;
401  int32_t lmk_N = parameters->lmk_N;
402  int32_t total_N = parameters->total_N;
403  const bool* to_process = parameters->to_process;
404  float64_t* lmk_feature_matrix = parameters->lmk_feature_matrix;
405  float64_t* new_feature_matrix = parameters->new_feature_matrix;
406 
407  int32_t i,k;
408  for (i=idx_start; i<idx_stop; i+=idx_step)
409  {
410  // skip if landmark
411  if (!to_process[i])
412  continue;
413 
414  // compute difference from mean landmark distance vector
415  for (k=0; k<lmk_N; k++)
416  {
417  current_dist_to_lmks[k] =
418  CMath::sq(distance_matrix[i*total_N+lmk_idxs[k]]) -
419  mean_sq_dist_vector[k];
420  }
421  // compute embedding
422  cblas_dgemv(CblasColMajor,CblasNoTrans,
423  m_target_dim,lmk_N,
424  -0.5,lmk_feature_matrix,m_target_dim,
425  current_dist_to_lmks,1,
426  0.0,(new_feature_matrix+i*m_target_dim),1);
427  }
428  return NULL;
429 }
430 
431 #endif /* HAVE_LAPACK */

SHOGUN Machine Learning Toolbox - Documentation