SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LaplacianEigenmaps.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Sergey Lisitsyn
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
10 
13 #ifdef HAVE_LAPACK
18 #include <shogun/io/SGIO.h>
20 #include <shogun/lib/Signal.h>
21 
22 using namespace shogun;
23 
26 {
27  m_k = 3;
28  m_tau = 1.0;
29 
30  init();
31 }
32 
34 {
35  SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE);
36  SG_ADD(&m_tau, "tau", "heat distribution coefficient", MS_AVAILABLE);
37 }
38 
40 {
41 }
42 
44 {
45  ASSERT(k>0);
46  m_k = k;
47 }
48 
50 {
51  return m_k;
52 }
53 
55 {
56  m_tau = tau;
57 }
58 
60 {
61  return m_tau;
62 }
63 
64 const char* CLaplacianEigenmaps::get_name() const
65 {
66  return "LaplacianEigenmaps";
67 };
68 
70 {
71  // shorthand for simplefeatures
72  SG_REF(features);
73 
74  // get dimensionality and number of vectors of data
75  int32_t N = features->get_num_vectors();
76  ASSERT(m_k<N);
78 
79  // compute distance matrix
81  m_distance->init(features,features);
84  SG_UNREF(features);
85  return (CFeatures*)embedding;
86 }
87 
89 {
90  int32_t i,j;
91  SGMatrix<float64_t> W_sgmatrix = distance->get_distance_matrix();
92  ASSERT(W_sgmatrix.num_rows==W_sgmatrix.num_cols);
93  int32_t N = W_sgmatrix.num_rows;
94  // shorthand
95  float64_t* W_matrix = W_sgmatrix.matrix;
96 
97  // init heap to use
98  CFibonacciHeap* heap = new CFibonacciHeap(N);
99  float64_t tmp;
100  // for each object
101  for (i=0; i<N; i++)
102  {
103  // fill heap
104  for (j=0; j<N; j++)
105  heap->insert(j,W_matrix[i*N+j]);
106 
107  // rearrange heap with extracting ith object itself
108  heap->extract_min(tmp);
109 
110  // extract nearest neighbors, takes ~O(k*log n), and change sign for them
111  for (j=0; j<m_k; j++)
112  W_matrix[i*N+heap->extract_min(tmp)] *= -1.0;
113 
114  // remove all 'positive' distances and change 'negative' ones to positive
115  for (j=0; j<N; j++)
116  {
117  if (W_matrix[i*N+j]>0.0)
118  W_matrix[i*N+j] = 0.0;
119  else
120  W_matrix[i*N+j] *= -1.0;
121  }
122 
123  // clear heap to reuse
124  heap->clear();
125  }
126  delete heap;
127  // make distance matrix symmetric with mutual kNN relation
128  for (i=0; i<N; i++)
129  {
130  // check only upper triangle
131  for (j=i; j<N; j++)
132  {
133  // make kNN relation symmetric
134  if (W_matrix[i*N+j]!=0.0 || W_matrix[j*N+i]==0.0)
135  {
136  W_matrix[j*N+i] = W_matrix[i*N+j];
137  }
138  if (W_matrix[j*N+i]!=0.0 || W_matrix[i*N+j]==0.0)
139  {
140  W_matrix[i*N+j] = W_matrix[j*N+i];
141  }
142 
143  if (W_matrix[i*N+j] != 0.0)
144  {
145  // compute heat, exp(-d^2/tau)
146  tmp = CMath::exp(-CMath::sq(W_matrix[i*N+j])/m_tau);
147  W_matrix[i*N+j] = tmp;
148  W_matrix[j*N+i] = tmp;
149  }
150  }
151  }
152 
153  // compute D
154  CDenseFeatures<float64_t>* embedding = construct_embedding(features,W_sgmatrix);
155 
156  return embedding;
157 }
158 
160  SGMatrix<float64_t> W_matrix)
161 {
162  int32_t i,j;
163  int32_t N = W_matrix.num_cols;
164 
165  float64_t* D_diag_vector = SG_CALLOC(float64_t, N);
166  for (i=0; i<N; i++)
167  {
168  for (j=0; j<N; j++)
169  D_diag_vector[i] += W_matrix[i*N+j];
170  }
171 
172  // W = -W
173  for (i=0; i<N*N; i++)
174  if (W_matrix[i]>0.0)
175  W_matrix[i] *= -1.0;
176  // W = W + D
177  for (i=0; i<N; i++)
178  W_matrix[i*N+i] += D_diag_vector[i];
179 
180 #ifdef HAVE_ARPACK
181  // using ARPACK DS{E,A}UPD
182  int eigenproblem_status = 0;
183  float64_t* eigenvalues_vector = SG_MALLOC(float64_t,m_target_dim+1);
184  arpack_dsxupd(W_matrix.matrix,D_diag_vector,true,N,m_target_dim+1,"LA",true,3,false,false,-1e-9,0.0,
185  eigenvalues_vector,W_matrix.matrix,eigenproblem_status);
186 
187  if (eigenproblem_status!=0)
188  SG_ERROR("DSXUPD failed with code %d\n",eigenproblem_status);
189 
190  SG_FREE(eigenvalues_vector);
191 #else /* HAVE_ARPACK */
192  // using LAPACK DSYGVX
193  // requires 2x memory because of dense rhs matrix usage
194  int eigenproblem_status = 0;
195  float64_t* eigenvalues_vector = SG_MALLOC(float64_t,N);
196  float64_t* rhs = SG_CALLOC(float64_t,N*N);
197  // fill rhs with diag (for safety reasons zeros will be replaced with 1e-3)
198  for (i=0; i<N; i++)
199  rhs[i*N+i] = D_diag_vector[i];
200 
201  wrap_dsygvx(1,'V','U',N,W_matrix.matrix,N,rhs,N,1,m_target_dim+2,eigenvalues_vector,W_matrix.matrix,&eigenproblem_status);
202 
203  if (eigenproblem_status)
204  SG_ERROR("DSYGVX failed with code: %d.\n",eigenproblem_status);
205 
206  SG_FREE(rhs);
207  SG_FREE(eigenvalues_vector);
208 
209 #endif /* HAVE_ARPACK */
210  SG_FREE(D_diag_vector);
211 
213  // fill features according to used solver
214  for (i=0; i<m_target_dim; i++)
215  {
216  for (j=0; j<N; j++)
217  {
218  #ifdef HAVE_ARPACK
219  new_features[j*m_target_dim+i] = W_matrix[j*(m_target_dim+1)+i+1];
220  #else
221  new_features[j*m_target_dim+i] = W_matrix[(i+1)*N+j];
222  #endif
223  }
224  }
225  return new CDenseFeatures<float64_t>(new_features);
226 }
227 
228 #endif /* HAVE_LAPACK */

SHOGUN Machine Learning Toolbox - Documentation