00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/converter/LaplacianEigenmaps.h>
00012 #include <shogun/converter/EmbeddingConverter.h>
00013 #ifdef HAVE_LAPACK
00014 #include <shogun/mathematics/arpack.h>
00015 #include <shogun/mathematics/lapack.h>
00016 #include <shogun/lib/FibonacciHeap.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/distance/Distance.h>
00020 #include <shogun/lib/Signal.h>
00021
00022 using namespace shogun;
00023
00024 CLaplacianEigenmaps::CLaplacianEigenmaps() :
00025 CEmbeddingConverter()
00026 {
00027 m_k = 3;
00028 m_tau = 1.0;
00029
00030 init();
00031 }
00032
00033 void CLaplacianEigenmaps::init()
00034 {
00035 SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE);
00036 SG_ADD(&m_tau, "tau", "heat distribution coefficient", MS_AVAILABLE);
00037 }
00038
00039 CLaplacianEigenmaps::~CLaplacianEigenmaps()
00040 {
00041 }
00042
00043 void CLaplacianEigenmaps::set_k(int32_t k)
00044 {
00045 ASSERT(k>0);
00046 m_k = k;
00047 }
00048
00049 int32_t CLaplacianEigenmaps::get_k() const
00050 {
00051 return m_k;
00052 }
00053
00054 void CLaplacianEigenmaps::set_tau(float64_t tau)
00055 {
00056 m_tau = tau;
00057 }
00058
00059 float64_t CLaplacianEigenmaps::get_tau() const
00060 {
00061 return m_tau;
00062 }
00063
00064 const char* CLaplacianEigenmaps::get_name() const
00065 {
00066 return "LaplacianEigenmaps";
00067 };
00068
00069 CFeatures* CLaplacianEigenmaps::apply(CFeatures* features)
00070 {
00071
00072 SG_REF(features);
00073
00074
00075 int32_t N = features->get_num_vectors();
00076 ASSERT(m_k<N);
00077 ASSERT(m_target_dim<N);
00078
00079
00080 ASSERT(m_distance);
00081 m_distance->init(features,features);
00082 CDenseFeatures<float64_t>* embedding = embed_distance(m_distance,features);
00083 m_distance->remove_lhs_and_rhs();
00084 SG_UNREF(features);
00085 return (CFeatures*)embedding;
00086 }
00087
00088 CDenseFeatures<float64_t>* CLaplacianEigenmaps::embed_distance(CDistance* distance, CFeatures* features)
00089 {
00090 int32_t i,j;
00091 SGMatrix<float64_t> W_sgmatrix = distance->get_distance_matrix();
00092 ASSERT(W_sgmatrix.num_rows==W_sgmatrix.num_cols);
00093 int32_t N = W_sgmatrix.num_rows;
00094
00095 float64_t* W_matrix = W_sgmatrix.matrix;
00096
00097
00098 CFibonacciHeap* heap = new CFibonacciHeap(N);
00099 float64_t tmp;
00100
00101 for (i=0; i<N; i++)
00102 {
00103
00104 for (j=0; j<N; j++)
00105 heap->insert(j,W_matrix[i*N+j]);
00106
00107
00108 heap->extract_min(tmp);
00109
00110
00111 for (j=0; j<m_k; j++)
00112 W_matrix[i*N+heap->extract_min(tmp)] *= -1.0;
00113
00114
00115 for (j=0; j<N; j++)
00116 {
00117 if (W_matrix[i*N+j]>0.0)
00118 W_matrix[i*N+j] = 0.0;
00119 else
00120 W_matrix[i*N+j] *= -1.0;
00121 }
00122
00123
00124 heap->clear();
00125 }
00126 delete heap;
00127
00128 for (i=0; i<N; i++)
00129 {
00130
00131 for (j=i; j<N; j++)
00132 {
00133
00134 if (W_matrix[i*N+j]!=0.0 || W_matrix[j*N+i]==0.0)
00135 {
00136 W_matrix[j*N+i] = W_matrix[i*N+j];
00137 }
00138 if (W_matrix[j*N+i]!=0.0 || W_matrix[i*N+j]==0.0)
00139 {
00140 W_matrix[i*N+j] = W_matrix[j*N+i];
00141 }
00142
00143 if (W_matrix[i*N+j] != 0.0)
00144 {
00145
00146 tmp = CMath::exp(-CMath::sq(W_matrix[i*N+j])/m_tau);
00147 W_matrix[i*N+j] = tmp;
00148 W_matrix[j*N+i] = tmp;
00149 }
00150 }
00151 }
00152
00153
00154 CDenseFeatures<float64_t>* embedding = construct_embedding(features,W_sgmatrix);
00155
00156 return embedding;
00157 }
00158
00159 CDenseFeatures<float64_t>* CLaplacianEigenmaps::construct_embedding(CFeatures* features,
00160 SGMatrix<float64_t> W_matrix)
00161 {
00162 int32_t i,j;
00163 int32_t N = W_matrix.num_cols;
00164
00165 float64_t* D_diag_vector = SG_CALLOC(float64_t, N);
00166 for (i=0; i<N; i++)
00167 {
00168 for (j=0; j<N; j++)
00169 D_diag_vector[i] += W_matrix[i*N+j];
00170 }
00171
00172
00173 for (i=0; i<N*N; i++)
00174 if (W_matrix[i]>0.0)
00175 W_matrix[i] *= -1.0;
00176
00177 for (i=0; i<N; i++)
00178 W_matrix[i*N+i] += D_diag_vector[i];
00179
00180 #ifdef HAVE_ARPACK
00181
00182 int eigenproblem_status = 0;
00183 float64_t* eigenvalues_vector = SG_MALLOC(float64_t,m_target_dim+1);
00184 arpack_dsxupd(W_matrix.matrix,D_diag_vector,true,N,m_target_dim+1,"LA",true,3,false,false,-1e-9,0.0,
00185 eigenvalues_vector,W_matrix.matrix,eigenproblem_status);
00186
00187 if (eigenproblem_status!=0)
00188 SG_ERROR("DSXUPD failed with code %d\n",eigenproblem_status);
00189
00190 SG_FREE(eigenvalues_vector);
00191 #else
00192
00193
00194 int eigenproblem_status = 0;
00195 float64_t* eigenvalues_vector = SG_MALLOC(float64_t,N);
00196 float64_t* rhs = SG_CALLOC(float64_t,N*N);
00197
00198 for (i=0; i<N; i++)
00199 rhs[i*N+i] = D_diag_vector[i];
00200
00201 wrap_dsygvx(1,'V','U',N,W_matrix.matrix,N,rhs,N,1,m_target_dim+2,eigenvalues_vector,W_matrix.matrix,&eigenproblem_status);
00202
00203 if (eigenproblem_status)
00204 SG_ERROR("DSYGVX failed with code: %d.\n",eigenproblem_status);
00205
00206 SG_FREE(rhs);
00207 SG_FREE(eigenvalues_vector);
00208
00209 #endif
00210 SG_FREE(D_diag_vector);
00211
00212 SGMatrix<float64_t> new_features = SGMatrix<float64_t>(m_target_dim,N);
00213
00214 for (i=0; i<m_target_dim; i++)
00215 {
00216 for (j=0; j<N; j++)
00217 {
00218 #ifdef HAVE_ARPACK
00219 new_features[j*m_target_dim+i] = W_matrix[j*(m_target_dim+1)+i+1];
00220 #else
00221 new_features[j*m_target_dim+i] = W_matrix[(i+1)*N+j];
00222 #endif
00223 }
00224 }
00225 return new CDenseFeatures<float64_t>(new_features);
00226 }
00227
00228 #endif