SHOGUN: LaplacianEigenmaps.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/converter/LaplacianEigenmaps.h>
00012 #include <shogun/converter/EmbeddingConverter.h>
00013 #ifdef HAVE_LAPACK
00014 #include <shogun/mathematics/arpack.h>
00015 #include <shogun/mathematics/lapack.h>
00016 #include <shogun/lib/FibonacciHeap.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/distance/Distance.h>
00020 #include <shogun/lib/Signal.h>
00021 
00022 using namespace shogun;
00023 
00024 CLaplacianEigenmaps::CLaplacianEigenmaps() :
00025         CEmbeddingConverter()
00026 {
00027     m_k = 3;
00028     m_tau = 1.0;
00029 
00030     init();
00031 }
00032 
00033 void CLaplacianEigenmaps::init()
00034 {
00035     SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE);
00036     SG_ADD(&m_tau, "tau", "heat distribution coefficient", MS_AVAILABLE);
00037 }
00038 
00039 CLaplacianEigenmaps::~CLaplacianEigenmaps()
00040 {
00041 }
00042 
00043 void CLaplacianEigenmaps::set_k(int32_t k)
00044 {
00045     ASSERT(k>0);
00046     m_k = k;
00047 }
00048 
00049 int32_t CLaplacianEigenmaps::get_k() const
00050 {
00051     return m_k;
00052 }
00053 
00054 void CLaplacianEigenmaps::set_tau(float64_t tau)
00055 {
00056     m_tau = tau;
00057 }
00058 
00059 float64_t CLaplacianEigenmaps::get_tau() const
00060 {
00061     return m_tau;
00062 }
00063 
00064 const char* CLaplacianEigenmaps::get_name() const
00065 {
00066     return "LaplacianEigenmaps";
00067 };
00068 
00069 CFeatures* CLaplacianEigenmaps::apply(CFeatures* features)
00070 {
00071     // shorthand for simplefeatures
00072     SG_REF(features);
00073 
00074     // get dimensionality and number of vectors of data
00075     int32_t N = features->get_num_vectors();
00076     ASSERT(m_k<N);
00077     ASSERT(m_target_dim<N);
00078 
00079     // compute distance matrix
00080     ASSERT(m_distance);
00081     m_distance->init(features,features);
00082     CDenseFeatures<float64_t>* embedding = embed_distance(m_distance,features);
00083     m_distance->remove_lhs_and_rhs();
00084     SG_UNREF(features);
00085     return (CFeatures*)embedding;
00086 }
00087 
00088 CDenseFeatures<float64_t>* CLaplacianEigenmaps::embed_distance(CDistance* distance, CFeatures* features)
00089 {
00090     int32_t i,j;
00091     SGMatrix<float64_t> W_sgmatrix = distance->get_distance_matrix();
00092     ASSERT(W_sgmatrix.num_rows==W_sgmatrix.num_cols);
00093     int32_t N = W_sgmatrix.num_rows;
00094     // shorthand
00095     float64_t* W_matrix = W_sgmatrix.matrix;
00096 
00097     // init heap to use
00098     CFibonacciHeap* heap = new CFibonacciHeap(N);
00099     float64_t tmp;
00100     // for each object
00101     for (i=0; i<N; i++)
00102     {
00103         // fill heap
00104         for (j=0; j<N; j++)
00105             heap->insert(j,W_matrix[i*N+j]);
00106 
00107         // rearrange heap with extracting ith object itself
00108         heap->extract_min(tmp);
00109 
00110         // extract nearest neighbors, takes ~O(k*log n), and change sign for them
00111         for (j=0; j<m_k; j++)
00112             W_matrix[i*N+heap->extract_min(tmp)] *= -1.0;
00113 
00114         // remove all 'positive' distances and change 'negative' ones to positive
00115         for (j=0; j<N; j++)
00116         {
00117             if (W_matrix[i*N+j]>0.0)
00118                 W_matrix[i*N+j] = 0.0;
00119             else
00120                 W_matrix[i*N+j] *= -1.0;
00121         }
00122 
00123         // clear heap to reuse
00124         heap->clear();
00125     }
00126     delete heap;
00127     // make distance matrix symmetric with mutual kNN relation
00128     for (i=0; i<N; i++)
00129     {
00130         // check only upper triangle
00131         for (j=i; j<N; j++)
00132         {
00133             // make kNN relation symmetric
00134             if (W_matrix[i*N+j]!=0.0 || W_matrix[j*N+i]==0.0)
00135             {
00136                 W_matrix[j*N+i] = W_matrix[i*N+j];
00137             }
00138             if (W_matrix[j*N+i]!=0.0 || W_matrix[i*N+j]==0.0)
00139             {
00140                 W_matrix[i*N+j] = W_matrix[j*N+i];
00141             }
00142 
00143             if (W_matrix[i*N+j] != 0.0)
00144             {
00145                 // compute heat, exp(-d^2/tau)
00146                 tmp = CMath::exp(-CMath::sq(W_matrix[i*N+j])/m_tau);
00147                 W_matrix[i*N+j] = tmp;
00148                 W_matrix[j*N+i] = tmp;
00149             }
00150         }
00151     }
00152 
00153     // compute D
00154     CDenseFeatures<float64_t>* embedding = construct_embedding(features,W_sgmatrix);
00155 
00156     return embedding;
00157 }
00158 
00159 CDenseFeatures<float64_t>* CLaplacianEigenmaps::construct_embedding(CFeatures* features,
00160                                                                      SGMatrix<float64_t> W_matrix)
00161 {
00162     int32_t i,j;
00163     int32_t N = W_matrix.num_cols;
00164 
00165     float64_t* D_diag_vector = SG_CALLOC(float64_t, N);
00166     for (i=0; i<N; i++)
00167     {
00168         for (j=0; j<N; j++)
00169             D_diag_vector[i] += W_matrix[i*N+j];
00170     }
00171 
00172     // W = -W
00173     for (i=0; i<N*N; i++)
00174         if (W_matrix[i]>0.0)
00175             W_matrix[i] *= -1.0;
00176     // W = W + D
00177     for (i=0; i<N; i++)
00178         W_matrix[i*N+i] += D_diag_vector[i];
00179 
00180 #ifdef HAVE_ARPACK
00181     // using ARPACK DS{E,A}UPD
00182     int eigenproblem_status = 0;
00183     float64_t* eigenvalues_vector = SG_MALLOC(float64_t,m_target_dim+1);
00184     arpack_dsxupd(W_matrix.matrix,D_diag_vector,true,N,m_target_dim+1,"LA",true,3,false,false,-1e-9,0.0,
00185                   eigenvalues_vector,W_matrix.matrix,eigenproblem_status);
00186 
00187     if (eigenproblem_status!=0)
00188         SG_ERROR("DSXUPD failed with code %d\n",eigenproblem_status);
00189 
00190     SG_FREE(eigenvalues_vector);
00191 #else /* HAVE_ARPACK */
00192     // using LAPACK DSYGVX
00193     // requires 2x memory because of dense rhs matrix usage
00194     int eigenproblem_status = 0;
00195     float64_t* eigenvalues_vector = SG_MALLOC(float64_t,N);
00196     float64_t* rhs = SG_CALLOC(float64_t,N*N);
00197     // fill rhs with diag (for safety reasons zeros will be replaced with 1e-3)
00198     for (i=0; i<N; i++)
00199         rhs[i*N+i] = D_diag_vector[i];
00200 
00201     wrap_dsygvx(1,'V','U',N,W_matrix.matrix,N,rhs,N,1,m_target_dim+2,eigenvalues_vector,W_matrix.matrix,&eigenproblem_status);
00202 
00203     if (eigenproblem_status)
00204         SG_ERROR("DSYGVX failed with code: %d.\n",eigenproblem_status);
00205 
00206     SG_FREE(rhs);
00207     SG_FREE(eigenvalues_vector);
00208 
00209 #endif /* HAVE_ARPACK */
00210     SG_FREE(D_diag_vector);
00211 
00212     SGMatrix<float64_t> new_features = SGMatrix<float64_t>(m_target_dim,N);
00213     // fill features according to used solver
00214     for (i=0; i<m_target_dim; i++)
00215     {
00216         for (j=0; j<N; j++)
00217         {
00218             #ifdef HAVE_ARPACK
00219                 new_features[j*m_target_dim+i] = W_matrix[j*(m_target_dim+1)+i+1];
00220             #else
00221                 new_features[j*m_target_dim+i] = W_matrix[(i+1)*N+j];
00222             #endif
00223         }
00224     }
00225     return new CDenseFeatures<float64_t>(new_features);
00226 }
00227 
00228 #endif /* HAVE_LAPACK */