LaplacianEigenmaps.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/preprocessor/LaplacianEigenmaps.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
00014 #include <shogun/mathematics/arpack.h>
00015 #include <shogun/mathematics/lapack.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/lib/FibonacciHeap.h>
00018 #include <shogun/mathematics/Math.h>
00019 #include <shogun/io/SGIO.h>
00020 #include <shogun/distance/EuclidianDistance.h>
00021 #include <shogun/lib/Signal.h>
00022 
00023 using namespace shogun;
00024 
00025 CLaplacianEigenmaps::CLaplacianEigenmaps() :
00026         CDimensionReductionPreprocessor()
00027 {
00028     init();
00029 }
00030 
00031 void CLaplacianEigenmaps::init()
00032 {
00033     m_k = 3;
00034     m_tau = 1.0;
00035 
00036     m_parameters->add(&m_k, "k", "number of neighbors");
00037     m_parameters->add(&m_tau, "tau", "heat distribution coefficient");
00038 }
00039 
00040 CLaplacianEigenmaps::~CLaplacianEigenmaps()
00041 {
00042 }
00043 
00044 bool CLaplacianEigenmaps::init(CFeatures* features)
00045 {
00046     return true;
00047 }
00048 
00049 void CLaplacianEigenmaps::cleanup()
00050 {
00051 }
00052 
00053 SGMatrix<float64_t> CLaplacianEigenmaps::apply_to_feature_matrix(CFeatures* features)
00054 {
00055     // shorthand for simplefeatures
00056     CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*) features;
00057     SG_REF(features);
00058     ASSERT(simple_features);
00059 
00060     // get dimensionality and number of vectors of data
00061     int32_t dim = simple_features->get_num_features();
00062     ASSERT(m_target_dim<=dim);
00063     int32_t N = simple_features->get_num_vectors();
00064     ASSERT(m_k<N);
00065 
00066     // loop variables
00067     int32_t i,j;
00068 
00069     // compute distance matrix
00070     CDistance* distance = new CEuclidianDistance(simple_features,simple_features);
00071     SGMatrix<float64_t> W_sgmatrix = distance->get_distance_matrix();
00072     // shorthand
00073     float64_t* W_matrix = W_sgmatrix.matrix;
00074     delete distance;
00075 
00076     // init heap to use
00077     CFibonacciHeap* heap = new CFibonacciHeap(N);
00078     float64_t tmp;
00079     // for each object
00080     for (i=0; i<N; i++)
00081     {
00082         // fill heap
00083         for (j=0; j<N; j++)
00084             heap->insert(j,W_matrix[i*N+j]);
00085 
00086         // rearrange heap with extracting ith object itself
00087         heap->extract_min(tmp);
00088 
00089         // extract nearest neighbors, takes ~O(k*log n), and change sign for them
00090         for (j=0; j<m_k; j++)
00091             W_matrix[i*N+heap->extract_min(tmp)] *= -1.0;
00092 
00093         // remove all 'positive' distances and change 'negative' ones to positive
00094         for (j=0; j<N; j++)
00095         {
00096             if (W_matrix[i*N+j]>0.0)
00097                 W_matrix[i*N+j] = 0.0;
00098             else
00099                 W_matrix[i*N+j] *= -1.0;
00100         }
00101         
00102         // clear heap to reuse
00103         heap->clear();
00104     }
00105     delete heap;
00106     // make distance matrix symmetric with mutual kNN relation
00107     for (i=0; i<N; i++)
00108     {
00109         // check only upper triangle
00110         for (j=i; j<N; j++)
00111         {
00112             // make kNN relation symmetric
00113             if (W_matrix[i*N+j]!=0.0 || W_matrix[j*N+i]==0.0)
00114             {
00115                 W_matrix[j*N+i] = W_matrix[i*N+j];
00116             }
00117             if (W_matrix[j*N+i]!=0.0 || W_matrix[i*N+j]==0.0)
00118             {
00119                 W_matrix[i*N+j] = W_matrix[j*N+i];
00120             }
00121             
00122             if (W_matrix[i*N+j] != 0.0)
00123             {
00124                 // compute heat, exp(-d^2/tau)
00125                 tmp = CMath::exp(-CMath::sq(W_matrix[i*N+j])/m_tau);
00126                 W_matrix[i*N+j] = tmp;
00127                 W_matrix[j*N+i] = tmp;
00128             }
00129         }
00130     }
00131 
00132     // compute D
00133     float64_t* D_diag_vector = SG_CALLOC(float64_t, N);
00134     for (i=0; i<N; i++)
00135     {
00136         for (j=0; j<N; j++)
00137             D_diag_vector[i] += W_matrix[i*N+j];
00138     }
00139 
00140     // W = -W
00141     for (i=0; i<N*N; i++)
00142         if (W_matrix[i]>0.0)
00143             W_matrix[i] *= -1.0;
00144     // W = W + D
00145     for (i=0; i<N; i++)
00146         W_matrix[i*N+i] += D_diag_vector[i];
00147 
00148     #ifdef HAVE_ARPACK
00149         // using ARPACK DS{E,A}UPD
00150         int eigenproblem_status = 0;
00151         float64_t* eigenvalues_vector = SG_MALLOC(float64_t,m_target_dim+1);
00152         arpack_dsaeupd_wrap(W_matrix,D_diag_vector,N,m_target_dim+1,"LA",3,false,0.0,0.0,
00153                             eigenvalues_vector,W_matrix,eigenproblem_status);
00154         ASSERT(eigenproblem_status==0);
00155         SG_FREE(eigenvalues_vector);
00156     #else
00157         // using LAPACK DSYGVX
00158         // requires 2x memory because of dense rhs matrix usage
00159         int eigenproblem_status = 0;
00160         float64_t* eigenvalues_vector = SG_MALLOC(float64_t,N);
00161         float64_t* rhs = SG_CALLOC(float64_t,N*N);
00162         // fill rhs with diag (for safety reasons zeros will be replaced with 1e-3)
00163         for (i=0; i<N; i++)
00164             rhs[i*N+i] = D_diag_vector[i];
00165         wrap_dsygvx(1,'V','U',N,W_matrix,N,rhs,N,1,m_target_dim+2,eigenvalues_vector,W_matrix,&eigenproblem_status);
00166         if (eigenproblem_status)
00167             SG_ERROR("DSYGVX failed with code: %d.\n",eigenproblem_status);
00168         SG_FREE(rhs);
00169         SG_FREE(eigenvalues_vector);
00170     #endif /* HAVE_ARPACK */
00171     SG_FREE(D_diag_vector);
00172 
00173     SGMatrix<float64_t> new_features = SGMatrix<float64_t>(m_target_dim,N);
00174     // fill features according to used solver
00175     for (i=0; i<m_target_dim; i++)
00176     {
00177         for (j=0; j<N; j++)
00178         {
00179             #ifdef HAVE_ARPACK
00180                 new_features.matrix[j*m_target_dim+i] = W_matrix[j*(m_target_dim+1)+i+1];
00181             #else
00182                 new_features.matrix[j*m_target_dim+i] = W_matrix[(i+1)*N+j];
00183             #endif
00184         }
00185     }
00186     W_sgmatrix.destroy_matrix();
00187 
00188     simple_features->set_feature_matrix(new_features);
00189     SG_UNREF(features);
00190     return simple_features->get_feature_matrix();
00191 }
00192 
00193 SGVector<float64_t> CLaplacianEigenmaps::apply_to_feature_vector(SGVector<float64_t> vector)
00194 {
00195     SG_NOTIMPLEMENTED;
00196     return vector;
00197 }
00198 
00199 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation