LocallyLinearEmbedding.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/preprocessor/LocallyLinearEmbedding.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
00014 #include <shogun/mathematics/arpack.h>
00015 #include <shogun/mathematics/lapack.h>
00016 #include <shogun/lib/FibonacciHeap.h>
00017 #include <shogun/lib/common.h>
00018 #include <shogun/mathematics/Math.h>
00019 #include <shogun/io/SGIO.h>
00020 #include <shogun/distance/EuclidianDistance.h>
00021 #include <shogun/lib/Signal.h>
00022 
00023 #ifdef HAVE_PTHREAD
00024 #include <pthread.h>
00025 #endif
00026 
00027 using namespace shogun;
00028 
00029 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00030 struct LINRECONSTRUCTION_THREAD_PARAM
00031 {
00033     int32_t idx_start;
00035     int32_t idx_stop;
00037     int32_t idx_step;
00039     int32_t m_k;
00041     int32_t dim;
00043     int32_t N;
00045     const int32_t* neighborhood_matrix;
00047     const float64_t* feature_matrix;
00049     float64_t* z_matrix;
00051     float64_t* covariance_matrix;
00053     float64_t* id_vector;
00055     float64_t* W_matrix;
00056 };
00057 
00058 struct NEIGHBORHOOD_THREAD_PARAM
00059 {
00061     int32_t idx_start;
00063     int32_t idx_step;
00065     int32_t idx_stop;
00067     int32_t N;
00069     int32_t m_k;
00071     CFibonacciHeap* heap;
00073     const float64_t* distance_matrix;
00075     int32_t* neighborhood_matrix;
00076 };
00077 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00078 
00079 CLocallyLinearEmbedding::CLocallyLinearEmbedding() :
00080         CDimensionReductionPreprocessor()
00081 {
00082     init();
00083 }
00084 
00085 void CLocallyLinearEmbedding::init()
00086 {
00087     m_k = 3;
00088     m_posdef = true;
00089 
00090     m_parameters->add(&m_k, "k", "number of neighbors");
00091     m_parameters->add(&m_posdef, "posdef", 
00092                       "indicates if matrix should be considered as positive definite");
00093 }
00094 
00095 CLocallyLinearEmbedding::~CLocallyLinearEmbedding()
00096 {
00097 }
00098 
00099 bool CLocallyLinearEmbedding::init(CFeatures* features)
00100 {
00101     return true;
00102 }
00103 
00104 void CLocallyLinearEmbedding::cleanup()
00105 {
00106 }
00107 
00108 SGMatrix<float64_t> CLocallyLinearEmbedding::apply_to_feature_matrix(CFeatures* features)
00109 {
00110     ASSERT(features);
00111     if (!(features->get_feature_class()==C_SIMPLE &&
00112           features->get_feature_type()==F_DREAL))
00113     {
00114         SG_ERROR("Given features are not of SimpleRealFeatures type.\n");
00115     }
00116     // shorthand for simplefeatures
00117     CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*) features;
00118     SG_REF(features);
00119 
00120     // get dimensionality and number of vectors of data
00121     int32_t dim = simple_features->get_num_features();
00122     if (m_target_dim>dim)
00123         SG_ERROR("Cannot increase dimensionality: target dimensionality is %d while given features dimensionality is %d.\n",
00124                  m_target_dim, dim);
00125 
00126     int32_t N = simple_features->get_num_vectors();
00127     if (m_k>=N)
00128         SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
00129                  m_k, N);
00130 
00131     // loop variables
00132     int32_t i,j,t;
00133 
00134     // compute distance matrix
00135     CDistance* distance = new CEuclidianDistance(simple_features,simple_features);
00136     Parallel* distance_parallel = distance->parallel;
00137     distance->parallel = this->parallel;
00138     SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(distance);
00139     distance->parallel = distance_parallel;
00140     delete distance;
00141 
00142     // init W (weight) matrix
00143     float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
00144 
00145 #ifdef HAVE_PTHREAD
00146     int32_t num_threads = parallel->get_num_threads();
00147     ASSERT(num_threads>0);
00148     // allocate threads
00149     pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00150     LINRECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LINRECONSTRUCTION_THREAD_PARAM, num_threads);
00151     pthread_attr_t attr;
00152     pthread_attr_init(&attr);
00153     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00154 #else
00155     int32_t num_threads = 1;
00156 #endif 
00157     // init matrices and norm factor to be used
00158     float64_t* z_matrix = SG_MALLOC(float64_t, m_k*dim*num_threads);
00159     float64_t* covariance_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
00160     float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads);
00161 
00162     // get feature matrix
00163     SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();
00164 
00165 #ifdef HAVE_PTHREAD
00166     for (t=0; t<num_threads; t++)
00167     {
00168         parameters[t].idx_start = t;
00169         parameters[t].idx_step = num_threads;
00170         parameters[t].idx_stop = N;
00171         parameters[t].m_k = m_k;
00172         parameters[t].dim = dim;
00173         parameters[t].N = N;
00174         parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
00175         parameters[t].z_matrix = z_matrix+(m_k*dim)*t;
00176         parameters[t].feature_matrix = feature_matrix.matrix;
00177         parameters[t].covariance_matrix = covariance_matrix+(m_k*m_k)*t;
00178         parameters[t].id_vector = id_vector+m_k*t;
00179         parameters[t].W_matrix = W_matrix;
00180         pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)&parameters[t]);
00181     }
00182     for (t=0; t<num_threads; t++)
00183         pthread_join(threads[t], NULL);
00184     pthread_attr_destroy(&attr);
00185     SG_FREE(parameters);
00186     SG_FREE(threads);
00187 #else
00188     LINRECONSTRUCTION_THREAD_PARAM single_thread_param;
00189     single_thread_param.idx_start = 0;
00190     single_thread_param.idx_step = 1;
00191     single_thread_param.idx_stop = N;
00192     single_thread_param.m_k = m_k;
00193     single_thread_param.dim = dim;
00194     single_thread_param.N = N;
00195     single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
00196     single_thread_param.z_matrix = z_matrix;
00197     single_thread_param.feature_matrix = feature_matrix.matrix;
00198     single_thread_param.covariance_matrix = covariance_matrix;
00199     single_thread_param.id_vector = id_vector;
00200     single_thread_param.W_matrix = W_matrix;
00201     run_linearreconstruction_thread((void*)single_thread_param);
00202 #endif
00203 
00204     // clean
00205     SG_FREE(id_vector);
00206     neighborhood_matrix.destroy_matrix();
00207     SG_FREE(z_matrix);
00208     SG_FREE(covariance_matrix);
00209 
00210     // W=I-W
00211     for (i=0; i<N; i++)
00212     {
00213         for (j=0; j<N; j++)
00214             W_matrix[j*N+i] = (i==j) ? 1.0-W_matrix[j*N+i] : -W_matrix[j*N+i];
00215     }
00216 
00217     // compute M=(W-I)'*(W-I)
00218     SGMatrix<float64_t> M_matrix(N,N);
00219     cblas_dgemm(CblasColMajor,CblasTrans, CblasNoTrans,
00220                 N,N,N,
00221                 1.0,W_matrix,N,
00222                 W_matrix,N,
00223                 0.0,M_matrix.matrix,N);
00224 
00225     SG_FREE(W_matrix);
00226 
00227     simple_features->set_feature_matrix(find_null_space(M_matrix,m_target_dim,false));
00228     M_matrix.destroy_matrix();
00229 
00230     SG_UNREF(features);
00231     return simple_features->get_feature_matrix();
00232 }
00233 
00234 SGVector<float64_t> CLocallyLinearEmbedding::apply_to_feature_vector(SGVector<float64_t> vector)
00235 {
00236     SG_NOTIMPLEMENTED;
00237     return vector;
00238 }
00239 
00240 SGMatrix<float64_t> CLocallyLinearEmbedding::find_null_space(SGMatrix<float64_t> matrix, int dimension, bool force_lapack)
00241 {
00242     int i,j;
00243     ASSERT(matrix.num_cols==matrix.num_rows);
00244     int N = matrix.num_cols;
00245     // get eigenvectors with ARPACK or LAPACK
00246     int eigenproblem_status = 0;
00247 
00248     bool arpack = false;
00249 
00250 #ifdef HAVE_ARPACK
00251     arpack = true;
00252 #endif
00253 
00254     if (force_lapack) arpack = false;
00255 
00256     float64_t* eigenvalues_vector;
00257     float64_t* eigenvectors;
00258     if (arpack)
00259     {
00260         // using ARPACK (faster)
00261         eigenvalues_vector = SG_MALLOC(float64_t, dimension+1);
00262         #ifdef HAVE_ARPACK
00263         arpack_dsaeupd_wrap(matrix.matrix,NULL,N,dimension+1,"LA",3,m_posdef,-1e-6, 0.0,
00264                             eigenvalues_vector,matrix.matrix,eigenproblem_status);
00265         #endif
00266     }
00267     else
00268     {
00269         // using LAPACK (slower)
00270         eigenvalues_vector = SG_MALLOC(float64_t, N);
00271         eigenvectors = SG_MALLOC(float64_t,(dimension+1)*N);
00272         wrap_dsyevr('V','U',N,matrix.matrix,N,2,dimension+2,eigenvalues_vector,eigenvectors,&eigenproblem_status);
00273     }
00274 
00275     // check if failed
00276     if (eigenproblem_status)
00277         SG_ERROR("Eigenproblem failed with code: %d", eigenproblem_status);
00278     
00279     // allocate null space feature matrix
00280     float64_t* null_space_features = SG_MALLOC(float64_t, N*dimension);
00281 
00282     // construct embedding w.r.t to used solver (prefer ARPACK if available)
00283     if (arpack) 
00284     {
00285         // ARPACKed eigenvectors
00286         for (i=0; i<dimension; i++)
00287         {
00288             for (j=0; j<N; j++)
00289                 null_space_features[j*dimension+i] = matrix.matrix[j*(dimension+1)+i+1];
00290         }
00291     }
00292     else
00293     {
00294         // LAPACKed eigenvectors
00295         for (i=0; i<dimension; i++)
00296         {
00297             for (j=0; j<N; j++)
00298                 null_space_features[j*dimension+i] = eigenvectors[i*N+j];
00299         }
00300         SG_FREE(eigenvectors);
00301     }
00302     SG_FREE(eigenvalues_vector);
00303 
00304     return SGMatrix<float64_t>(null_space_features,dimension,N);
00305 }
00306 
00307 void* CLocallyLinearEmbedding::run_linearreconstruction_thread(void* p)
00308 {
00309     LINRECONSTRUCTION_THREAD_PARAM* parameters = (LINRECONSTRUCTION_THREAD_PARAM*)p;
00310     int32_t idx_start = parameters->idx_start;
00311     int32_t idx_step = parameters->idx_step;
00312     int32_t idx_stop = parameters->idx_stop;
00313     int32_t m_k = parameters->m_k;
00314     int32_t dim = parameters->dim;
00315     int32_t N = parameters->N;
00316     const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00317     float64_t* z_matrix = parameters->z_matrix;
00318     const float64_t* feature_matrix = parameters->feature_matrix;
00319     float64_t* covariance_matrix = parameters->covariance_matrix;
00320     float64_t* id_vector = parameters->id_vector;
00321     float64_t* W_matrix = parameters->W_matrix;
00322 
00323     int32_t i,j,k;
00324     float64_t norming,trace;
00325 
00326     for (i=idx_start; i<idx_stop; i+=idx_step)
00327     {
00328         // compute local feature matrix containing neighbors of i-th vector
00329         for (j=0; j<m_k; j++)
00330         {
00331             for (k=0; k<dim; k++)
00332                 z_matrix[j*dim+k] = feature_matrix[neighborhood_matrix[j*N+i]*dim+k];
00333         }
00334 
00335         // center features by subtracting i-th feature column
00336         for (j=0; j<m_k; j++)
00337         {
00338             for (k=0; k<dim; k++)
00339                 z_matrix[j*dim+k] -= feature_matrix[i*dim+k];
00340         }
00341 
00342         // compute local covariance matrix
00343         cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,
00344                     m_k,m_k,dim,
00345                     1.0,z_matrix,dim,
00346                     z_matrix,dim,
00347                     0.0,covariance_matrix,m_k);
00348 
00349         for (j=0; j<m_k; j++)
00350             id_vector[j] = 1.0;
00351 
00352         // regularize in case of ill-posed system
00353         if (m_k>dim)
00354         {
00355             // compute tr(C)
00356             trace = 0.0;
00357             for (j=0; j<m_k; j++)
00358                 trace += covariance_matrix[j*m_k+j];
00359 
00360             for (j=0; j<m_k; j++)
00361                 covariance_matrix[j*m_k+j] += 1e-3*trace;
00362         }
00363 
00364         // solve system of linear equations: covariance_matrix * X = 1
00365         // covariance_matrix is a pos-def matrix
00366         clapack_dposv(CblasColMajor,CblasLower,m_k,1,covariance_matrix,m_k,id_vector,m_k);
00367 
00368         // normalize weights
00369         norming=0.0;
00370         for (j=0; j<m_k; j++)
00371             norming += id_vector[j];
00372 
00373         for (j=0; j<m_k; j++)
00374             id_vector[j]/=norming;
00375 
00376         // put weights into W matrix
00377         for (j=0; j<m_k; j++)
00378             W_matrix[N*neighborhood_matrix[j*N+i]+i]=id_vector[j];
00379     }
00380     return NULL;
00381 }
00382 
00383 SGMatrix<int32_t> CLocallyLinearEmbedding::get_neighborhood_matrix(CDistance* distance)
00384 {
00385     int32_t t;
00386     SGMatrix<float64_t> distance_matrix = distance->get_distance_matrix();
00387     int32_t N = distance->get_num_vec_lhs();
00388     // init matrix and heap to be used
00389     int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*m_k);
00390 #ifdef HAVE_PTHREAD
00391     int32_t num_threads = parallel->get_num_threads();
00392     ASSERT(num_threads>0);
00393     NEIGHBORHOOD_THREAD_PARAM* parameters = SG_MALLOC(NEIGHBORHOOD_THREAD_PARAM, num_threads);
00394     pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00395     pthread_attr_t attr;
00396     pthread_attr_init(&attr);
00397     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00398 #else
00399     int32_t num_threads = 1;
00400 #endif
00401     CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00402     for (t=0; t<num_threads; t++)
00403         heaps[t] = new CFibonacciHeap(N);
00404 
00405 #ifdef HAVE_PTHREAD
00406     for (t=0; t<num_threads; t++)
00407     {
00408         parameters[t].idx_start = t;
00409         parameters[t].idx_step = num_threads;
00410         parameters[t].idx_stop = N;
00411         parameters[t].m_k = m_k;
00412         parameters[t].N = N;
00413         parameters[t].heap = heaps[t];
00414         parameters[t].neighborhood_matrix = neighborhood_matrix;
00415         parameters[t].distance_matrix = distance_matrix.matrix;
00416         pthread_create(&threads[t], &attr, run_neighborhood_thread, (void*)&parameters[t]);
00417     }
00418     for (t=0; t<num_threads; t++)
00419         pthread_join(threads[t], NULL);
00420     pthread_attr_destroy(&attr);
00421     SG_FREE(threads);
00422     SG_FREE(parameters);
00423 #else
00424     NEIGHBORHOOD_THREAD_PARAM single_thread_param;
00425     single_thread_param.idx_start = 0;
00426     single_thread_param.idx_step = 1;
00427     single_thread_param.idx_stop = N;
00428     single_thread_param.m_k = m_k;
00429     single_thread_param.N = N;
00430     single_thread_param.heap = heaps[0]
00431     single_thread_param.neighborhood_matrix = neighborhood_matrix;
00432     single_thread_param.distance_matrix = distance_matrix.matrix;
00433     run_neighborhood_thread((void*)&single_thread_param);
00434 #endif
00435 
00436     for (t=0; t<num_threads; t++)
00437         delete heaps[t];
00438     SG_FREE(heaps);
00439     distance_matrix.destroy_matrix();
00440 
00441     return SGMatrix<int32_t>(neighborhood_matrix,m_k,N);
00442 }
00443 
00444 
00445 void* CLocallyLinearEmbedding::run_neighborhood_thread(void* p)
00446 {
00447     NEIGHBORHOOD_THREAD_PARAM* parameters = (NEIGHBORHOOD_THREAD_PARAM*)p;
00448     int32_t idx_start = parameters->idx_start;
00449     int32_t idx_step = parameters->idx_step;
00450     int32_t idx_stop = parameters->idx_stop;
00451     int32_t N = parameters->N;
00452     int32_t m_k = parameters->m_k;
00453     CFibonacciHeap* heap = parameters->heap;
00454     const float64_t* distance_matrix = parameters->distance_matrix;
00455     int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00456 
00457     int32_t i,j;
00458     float64_t tmp;
00459     for (i=idx_start; i<idx_stop; i+=idx_step)
00460     {
00461         for (j=0; j<N; j++)
00462         {
00463             heap->insert(j,distance_matrix[i*N+j]);
00464         }
00465 
00466         heap->extract_min(tmp);
00467 
00468         for (j=0; j<m_k; j++)
00469             neighborhood_matrix[j*N+i] = heap->extract_min(tmp);
00470 
00471         heap->clear();
00472     }
00473 
00474     return NULL;
00475 }
00476 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation