00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/preprocessor/LocallyLinearEmbedding.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
00014 #include <shogun/mathematics/arpack.h>
00015 #include <shogun/mathematics/lapack.h>
00016 #include <shogun/lib/FibonacciHeap.h>
00017 #include <shogun/lib/common.h>
00018 #include <shogun/mathematics/Math.h>
00019 #include <shogun/io/SGIO.h>
00020 #include <shogun/distance/EuclidianDistance.h>
00021 #include <shogun/lib/Signal.h>
00022
00023 #ifdef HAVE_PTHREAD
00024 #include <pthread.h>
00025 #endif
00026
00027 using namespace shogun;
00028
00029 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00030 struct LINRECONSTRUCTION_THREAD_PARAM
00031 {
00033 int32_t idx_start;
00035 int32_t idx_stop;
00037 int32_t idx_step;
00039 int32_t m_k;
00041 int32_t dim;
00043 int32_t N;
00045 const int32_t* neighborhood_matrix;
00047 const float64_t* feature_matrix;
00049 float64_t* z_matrix;
00051 float64_t* covariance_matrix;
00053 float64_t* id_vector;
00055 float64_t* W_matrix;
00056 };
00057
00058 struct NEIGHBORHOOD_THREAD_PARAM
00059 {
00061 int32_t idx_start;
00063 int32_t idx_step;
00065 int32_t idx_stop;
00067 int32_t N;
00069 int32_t m_k;
00071 CFibonacciHeap* heap;
00073 const float64_t* distance_matrix;
00075 int32_t* neighborhood_matrix;
00076 };
00077 #endif
00078
00079 CLocallyLinearEmbedding::CLocallyLinearEmbedding() :
00080 CDimensionReductionPreprocessor()
00081 {
00082 init();
00083 }
00084
00085 void CLocallyLinearEmbedding::init()
00086 {
00087 m_k = 3;
00088 m_posdef = true;
00089
00090 m_parameters->add(&m_k, "k", "number of neighbors");
00091 m_parameters->add(&m_posdef, "posdef",
00092 "indicates if matrix should be considered as positive definite");
00093 }
00094
00095 CLocallyLinearEmbedding::~CLocallyLinearEmbedding()
00096 {
00097 }
00098
00099 bool CLocallyLinearEmbedding::init(CFeatures* features)
00100 {
00101 return true;
00102 }
00103
00104 void CLocallyLinearEmbedding::cleanup()
00105 {
00106 }
00107
00108 SGMatrix<float64_t> CLocallyLinearEmbedding::apply_to_feature_matrix(CFeatures* features)
00109 {
00110 ASSERT(features);
00111 if (!(features->get_feature_class()==C_SIMPLE &&
00112 features->get_feature_type()==F_DREAL))
00113 {
00114 SG_ERROR("Given features are not of SimpleRealFeatures type.\n");
00115 }
00116
00117 CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*) features;
00118 SG_REF(features);
00119
00120
00121 int32_t dim = simple_features->get_num_features();
00122 if (m_target_dim>dim)
00123 SG_ERROR("Cannot increase dimensionality: target dimensionality is %d while given features dimensionality is %d.\n",
00124 m_target_dim, dim);
00125
00126 int32_t N = simple_features->get_num_vectors();
00127 if (m_k>=N)
00128 SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
00129 m_k, N);
00130
00131
00132 int32_t i,j,t;
00133
00134
00135 CDistance* distance = new CEuclidianDistance(simple_features,simple_features);
00136 Parallel* distance_parallel = distance->parallel;
00137 distance->parallel = this->parallel;
00138 SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(distance);
00139 distance->parallel = distance_parallel;
00140 delete distance;
00141
00142
00143 float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
00144
00145 #ifdef HAVE_PTHREAD
00146 int32_t num_threads = parallel->get_num_threads();
00147 ASSERT(num_threads>0);
00148
00149 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00150 LINRECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LINRECONSTRUCTION_THREAD_PARAM, num_threads);
00151 pthread_attr_t attr;
00152 pthread_attr_init(&attr);
00153 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00154 #else
00155 int32_t num_threads = 1;
00156 #endif
00157
00158 float64_t* z_matrix = SG_MALLOC(float64_t, m_k*dim*num_threads);
00159 float64_t* covariance_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
00160 float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads);
00161
00162
00163 SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();
00164
00165 #ifdef HAVE_PTHREAD
00166 for (t=0; t<num_threads; t++)
00167 {
00168 parameters[t].idx_start = t;
00169 parameters[t].idx_step = num_threads;
00170 parameters[t].idx_stop = N;
00171 parameters[t].m_k = m_k;
00172 parameters[t].dim = dim;
00173 parameters[t].N = N;
00174 parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
00175 parameters[t].z_matrix = z_matrix+(m_k*dim)*t;
00176 parameters[t].feature_matrix = feature_matrix.matrix;
00177 parameters[t].covariance_matrix = covariance_matrix+(m_k*m_k)*t;
00178 parameters[t].id_vector = id_vector+m_k*t;
00179 parameters[t].W_matrix = W_matrix;
00180 pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)¶meters[t]);
00181 }
00182 for (t=0; t<num_threads; t++)
00183 pthread_join(threads[t], NULL);
00184 pthread_attr_destroy(&attr);
00185 SG_FREE(parameters);
00186 SG_FREE(threads);
00187 #else
00188 LINRECONSTRUCTION_THREAD_PARAM single_thread_param;
00189 single_thread_param.idx_start = 0;
00190 single_thread_param.idx_step = 1;
00191 single_thread_param.idx_stop = N;
00192 single_thread_param.m_k = m_k;
00193 single_thread_param.dim = dim;
00194 single_thread_param.N = N;
00195 single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
00196 single_thread_param.z_matrix = z_matrix;
00197 single_thread_param.feature_matrix = feature_matrix.matrix;
00198 single_thread_param.covariance_matrix = covariance_matrix;
00199 single_thread_param.id_vector = id_vector;
00200 single_thread_param.W_matrix = W_matrix;
00201 run_linearreconstruction_thread((void*)single_thread_param);
00202 #endif
00203
00204
00205 SG_FREE(id_vector);
00206 neighborhood_matrix.destroy_matrix();
00207 SG_FREE(z_matrix);
00208 SG_FREE(covariance_matrix);
00209
00210
00211 for (i=0; i<N; i++)
00212 {
00213 for (j=0; j<N; j++)
00214 W_matrix[j*N+i] = (i==j) ? 1.0-W_matrix[j*N+i] : -W_matrix[j*N+i];
00215 }
00216
00217
00218 SGMatrix<float64_t> M_matrix(N,N);
00219 cblas_dgemm(CblasColMajor,CblasTrans, CblasNoTrans,
00220 N,N,N,
00221 1.0,W_matrix,N,
00222 W_matrix,N,
00223 0.0,M_matrix.matrix,N);
00224
00225 SG_FREE(W_matrix);
00226
00227 simple_features->set_feature_matrix(find_null_space(M_matrix,m_target_dim,false));
00228 M_matrix.destroy_matrix();
00229
00230 SG_UNREF(features);
00231 return simple_features->get_feature_matrix();
00232 }
00233
00234 SGVector<float64_t> CLocallyLinearEmbedding::apply_to_feature_vector(SGVector<float64_t> vector)
00235 {
00236 SG_NOTIMPLEMENTED;
00237 return vector;
00238 }
00239
00240 SGMatrix<float64_t> CLocallyLinearEmbedding::find_null_space(SGMatrix<float64_t> matrix, int dimension, bool force_lapack)
00241 {
00242 int i,j;
00243 ASSERT(matrix.num_cols==matrix.num_rows);
00244 int N = matrix.num_cols;
00245
00246 int eigenproblem_status = 0;
00247
00248 bool arpack = false;
00249
00250 #ifdef HAVE_ARPACK
00251 arpack = true;
00252 #endif
00253
00254 if (force_lapack) arpack = false;
00255
00256 float64_t* eigenvalues_vector;
00257 float64_t* eigenvectors;
00258 if (arpack)
00259 {
00260
00261 eigenvalues_vector = SG_MALLOC(float64_t, dimension+1);
00262 #ifdef HAVE_ARPACK
00263 arpack_dsaeupd_wrap(matrix.matrix,NULL,N,dimension+1,"LA",3,m_posdef,-1e-6, 0.0,
00264 eigenvalues_vector,matrix.matrix,eigenproblem_status);
00265 #endif
00266 }
00267 else
00268 {
00269
00270 eigenvalues_vector = SG_MALLOC(float64_t, N);
00271 eigenvectors = SG_MALLOC(float64_t,(dimension+1)*N);
00272 wrap_dsyevr('V','U',N,matrix.matrix,N,2,dimension+2,eigenvalues_vector,eigenvectors,&eigenproblem_status);
00273 }
00274
00275
00276 if (eigenproblem_status)
00277 SG_ERROR("Eigenproblem failed with code: %d", eigenproblem_status);
00278
00279
00280 float64_t* null_space_features = SG_MALLOC(float64_t, N*dimension);
00281
00282
00283 if (arpack)
00284 {
00285
00286 for (i=0; i<dimension; i++)
00287 {
00288 for (j=0; j<N; j++)
00289 null_space_features[j*dimension+i] = matrix.matrix[j*(dimension+1)+i+1];
00290 }
00291 }
00292 else
00293 {
00294
00295 for (i=0; i<dimension; i++)
00296 {
00297 for (j=0; j<N; j++)
00298 null_space_features[j*dimension+i] = eigenvectors[i*N+j];
00299 }
00300 SG_FREE(eigenvectors);
00301 }
00302 SG_FREE(eigenvalues_vector);
00303
00304 return SGMatrix<float64_t>(null_space_features,dimension,N);
00305 }
00306
00307 void* CLocallyLinearEmbedding::run_linearreconstruction_thread(void* p)
00308 {
00309 LINRECONSTRUCTION_THREAD_PARAM* parameters = (LINRECONSTRUCTION_THREAD_PARAM*)p;
00310 int32_t idx_start = parameters->idx_start;
00311 int32_t idx_step = parameters->idx_step;
00312 int32_t idx_stop = parameters->idx_stop;
00313 int32_t m_k = parameters->m_k;
00314 int32_t dim = parameters->dim;
00315 int32_t N = parameters->N;
00316 const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00317 float64_t* z_matrix = parameters->z_matrix;
00318 const float64_t* feature_matrix = parameters->feature_matrix;
00319 float64_t* covariance_matrix = parameters->covariance_matrix;
00320 float64_t* id_vector = parameters->id_vector;
00321 float64_t* W_matrix = parameters->W_matrix;
00322
00323 int32_t i,j,k;
00324 float64_t norming,trace;
00325
00326 for (i=idx_start; i<idx_stop; i+=idx_step)
00327 {
00328
00329 for (j=0; j<m_k; j++)
00330 {
00331 for (k=0; k<dim; k++)
00332 z_matrix[j*dim+k] = feature_matrix[neighborhood_matrix[j*N+i]*dim+k];
00333 }
00334
00335
00336 for (j=0; j<m_k; j++)
00337 {
00338 for (k=0; k<dim; k++)
00339 z_matrix[j*dim+k] -= feature_matrix[i*dim+k];
00340 }
00341
00342
00343 cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,
00344 m_k,m_k,dim,
00345 1.0,z_matrix,dim,
00346 z_matrix,dim,
00347 0.0,covariance_matrix,m_k);
00348
00349 for (j=0; j<m_k; j++)
00350 id_vector[j] = 1.0;
00351
00352
00353 if (m_k>dim)
00354 {
00355
00356 trace = 0.0;
00357 for (j=0; j<m_k; j++)
00358 trace += covariance_matrix[j*m_k+j];
00359
00360 for (j=0; j<m_k; j++)
00361 covariance_matrix[j*m_k+j] += 1e-3*trace;
00362 }
00363
00364
00365
00366 clapack_dposv(CblasColMajor,CblasLower,m_k,1,covariance_matrix,m_k,id_vector,m_k);
00367
00368
00369 norming=0.0;
00370 for (j=0; j<m_k; j++)
00371 norming += id_vector[j];
00372
00373 for (j=0; j<m_k; j++)
00374 id_vector[j]/=norming;
00375
00376
00377 for (j=0; j<m_k; j++)
00378 W_matrix[N*neighborhood_matrix[j*N+i]+i]=id_vector[j];
00379 }
00380 return NULL;
00381 }
00382
00383 SGMatrix<int32_t> CLocallyLinearEmbedding::get_neighborhood_matrix(CDistance* distance)
00384 {
00385 int32_t t;
00386 SGMatrix<float64_t> distance_matrix = distance->get_distance_matrix();
00387 int32_t N = distance->get_num_vec_lhs();
00388
00389 int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*m_k);
00390 #ifdef HAVE_PTHREAD
00391 int32_t num_threads = parallel->get_num_threads();
00392 ASSERT(num_threads>0);
00393 NEIGHBORHOOD_THREAD_PARAM* parameters = SG_MALLOC(NEIGHBORHOOD_THREAD_PARAM, num_threads);
00394 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00395 pthread_attr_t attr;
00396 pthread_attr_init(&attr);
00397 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00398 #else
00399 int32_t num_threads = 1;
00400 #endif
00401 CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00402 for (t=0; t<num_threads; t++)
00403 heaps[t] = new CFibonacciHeap(N);
00404
00405 #ifdef HAVE_PTHREAD
00406 for (t=0; t<num_threads; t++)
00407 {
00408 parameters[t].idx_start = t;
00409 parameters[t].idx_step = num_threads;
00410 parameters[t].idx_stop = N;
00411 parameters[t].m_k = m_k;
00412 parameters[t].N = N;
00413 parameters[t].heap = heaps[t];
00414 parameters[t].neighborhood_matrix = neighborhood_matrix;
00415 parameters[t].distance_matrix = distance_matrix.matrix;
00416 pthread_create(&threads[t], &attr, run_neighborhood_thread, (void*)¶meters[t]);
00417 }
00418 for (t=0; t<num_threads; t++)
00419 pthread_join(threads[t], NULL);
00420 pthread_attr_destroy(&attr);
00421 SG_FREE(threads);
00422 SG_FREE(parameters);
00423 #else
00424 NEIGHBORHOOD_THREAD_PARAM single_thread_param;
00425 single_thread_param.idx_start = 0;
00426 single_thread_param.idx_step = 1;
00427 single_thread_param.idx_stop = N;
00428 single_thread_param.m_k = m_k;
00429 single_thread_param.N = N;
00430 single_thread_param.heap = heaps[0]
00431 single_thread_param.neighborhood_matrix = neighborhood_matrix;
00432 single_thread_param.distance_matrix = distance_matrix.matrix;
00433 run_neighborhood_thread((void*)&single_thread_param);
00434 #endif
00435
00436 for (t=0; t<num_threads; t++)
00437 delete heaps[t];
00438 SG_FREE(heaps);
00439 distance_matrix.destroy_matrix();
00440
00441 return SGMatrix<int32_t>(neighborhood_matrix,m_k,N);
00442 }
00443
00444
00445 void* CLocallyLinearEmbedding::run_neighborhood_thread(void* p)
00446 {
00447 NEIGHBORHOOD_THREAD_PARAM* parameters = (NEIGHBORHOOD_THREAD_PARAM*)p;
00448 int32_t idx_start = parameters->idx_start;
00449 int32_t idx_step = parameters->idx_step;
00450 int32_t idx_stop = parameters->idx_stop;
00451 int32_t N = parameters->N;
00452 int32_t m_k = parameters->m_k;
00453 CFibonacciHeap* heap = parameters->heap;
00454 const float64_t* distance_matrix = parameters->distance_matrix;
00455 int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00456
00457 int32_t i,j;
00458 float64_t tmp;
00459 for (i=idx_start; i<idx_stop; i+=idx_step)
00460 {
00461 for (j=0; j<N; j++)
00462 {
00463 heap->insert(j,distance_matrix[i*N+j]);
00464 }
00465
00466 heap->extract_min(tmp);
00467
00468 for (j=0; j<m_k; j++)
00469 neighborhood_matrix[j*N+i] = heap->extract_min(tmp);
00470
00471 heap->clear();
00472 }
00473
00474 return NULL;
00475 }
00476 #endif