00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/preprocessor/KernelLocallyLinearEmbedding.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/mathematics/arpack.h>
00014 #include <shogun/mathematics/lapack.h>
00015 #include <shogun/lib/FibonacciHeap.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/lib/Signal.h>
00020
00021 #ifdef HAVE_PTHREAD
00022 #include <pthread.h>
00023 #endif
00024
00025 using namespace shogun;
00026
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028 struct LK_RECONSTRUCTION_THREAD_PARAM
00029 {
00031 int32_t idx_start;
00033 int32_t idx_stop;
00035 int32_t idx_step;
00037 int32_t m_k;
00039 int32_t N;
00041 const int32_t* neighborhood_matrix;
00043 float64_t* local_gram_matrix;
00045 const float64_t* kernel_matrix;
00047 float64_t* id_vector;
00049 float64_t* W_matrix;
00050 };
00051
00052 struct K_NEIGHBORHOOD_THREAD_PARAM
00053 {
00054 int32_t idx_start;
00055 int32_t idx_step;
00056 int32_t idx_stop;
00057 int32_t N;
00058 int32_t m_k;
00059 CFibonacciHeap* heap;
00060 const float64_t* kernel_matrix;
00061 int32_t* neighborhood_matrix;
00062 };
00063 #endif
00064
00065 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding() :
00066 CLocallyLinearEmbedding()
00067 {
00068 init();
00069 }
00070
00071 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding(CKernel* kernel)
00072 {
00073 init();
00074 SG_REF(kernel);
00075 m_kernel = kernel;
00076 }
00077
00078 void CKernelLocallyLinearEmbedding::init()
00079 {
00080 m_kernel = NULL;
00081 }
00082
00083 CKernelLocallyLinearEmbedding::~CKernelLocallyLinearEmbedding()
00084 {
00085 SG_UNREF(m_kernel);
00086 }
00087
00088 bool CKernelLocallyLinearEmbedding::init(CFeatures* features)
00089 {
00090 return true;
00091 }
00092
00093 void CKernelLocallyLinearEmbedding::cleanup()
00094 {
00095 }
00096
00097 SGMatrix<float64_t> CKernelLocallyLinearEmbedding::apply_to_feature_matrix(CFeatures* features)
00098 {
00099 ASSERT(features);
00100 ASSERT(m_kernel);
00101 SG_REF(features);
00102
00103
00104 int32_t N = features->get_num_vectors();
00105 if (m_k>=N)
00106 SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
00107 m_k, N);
00108
00109
00110 int32_t i,j,t;
00111
00112
00113 m_kernel->init(features,features);
00114 Parallel* kernel_parallel = m_kernel->parallel;
00115 m_kernel->parallel = this->parallel;
00116 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00117 SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(kernel_matrix);
00118 m_kernel->parallel = kernel_parallel;
00119 m_kernel->cleanup();
00120
00121
00122 float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
00123
00124 #ifdef HAVE_PTHREAD
00125 int32_t num_threads = parallel->get_num_threads();
00126 ASSERT(num_threads>0);
00127
00128 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00129 LK_RECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LK_RECONSTRUCTION_THREAD_PARAM, num_threads);
00130 pthread_attr_t attr;
00131 pthread_attr_init(&attr);
00132 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00133 #else
00134 int32_t num_threads = 1;
00135 #endif
00136
00137 float64_t* local_gram_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
00138 float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads);
00139
00140 #ifdef HAVE_PTHREAD
00141 for (t=0; t<num_threads; t++)
00142 {
00143 parameters[t].idx_start = t;
00144 parameters[t].idx_step = num_threads;
00145 parameters[t].idx_stop = N;
00146 parameters[t].m_k = m_k;
00147 parameters[t].N = N;
00148 parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
00149 parameters[t].kernel_matrix = kernel_matrix.matrix;
00150 parameters[t].local_gram_matrix = local_gram_matrix+(m_k*m_k)*t;
00151 parameters[t].id_vector = id_vector+m_k*t;
00152 parameters[t].W_matrix = W_matrix;
00153 pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)¶meters[t]);
00154 }
00155 for (t=0; t<num_threads; t++)
00156 pthread_join(threads[t], NULL);
00157 pthread_attr_destroy(&attr);
00158 SG_FREE(parameters);
00159 SG_FREE(threads);
00160 #else
00161 LK_RECONSTRUCTION_THREAD_PARAM single_thread_param;
00162 single_thread_param.idx_start = 0;
00163 single_thread_param.idx_step = 1;
00164 single_thread_param.idx_stop = N;
00165 single_thread_param.m_k = m_k;
00166 single_thread_param.N = N;
00167 single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
00168 single_thread_param.local_gram_matrix = local_gram_matrix;
00169 single_thread_param.kernel_matrix = kernel_matrix.matrix;
00170 single_thread_param.id_vector = id_vector;
00171 single_thread_param.W_matrix = W_matrix;
00172 run_linearreconstruction_thread((void*)single_thread_param);
00173 #endif
00174
00175
00176 SG_FREE(id_vector);
00177 neighborhood_matrix.destroy_matrix();
00178 kernel_matrix.destroy_matrix();
00179 SG_FREE(local_gram_matrix);
00180
00181
00182 for (i=0; i<N; i++)
00183 {
00184 for (j=0; j<N; j++)
00185 W_matrix[j*N+i] = (i==j) ? 1.0-W_matrix[j*N+i] : -W_matrix[j*N+i];
00186 }
00187
00188
00189 SGMatrix<float64_t> M_matrix(N,N);
00190 cblas_dgemm(CblasColMajor,CblasTrans, CblasNoTrans,
00191 N,N,N,
00192 1.0,W_matrix,N,
00193 W_matrix,N,
00194 0.0,M_matrix.matrix,N);
00195
00196 SG_FREE(W_matrix);
00197
00198 SGMatrix<float64_t> nullspace = find_null_space(M_matrix,m_target_dim,false);
00199
00200 if ((features->get_feature_class()==C_SIMPLE) &&
00201 (features->get_feature_type()==F_DREAL))
00202 {
00203 ((CSimpleFeatures<float64_t>*)features)->set_feature_matrix(nullspace);
00204 M_matrix.destroy_matrix();
00205 SG_UNREF(features);
00206 return ((CSimpleFeatures<float64_t>*)features)->get_feature_matrix();
00207 }
00208 else
00209 {
00210 SG_UNREF(features);
00211 SG_WARNING("Can't set feature matrix, returning feature matrix.\n");
00212 return nullspace;
00213 }
00214 }
00215
00216 SGVector<float64_t> CKernelLocallyLinearEmbedding::apply_to_feature_vector(SGVector<float64_t> vector)
00217 {
00218 SG_NOTIMPLEMENTED;
00219 return vector;
00220 }
00221
00222 void* CKernelLocallyLinearEmbedding::run_linearreconstruction_thread(void* p)
00223 {
00224 LK_RECONSTRUCTION_THREAD_PARAM* parameters = (LK_RECONSTRUCTION_THREAD_PARAM*)p;
00225 int32_t idx_start = parameters->idx_start;
00226 int32_t idx_step = parameters->idx_step;
00227 int32_t idx_stop = parameters->idx_stop;
00228 int32_t m_k = parameters->m_k;
00229 int32_t N = parameters->N;
00230 const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00231 float64_t* local_gram_matrix = parameters->local_gram_matrix;
00232 const float64_t* kernel_matrix = parameters->kernel_matrix;
00233 float64_t* id_vector = parameters->id_vector;
00234 float64_t* W_matrix = parameters->W_matrix;
00235
00236 int32_t i,j,k;
00237 float64_t norming,trace;
00238
00239 for (i=idx_start; i<idx_stop; i+=idx_step)
00240 {
00241 for (j=0; j<m_k; j++)
00242 {
00243 for (k=0; k<m_k; k++)
00244 local_gram_matrix[j*m_k+k] =
00245 kernel_matrix[i*N+i] -
00246 kernel_matrix[i*N+neighborhood_matrix[j*N+i]] -
00247 kernel_matrix[i*N+neighborhood_matrix[k*N+i]] +
00248 kernel_matrix[neighborhood_matrix[j*N+i]*N+neighborhood_matrix[k*N+i]];
00249 }
00250
00251 for (j=0; j<m_k; j++)
00252 id_vector[j] = 1.0;
00253
00254
00255 trace = 0.0;
00256 for (j=0; j<m_k; j++)
00257 trace += local_gram_matrix[j*m_k+j];
00258
00259
00260 for (j=0; j<m_k; j++)
00261 local_gram_matrix[j*m_k+j] += 1e-3*trace/m_k;
00262
00263 clapack_dposv(CblasColMajor,CblasLower,m_k,1,local_gram_matrix,m_k,id_vector,m_k);
00264
00265
00266 norming=0.0;
00267 for (j=0; j<m_k; j++)
00268 norming += id_vector[j];
00269
00270 for (j=0; j<m_k; j++)
00271 id_vector[j]/=norming;
00272
00273
00274 for (j=0; j<m_k; j++)
00275 W_matrix[N*neighborhood_matrix[j*N+i]+i]=id_vector[j];
00276 }
00277 return NULL;
00278 }
00279
00280 SGMatrix<int32_t> CKernelLocallyLinearEmbedding::get_neighborhood_matrix(SGMatrix<float64_t> kernel_matrix)
00281 {
00282 int32_t t;
00283 int32_t N = kernel_matrix.num_cols;
00284
00285 int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*m_k);
00286 #ifdef HAVE_PTHREAD
00287 int32_t num_threads = parallel->get_num_threads();
00288 ASSERT(num_threads>0);
00289 K_NEIGHBORHOOD_THREAD_PARAM* parameters = SG_MALLOC(K_NEIGHBORHOOD_THREAD_PARAM, num_threads);
00290 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00291 pthread_attr_t attr;
00292 pthread_attr_init(&attr);
00293 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00294 #else
00295 int32_t num_threads = 1;
00296 #endif
00297 CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00298 for (t=0; t<num_threads; t++)
00299 heaps[t] = new CFibonacciHeap(N);
00300
00301 #ifdef HAVE_PTHREAD
00302 for (t=0; t<num_threads; t++)
00303 {
00304 parameters[t].idx_start = t;
00305 parameters[t].idx_step = num_threads;
00306 parameters[t].idx_stop = N;
00307 parameters[t].m_k = m_k;
00308 parameters[t].N = N;
00309 parameters[t].heap = heaps[t];
00310 parameters[t].neighborhood_matrix = neighborhood_matrix;
00311 parameters[t].kernel_matrix = kernel_matrix.matrix;
00312 pthread_create(&threads[t], &attr, run_neighborhood_thread, (void*)¶meters[t]);
00313 }
00314 for (t=0; t<num_threads; t++)
00315 pthread_join(threads[t], NULL);
00316 pthread_attr_destroy(&attr);
00317 SG_FREE(threads);
00318 SG_FREE(parameters);
00319 #else
00320 K_NEIGHBORHOOD_THREAD_PARAM single_thread_param;
00321 single_thread_param.idx_start = 0;
00322 single_thread_param.idx_step = 1;
00323 single_thread_param.idx_stop = N;
00324 single_thread_param.m_k = m_k;
00325 single_thread_param.N = N;
00326 single_thread_param.heap = heaps[0]
00327 single_thread_param.neighborhood_matrix = neighborhood_matrix;
00328 single_thread_param.kernel_matrix = kernel_matrix.matrix;
00329 run_neighborhood_thread((void*)&single_thread_param);
00330 #endif
00331
00332 for (t=0; t<num_threads; t++)
00333 delete heaps[t];
00334 SG_FREE(heaps);
00335
00336 return SGMatrix<int32_t>(neighborhood_matrix,m_k,N);
00337 }
00338
00339
00340 void* CKernelLocallyLinearEmbedding::run_neighborhood_thread(void* p)
00341 {
00342 K_NEIGHBORHOOD_THREAD_PARAM* parameters = (K_NEIGHBORHOOD_THREAD_PARAM*)p;
00343 int32_t idx_start = parameters->idx_start;
00344 int32_t idx_step = parameters->idx_step;
00345 int32_t idx_stop = parameters->idx_stop;
00346 int32_t N = parameters->N;
00347 int32_t m_k = parameters->m_k;
00348 CFibonacciHeap* heap = parameters->heap;
00349 const float64_t* kernel_matrix = parameters->kernel_matrix;
00350 int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00351
00352 int32_t i,j;
00353 float64_t tmp;
00354 for (i=idx_start; i<idx_stop; i+=idx_step)
00355 {
00356 for (j=0; j<N; j++)
00357 {
00358 heap->insert(j,kernel_matrix[i*N+i]-2*kernel_matrix[i*N+j]+kernel_matrix[j*N+j]);
00359 }
00360
00361 heap->extract_min(tmp);
00362
00363 for (j=0; j<m_k; j++)
00364 neighborhood_matrix[j*N+i] = heap->extract_min(tmp);
00365
00366 heap->clear();
00367 }
00368
00369 return NULL;
00370 }
00371 #endif