00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/converter/KernelLocallyLinearEmbedding.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/mathematics/lapack.h>
00014 #include <shogun/lib/FibonacciHeap.h>
00015 #include <shogun/lib/common.h>
00016 #include <shogun/base/DynArray.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/lib/Signal.h>
00020
00021 #ifdef HAVE_PTHREAD
00022 #include <pthread.h>
00023 #endif
00024
00025 using namespace shogun;
00026
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028 struct LK_RECONSTRUCTION_THREAD_PARAM
00029 {
00031 int32_t idx_start;
00033 int32_t idx_stop;
00035 int32_t idx_step;
00037 int32_t m_k;
00039 int32_t N;
00041 const int32_t* neighborhood_matrix;
00043 float64_t* local_gram_matrix;
00045 const float64_t* kernel_matrix;
00047 float64_t* id_vector;
00049 float64_t reconstruction_shift;
00051 float64_t* W_matrix;
00052 };
00053
00054 struct K_NEIGHBORHOOD_THREAD_PARAM
00055 {
00057 int32_t idx_start;
00059 int32_t idx_step;
00061 int32_t idx_stop;
00063 int32_t N;
00065 int32_t m_k;
00067 CFibonacciHeap* heap;
00069 const float64_t* kernel_matrix;
00071 int32_t* neighborhood_matrix;
00072 };
00073 #endif
00074
00075 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding() :
00076 CLocallyLinearEmbedding()
00077 {
00078 }
00079
00080 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding(CKernel* kernel) :
00081 CLocallyLinearEmbedding()
00082 {
00083 set_kernel(kernel);
00084 }
00085
00086 const char* CKernelLocallyLinearEmbedding::get_name() const
00087 {
00088 return "KernelLocallyLinearEmbedding";
00089 };
00090
00091 CKernelLocallyLinearEmbedding::~CKernelLocallyLinearEmbedding()
00092 {
00093 }
00094
00095 CFeatures* CKernelLocallyLinearEmbedding::apply(CFeatures* features)
00096 {
00097 ASSERT(features);
00098 SG_REF(features);
00099
00100
00101 int32_t N = features->get_num_vectors();
00102 if (m_k>=N)
00103 SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
00104 m_k, N);
00105
00106
00107 ASSERT(m_kernel);
00108 m_kernel->init(features,features);
00109 CSimpleFeatures<float64_t>* embedding = embed_kernel(m_kernel);
00110 m_kernel->cleanup();
00111 SG_UNREF(features);
00112 return (CFeatures*)embedding;
00113 }
00114
00115 CSimpleFeatures<float64_t>* CKernelLocallyLinearEmbedding::embed_kernel(CKernel* kernel)
00116 {
00117 SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix();
00118 SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(kernel_matrix,m_k);
00119
00120 SGMatrix<float64_t> M_matrix = construct_weight_matrix(kernel_matrix,neighborhood_matrix);
00121 neighborhood_matrix.destroy_matrix();
00122
00123 SGMatrix<float64_t> nullspace = construct_embedding(M_matrix,m_target_dim);
00124 M_matrix.destroy_matrix();
00125
00126 return new CSimpleFeatures<float64_t>(nullspace);
00127 }
00128
00129 SGMatrix<float64_t> CKernelLocallyLinearEmbedding::construct_weight_matrix(SGMatrix<float64_t> kernel_matrix,
00130 SGMatrix<int32_t> neighborhood_matrix)
00131 {
00132 int32_t N = kernel_matrix.num_cols;
00133
00134 int32_t t;
00135 #ifdef HAVE_PTHREAD
00136 int32_t num_threads = parallel->get_num_threads();
00137 ASSERT(num_threads>0);
00138
00139 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00140 LK_RECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LK_RECONSTRUCTION_THREAD_PARAM, num_threads);
00141 pthread_attr_t attr;
00142 pthread_attr_init(&attr);
00143 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00144 #else
00145 int32_t num_threads = 1;
00146 #endif
00147 float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
00148
00149 float64_t* local_gram_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
00150 float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads);
00151
00152 #ifdef HAVE_PTHREAD
00153 for (t=0; t<num_threads; t++)
00154 {
00155 parameters[t].idx_start = t;
00156 parameters[t].idx_step = num_threads;
00157 parameters[t].idx_stop = N;
00158 parameters[t].m_k = m_k;
00159 parameters[t].N = N;
00160 parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
00161 parameters[t].kernel_matrix = kernel_matrix.matrix;
00162 parameters[t].local_gram_matrix = local_gram_matrix+(m_k*m_k)*t;
00163 parameters[t].id_vector = id_vector+m_k*t;
00164 parameters[t].W_matrix = W_matrix;
00165 parameters[t].reconstruction_shift = m_reconstruction_shift;
00166 pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)¶meters[t]);
00167 }
00168 for (t=0; t<num_threads; t++)
00169 pthread_join(threads[t], NULL);
00170 pthread_attr_destroy(&attr);
00171 SG_FREE(parameters);
00172 SG_FREE(threads);
00173 #else
00174 LK_RECONSTRUCTION_THREAD_PARAM single_thread_param;
00175 single_thread_param.idx_start = 0;
00176 single_thread_param.idx_step = 1;
00177 single_thread_param.idx_stop = N;
00178 single_thread_param.m_k = m_k;
00179 single_thread_param.N = N;
00180 single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
00181 single_thread_param.local_gram_matrix = local_gram_matrix;
00182 single_thread_param.kernel_matrix = kernel_matrix.matrix;
00183 single_thread_param.id_vector = id_vector;
00184 single_thread_param.W_matrix = W_matrix;
00185 run_linearreconstruction_thread((void*)single_thread_param);
00186 #endif
00187
00188
00189 SG_FREE(id_vector);
00190 SG_FREE(local_gram_matrix);
00191
00192 return SGMatrix<float64_t>(W_matrix,N,N);
00193 }
00194
00195 void* CKernelLocallyLinearEmbedding::run_linearreconstruction_thread(void* p)
00196 {
00197 LK_RECONSTRUCTION_THREAD_PARAM* parameters = (LK_RECONSTRUCTION_THREAD_PARAM*)p;
00198 int32_t idx_start = parameters->idx_start;
00199 int32_t idx_step = parameters->idx_step;
00200 int32_t idx_stop = parameters->idx_stop;
00201 int32_t m_k = parameters->m_k;
00202 int32_t N = parameters->N;
00203 const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00204 float64_t* local_gram_matrix = parameters->local_gram_matrix;
00205 const float64_t* kernel_matrix = parameters->kernel_matrix;
00206 float64_t* id_vector = parameters->id_vector;
00207 float64_t* W_matrix = parameters->W_matrix;
00208 float64_t reconstruction_shift = parameters->reconstruction_shift;
00209
00210 int32_t i,j,k;
00211 float64_t norming,trace;
00212
00213 for (i=idx_start; i<idx_stop; i+=idx_step)
00214 {
00215 for (j=0; j<m_k; j++)
00216 {
00217 for (k=0; k<m_k; k++)
00218 local_gram_matrix[j*m_k+k] =
00219 kernel_matrix[i*N+i] -
00220 kernel_matrix[i*N+neighborhood_matrix[j*N+i]] -
00221 kernel_matrix[i*N+neighborhood_matrix[k*N+i]] +
00222 kernel_matrix[neighborhood_matrix[j*N+i]*N+neighborhood_matrix[k*N+i]];
00223 }
00224
00225 for (j=0; j<m_k; j++)
00226 id_vector[j] = 1.0;
00227
00228
00229 trace = 0.0;
00230 for (j=0; j<m_k; j++)
00231 trace += local_gram_matrix[j*m_k+j];
00232
00233
00234 for (j=0; j<m_k; j++)
00235 local_gram_matrix[j*m_k+j] += reconstruction_shift*trace;
00236
00237 clapack_dposv(CblasColMajor,CblasLower,m_k,1,local_gram_matrix,m_k,id_vector,m_k);
00238
00239
00240 norming=0.0;
00241 for (j=0; j<m_k; j++)
00242 norming += id_vector[j];
00243
00244 cblas_dscal(m_k,1.0/norming,id_vector,1);
00245
00246 memset(local_gram_matrix,0,sizeof(float64_t)*m_k*m_k);
00247 cblas_dger(CblasColMajor,m_k,m_k,1.0,id_vector,1,id_vector,1,local_gram_matrix,m_k);
00248
00249
00250 W_matrix[N*i+i] += 1.0;
00251 for (j=0; j<m_k; j++)
00252 {
00253 W_matrix[N*i+neighborhood_matrix[j*N+i]] -= id_vector[j];
00254 W_matrix[N*neighborhood_matrix[j*N+i]+i] -= id_vector[j];
00255 }
00256 for (j=0; j<m_k; j++)
00257 {
00258 for (k=0; k<m_k; k++)
00259 W_matrix[N*neighborhood_matrix[j*N+i]+neighborhood_matrix[k*N+i]]+=local_gram_matrix[j*m_k+k];
00260 }
00261 }
00262 return NULL;
00263 }
00264
00265 SGMatrix<int32_t> CKernelLocallyLinearEmbedding::get_neighborhood_matrix(SGMatrix<float64_t> kernel_matrix, int32_t k)
00266 {
00267 int32_t t;
00268 int32_t N = kernel_matrix.num_cols;
00269
00270 int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*k);
00271 #ifdef HAVE_PTHREAD
00272 int32_t num_threads = parallel->get_num_threads();
00273 ASSERT(num_threads>0);
00274 K_NEIGHBORHOOD_THREAD_PARAM* parameters = SG_MALLOC(K_NEIGHBORHOOD_THREAD_PARAM, num_threads);
00275 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00276 pthread_attr_t attr;
00277 pthread_attr_init(&attr);
00278 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00279 #else
00280 int32_t num_threads = 1;
00281 #endif
00282 CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00283 for (t=0; t<num_threads; t++)
00284 heaps[t] = new CFibonacciHeap(N);
00285
00286 #ifdef HAVE_PTHREAD
00287 for (t=0; t<num_threads; t++)
00288 {
00289 parameters[t].idx_start = t;
00290 parameters[t].idx_step = num_threads;
00291 parameters[t].idx_stop = N;
00292 parameters[t].m_k = k;
00293 parameters[t].N = N;
00294 parameters[t].heap = heaps[t];
00295 parameters[t].neighborhood_matrix = neighborhood_matrix;
00296 parameters[t].kernel_matrix = kernel_matrix.matrix;
00297 pthread_create(&threads[t], &attr, run_neighborhood_thread, (void*)¶meters[t]);
00298 }
00299 for (t=0; t<num_threads; t++)
00300 pthread_join(threads[t], NULL);
00301 pthread_attr_destroy(&attr);
00302 SG_FREE(threads);
00303 SG_FREE(parameters);
00304 #else
00305 K_NEIGHBORHOOD_THREAD_PARAM single_thread_param;
00306 single_thread_param.idx_start = 0;
00307 single_thread_param.idx_step = 1;
00308 single_thread_param.idx_stop = N;
00309 single_thread_param.m_k = k;
00310 single_thread_param.N = N;
00311 single_thread_param.heap = heaps[0]
00312 single_thread_param.neighborhood_matrix = neighborhood_matrix;
00313 single_thread_param.kernel_matrix = kernel_matrix.matrix;
00314 run_neighborhood_thread((void*)&single_thread_param);
00315 #endif
00316
00317 for (t=0; t<num_threads; t++)
00318 delete heaps[t];
00319 SG_FREE(heaps);
00320
00321 return SGMatrix<int32_t>(neighborhood_matrix,k,N);
00322 }
00323
00324 void* CKernelLocallyLinearEmbedding::run_neighborhood_thread(void* p)
00325 {
00326 K_NEIGHBORHOOD_THREAD_PARAM* parameters = (K_NEIGHBORHOOD_THREAD_PARAM*)p;
00327 int32_t idx_start = parameters->idx_start;
00328 int32_t idx_step = parameters->idx_step;
00329 int32_t idx_stop = parameters->idx_stop;
00330 int32_t N = parameters->N;
00331 int32_t m_k = parameters->m_k;
00332 CFibonacciHeap* heap = parameters->heap;
00333 const float64_t* kernel_matrix = parameters->kernel_matrix;
00334 int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00335
00336 int32_t i,j;
00337 float64_t tmp;
00338 for (i=idx_start; i<idx_stop; i+=idx_step)
00339 {
00340 for (j=0; j<N; j++)
00341 {
00342 heap->insert(j,kernel_matrix[i*N+i]-2*kernel_matrix[i*N+j]+kernel_matrix[j*N+j]);
00343 }
00344
00345 heap->extract_min(tmp);
00346
00347 for (j=0; j<m_k; j++)
00348 neighborhood_matrix[j*N+i] = heap->extract_min(tmp);
00349
00350 heap->clear();
00351 }
00352
00353 return NULL;
00354 }
00355 #endif