00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/converter/KernelLocallyLinearEmbedding.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/mathematics/lapack.h>
00014 #include <shogun/lib/FibonacciHeap.h>
00015 #include <shogun/lib/common.h>
00016 #include <shogun/base/DynArray.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/lib/Time.h>
00020 #include <shogun/lib/Signal.h>
00021
00022 #ifdef HAVE_PTHREAD
00023 #include <pthread.h>
00024 #endif
00025
00026 using namespace shogun;
00027
00028 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00029 struct LK_RECONSTRUCTION_THREAD_PARAM
00030 {
00032 int32_t idx_start;
00034 int32_t idx_stop;
00036 int32_t idx_step;
00038 int32_t m_k;
00040 int32_t N;
00042 const int32_t* neighborhood_matrix;
00044 float64_t* local_gram_matrix;
00046 const float64_t* kernel_matrix;
00048 float64_t* id_vector;
00050 float64_t reconstruction_shift;
00052 float64_t* W_matrix;
00053 };
00054
00055 class KLLE_COVERTREE_POINT
00056 {
00057 public:
00058
00059 KLLE_COVERTREE_POINT(int32_t index, const SGMatrix<float64_t>& dmatrix)
00060 {
00061 this->point_index = index;
00062 this->kernel_matrix = dmatrix;
00063 this->kii = dmatrix[index*dmatrix.num_rows+index];
00064 }
00065
00066 inline double distance(const KLLE_COVERTREE_POINT& p) const
00067 {
00068 int32_t N = kernel_matrix.num_rows;
00069 return kii+kernel_matrix[p.point_index*N+p.point_index]-2.0*kernel_matrix[point_index*N+p.point_index];
00070 }
00071
00072 inline bool operator==(const KLLE_COVERTREE_POINT& p) const
00073 {
00074 return (p.point_index==this->point_index);
00075 }
00076
00077 int32_t point_index;
00078 float64_t kii;
00079 SGMatrix<float64_t> kernel_matrix;
00080 };
00081
00082 #endif
00083
00084 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding() :
00085 CLocallyLinearEmbedding()
00086 {
00087 }
00088
00089 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding(CKernel* kernel) :
00090 CLocallyLinearEmbedding()
00091 {
00092 set_kernel(kernel);
00093 }
00094
00095 const char* CKernelLocallyLinearEmbedding::get_name() const
00096 {
00097 return "KernelLocallyLinearEmbedding";
00098 };
00099
00100 CKernelLocallyLinearEmbedding::~CKernelLocallyLinearEmbedding()
00101 {
00102 }
00103
00104 CFeatures* CKernelLocallyLinearEmbedding::apply(CFeatures* features)
00105 {
00106 ASSERT(features);
00107 SG_REF(features);
00108
00109
00110 int32_t N = features->get_num_vectors();
00111 if (m_k>=N)
00112 SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
00113 m_k, N);
00114
00115
00116 ASSERT(m_kernel);
00117 m_kernel->init(features,features);
00118 CDenseFeatures<float64_t>* embedding = embed_kernel(m_kernel);
00119 m_kernel->cleanup();
00120 SG_UNREF(features);
00121 return (CFeatures*)embedding;
00122 }
00123
00124 CDenseFeatures<float64_t>* CKernelLocallyLinearEmbedding::embed_kernel(CKernel* kernel)
00125 {
00126 CTime* time = new CTime();
00127
00128 time->start();
00129 SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix();
00130 SG_DEBUG("Kernel matrix computation took %fs\n",time->cur_time_diff());
00131
00132 time->start();
00133 SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(kernel_matrix,m_k);
00134 SG_DEBUG("Neighbors finding took %fs\n",time->cur_time_diff());
00135
00136 time->start();
00137 SGMatrix<float64_t> M_matrix = construct_weight_matrix(kernel_matrix,neighborhood_matrix);
00138 SG_DEBUG("Weights computation took %fs\n",time->cur_time_diff());
00139
00140 time->start();
00141 SGMatrix<float64_t> nullspace = construct_embedding(M_matrix,m_target_dim);
00142 SG_DEBUG("Embedding construction took %fs\n",time->cur_time_diff());
00143
00144 delete time;
00145
00146 return new CDenseFeatures<float64_t>(nullspace);
00147 }
00148
00149 SGMatrix<float64_t> CKernelLocallyLinearEmbedding::construct_weight_matrix(SGMatrix<float64_t> kernel_matrix,
00150 SGMatrix<int32_t> neighborhood_matrix)
00151 {
00152 int32_t N = kernel_matrix.num_cols;
00153
00154 #ifdef HAVE_PTHREAD
00155 int32_t t;
00156 int32_t num_threads = parallel->get_num_threads();
00157 ASSERT(num_threads>0);
00158
00159 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00160 LK_RECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LK_RECONSTRUCTION_THREAD_PARAM, num_threads);
00161 pthread_attr_t attr;
00162 pthread_attr_init(&attr);
00163 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00164 #else
00165 int32_t num_threads = 1;
00166 #endif
00167 float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
00168
00169 float64_t* local_gram_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
00170 float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads);
00171
00172 #ifdef HAVE_PTHREAD
00173 for (t=0; t<num_threads; t++)
00174 {
00175 parameters[t].idx_start = t;
00176 parameters[t].idx_step = num_threads;
00177 parameters[t].idx_stop = N;
00178 parameters[t].m_k = m_k;
00179 parameters[t].N = N;
00180 parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
00181 parameters[t].kernel_matrix = kernel_matrix.matrix;
00182 parameters[t].local_gram_matrix = local_gram_matrix+(m_k*m_k)*t;
00183 parameters[t].id_vector = id_vector+m_k*t;
00184 parameters[t].W_matrix = W_matrix;
00185 parameters[t].reconstruction_shift = m_reconstruction_shift;
00186 pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)¶meters[t]);
00187 }
00188 for (t=0; t<num_threads; t++)
00189 pthread_join(threads[t], NULL);
00190 pthread_attr_destroy(&attr);
00191 SG_FREE(parameters);
00192 SG_FREE(threads);
00193 #else
00194 LK_RECONSTRUCTION_THREAD_PARAM single_thread_param;
00195 single_thread_param.idx_start = 0;
00196 single_thread_param.idx_step = 1;
00197 single_thread_param.idx_stop = N;
00198 single_thread_param.m_k = m_k;
00199 single_thread_param.N = N;
00200 single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
00201 single_thread_param.local_gram_matrix = local_gram_matrix;
00202 single_thread_param.kernel_matrix = kernel_matrix.matrix;
00203 single_thread_param.id_vector = id_vector;
00204 single_thread_param.W_matrix = W_matrix;
00205 run_linearreconstruction_thread((void*)&single_thread_param);
00206 #endif
00207
00208
00209 SG_FREE(id_vector);
00210 SG_FREE(local_gram_matrix);
00211
00212 return SGMatrix<float64_t>(W_matrix,N,N);
00213 }
00214
00215 void* CKernelLocallyLinearEmbedding::run_linearreconstruction_thread(void* p)
00216 {
00217 LK_RECONSTRUCTION_THREAD_PARAM* parameters = (LK_RECONSTRUCTION_THREAD_PARAM*)p;
00218 int32_t idx_start = parameters->idx_start;
00219 int32_t idx_step = parameters->idx_step;
00220 int32_t idx_stop = parameters->idx_stop;
00221 int32_t m_k = parameters->m_k;
00222 int32_t N = parameters->N;
00223 const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00224 float64_t* local_gram_matrix = parameters->local_gram_matrix;
00225 const float64_t* kernel_matrix = parameters->kernel_matrix;
00226 float64_t* id_vector = parameters->id_vector;
00227 float64_t* W_matrix = parameters->W_matrix;
00228 float64_t reconstruction_shift = parameters->reconstruction_shift;
00229
00230 int32_t i,j,k;
00231 float64_t norming,trace;
00232
00233 for (i=idx_start; i<idx_stop; i+=idx_step)
00234 {
00235 for (j=0; j<m_k; j++)
00236 {
00237 for (k=0; k<m_k; k++)
00238 local_gram_matrix[j*m_k+k] =
00239 kernel_matrix[i*N+i] -
00240 kernel_matrix[i*N+neighborhood_matrix[i*m_k+j]] -
00241 kernel_matrix[i*N+neighborhood_matrix[i*m_k+k]] +
00242 kernel_matrix[neighborhood_matrix[i*m_k+j]*N+neighborhood_matrix[i*m_k+k]];
00243 }
00244
00245 for (j=0; j<m_k; j++)
00246 id_vector[j] = 1.0;
00247
00248
00249 trace = 0.0;
00250 for (j=0; j<m_k; j++)
00251 trace += local_gram_matrix[j*m_k+j];
00252
00253
00254 for (j=0; j<m_k; j++)
00255 local_gram_matrix[j*m_k+j] += reconstruction_shift*trace;
00256
00257 clapack_dposv(CblasColMajor,CblasLower,m_k,1,local_gram_matrix,m_k,id_vector,m_k);
00258
00259
00260 norming=0.0;
00261 for (j=0; j<m_k; j++)
00262 norming += id_vector[j];
00263
00264 cblas_dscal(m_k,1.0/norming,id_vector,1);
00265
00266 memset(local_gram_matrix,0,sizeof(float64_t)*m_k*m_k);
00267 cblas_dger(CblasColMajor,m_k,m_k,1.0,id_vector,1,id_vector,1,local_gram_matrix,m_k);
00268
00269
00270 W_matrix[N*i+i] += 1.0;
00271 for (j=0; j<m_k; j++)
00272 {
00273 W_matrix[N*i+neighborhood_matrix[i*m_k+j]] -= id_vector[j];
00274 W_matrix[N*neighborhood_matrix[i*m_k+j]+i] -= id_vector[j];
00275 }
00276 for (j=0; j<m_k; j++)
00277 {
00278 for (k=0; k<m_k; k++)
00279 W_matrix[N*neighborhood_matrix[i*m_k+j]+neighborhood_matrix[i*m_k+k]]+=local_gram_matrix[j*m_k+k];
00280 }
00281 }
00282 return NULL;
00283 }
00284
00285 SGMatrix<int32_t> CKernelLocallyLinearEmbedding::get_neighborhood_matrix(SGMatrix<float64_t> kernel_matrix, int32_t k)
00286 {
00287 int32_t i;
00288 int32_t N = kernel_matrix.num_cols;
00289
00290 int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*k);
00291
00292 float64_t max_dist=0.0;
00293 for (i=0; i<N; i++)
00294 max_dist = CMath::max(max_dist,kernel_matrix[i*N+i]);
00295
00296 std::vector<KLLE_COVERTREE_POINT> vectors;
00297 vectors.reserve(N);
00298 for (i=0; i<N; i++)
00299 vectors.push_back(KLLE_COVERTREE_POINT(i,kernel_matrix));
00300
00301 CoverTree<KLLE_COVERTREE_POINT>* coverTree = new CoverTree<KLLE_COVERTREE_POINT>(2.0*max_dist,vectors);
00302
00303 for (i=0; i<N; i++)
00304 {
00305 std::vector<KLLE_COVERTREE_POINT> neighbors =
00306 coverTree->kNearestNeighbors(vectors[i],k+1);
00307
00308 ASSERT(neighbors.size()>=unsigned(k+1));
00309
00310 for (std::size_t m=1; m<unsigned(k+1); m++)
00311 neighborhood_matrix[i*k+m-1] = neighbors[m].point_index;
00312 }
00313
00314 delete coverTree;
00315
00316 return SGMatrix<int32_t>(neighborhood_matrix,k,N);
00317 }
00318
00319 #endif