00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/preprocessor/HessianLocallyLinearEmbedding.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/mathematics/arpack.h>
00014 #include <shogun/mathematics/lapack.h>
00015 #include <shogun/lib/common.h>
00016 #include <shogun/mathematics/Math.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/distance/EuclidianDistance.h>
00019 #include <shogun/lib/Signal.h>
00020
00021 #ifdef HAVE_PTHREAD
00022 #include <pthread.h>
00023 #endif
00024
00025 using namespace shogun;
00026
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028 struct HESSIANESTIMATION_THREAD_PARAM
00029 {
00031 int32_t idx_start;
00033 int32_t idx_step;
00035 int32_t idx_stop;
00037 int32_t m_k;
00039 int32_t dim;
00041 int32_t N;
00043 int32_t dp;
00045 int32_t m_target_dim;
00047 const int32_t* neighborhood_matrix;
00049 const float64_t* feature_matrix;
00051 float64_t* local_feature_matrix;
00053 float64_t* Yi_matrix;
00055 float64_t* mean_vector;
00057 float64_t* s_values_vector;
00059 float64_t* tau;
00061 int32_t tau_len;
00063 float64_t* w_sum_vector;
00065 float64_t* q_matrix;
00067 float64_t* W_matrix;
00068 #ifdef HAVE_PTHREAD
00069
00070 PTHREAD_LOCK_T* W_matrix_lock;
00071 #endif
00072 };
00073 #endif
00074
00075 CHessianLocallyLinearEmbedding::CHessianLocallyLinearEmbedding() :
00076 CLocallyLinearEmbedding()
00077 {
00078 }
00079
00080 CHessianLocallyLinearEmbedding::~CHessianLocallyLinearEmbedding()
00081 {
00082 }
00083
00084 bool CHessianLocallyLinearEmbedding::init(CFeatures* features)
00085 {
00086 return true;
00087 }
00088
00089 void CHessianLocallyLinearEmbedding::cleanup()
00090 {
00091 }
00092
00093 SGMatrix<float64_t> CHessianLocallyLinearEmbedding::apply_to_feature_matrix(CFeatures* features)
00094 {
00095 ASSERT(features);
00096
00097 CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*) features;
00098 SG_REF(features);
00099 ASSERT(simple_features);
00100
00101
00102 int32_t dim = simple_features->get_num_features();
00103 if (m_target_dim>dim)
00104 SG_ERROR("Cannot increase dimensionality: target dimensionality is %d while given features dimensionality is %d.\n",
00105 m_target_dim, dim);
00106
00107 int32_t N = simple_features->get_num_vectors();
00108 if (m_k>=N)
00109 SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
00110 m_k, N);
00111
00112
00113 int32_t t;
00114
00115 int32_t dp = m_target_dim*(m_target_dim+1)/2;
00116 ASSERT(m_k>=(1+m_target_dim+dp));
00117
00118
00119 CDistance* distance = new CEuclidianDistance(simple_features,simple_features);
00120 SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(distance);
00121
00122
00123 float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
00124
00125 #ifdef HAVE_PTHREAD
00126 int32_t num_threads = parallel->get_num_threads();
00127 ASSERT(num_threads>0);
00128
00129 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00130 HESSIANESTIMATION_THREAD_PARAM* parameters = SG_MALLOC(HESSIANESTIMATION_THREAD_PARAM, num_threads);
00131
00132 #else
00133 int32_t num_threads = 1;
00134 #endif
00135
00136
00137 float64_t* local_feature_matrix = SG_MALLOC(float64_t, m_k*dim*num_threads);
00138 float64_t* s_values_vector = SG_MALLOC(float64_t, dim*num_threads);
00139 int32_t tau_len = CMath::min((1+m_target_dim+dp), m_k);
00140 float64_t* tau = SG_MALLOC(float64_t, tau_len*num_threads);
00141 float64_t* mean_vector = SG_MALLOC(float64_t, dim*num_threads);
00142 float64_t* q_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
00143 float64_t* w_sum_vector = SG_MALLOC(float64_t, dp*num_threads);
00144 float64_t* Yi_matrix = SG_MALLOC(float64_t, m_k*(1+m_target_dim+dp)*num_threads);
00145
00146 SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();
00147
00148 #ifdef HAVE_PTHREAD
00149 PTHREAD_LOCK_T W_matrix_lock;
00150 pthread_attr_t attr;
00151 PTHREAD_LOCK_INIT(&W_matrix_lock);
00152 pthread_attr_init(&attr);
00153 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00154
00155 for (t=0; t<num_threads; t++)
00156 {
00157 parameters[t].idx_start = t;
00158 parameters[t].idx_step = num_threads;
00159 parameters[t].idx_stop = N;
00160 parameters[t].m_k = m_k;
00161 parameters[t].dim = dim;
00162 parameters[t].m_target_dim = m_target_dim;
00163 parameters[t].N = N;
00164 parameters[t].dp = dp;
00165 parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
00166 parameters[t].feature_matrix = feature_matrix.matrix;
00167 parameters[t].local_feature_matrix = local_feature_matrix + (m_k*dim)*t;
00168 parameters[t].Yi_matrix = Yi_matrix + (m_k*(1+m_target_dim+dp))*t;
00169 parameters[t].mean_vector = mean_vector + dim*t;
00170 parameters[t].s_values_vector = s_values_vector + dim*t;
00171 parameters[t].tau = tau+tau_len*t;
00172 parameters[t].tau_len = tau_len;
00173 parameters[t].w_sum_vector = w_sum_vector + dp*t;
00174 parameters[t].q_matrix = q_matrix + (m_k*m_k)*t;
00175 parameters[t].W_matrix = W_matrix;
00176 parameters[t].W_matrix_lock = &W_matrix_lock;
00177 pthread_create(&threads[t], &attr, run_hessianestimation_thread, (void*)¶meters[t]);
00178 }
00179 for (t=0; t<num_threads; t++)
00180 pthread_join(threads[t], NULL);
00181 PTHREAD_LOCK_DESTROY(&W_matrix_lock);
00182 SG_FREE(parameters);
00183 SG_FREE(threads);
00184 #else
00185 HESSIANESTIMATION_THREAD_PARAM single_thread_param;
00186 single_thread_param.idx_start = t;
00187 single_thread_param.idx_step = num_threads;
00188 single_thread_param.idx_stop = N;
00189 single_thread_param.m_k = m_k;
00190 single_thread_param.dim = dim;
00191 single_thread_param.m_target_dim = m_target_dim;
00192 single_thread_param.N = N;
00193 single_thread_param.dp = dp;
00194 single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
00195 single_thread_param.feature_matrix = feature_matrix.matrix;
00196 single_thread_param.local_feature_matrix = local_feature_matrix;
00197 single_thread_param.Yi_matrix = Yi_matrix;
00198 single_thread_param.mean_vector = mean_vector;
00199 single_thread_param.s_values_vector = s_values_vector;
00200 single_thread_param.tau = tau;
00201 single_thread_param.tau_len = tau_len;
00202 single_thread_param.w_sum_vector = w_sum_vector;
00203 single_thread_param.q_matrix = q_matrix;
00204 single_thread_param.W_matrix = W_matrix;
00205 run_hessianestimation_thread((void*)&single_thread_param);
00206 #endif
00207
00208
00209 SG_FREE(Yi_matrix);
00210 SG_FREE(s_values_vector);
00211 SG_FREE(mean_vector);
00212 SG_FREE(tau);
00213 SG_FREE(w_sum_vector);
00214 neighborhood_matrix.destroy_matrix();
00215 SG_FREE(local_feature_matrix);
00216 SG_FREE(q_matrix);
00217
00218
00219 SGMatrix<float64_t> W_sgmatrix = SGMatrix<float64_t>(W_matrix,N,N);
00220 simple_features->set_feature_matrix(find_null_space(W_sgmatrix,m_target_dim,false));
00221 W_sgmatrix.destroy_matrix();
00222
00223 SG_UNREF(features);
00224 return simple_features->get_feature_matrix();
00225 }
00226
00227 SGVector<float64_t> CHessianLocallyLinearEmbedding::apply_to_feature_vector(SGVector<float64_t> vector)
00228 {
00229 SG_NOTIMPLEMENTED;
00230 return vector;
00231 }
00232
00233 void* CHessianLocallyLinearEmbedding::run_hessianestimation_thread(void* p)
00234 {
00235 HESSIANESTIMATION_THREAD_PARAM* parameters = (HESSIANESTIMATION_THREAD_PARAM*)p;
00236 int32_t idx_start = parameters->idx_start;
00237 int32_t idx_step = parameters->idx_step;
00238 int32_t idx_stop = parameters->idx_stop;
00239 int32_t m_k = parameters->m_k;
00240 int32_t dim = parameters->dim;
00241 int32_t N = parameters->N;
00242 int32_t dp = parameters->dp;
00243 int32_t m_target_dim = parameters->m_target_dim;
00244 const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
00245 const float64_t* feature_matrix = parameters->feature_matrix;
00246 float64_t* local_feature_matrix = parameters->local_feature_matrix;
00247 float64_t* Yi_matrix = parameters->Yi_matrix;
00248 float64_t* mean_vector = parameters->mean_vector;
00249 float64_t* s_values_vector = parameters->s_values_vector;
00250 float64_t* tau = parameters->tau;
00251 int32_t tau_len = parameters->tau_len;
00252 float64_t* w_sum_vector = parameters->w_sum_vector;
00253 float64_t* q_matrix = parameters->q_matrix;
00254 float64_t* W_matrix = parameters->W_matrix;
00255 #ifdef HAVE_PTHREAD
00256 PTHREAD_LOCK_T* W_matrix_lock = parameters->W_matrix_lock;
00257 #endif
00258
00259 int i,j,k,l;
00260
00261 for (i=idx_start; i<idx_stop; i+=idx_step)
00262 {
00263
00264 for (j=0; j<m_k; j++)
00265 Yi_matrix[j] = 1.0;
00266
00267
00268 for (j=0; j<dim; j++)
00269 mean_vector[j] = 0.0;
00270
00271
00272 for (j=0; j<m_k; j++)
00273 {
00274 for (k=0; k<dim; k++)
00275 {
00276 local_feature_matrix[j*dim+k] = feature_matrix[neighborhood_matrix[j*N+i]*dim+k];
00277 mean_vector[k] += local_feature_matrix[j*dim+k];
00278 }
00279 }
00280
00281
00282 for (j=0; j<dim; j++)
00283 mean_vector[j] /= m_k;
00284
00285
00286 for (j=0; j<m_k; j++)
00287 {
00288 for (k=0; k<dim; k++)
00289 local_feature_matrix[j*dim+k] -= mean_vector[k];
00290 }
00291
00292 int32_t info = 0;
00293
00294 wrap_dgesvd('N','O', dim,m_k,local_feature_matrix,dim,
00295 s_values_vector,
00296 NULL,1, NULL,1, &info);
00297 ASSERT(info==0);
00298
00299
00300 for (j=0; j<m_target_dim; j++)
00301 {
00302 for (k=0; k<m_k; k++)
00303 Yi_matrix[(j+1)*m_k+k] = local_feature_matrix[k*dim+j];
00304 }
00305
00306 int32_t ct = 0;
00307
00308
00309 for (j=0; j<m_target_dim; j++)
00310 {
00311 for (k=0; k<m_target_dim-j; k++)
00312 {
00313 for (l=0; l<m_k; l++)
00314 {
00315 Yi_matrix[(ct+k+1+m_target_dim)*m_k+l] = Yi_matrix[(j+1)*m_k+l]*Yi_matrix[(j+k+1)*m_k+l];
00316 }
00317 }
00318 ct += ct + m_target_dim - j;
00319 }
00320
00321
00322 wrap_dgeqrf(m_k,(1+m_target_dim+dp),Yi_matrix,m_k,tau,&info);
00323 ASSERT(info==0);
00324 wrap_dorgqr(m_k,(1+m_target_dim+dp),tau_len,Yi_matrix,m_k,tau,&info);
00325 ASSERT(info==0);
00326
00327 float64_t* Pii = (Yi_matrix+m_k*(1+m_target_dim));
00328
00329 for (j=0; j<dp; j++)
00330 {
00331 w_sum_vector[j] = 0.0;
00332 for (k=0; k<m_k; k++)
00333 {
00334 w_sum_vector[j] += Pii[j*m_k+k];
00335 }
00336 if (w_sum_vector[j]<0.001)
00337 w_sum_vector[j] = 1.0;
00338 for (k=0; k<m_k; k++)
00339 Pii[j*m_k+k] /= w_sum_vector[j];
00340 }
00341
00342 cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,
00343 m_k,m_k,dp,
00344 1.0,Pii,m_k,
00345 Pii,m_k,
00346 0.0,q_matrix,m_k);
00347 #ifdef HAVE_PTHREAD
00348 PTHREAD_LOCK(W_matrix_lock);
00349 #endif
00350 for (j=0; j<m_k; j++)
00351 {
00352 for (k=0; k<m_k; k++)
00353 W_matrix[N*neighborhood_matrix[k*N+i]+neighborhood_matrix[j*N+i]] += q_matrix[j*m_k+k];
00354 }
00355 #ifdef HAVE_PTHREAD
00356 PTHREAD_UNLOCK(W_matrix_lock);
00357 #endif
00358 }
00359 return NULL;
00360 }
00361 #endif