Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/preprocessor/Isomap.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/common.h>
00014 #include <shogun/lib/FibonacciHeap.h>
00015 #include <shogun/distance/CustomDistance.h>
00016 #include <shogun/mathematics/Math.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/base/Parallel.h>
00019 #include <shogun/lib/Signal.h>
00020
00021 #ifdef HAVE_PTHREAD
00022 #include <pthread.h>
00023 #endif
00024
00025 using namespace shogun;
00026
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028
00029
00030 struct DIJKSTRA_THREAD_PARAM
00031 {
00033 CFibonacciHeap* heap;
00036 const float64_t* edges_matrix;
00039 const int32_t* edges_idx_matrix;
00041 float64_t* shortest_D;
00043 int32_t i_start;
00045 int32_t i_stop;
00047 int32_t i_step;
00049 int32_t m_k;
00051 bool* s;
00053 bool* f;
00054 };
00055 #endif
00056
00057 CIsomap::CIsomap() : CMultidimensionalScaling()
00058 {
00059 init();
00060 }
00061
00062 void CIsomap::init()
00063 {
00064 m_k = 3;
00065
00066 m_parameters->add(&m_k, "k", "number of neighbors");
00067 }
00068
00069 CIsomap::~CIsomap()
00070 {
00071 }
00072
00073 bool CIsomap::init(CFeatures* features)
00074 {
00075 return true;
00076 }
00077
00078 void CIsomap::cleanup()
00079 {
00080 }
00081
00082 CSimpleFeatures<float64_t>* CIsomap::apply_to_distance(CDistance* distance)
00083 {
00084 ASSERT(distance);
00085 SG_REF(distance);
00086 SGMatrix<float64_t> geodesic_distance_matrix = isomap_distance(distance->get_distance_matrix());
00087
00088 SGMatrix<float64_t> new_feature_matrix;
00089 if (m_landmark)
00090 new_feature_matrix = CMultidimensionalScaling::landmark_embedding(geodesic_distance_matrix);
00091 else
00092 new_feature_matrix = CMultidimensionalScaling::classic_embedding(geodesic_distance_matrix);
00093
00094 geodesic_distance_matrix.destroy_matrix();
00095 SG_UNREF(distance);
00096 return new CSimpleFeatures<float64_t>(new_feature_matrix);
00097 }
00098
00099
00100 SGMatrix<float64_t> CIsomap::apply_to_feature_matrix(CFeatures* features)
00101 {
00102 CSimpleFeatures<float64_t>* simple_features =
00103 (CSimpleFeatures<float64_t>*) features;
00104 SG_REF(features);
00105 CDistance* euclidian_distance = new CEuclidianDistance();
00106 euclidian_distance->init(simple_features,simple_features);
00107
00108 Parallel* distance_parallel = euclidian_distance->parallel;
00109 euclidian_distance->parallel = this->parallel;
00110
00111 SGMatrix<float64_t> geodesic_distance_matrix = isomap_distance(euclidian_distance->get_distance_matrix());
00112
00113 SGMatrix<float64_t> new_features;
00114
00115 if (m_landmark)
00116 new_features = CMultidimensionalScaling::landmark_embedding(geodesic_distance_matrix);
00117 else
00118 new_features = CMultidimensionalScaling::classic_embedding(geodesic_distance_matrix);
00119
00120 geodesic_distance_matrix.destroy_matrix();
00121 euclidian_distance->parallel = distance_parallel;
00122 delete euclidian_distance;
00123
00124 simple_features->set_feature_matrix(new_features);
00125
00126 SG_UNREF(features);
00127 return simple_features->get_feature_matrix();
00128 }
00129
00130 SGVector<float64_t> CIsomap::apply_to_feature_vector(SGVector<float64_t> vector)
00131 {
00132 SG_NOTIMPLEMENTED;
00133 return vector;
00134 }
00135
00136 SGMatrix<float64_t> CIsomap::isomap_distance(SGMatrix<float64_t> D_matrix)
00137 {
00138 int32_t N,t,i,j;
00139 float64_t tmp;
00140 N = D_matrix.num_cols;
00141 if (D_matrix.num_cols!=D_matrix.num_rows)
00142 {
00143 D_matrix.destroy_matrix();
00144 SG_ERROR("Given distance matrix is not square.\n");
00145 }
00146 if (m_k>=N)
00147 {
00148 D_matrix.destroy_matrix();
00149 SG_ERROR("K parameter should be less than number of given vectors (k=%d, N=%d)\n", m_k, N);
00150 }
00151
00152
00153 int32_t* edges_idx_matrix = SG_MALLOC(int32_t, N*m_k);
00154 float64_t* edges_matrix = SG_MALLOC(float64_t, N*m_k);
00155
00156
00157 CFibonacciHeap* heap = new CFibonacciHeap(N);
00158 for (i=0; i<N; i++)
00159 {
00160
00161 for (j=0; j<N; j++)
00162 heap->insert(j,D_matrix.matrix[i*N+j]);
00163
00164
00165 heap->extract_min(tmp);
00166
00167
00168 for (j=0; j<m_k; j++)
00169 {
00170 edges_idx_matrix[i*m_k+j] = heap->extract_min(tmp);
00171 edges_matrix[i*m_k+j] = tmp;
00172 }
00173
00174 heap->clear();
00175 }
00176
00177 delete heap;
00178 D_matrix.destroy_matrix();
00179
00180 #ifdef HAVE_PTHREAD
00181
00182
00183 int32_t num_threads = parallel->get_num_threads();
00184 ASSERT(num_threads>0);
00185
00186 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00187 DIJKSTRA_THREAD_PARAM* parameters = SG_MALLOC(DIJKSTRA_THREAD_PARAM, num_threads);
00188
00189 CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00190 for (t=0; t<num_threads; t++)
00191 heaps[t] = new CFibonacciHeap(N);
00192
00193 #else
00194 int32_t num_threads = 1;
00195 #endif
00196
00197
00198 bool* s = SG_MALLOC(bool,N*num_threads);
00199
00200 bool* f = SG_MALLOC(bool,N*num_threads);
00201
00202 float64_t* shortest_D = SG_MALLOC(float64_t,N*N);
00203
00204 #ifdef HAVE_PTHREAD
00205
00206 pthread_attr_t attr;
00207 pthread_attr_init(&attr);
00208 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00209 for (t=0; t<num_threads; t++)
00210 {
00211 parameters[t].i_start = t;
00212 parameters[t].i_stop = N;
00213 parameters[t].i_step = num_threads;
00214 parameters[t].heap = heaps[t];
00215 parameters[t].edges_matrix = edges_matrix;
00216 parameters[t].edges_idx_matrix = edges_idx_matrix;
00217 parameters[t].s = s+t*N;
00218 parameters[t].f = f+t*N;
00219 parameters[t].m_k = m_k;
00220 parameters[t].shortest_D = shortest_D;
00221 pthread_create(&threads[t], &attr, CIsomap::run_dijkstra_thread, (void*)¶meters[t]);
00222 }
00223 for (t=0; t<num_threads; t++)
00224 pthread_join(threads[t], NULL);
00225 pthread_attr_destroy(&attr);
00226 for (t=0; t<num_threads; t++)
00227 delete heaps[t];
00228 SG_FREE(heaps);
00229 SG_FREE(parameters);
00230 SG_FREE(threads);
00231 #else
00232 D_THREAD_PARAM single_thread_param;
00233 single_thread_param.i_start = 0;
00234 single_thread_param.i_stop = N;
00235 single_thread_param.i_step = 1;
00236 single_thread_param.m_k = m_k;
00237 single_thread_param.heap = new CFibonacciHeap(N);
00238 single_thread_param.edges_matrix = edges_matrix;
00239 single_thread_param.edges_idx_matrix = edges_idx_matrix;
00240 single_thread_param.s = s;
00241 single_thread_param.f = f;
00242 single_thread_param.shortest_D = shortest_D;
00243
00244 run_dijkstra_thread((void*)&single_thread_param);
00245 delete single_thread_param.heap;
00246 #endif
00247
00248 SG_FREE(edges_matrix);
00249 SG_FREE(edges_idx_matrix);
00250 SG_FREE(s);
00251 SG_FREE(f);
00252
00253 return SGMatrix<float64_t>(shortest_D,N,N);
00254 }
00255
00256 void* CIsomap::run_dijkstra_thread(void *p)
00257 {
00258
00259 DIJKSTRA_THREAD_PARAM* parameters = (DIJKSTRA_THREAD_PARAM*)p;
00260 CFibonacciHeap* heap = parameters->heap;
00261 int32_t i_start = parameters->i_start;
00262 int32_t i_stop = parameters->i_stop;
00263 int32_t i_step = parameters->i_step;
00264 bool* s = parameters->s;
00265 bool* f = parameters->f;
00266 const float64_t* edges_matrix = parameters->edges_matrix;
00267 const int32_t* edges_idx_matrix = parameters->edges_idx_matrix;
00268 float64_t* shortest_D = parameters->shortest_D;
00269 int32_t m_k = parameters->m_k;
00270 int32_t k,j,i,min_item,w;
00271 int32_t N = i_stop;
00272
00273
00274 float64_t dist,tmp;
00275
00276
00277 for (k=i_start; k<i_stop; k+=i_step)
00278 {
00279
00280 for (j=0; j<N; j++)
00281 {
00282 shortest_D[k*N+j] = CMath::ALMOST_INFTY;
00283 s[j] = false;
00284 f[j] = false;
00285 }
00286
00287 shortest_D[k*N+k] = 0.0;
00288
00289
00290 heap->insert(k,0.0);
00291 f[k] = true;
00292
00293
00294 while (heap->get_num_nodes()>0)
00295 {
00296
00297 min_item = heap->extract_min(tmp);
00298 s[min_item] = true;
00299 f[min_item] = false;
00300
00301
00302 for (i=0; i<m_k; i++)
00303 {
00304
00305 w = edges_idx_matrix[min_item*m_k+i];
00306
00307 if (s[w] == false)
00308 {
00309
00310 dist = shortest_D[k*N+min_item] + edges_matrix[min_item*m_k+i];
00311
00312 if (dist < shortest_D[k*N+w])
00313 {
00314
00315 shortest_D[k*N+w] = dist;
00316
00317 if (f[w])
00318 {
00319
00320 heap->decrease_key(w, dist);
00321 }
00322 else
00323 {
00324
00325 heap->insert(w, dist);
00326 f[w] = true;
00327 }
00328 }
00329 }
00330 }
00331 }
00332
00333 heap->clear();
00334 }
00335 return NULL;
00336 }
00337
00338 #endif