Isomap.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/preprocessor/Isomap.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/common.h>
00014 #include <shogun/lib/FibonacciHeap.h>
00015 #include <shogun/distance/CustomDistance.h>
00016 #include <shogun/mathematics/Math.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/base/Parallel.h>
00019 #include <shogun/lib/Signal.h>
00020 
00021 #ifdef HAVE_PTHREAD
00022 #include <pthread.h>
00023 #endif
00024 
00025 using namespace shogun;
00026 
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028 /* struct storing thread params
00029  */
00030 struct DIJKSTRA_THREAD_PARAM
00031 {
00033     CFibonacciHeap* heap;
00036     const float64_t* edges_matrix;
00039     const int32_t* edges_idx_matrix;
00041     float64_t* shortest_D;
00043     int32_t i_start;
00045     int32_t i_stop;
00047     int32_t i_step;
00049     int32_t m_k;
00051     bool* s;
00053     bool* f;
00054 };
00055 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00056 
00057 CIsomap::CIsomap() : CMultidimensionalScaling()
00058 {
00059     init();
00060 }
00061 
00062 void CIsomap::init()
00063 {
00064     m_k = 3;
00065 
00066     m_parameters->add(&m_k, "k", "number of neighbors");
00067 }
00068 
00069 CIsomap::~CIsomap()
00070 {
00071 }
00072 
00073 bool CIsomap::init(CFeatures* features)
00074 {
00075     return true;
00076 }
00077 
00078 void CIsomap::cleanup()
00079 {
00080 }
00081 
00082 CSimpleFeatures<float64_t>* CIsomap::apply_to_distance(CDistance* distance)
00083 {
00084     ASSERT(distance);
00085     SG_REF(distance);
00086     SGMatrix<float64_t> geodesic_distance_matrix = isomap_distance(distance->get_distance_matrix());
00087     // compute embedding for geodesic distance
00088     SGMatrix<float64_t> new_feature_matrix;
00089     if (m_landmark)
00090         new_feature_matrix = CMultidimensionalScaling::landmark_embedding(geodesic_distance_matrix);
00091     else
00092         new_feature_matrix = CMultidimensionalScaling::classic_embedding(geodesic_distance_matrix);
00093 
00094     geodesic_distance_matrix.destroy_matrix();
00095     SG_UNREF(distance);
00096     return new CSimpleFeatures<float64_t>(new_feature_matrix);
00097 }
00098 
00099 
00100 SGMatrix<float64_t> CIsomap::apply_to_feature_matrix(CFeatures* features)
00101 {
00102     CSimpleFeatures<float64_t>* simple_features =
00103         (CSimpleFeatures<float64_t>*) features;
00104     SG_REF(features);
00105     CDistance* euclidian_distance = new CEuclidianDistance();
00106     euclidian_distance->init(simple_features,simple_features);
00107 
00108     Parallel* distance_parallel = euclidian_distance->parallel;
00109     euclidian_distance->parallel = this->parallel;
00110     
00111     SGMatrix<float64_t> geodesic_distance_matrix = isomap_distance(euclidian_distance->get_distance_matrix());
00112 
00113     SGMatrix<float64_t> new_features;
00114 
00115     if (m_landmark) 
00116         new_features = CMultidimensionalScaling::landmark_embedding(geodesic_distance_matrix);
00117     else
00118         new_features = CMultidimensionalScaling::classic_embedding(geodesic_distance_matrix);
00119 
00120     geodesic_distance_matrix.destroy_matrix();
00121     euclidian_distance->parallel = distance_parallel;
00122     delete euclidian_distance;
00123 
00124     simple_features->set_feature_matrix(new_features);
00125 
00126     SG_UNREF(features);
00127     return simple_features->get_feature_matrix();
00128 }
00129     
00130 SGVector<float64_t> CIsomap::apply_to_feature_vector(SGVector<float64_t> vector)
00131 {
00132     SG_NOTIMPLEMENTED;
00133     return vector;
00134 }
00135     
00136 SGMatrix<float64_t> CIsomap::isomap_distance(SGMatrix<float64_t> D_matrix)
00137 {
00138     int32_t N,t,i,j;
00139     float64_t tmp;
00140     N = D_matrix.num_cols;
00141     if (D_matrix.num_cols!=D_matrix.num_rows)
00142     {
00143         D_matrix.destroy_matrix();
00144         SG_ERROR("Given distance matrix is not square.\n");
00145     }
00146     if (m_k>=N)
00147     {
00148         D_matrix.destroy_matrix();
00149         SG_ERROR("K parameter should be less than number of given vectors (k=%d, N=%d)\n", m_k, N);
00150     }
00151     
00152     // cut by k-nearest neighbors
00153     int32_t* edges_idx_matrix = SG_MALLOC(int32_t, N*m_k);
00154     float64_t* edges_matrix = SG_MALLOC(float64_t, N*m_k);
00155             
00156     // query neighbors and edges to neighbors
00157     CFibonacciHeap* heap = new CFibonacciHeap(N);
00158     for (i=0; i<N; i++)
00159     {
00160         // insert distances to heap
00161         for (j=0; j<N; j++)
00162             heap->insert(j,D_matrix.matrix[i*N+j]);
00163 
00164         // extract nearest neighbor: the jth object itself
00165         heap->extract_min(tmp);
00166 
00167         // extract m_k neighbors and distances
00168         for (j=0; j<m_k; j++)
00169         {
00170             edges_idx_matrix[i*m_k+j] = heap->extract_min(tmp);
00171             edges_matrix[i*m_k+j] = tmp;
00172         }
00173         // clear heap
00174         heap->clear();
00175     }
00176     // cleanup
00177     delete heap;
00178     D_matrix.destroy_matrix();
00179 
00180 #ifdef HAVE_PTHREAD
00181 
00182     // Parallel Dijkstra with Fibonacci Heap 
00183     int32_t num_threads = parallel->get_num_threads();
00184     ASSERT(num_threads>0);
00185     // allocate threads and thread parameters
00186     pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00187     DIJKSTRA_THREAD_PARAM* parameters = SG_MALLOC(DIJKSTRA_THREAD_PARAM, num_threads);
00188     // allocate heaps
00189     CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00190     for (t=0; t<num_threads; t++)
00191         heaps[t] = new CFibonacciHeap(N);
00192 
00193 #else
00194     int32_t num_threads = 1;    
00195 #endif  
00196 
00197     // allocate (s)olution
00198     bool* s = SG_MALLOC(bool,N*num_threads);
00199     // allocate (f)rontier
00200     bool* f = SG_MALLOC(bool,N*num_threads);
00201     // init matrix to store shortest distances
00202     float64_t* shortest_D = SG_MALLOC(float64_t,N*N);
00203 
00204 #ifdef HAVE_PTHREAD
00205 
00206     pthread_attr_t attr;
00207     pthread_attr_init(&attr);
00208     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00209     for (t=0; t<num_threads; t++)
00210     {
00211         parameters[t].i_start = t;
00212         parameters[t].i_stop = N;
00213         parameters[t].i_step = num_threads;
00214         parameters[t].heap = heaps[t];
00215         parameters[t].edges_matrix = edges_matrix;
00216         parameters[t].edges_idx_matrix = edges_idx_matrix;
00217         parameters[t].s = s+t*N;
00218         parameters[t].f = f+t*N;
00219         parameters[t].m_k = m_k;
00220         parameters[t].shortest_D = shortest_D;
00221         pthread_create(&threads[t], &attr, CIsomap::run_dijkstra_thread, (void*)&parameters[t]);
00222     }
00223     for (t=0; t<num_threads; t++)
00224         pthread_join(threads[t], NULL);
00225     pthread_attr_destroy(&attr);
00226     for (t=0; t<num_threads; t++)
00227         delete heaps[t];
00228     SG_FREE(heaps);
00229     SG_FREE(parameters);
00230     SG_FREE(threads);
00231 #else
00232     D_THREAD_PARAM single_thread_param;
00233     single_thread_param.i_start = 0;
00234     single_thread_param.i_stop = N;
00235     single_thread_param.i_step = 1;
00236     single_thread_param.m_k = m_k;
00237     single_thread_param.heap = new CFibonacciHeap(N);
00238     single_thread_param.edges_matrix = edges_matrix;
00239     single_thread_param.edges_idx_matrix = edges_idx_matrix;
00240     single_thread_param.s = s;
00241     single_thread_param.f = f;
00242     single_thread_param.shortest_D = shortest_D;
00243     
00244     run_dijkstra_thread((void*)&single_thread_param);
00245     delete single_thread_param.heap;
00246 #endif
00247     // cleanup
00248     SG_FREE(edges_matrix);
00249     SG_FREE(edges_idx_matrix);
00250     SG_FREE(s);
00251     SG_FREE(f);
00252 
00253     return SGMatrix<float64_t>(shortest_D,N,N);
00254 }
00255 
00256 void* CIsomap::run_dijkstra_thread(void *p)
00257 {
00258     // get parameters from p
00259     DIJKSTRA_THREAD_PARAM* parameters = (DIJKSTRA_THREAD_PARAM*)p;
00260     CFibonacciHeap* heap = parameters->heap;
00261     int32_t i_start = parameters->i_start;
00262     int32_t i_stop = parameters->i_stop;
00263     int32_t i_step = parameters->i_step;
00264     bool* s = parameters->s;
00265     bool* f = parameters->f;
00266     const float64_t* edges_matrix = parameters->edges_matrix;
00267     const int32_t* edges_idx_matrix = parameters->edges_idx_matrix;
00268     float64_t* shortest_D = parameters->shortest_D;
00269     int32_t m_k = parameters->m_k;
00270     int32_t k,j,i,min_item,w;
00271     int32_t N = i_stop;
00272 
00273     // temporary vars
00274     float64_t dist,tmp;
00275 
00276     // main loop
00277     for (k=i_start; k<i_stop; k+=i_step)
00278     {
00279         // fill s and f with false, fill shortest_D with infinity
00280         for (j=0; j<N; j++)
00281         {
00282             shortest_D[k*N+j] = CMath::ALMOST_INFTY;
00283             s[j] = false;
00284             f[j] = false;
00285         }
00286         // set distance from k to k as zero
00287         shortest_D[k*N+k] = 0.0;
00288 
00289         // insert kth object to heap with zero distance and set f[k] true
00290         heap->insert(k,0.0);
00291         f[k] = true;
00292 
00293         // while heap is not empty
00294         while (heap->get_num_nodes()>0)
00295         {
00296             // extract min and set (s)olution state as true and (f)rontier as false
00297             min_item = heap->extract_min(tmp);
00298             s[min_item] = true;
00299             f[min_item] = false;
00300             
00301             // for-each edge (min_item->w)
00302             for (i=0; i<m_k; i++)
00303             {
00304                 // get w idx
00305                 w = edges_idx_matrix[min_item*m_k+i];
00306                 // if w is not in solution yet
00307                 if (s[w] == false)
00308                 {
00309                     // get distance from k to i through min_item
00310                     dist = shortest_D[k*N+min_item] + edges_matrix[min_item*m_k+i];
00311                     // if distance can be relaxed
00312                     if (dist < shortest_D[k*N+w])
00313                     {
00314                         // relax distance
00315                         shortest_D[k*N+w] = dist;
00316                         // if w is in (f)rontier 
00317                         if (f[w])
00318                         {
00319                             // decrease distance in heap
00320                             heap->decrease_key(w, dist);
00321                         }
00322                         else 
00323                         {
00324                             // insert w to heap and set (f)rontier as true
00325                             heap->insert(w, dist);
00326                             f[w] = true;
00327                         }
00328                     } 
00329                 }
00330             }
00331         }
00332         // clear heap to re-use
00333         heap->clear();
00334     }
00335     return NULL;
00336 }
00337 
00338 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation