Isomap.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/converter/Isomap.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/common.h>
00014 #include <shogun/lib/FibonacciHeap.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/base/Parallel.h>
00018 #include <shogun/lib/Signal.h>
00019 #include <shogun/lib/CoverTree.h>
00020 
00021 #ifdef HAVE_PTHREAD
00022 #include <pthread.h>
00023 #endif
00024 
00025 using namespace shogun;
00026 
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028 /* struct storing thread params
00029  */
00030 struct DIJKSTRA_THREAD_PARAM
00031 {
00033     CFibonacciHeap* heap;
00036     const float64_t* edges_matrix;
00039     const int32_t* edges_idx_matrix;
00041     float64_t* shortest_D;
00043     int32_t i_start;
00045     int32_t i_stop;
00047     int32_t i_step;
00049     int32_t m_k;
00051     bool* s;
00053     bool* f;
00054 };
00055 
00056 class ISOMAP_COVERTREE_POINT
00057 {
00058 public:
00059 
00060     ISOMAP_COVERTREE_POINT(int32_t index, const SGMatrix<float64_t>& dmatrix)
00061     {
00062         point_index = index;
00063         distance_matrix = dmatrix;
00064     }
00065 
00066     inline double distance(const ISOMAP_COVERTREE_POINT& p) const
00067     {
00068         return distance_matrix[point_index*distance_matrix.num_rows+p.point_index];
00069     }
00070 
00071     inline bool operator==(const ISOMAP_COVERTREE_POINT& p) const
00072     {
00073         return (p.point_index==point_index);
00074     }
00075 
00076     int32_t point_index;
00077     SGMatrix<float64_t> distance_matrix;
00078 };
00079 
00080 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00081 
00082 CIsomap::CIsomap() : CMultidimensionalScaling()
00083 {
00084     m_k = 3;
00085 
00086     init();
00087 }
00088 
00089 void CIsomap::init()
00090 {
00091     SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE);
00092 }
00093 
00094 CIsomap::~CIsomap()
00095 {
00096 }
00097 
00098 void CIsomap::set_k(int32_t k)
00099 {
00100     ASSERT(k>0);
00101     m_k = k;
00102 }
00103 
00104 int32_t CIsomap::get_k() const
00105 {
00106     return m_k;
00107 }
00108 
00109 const char* CIsomap::get_name() const
00110 {
00111     return "Isomap";
00112 }
00113 
00114 SGMatrix<float64_t> CIsomap::process_distance_matrix(SGMatrix<float64_t> distance_matrix)
00115 {
00116     return isomap_distance(distance_matrix);
00117 }
00118 
00119 SGMatrix<float64_t> CIsomap::isomap_distance(SGMatrix<float64_t> D_matrix)
00120 {
00121     int32_t N,i;
00122     N = D_matrix.num_cols;
00123     if (D_matrix.num_cols!=D_matrix.num_rows)
00124     {
00125         SG_ERROR("Given distance matrix is not square.\n");
00126     }
00127     if (m_k>=N)
00128     {
00129         SG_ERROR("K parameter should be less than number of given vectors (k=%d, N=%d)\n", m_k, N);
00130     }
00131 
00132     // cut by k-nearest neighbors
00133     int32_t* edges_idx_matrix = SG_MALLOC(int32_t, N*m_k);
00134     float64_t* edges_matrix = SG_MALLOC(float64_t, N*m_k);
00135 
00136     float64_t max_dist = SGVector<float64_t>::max(D_matrix.matrix,N*N);
00137     CoverTree<ISOMAP_COVERTREE_POINT>* coverTree = new CoverTree<ISOMAP_COVERTREE_POINT>(max_dist);
00138 
00139     for (i=0; i<N; i++)
00140         coverTree->insert(ISOMAP_COVERTREE_POINT(i,D_matrix));
00141 
00142     for (i=0; i<N; i++)
00143     {
00144         ISOMAP_COVERTREE_POINT origin(i,D_matrix);
00145         std::vector<ISOMAP_COVERTREE_POINT> neighbors =
00146            coverTree->kNearestNeighbors(origin,m_k+1);
00147         for (std::size_t m=1; m<neighbors.size(); m++)
00148         {
00149             edges_matrix[i*m_k+m-1] = origin.distance(neighbors[m]);
00150             edges_idx_matrix[i*m_k+m-1] = neighbors[m].point_index;
00151         }
00152     }
00153 
00154     delete coverTree;
00155 
00156 #ifdef HAVE_PTHREAD
00157     int32_t t;
00158     // Parallel Dijkstra with Fibonacci Heap
00159     int32_t num_threads = parallel->get_num_threads();
00160     ASSERT(num_threads>0);
00161     // allocate threads and thread parameters
00162     pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00163     DIJKSTRA_THREAD_PARAM* parameters = SG_MALLOC(DIJKSTRA_THREAD_PARAM, num_threads);
00164     // allocate heaps
00165     CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00166     for (t=0; t<num_threads; t++)
00167         heaps[t] = new CFibonacciHeap(N);
00168 
00169 #else
00170     int32_t num_threads = 1;
00171 #endif
00172 
00173     // allocate (s)olution
00174     bool* s = SG_MALLOC(bool,N*num_threads);
00175     // allocate (f)rontier
00176     bool* f = SG_MALLOC(bool,N*num_threads);
00177     // init matrix to store shortest distances
00178     float64_t* shortest_D = D_matrix.matrix;
00179 
00180 #ifdef HAVE_PTHREAD
00181 
00182     pthread_attr_t attr;
00183     pthread_attr_init(&attr);
00184     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00185     for (t=0; t<num_threads; t++)
00186     {
00187         parameters[t].i_start = t;
00188         parameters[t].i_stop = N;
00189         parameters[t].i_step = num_threads;
00190         parameters[t].heap = heaps[t];
00191         parameters[t].edges_matrix = edges_matrix;
00192         parameters[t].edges_idx_matrix = edges_idx_matrix;
00193         parameters[t].s = s+t*N;
00194         parameters[t].f = f+t*N;
00195         parameters[t].m_k = m_k;
00196         parameters[t].shortest_D = shortest_D;
00197         pthread_create(&threads[t], &attr, CIsomap::run_dijkstra_thread, (void*)&parameters[t]);
00198     }
00199     for (t=0; t<num_threads; t++)
00200         pthread_join(threads[t], NULL);
00201     pthread_attr_destroy(&attr);
00202     for (t=0; t<num_threads; t++)
00203         delete heaps[t];
00204     SG_FREE(heaps);
00205     SG_FREE(parameters);
00206     SG_FREE(threads);
00207 #else
00208     DIJKSTRA_THREAD_PARAM single_thread_param;
00209     single_thread_param.i_start = 0;
00210     single_thread_param.i_stop = N;
00211     single_thread_param.i_step = 1;
00212     single_thread_param.m_k = m_k;
00213     single_thread_param.heap = new CFibonacciHeap(N);
00214     single_thread_param.edges_matrix = edges_matrix;
00215     single_thread_param.edges_idx_matrix = edges_idx_matrix;
00216     single_thread_param.s = s;
00217     single_thread_param.f = f;
00218     single_thread_param.shortest_D = shortest_D;
00219     run_dijkstra_thread((void*)&single_thread_param);
00220     delete single_thread_param.heap;
00221 #endif
00222     // cleanup
00223     SG_FREE(edges_matrix);
00224     SG_FREE(edges_idx_matrix);
00225     SG_FREE(s);
00226     SG_FREE(f);
00227 
00228     return D_matrix;
00229 }
00230 
00231 void* CIsomap::run_dijkstra_thread(void *p)
00232 {
00233     // get parameters from p
00234     DIJKSTRA_THREAD_PARAM* parameters = (DIJKSTRA_THREAD_PARAM*)p;
00235     CFibonacciHeap* heap = parameters->heap;
00236     int32_t i_start = parameters->i_start;
00237     int32_t i_stop = parameters->i_stop;
00238     int32_t i_step = parameters->i_step;
00239     bool* s = parameters->s;
00240     bool* f = parameters->f;
00241     const float64_t* edges_matrix = parameters->edges_matrix;
00242     const int32_t* edges_idx_matrix = parameters->edges_idx_matrix;
00243     float64_t* shortest_D = parameters->shortest_D;
00244     int32_t m_k = parameters->m_k;
00245     int32_t k,j,i,min_item,w;
00246     int32_t N = i_stop;
00247 
00248     // temporary vars
00249     float64_t dist,tmp;
00250 
00251     // main loop
00252     for (k=i_start; k<i_stop; k+=i_step)
00253     {
00254         // fill s and f with false, fill shortest_D with infinity
00255         for (j=0; j<N; j++)
00256         {
00257             shortest_D[k*N+j] = CMath::ALMOST_INFTY;
00258             s[j] = false;
00259             f[j] = false;
00260         }
00261         // set distance from k to k as zero
00262         shortest_D[k*N+k] = 0.0;
00263 
00264         // insert kth object to heap with zero distance and set f[k] true
00265         heap->insert(k,0.0);
00266         f[k] = true;
00267 
00268         // while heap is not empty
00269         while (heap->get_num_nodes()>0)
00270         {
00271             // extract min and set (s)olution state as true and (f)rontier as false
00272             min_item = heap->extract_min(tmp);
00273             s[min_item] = true;
00274             f[min_item] = false;
00275 
00276             // for-each edge (min_item->w)
00277             for (i=0; i<m_k; i++)
00278             {
00279                 // get w idx
00280                 w = edges_idx_matrix[min_item*m_k+i];
00281                 // if w is not in solution yet
00282                 if (s[w] == false)
00283                 {
00284                     // get distance from k to i through min_item
00285                     dist = shortest_D[k*N+min_item] + edges_matrix[min_item*m_k+i];
00286                     // if distance can be relaxed
00287                     if (dist < shortest_D[k*N+w])
00288                     {
00289                         // relax distance
00290                         shortest_D[k*N+w] = dist;
00291                         // if w is in (f)rontier
00292                         if (f[w])
00293                         {
00294                             // decrease distance in heap
00295                             heap->decrease_key(w, dist);
00296                         }
00297                         else
00298                         {
00299                             // insert w to heap and set (f)rontier as true
00300                             heap->insert(w, dist);
00301                             f[w] = true;
00302                         }
00303                     }
00304                 }
00305             }
00306         }
00307         // clear heap to re-use
00308         heap->clear();
00309     }
00310     return NULL;
00311 }
00312 
00313 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation