Isomap.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/converter/Isomap.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/common.h>
00014 #include <shogun/lib/FibonacciHeap.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/base/Parallel.h>
00018 #include <shogun/lib/Signal.h>
00019 
00020 #ifdef HAVE_PTHREAD
00021 #include <pthread.h>
00022 #endif
00023 
00024 using namespace shogun;
00025 
00026 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00027 /* struct storing thread params
00028  */
00029 struct DIJKSTRA_THREAD_PARAM
00030 {
00032     CFibonacciHeap* heap;
00035     const float64_t* edges_matrix;
00038     const int32_t* edges_idx_matrix;
00040     float64_t* shortest_D;
00042     int32_t i_start;
00044     int32_t i_stop;
00046     int32_t i_step;
00048     int32_t m_k;
00050     bool* s;
00052     bool* f;
00053 };
00054 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00055 
00056 CIsomap::CIsomap() : CMultidimensionalScaling()
00057 {
00058     m_k = 3;
00059     
00060     init();
00061 }
00062 
00063 void CIsomap::init()
00064 {
00065     m_parameters->add(&m_k, "k", "number of neighbors");
00066 }
00067 
00068 CIsomap::~CIsomap()
00069 {
00070 }
00071 
00072 void CIsomap::set_k(int32_t k)
00073 {
00074     ASSERT(k>0);
00075     m_k = k;
00076 }
00077 
00078 int32_t CIsomap::get_k() const
00079 {
00080     return m_k;
00081 }
00082 
00083 const char* CIsomap::get_name() const 
00084 {
00085     return "Isomap";
00086 }
00087 
00088 SGMatrix<float64_t> CIsomap::process_distance_matrix(SGMatrix<float64_t> distance_matrix)
00089 {
00090     return isomap_distance(distance_matrix);
00091 }
00092 
00093 SGMatrix<float64_t> CIsomap::isomap_distance(SGMatrix<float64_t> D_matrix)
00094 {
00095     int32_t N,t,i,j;
00096     float64_t tmp;
00097     N = D_matrix.num_cols;
00098     if (D_matrix.num_cols!=D_matrix.num_rows)
00099     {
00100         D_matrix.destroy_matrix();
00101         SG_ERROR("Given distance matrix is not square.\n");
00102     }
00103     if (m_k>=N)
00104     {
00105         D_matrix.destroy_matrix();
00106         SG_ERROR("K parameter should be less than number of given vectors (k=%d, N=%d)\n", m_k, N);
00107     }
00108     
00109     // cut by k-nearest neighbors
00110     int32_t* edges_idx_matrix = SG_MALLOC(int32_t, N*m_k);
00111     float64_t* edges_matrix = SG_MALLOC(float64_t, N*m_k);
00112             
00113     // query neighbors and edges to neighbors
00114     CFibonacciHeap* heap = new CFibonacciHeap(N);
00115     for (i=0; i<N; i++)
00116     {
00117         // insert distances to heap
00118         for (j=0; j<N; j++)
00119             heap->insert(j,D_matrix[i*N+j]);
00120 
00121         // extract nearest neighbor: the jth object itself
00122         heap->extract_min(tmp);
00123 
00124         // extract m_k neighbors and distances
00125         for (j=0; j<m_k; j++)
00126         {
00127             edges_idx_matrix[i*m_k+j] = heap->extract_min(tmp);
00128             edges_matrix[i*m_k+j] = tmp;
00129         }
00130         // clear heap
00131         heap->clear();
00132     }
00133     // cleanup
00134     delete heap;
00135 
00136 #ifdef HAVE_PTHREAD
00137 
00138     // Parallel Dijkstra with Fibonacci Heap 
00139     int32_t num_threads = parallel->get_num_threads();
00140     ASSERT(num_threads>0);
00141     // allocate threads and thread parameters
00142     pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00143     DIJKSTRA_THREAD_PARAM* parameters = SG_MALLOC(DIJKSTRA_THREAD_PARAM, num_threads);
00144     // allocate heaps
00145     CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00146     for (t=0; t<num_threads; t++)
00147         heaps[t] = new CFibonacciHeap(N);
00148 
00149 #else
00150     int32_t num_threads = 1;
00151 #endif  
00152 
00153     // allocate (s)olution
00154     bool* s = SG_MALLOC(bool,N*num_threads);
00155     // allocate (f)rontier
00156     bool* f = SG_MALLOC(bool,N*num_threads);
00157     // init matrix to store shortest distances
00158     float64_t* shortest_D = D_matrix.matrix;
00159 
00160 #ifdef HAVE_PTHREAD
00161 
00162     pthread_attr_t attr;
00163     pthread_attr_init(&attr);
00164     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00165     for (t=0; t<num_threads; t++)
00166     {
00167         parameters[t].i_start = t;
00168         parameters[t].i_stop = N;
00169         parameters[t].i_step = num_threads;
00170         parameters[t].heap = heaps[t];
00171         parameters[t].edges_matrix = edges_matrix;
00172         parameters[t].edges_idx_matrix = edges_idx_matrix;
00173         parameters[t].s = s+t*N;
00174         parameters[t].f = f+t*N;
00175         parameters[t].m_k = m_k;
00176         parameters[t].shortest_D = shortest_D;
00177         pthread_create(&threads[t], &attr, CIsomap::run_dijkstra_thread, (void*)&parameters[t]);
00178     }
00179     for (t=0; t<num_threads; t++)
00180         pthread_join(threads[t], NULL);
00181     pthread_attr_destroy(&attr);
00182     for (t=0; t<num_threads; t++)
00183         delete heaps[t];
00184     SG_FREE(heaps);
00185     SG_FREE(parameters);
00186     SG_FREE(threads);
00187 #else
00188     D_THREAD_PARAM single_thread_param;
00189     single_thread_param.i_start = 0;
00190     single_thread_param.i_stop = N;
00191     single_thread_param.i_step = 1;
00192     single_thread_param.m_k = m_k;
00193     single_thread_param.heap = new CFibonacciHeap(N);
00194     single_thread_param.edges_matrix = edges_matrix;
00195     single_thread_param.edges_idx_matrix = edges_idx_matrix;
00196     single_thread_param.s = s;
00197     single_thread_param.f = f;
00198     single_thread_param.shortest_D = shortest_D;
00199     run_dijkstra_thread((void*)&single_thread_param);
00200     delete single_thread_param.heap;
00201 #endif
00202     // cleanup
00203     SG_FREE(edges_matrix);
00204     SG_FREE(edges_idx_matrix);
00205     SG_FREE(s);
00206     SG_FREE(f);
00207 
00208     return SGMatrix<float64_t>(shortest_D,N,N);
00209 }
00210 
00211 void* CIsomap::run_dijkstra_thread(void *p)
00212 {
00213     // get parameters from p
00214     DIJKSTRA_THREAD_PARAM* parameters = (DIJKSTRA_THREAD_PARAM*)p;
00215     CFibonacciHeap* heap = parameters->heap;
00216     int32_t i_start = parameters->i_start;
00217     int32_t i_stop = parameters->i_stop;
00218     int32_t i_step = parameters->i_step;
00219     bool* s = parameters->s;
00220     bool* f = parameters->f;
00221     const float64_t* edges_matrix = parameters->edges_matrix;
00222     const int32_t* edges_idx_matrix = parameters->edges_idx_matrix;
00223     float64_t* shortest_D = parameters->shortest_D;
00224     int32_t m_k = parameters->m_k;
00225     int32_t k,j,i,min_item,w;
00226     int32_t N = i_stop;
00227 
00228     // temporary vars
00229     float64_t dist,tmp;
00230 
00231     // main loop
00232     for (k=i_start; k<i_stop; k+=i_step)
00233     {
00234         // fill s and f with false, fill shortest_D with infinity
00235         for (j=0; j<N; j++)
00236         {
00237             shortest_D[k*N+j] = CMath::ALMOST_INFTY;
00238             s[j] = false;
00239             f[j] = false;
00240         }
00241         // set distance from k to k as zero
00242         shortest_D[k*N+k] = 0.0;
00243 
00244         // insert kth object to heap with zero distance and set f[k] true
00245         heap->insert(k,0.0);
00246         f[k] = true;
00247 
00248         // while heap is not empty
00249         while (heap->get_num_nodes()>0)
00250         {
00251             // extract min and set (s)olution state as true and (f)rontier as false
00252             min_item = heap->extract_min(tmp);
00253             s[min_item] = true;
00254             f[min_item] = false;
00255             
00256             // for-each edge (min_item->w)
00257             for (i=0; i<m_k; i++)
00258             {
00259                 // get w idx
00260                 w = edges_idx_matrix[min_item*m_k+i];
00261                 // if w is not in solution yet
00262                 if (s[w] == false)
00263                 {
00264                     // get distance from k to i through min_item
00265                     dist = shortest_D[k*N+min_item] + edges_matrix[min_item*m_k+i];
00266                     // if distance can be relaxed
00267                     if (dist < shortest_D[k*N+w])
00268                     {
00269                         // relax distance
00270                         shortest_D[k*N+w] = dist;
00271                         // if w is in (f)rontier 
00272                         if (f[w])
00273                         {
00274                             // decrease distance in heap
00275                             heap->decrease_key(w, dist);
00276                         }
00277                         else 
00278                         {
00279                             // insert w to heap and set (f)rontier as true
00280                             heap->insert(w, dist);
00281                             f[w] = true;
00282                         }
00283                     } 
00284                 }
00285             }
00286         }
00287         // clear heap to re-use
00288         heap->clear();
00289     }
00290     return NULL;
00291 }
00292 
00293 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation