Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/converter/Isomap.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/common.h>
00014 #include <shogun/lib/FibonacciHeap.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/base/Parallel.h>
00018 #include <shogun/lib/Signal.h>
00019 #include <shogun/lib/CoverTree.h>
00020
00021 #ifdef HAVE_PTHREAD
00022 #include <pthread.h>
00023 #endif
00024
00025 using namespace shogun;
00026
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028
00029
00030 struct DIJKSTRA_THREAD_PARAM
00031 {
00033 CFibonacciHeap* heap;
00036 const float64_t* edges_matrix;
00039 const int32_t* edges_idx_matrix;
00041 float64_t* shortest_D;
00043 int32_t i_start;
00045 int32_t i_stop;
00047 int32_t i_step;
00049 int32_t m_k;
00051 bool* s;
00053 bool* f;
00054 };
00055
00056 class ISOMAP_COVERTREE_POINT
00057 {
00058 public:
00059
00060 ISOMAP_COVERTREE_POINT(int32_t index, const SGMatrix<float64_t>& dmatrix)
00061 {
00062 point_index = index;
00063 distance_matrix = dmatrix;
00064 }
00065
00066 inline double distance(const ISOMAP_COVERTREE_POINT& p) const
00067 {
00068 return distance_matrix[point_index*distance_matrix.num_rows+p.point_index];
00069 }
00070
00071 inline bool operator==(const ISOMAP_COVERTREE_POINT& p) const
00072 {
00073 return (p.point_index==point_index);
00074 }
00075
00076 int32_t point_index;
00077 SGMatrix<float64_t> distance_matrix;
00078 };
00079
00080 #endif
00081
00082 CIsomap::CIsomap() : CMultidimensionalScaling()
00083 {
00084 m_k = 3;
00085
00086 init();
00087 }
00088
00089 void CIsomap::init()
00090 {
00091 SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE);
00092 }
00093
00094 CIsomap::~CIsomap()
00095 {
00096 }
00097
00098 void CIsomap::set_k(int32_t k)
00099 {
00100 ASSERT(k>0);
00101 m_k = k;
00102 }
00103
00104 int32_t CIsomap::get_k() const
00105 {
00106 return m_k;
00107 }
00108
00109 const char* CIsomap::get_name() const
00110 {
00111 return "Isomap";
00112 }
00113
00114 SGMatrix<float64_t> CIsomap::process_distance_matrix(SGMatrix<float64_t> distance_matrix)
00115 {
00116 return isomap_distance(distance_matrix);
00117 }
00118
00119 SGMatrix<float64_t> CIsomap::isomap_distance(SGMatrix<float64_t> D_matrix)
00120 {
00121 int32_t N,i;
00122 N = D_matrix.num_cols;
00123 if (D_matrix.num_cols!=D_matrix.num_rows)
00124 {
00125 SG_ERROR("Given distance matrix is not square.\n");
00126 }
00127 if (m_k>=N)
00128 {
00129 SG_ERROR("K parameter should be less than number of given vectors (k=%d, N=%d)\n", m_k, N);
00130 }
00131
00132
00133 int32_t* edges_idx_matrix = SG_MALLOC(int32_t, N*m_k);
00134 float64_t* edges_matrix = SG_MALLOC(float64_t, N*m_k);
00135
00136 float64_t max_dist = SGVector<float64_t>::max(D_matrix.matrix,N*N);
00137 CoverTree<ISOMAP_COVERTREE_POINT>* coverTree = new CoverTree<ISOMAP_COVERTREE_POINT>(max_dist);
00138
00139 for (i=0; i<N; i++)
00140 coverTree->insert(ISOMAP_COVERTREE_POINT(i,D_matrix));
00141
00142 for (i=0; i<N; i++)
00143 {
00144 ISOMAP_COVERTREE_POINT origin(i,D_matrix);
00145 std::vector<ISOMAP_COVERTREE_POINT> neighbors =
00146 coverTree->kNearestNeighbors(origin,m_k+1);
00147 for (std::size_t m=1; m<neighbors.size(); m++)
00148 {
00149 edges_matrix[i*m_k+m-1] = origin.distance(neighbors[m]);
00150 edges_idx_matrix[i*m_k+m-1] = neighbors[m].point_index;
00151 }
00152 }
00153
00154 delete coverTree;
00155
00156 #ifdef HAVE_PTHREAD
00157 int32_t t;
00158
00159 int32_t num_threads = parallel->get_num_threads();
00160 ASSERT(num_threads>0);
00161
00162 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00163 DIJKSTRA_THREAD_PARAM* parameters = SG_MALLOC(DIJKSTRA_THREAD_PARAM, num_threads);
00164
00165 CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads);
00166 for (t=0; t<num_threads; t++)
00167 heaps[t] = new CFibonacciHeap(N);
00168
00169 #else
00170 int32_t num_threads = 1;
00171 #endif
00172
00173
00174 bool* s = SG_MALLOC(bool,N*num_threads);
00175
00176 bool* f = SG_MALLOC(bool,N*num_threads);
00177
00178 float64_t* shortest_D = D_matrix.matrix;
00179
00180 #ifdef HAVE_PTHREAD
00181
00182 pthread_attr_t attr;
00183 pthread_attr_init(&attr);
00184 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00185 for (t=0; t<num_threads; t++)
00186 {
00187 parameters[t].i_start = t;
00188 parameters[t].i_stop = N;
00189 parameters[t].i_step = num_threads;
00190 parameters[t].heap = heaps[t];
00191 parameters[t].edges_matrix = edges_matrix;
00192 parameters[t].edges_idx_matrix = edges_idx_matrix;
00193 parameters[t].s = s+t*N;
00194 parameters[t].f = f+t*N;
00195 parameters[t].m_k = m_k;
00196 parameters[t].shortest_D = shortest_D;
00197 pthread_create(&threads[t], &attr, CIsomap::run_dijkstra_thread, (void*)¶meters[t]);
00198 }
00199 for (t=0; t<num_threads; t++)
00200 pthread_join(threads[t], NULL);
00201 pthread_attr_destroy(&attr);
00202 for (t=0; t<num_threads; t++)
00203 delete heaps[t];
00204 SG_FREE(heaps);
00205 SG_FREE(parameters);
00206 SG_FREE(threads);
00207 #else
00208 DIJKSTRA_THREAD_PARAM single_thread_param;
00209 single_thread_param.i_start = 0;
00210 single_thread_param.i_stop = N;
00211 single_thread_param.i_step = 1;
00212 single_thread_param.m_k = m_k;
00213 single_thread_param.heap = new CFibonacciHeap(N);
00214 single_thread_param.edges_matrix = edges_matrix;
00215 single_thread_param.edges_idx_matrix = edges_idx_matrix;
00216 single_thread_param.s = s;
00217 single_thread_param.f = f;
00218 single_thread_param.shortest_D = shortest_D;
00219 run_dijkstra_thread((void*)&single_thread_param);
00220 delete single_thread_param.heap;
00221 #endif
00222
00223 SG_FREE(edges_matrix);
00224 SG_FREE(edges_idx_matrix);
00225 SG_FREE(s);
00226 SG_FREE(f);
00227
00228 return D_matrix;
00229 }
00230
00231 void* CIsomap::run_dijkstra_thread(void *p)
00232 {
00233
00234 DIJKSTRA_THREAD_PARAM* parameters = (DIJKSTRA_THREAD_PARAM*)p;
00235 CFibonacciHeap* heap = parameters->heap;
00236 int32_t i_start = parameters->i_start;
00237 int32_t i_stop = parameters->i_stop;
00238 int32_t i_step = parameters->i_step;
00239 bool* s = parameters->s;
00240 bool* f = parameters->f;
00241 const float64_t* edges_matrix = parameters->edges_matrix;
00242 const int32_t* edges_idx_matrix = parameters->edges_idx_matrix;
00243 float64_t* shortest_D = parameters->shortest_D;
00244 int32_t m_k = parameters->m_k;
00245 int32_t k,j,i,min_item,w;
00246 int32_t N = i_stop;
00247
00248
00249 float64_t dist,tmp;
00250
00251
00252 for (k=i_start; k<i_stop; k+=i_step)
00253 {
00254
00255 for (j=0; j<N; j++)
00256 {
00257 shortest_D[k*N+j] = CMath::ALMOST_INFTY;
00258 s[j] = false;
00259 f[j] = false;
00260 }
00261
00262 shortest_D[k*N+k] = 0.0;
00263
00264
00265 heap->insert(k,0.0);
00266 f[k] = true;
00267
00268
00269 while (heap->get_num_nodes()>0)
00270 {
00271
00272 min_item = heap->extract_min(tmp);
00273 s[min_item] = true;
00274 f[min_item] = false;
00275
00276
00277 for (i=0; i<m_k; i++)
00278 {
00279
00280 w = edges_idx_matrix[min_item*m_k+i];
00281
00282 if (s[w] == false)
00283 {
00284
00285 dist = shortest_D[k*N+min_item] + edges_matrix[min_item*m_k+i];
00286
00287 if (dist < shortest_D[k*N+w])
00288 {
00289
00290 shortest_D[k*N+w] = dist;
00291
00292 if (f[w])
00293 {
00294
00295 heap->decrease_key(w, dist);
00296 }
00297 else
00298 {
00299
00300 heap->insert(w, dist);
00301 f[w] = true;
00302 }
00303 }
00304 }
00305 }
00306 }
00307
00308 heap->clear();
00309 }
00310 return NULL;
00311 }
00312
00313 #endif