00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/lib/config.h>
00013 #include <shogun/lib/common.h>
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/io/File.h>
00016 #include <shogun/lib/Time.h>
00017 #include <shogun/base/Parallel.h>
00018 #include <shogun/base/Parameter.h>
00019
00020 #include <shogun/distance/Distance.h>
00021 #include <shogun/features/Features.h>
00022
00023 #include <string.h>
00024 #include <unistd.h>
00025
00026 #ifdef HAVE_PTHREAD
00027 #include <pthread.h>
00028 #endif
00029
00030 using namespace shogun;
00031
00032 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00033 struct DISTANCE_THREAD_PARAM
00034 {
00035
00036 CDistance* distance;
00037
00038 float64_t* distance_matrix;
00039
00040 int32_t idx_start;
00041
00042 int32_t idx_stop;
00043
00044 int32_t idx_step;
00045
00046 int32_t lhs_vectors_number;
00047
00048 int32_t rhs_vectors_number;
00049
00050 bool symmetric;
00051
00052 bool chunk_by_lhs;
00053 };
00054 #endif
00055
00056 CDistance::CDistance() : CSGObject()
00057 {
00058 init();
00059 }
00060
00061
00062 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs) : CSGObject()
00063 {
00064 init();
00065 init(p_lhs, p_rhs);
00066 }
00067
00068 CDistance::~CDistance()
00069 {
00070 SG_FREE(precomputed_matrix);
00071 precomputed_matrix=NULL;
00072
00073 remove_lhs_and_rhs();
00074 }
00075
00076 bool CDistance::init(CFeatures* l, CFeatures* r)
00077 {
00078
00079 ASSERT(l);
00080 ASSERT(r);
00081
00082
00083 ASSERT(l->get_feature_class()==r->get_feature_class());
00084 ASSERT(l->get_feature_type()==r->get_feature_type());
00085
00086
00087 remove_lhs_and_rhs();
00088
00089
00090 SG_REF(l);
00091 SG_REF(r);
00092
00093 lhs=l;
00094 rhs=r;
00095
00096 SG_FREE(precomputed_matrix);
00097 precomputed_matrix=NULL ;
00098
00099 return true;
00100 }
00101
00102 void CDistance::load(CFile* loader)
00103 {
00104 SG_SET_LOCALE_C;
00105 SG_RESET_LOCALE;
00106 }
00107
00108 void CDistance::save(CFile* writer)
00109 {
00110 SG_SET_LOCALE_C;
00111 SG_RESET_LOCALE;
00112 }
00113
00114 void CDistance::remove_lhs_and_rhs()
00115 {
00116 SG_UNREF(rhs);
00117 rhs = NULL;
00118
00119 SG_UNREF(lhs);
00120 lhs = NULL;
00121 }
00122
00123 void CDistance::remove_lhs()
00124 {
00125 SG_UNREF(lhs);
00126 lhs = NULL;
00127 }
00128
00130 void CDistance::remove_rhs()
00131 {
00132 SG_UNREF(rhs);
00133 rhs = NULL;
00134 }
00135
00136 CFeatures* CDistance::replace_rhs(CFeatures* r)
00137 {
00138
00139 ASSERT(r);
00140
00141
00142 ASSERT(lhs->get_feature_class()==r->get_feature_class());
00143 ASSERT(lhs->get_feature_type()==r->get_feature_type());
00144
00145
00146 CFeatures* tmp=rhs;
00147
00148 rhs=r;
00149
00150 SG_FREE(precomputed_matrix);
00151 precomputed_matrix=NULL ;
00152
00153
00154 return tmp;
00155 }
00156
00157 void CDistance::do_precompute_matrix()
00158 {
00159 int32_t num_left=lhs->get_num_vectors();
00160 int32_t num_right=rhs->get_num_vectors();
00161 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00162
00163 ASSERT(num_left==num_right);
00164 ASSERT(lhs==rhs);
00165 int32_t num=num_left;
00166
00167 SG_FREE(precomputed_matrix);
00168 precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
00169
00170 for (int32_t i=0; i<num; i++)
00171 {
00172 SG_PROGRESS(i*i,0,num*num);
00173 for (int32_t j=0; j<=i; j++)
00174 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00175 }
00176
00177 SG_PROGRESS(num*num,0,num*num);
00178 SG_DONE();
00179 }
00180
00181 SGMatrix<float64_t> CDistance::get_distance_matrix()
00182 {
00183 int32_t m,n;
00184 float64_t* data=get_distance_matrix_real(m,n,NULL);
00185 return SGMatrix<float64_t>(data, m,n);
00186 }
00187
00188 float32_t* CDistance::get_distance_matrix_shortreal(
00189 int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00190 {
00191 float32_t* result = NULL;
00192 CFeatures* f1 = lhs;
00193 CFeatures* f2 = rhs;
00194
00195 if (f1 && f2)
00196 {
00197 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00198 SG_ERROR("distance matrix does not fit into target\n");
00199
00200 num_vec1=f1->get_num_vectors();
00201 num_vec2=f2->get_num_vectors();
00202 int64_t total_num=num_vec1*num_vec2;
00203 int32_t num_done=0;
00204
00205 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00206
00207 if (target)
00208 result=target;
00209 else
00210 result=SG_MALLOC(float32_t, total_num);
00211
00212 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00213 {
00214 for (int32_t i=0; i<num_vec1; i++)
00215 {
00216 for (int32_t j=i; j<num_vec1; j++)
00217 {
00218 float64_t v=distance(i,j);
00219
00220 result[i+j*num_vec1]=v;
00221 result[j+i*num_vec1]=v;
00222
00223 if (num_done%100000)
00224 SG_PROGRESS(num_done, 0, total_num-1);
00225
00226 if (i!=j)
00227 num_done+=2;
00228 else
00229 num_done+=1;
00230 }
00231 }
00232 }
00233 else
00234 {
00235 for (int32_t i=0; i<num_vec1; i++)
00236 {
00237 for (int32_t j=0; j<num_vec2; j++)
00238 {
00239 result[i+j*num_vec1]=distance(i,j) ;
00240
00241 if (num_done%100000)
00242 SG_PROGRESS(num_done, 0, total_num-1);
00243
00244 num_done++;
00245 }
00246 }
00247 }
00248
00249 SG_DONE();
00250 }
00251 else
00252 SG_ERROR("no features assigned to distance\n");
00253
00254 return result;
00255 }
00256
00257 float64_t* CDistance::get_distance_matrix_real(
00258 int32_t &lhs_vectors_number, int32_t &rhs_vectors_number, float64_t* target)
00259 {
00260 float64_t* distance_matrix = NULL;
00261 CFeatures* lhs_features = lhs;
00262 CFeatures* rhs_features = rhs;
00263
00264
00265 if (!lhs_features || !rhs_features)
00266 SG_ERROR("No features assigned to the distance.\n");
00267
00268 if (target &&
00269 (lhs_vectors_number!=lhs_features->get_num_vectors() ||
00270 rhs_vectors_number!=rhs_features->get_num_vectors()))
00271 SG_ERROR("Distance matrix does not fit into the given target.\n");
00272
00273
00274 lhs_vectors_number = lhs_features->get_num_vectors();
00275 rhs_vectors_number = rhs_features->get_num_vectors();
00276 int64_t total_distances_number = lhs_vectors_number*rhs_vectors_number;
00277
00278 SG_DEBUG("Calculating distance matrix of size %dx%d.\n", lhs_vectors_number, rhs_vectors_number);
00279
00280
00281 if (target)
00282 distance_matrix = target;
00283 else
00284 distance_matrix = SG_MALLOC(float64_t, total_distances_number);
00285
00286
00287 bool symmetric = (lhs_features==rhs_features) || (lhs_vectors_number==rhs_vectors_number);
00288
00289 bool chunk_by_lhs = (lhs_vectors_number >= rhs_vectors_number);
00290
00291 #ifdef HAVE_PTHREAD
00292
00293 int32_t num_threads = parallel->get_num_threads();
00294 ASSERT(num_threads>0);
00295 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00296 DISTANCE_THREAD_PARAM* parameters = SG_MALLOC(DISTANCE_THREAD_PARAM,num_threads);
00297 pthread_attr_t attr;
00298 pthread_attr_init(&attr);
00299 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00300
00301 for (int32_t t=0; t<num_threads; t++)
00302 {
00303 parameters[t].idx_start = t;
00304 parameters[t].idx_stop = chunk_by_lhs ? lhs_vectors_number : rhs_vectors_number;
00305 parameters[t].idx_step = num_threads;
00306 parameters[t].distance_matrix = distance_matrix;
00307 parameters[t].symmetric = symmetric;
00308 parameters[t].lhs_vectors_number = lhs_vectors_number;
00309 parameters[t].rhs_vectors_number = rhs_vectors_number;
00310 parameters[t].chunk_by_lhs = chunk_by_lhs;
00311 parameters[t].distance = this;
00312 pthread_create(&threads[t], &attr, run_distance_thread, (void*)¶meters[t]);
00313 }
00314
00315 for (int32_t t=0; t<num_threads; t++)
00316 {
00317 pthread_join(threads[t], NULL);
00318 }
00319
00320 pthread_attr_destroy(&attr);
00321 SG_FREE(parameters);
00322 SG_FREE(threads);
00323 #else
00324
00325 DISTANCE_THREAD_PARAM single_thread_param;
00326 single_thread_param.idx_start = 0;
00327 single_thread_param.idx_stop = chunk_by_lhs ? lhs_vectors_number : rhs_vectors_number;
00328 single_thread_param.idx_step = 1;
00329 single_thread_param.distance_matrix = distance_matrix;
00330 single_thread_param.symmetric = symmetric;
00331 single_thread_param.lhs_vectors_number = lhs_vectors_number;
00332 single_thread_param.rhs_vectors_number = rhs_vectors_number;
00333 single_thread_param.chunk_by_lhs = chunk_by_lhs;
00334 single_thread_param.distance = this;
00335
00336 run_distance_thread((void*)&single_thread_param);
00337 #endif
00338
00339 return distance_matrix;
00340 }
00341
00342 void CDistance::init()
00343 {
00344 precomputed_matrix = NULL;
00345 precompute_matrix = false;
00346 lhs = NULL;
00347 rhs = NULL;
00348
00349 m_parameters->add((CSGObject**) &lhs, "lhs",
00350 "Feature vectors to occur on left hand side.");
00351 m_parameters->add((CSGObject**) &rhs, "rhs",
00352 "Feature vectors to occur on right hand side.");
00353 }
00354
00355 void* CDistance::run_distance_thread(void* p)
00356 {
00357 DISTANCE_THREAD_PARAM* parameters = (DISTANCE_THREAD_PARAM*)p;
00358 float64_t* distance_matrix = parameters->distance_matrix;
00359 CDistance* distance = parameters->distance;
00360 int32_t idx_start = parameters->idx_start;
00361 int32_t idx_stop = parameters->idx_stop;
00362 int32_t idx_step = parameters->idx_step;
00363 int32_t lhs_vectors_number = parameters->lhs_vectors_number;
00364 int32_t rhs_vectors_number = parameters->rhs_vectors_number;
00365 bool symmetric = parameters->symmetric;
00366 bool chunk_by_lhs = parameters->chunk_by_lhs;
00367
00368 if (symmetric)
00369 {
00370 for (int32_t i=idx_start; i<idx_stop; i+=idx_step)
00371 {
00372 for (int32_t j=i; j<rhs_vectors_number; j++)
00373 {
00374 float64_t ij_distance = distance->compute(i,j);
00375 distance_matrix[i*rhs_vectors_number+j] = ij_distance;
00376 distance_matrix[j*rhs_vectors_number+i] = ij_distance;
00377 }
00378 }
00379 }
00380 else
00381 {
00382 if (chunk_by_lhs)
00383 {
00384 for (int32_t i=idx_start; i<idx_stop; i+=idx_step)
00385 {
00386 for (int32_t j=0; j<rhs_vectors_number; j++)
00387 {
00388 distance_matrix[j*lhs_vectors_number+i] = distance->compute(i,j);
00389 }
00390 }
00391 }
00392 else
00393 {
00394 for (int32_t j=idx_start; j<idx_stop; j+=idx_step)
00395 {
00396 for (int32_t i=0; i<lhs_vectors_number; i++)
00397 {
00398 distance_matrix[j*lhs_vectors_number+i] = distance->compute(i,j);
00399 }
00400 }
00401 }
00402 }
00403
00404 return NULL;
00405 }