00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/lib/config.h>
00013 #include <shogun/lib/common.h>
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/io/File.h>
00016 #include <shogun/lib/Time.h>
00017 #include <shogun/base/Parallel.h>
00018 #include <shogun/base/Parameter.h>
00019
00020 #include <shogun/distance/Distance.h>
00021 #include <shogun/features/Features.h>
00022
00023 #include <string.h>
00024 #include <unistd.h>
00025
00026 #ifdef HAVE_PTHREAD
00027 #include <pthread.h>
00028 #endif
00029
00030 using namespace shogun;
00031
00032 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00033 struct DISTANCE_THREAD_PARAM
00034 {
00035
00036 CDistance* distance;
00037
00038 float64_t* distance_matrix;
00039
00040 int32_t idx_start;
00041
00042 int32_t idx_stop;
00043
00044 int32_t idx_step;
00045
00046 int32_t lhs_vectors_number;
00047
00048 int32_t rhs_vectors_number;
00049
00050 bool symmetric;
00051
00052 bool chunk_by_lhs;
00053 };
00054 #endif
00055
00056 CDistance::CDistance() : CSGObject()
00057 {
00058 init();
00059 }
00060
00061
00062 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs) : CSGObject()
00063 {
00064 init();
00065 init(p_lhs, p_rhs);
00066 }
00067
00068 CDistance::~CDistance()
00069 {
00070 SG_FREE(precomputed_matrix);
00071 precomputed_matrix=NULL;
00072
00073 remove_lhs_and_rhs();
00074 }
00075
00076 bool CDistance::init(CFeatures* l, CFeatures* r)
00077 {
00078
00079 ASSERT(l);
00080 ASSERT(r);
00081
00082
00083 ASSERT(l->get_feature_class()==r->get_feature_class());
00084 ASSERT(l->get_feature_type()==r->get_feature_type());
00085
00086
00087 remove_lhs_and_rhs();
00088
00089
00090 SG_REF(l);
00091 SG_REF(r);
00092
00093 lhs=l;
00094 rhs=r;
00095
00096 num_lhs=l->get_num_vectors();
00097 num_rhs=r->get_num_vectors();
00098
00099 SG_FREE(precomputed_matrix);
00100 precomputed_matrix=NULL ;
00101
00102 return true;
00103 }
00104
00105 void CDistance::load(CFile* loader)
00106 {
00107 SG_SET_LOCALE_C;
00108 SG_RESET_LOCALE;
00109 }
00110
00111 void CDistance::save(CFile* writer)
00112 {
00113 SG_SET_LOCALE_C;
00114 SG_RESET_LOCALE;
00115 }
00116
00117 void CDistance::remove_lhs_and_rhs()
00118 {
00119 SG_UNREF(rhs);
00120 rhs = NULL;
00121 num_rhs=0;
00122
00123 SG_UNREF(lhs);
00124 lhs = NULL;
00125 num_lhs=0;
00126 }
00127
00128 void CDistance::remove_lhs()
00129 {
00130 SG_UNREF(lhs);
00131 lhs = NULL;
00132 num_lhs=0;
00133 }
00134
00136 void CDistance::remove_rhs()
00137 {
00138 SG_UNREF(rhs);
00139 rhs = NULL;
00140 num_rhs=0;
00141 }
00142
00143 CFeatures* CDistance::replace_rhs(CFeatures* r)
00144 {
00145
00146 ASSERT(r);
00147
00148
00149 ASSERT(lhs->get_feature_class()==r->get_feature_class());
00150 ASSERT(lhs->get_feature_type()==r->get_feature_type());
00151
00152
00153 CFeatures* tmp=rhs;
00154
00155 rhs=r;
00156 num_rhs=r->get_num_vectors();
00157
00158 SG_FREE(precomputed_matrix);
00159 precomputed_matrix=NULL ;
00160
00161
00162 return tmp;
00163 }
00164
00165 CFeatures* CDistance::replace_lhs(CFeatures* l)
00166 {
00167
00168 ASSERT(l);
00169
00170
00171 ASSERT(rhs->get_feature_class()==l->get_feature_class());
00172 ASSERT(rhs->get_feature_type()==l->get_feature_type());
00173
00174
00175 CFeatures* tmp=lhs;
00176
00177 lhs=l;
00178 num_lhs=l->get_num_vectors();
00179
00180 SG_FREE(precomputed_matrix);
00181 precomputed_matrix=NULL ;
00182
00183
00184 return tmp;
00185 }
00186
00187 void CDistance::do_precompute_matrix()
00188 {
00189 int32_t num_left=lhs->get_num_vectors();
00190 int32_t num_right=rhs->get_num_vectors();
00191 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00192
00193 ASSERT(num_left==num_right);
00194 ASSERT(lhs==rhs);
00195 int32_t num=num_left;
00196
00197 SG_FREE(precomputed_matrix);
00198 precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
00199
00200 for (int32_t i=0; i<num; i++)
00201 {
00202 SG_PROGRESS(i*i,0,num*num);
00203 for (int32_t j=0; j<=i; j++)
00204 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00205 }
00206
00207 SG_PROGRESS(num*num,0,num*num);
00208 SG_DONE();
00209 }
00210
00211 SGMatrix<float64_t> CDistance::get_distance_matrix()
00212 {
00213 int32_t m,n;
00214 float64_t* data=get_distance_matrix_real(m,n,NULL);
00215 return SGMatrix<float64_t>(data, m,n);
00216 }
00217
00218 float32_t* CDistance::get_distance_matrix_shortreal(
00219 int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00220 {
00221 float32_t* result = NULL;
00222 CFeatures* f1 = lhs;
00223 CFeatures* f2 = rhs;
00224
00225 if (has_features())
00226 {
00227 if (target && (num_vec1!=get_num_vec_lhs() || num_vec2!=get_num_vec_rhs()))
00228 SG_ERROR("distance matrix does not fit into target\n");
00229
00230 num_vec1=get_num_vec_lhs();
00231 num_vec2=get_num_vec_rhs();
00232 int64_t total_num=num_vec1*num_vec2;
00233 int32_t num_done=0;
00234
00235 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00236
00237 if (target)
00238 result=target;
00239 else
00240 result=SG_MALLOC(float32_t, total_num);
00241
00242 if ( (f1 == f2) && (num_vec1 == num_vec2) && (f1!=NULL && f2!=NULL) )
00243 {
00244 for (int32_t i=0; i<num_vec1; i++)
00245 {
00246 for (int32_t j=i; j<num_vec1; j++)
00247 {
00248 float64_t v=distance(i,j);
00249
00250 result[i+j*num_vec1]=v;
00251 result[j+i*num_vec1]=v;
00252
00253 if (num_done%100000)
00254 SG_PROGRESS(num_done, 0, total_num-1);
00255
00256 if (i!=j)
00257 num_done+=2;
00258 else
00259 num_done+=1;
00260 }
00261 }
00262 }
00263 else
00264 {
00265 for (int32_t i=0; i<num_vec1; i++)
00266 {
00267 for (int32_t j=0; j<num_vec2; j++)
00268 {
00269 result[i+j*num_vec1]=distance(i,j) ;
00270
00271 if (num_done%100000)
00272 SG_PROGRESS(num_done, 0, total_num-1);
00273
00274 num_done++;
00275 }
00276 }
00277 }
00278
00279 SG_DONE();
00280 }
00281 else
00282 SG_ERROR("no features assigned to distance\n");
00283
00284 return result;
00285 }
00286
00287 float64_t* CDistance::get_distance_matrix_real(
00288 int32_t &lhs_vectors_number, int32_t &rhs_vectors_number, float64_t* target)
00289 {
00290 float64_t* distance_matrix = NULL;
00291 CFeatures* lhs_features = lhs;
00292 CFeatures* rhs_features = rhs;
00293
00294
00295 if (!has_features())
00296 SG_ERROR("No features assigned to the distance.\n");
00297
00298 if (target &&
00299 (lhs_vectors_number!=get_num_vec_lhs() ||
00300 rhs_vectors_number!=get_num_vec_rhs()))
00301 SG_ERROR("Distance matrix does not fit into the given target.\n");
00302
00303
00304 lhs_vectors_number = get_num_vec_lhs();
00305 rhs_vectors_number = get_num_vec_rhs();
00306 int64_t total_distances_number = lhs_vectors_number*rhs_vectors_number;
00307
00308 SG_DEBUG("Calculating distance matrix of size %dx%d.\n", lhs_vectors_number, rhs_vectors_number);
00309
00310
00311 if (target)
00312 distance_matrix = target;
00313 else
00314 distance_matrix = SG_MALLOC(float64_t, total_distances_number);
00315
00316
00317 bool symmetric = (lhs_features==rhs_features) || (lhs_vectors_number==rhs_vectors_number);
00318
00319 bool chunk_by_lhs = (lhs_vectors_number >= rhs_vectors_number);
00320
00321 #ifdef HAVE_PTHREAD
00322
00323 int32_t num_threads = parallel->get_num_threads();
00324 ASSERT(num_threads>0);
00325 pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
00326 DISTANCE_THREAD_PARAM* parameters = SG_MALLOC(DISTANCE_THREAD_PARAM,num_threads);
00327 pthread_attr_t attr;
00328 pthread_attr_init(&attr);
00329 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00330
00331 for (int32_t t=0; t<num_threads; t++)
00332 {
00333 parameters[t].idx_start = t;
00334 parameters[t].idx_stop = chunk_by_lhs ? lhs_vectors_number : rhs_vectors_number;
00335 parameters[t].idx_step = num_threads;
00336 parameters[t].distance_matrix = distance_matrix;
00337 parameters[t].symmetric = symmetric;
00338 parameters[t].lhs_vectors_number = lhs_vectors_number;
00339 parameters[t].rhs_vectors_number = rhs_vectors_number;
00340 parameters[t].chunk_by_lhs = chunk_by_lhs;
00341 parameters[t].distance = this;
00342 pthread_create(&threads[t], &attr, run_distance_thread, (void*)¶meters[t]);
00343 }
00344
00345 for (int32_t t=0; t<num_threads; t++)
00346 {
00347 pthread_join(threads[t], NULL);
00348 }
00349
00350 pthread_attr_destroy(&attr);
00351 SG_FREE(parameters);
00352 SG_FREE(threads);
00353 #else
00354
00355 DISTANCE_THREAD_PARAM single_thread_param;
00356 single_thread_param.idx_start = 0;
00357 single_thread_param.idx_stop = chunk_by_lhs ? lhs_vectors_number : rhs_vectors_number;
00358 single_thread_param.idx_step = 1;
00359 single_thread_param.distance_matrix = distance_matrix;
00360 single_thread_param.symmetric = symmetric;
00361 single_thread_param.lhs_vectors_number = lhs_vectors_number;
00362 single_thread_param.rhs_vectors_number = rhs_vectors_number;
00363 single_thread_param.chunk_by_lhs = chunk_by_lhs;
00364 single_thread_param.distance = this;
00365
00366 run_distance_thread((void*)&single_thread_param);
00367 #endif
00368
00369 return distance_matrix;
00370 }
00371
00372 void CDistance::init()
00373 {
00374 precomputed_matrix = NULL;
00375 precompute_matrix = false;
00376 lhs = NULL;
00377 rhs = NULL;
00378 num_lhs=0;
00379 num_rhs=0;
00380
00381 m_parameters->add((CSGObject**) &lhs, "lhs",
00382 "Feature vectors to occur on left hand side.");
00383 m_parameters->add((CSGObject**) &rhs, "rhs",
00384 "Feature vectors to occur on right hand side.");
00385 }
00386
00387 void* CDistance::run_distance_thread(void* p)
00388 {
00389 DISTANCE_THREAD_PARAM* parameters = (DISTANCE_THREAD_PARAM*)p;
00390 float64_t* distance_matrix = parameters->distance_matrix;
00391 CDistance* distance = parameters->distance;
00392 int32_t idx_start = parameters->idx_start;
00393 int32_t idx_stop = parameters->idx_stop;
00394 int32_t idx_step = parameters->idx_step;
00395 int32_t lhs_vectors_number = parameters->lhs_vectors_number;
00396 int32_t rhs_vectors_number = parameters->rhs_vectors_number;
00397 bool symmetric = parameters->symmetric;
00398 bool chunk_by_lhs = parameters->chunk_by_lhs;
00399
00400 if (symmetric)
00401 {
00402 for (int32_t i=idx_start; i<idx_stop; i+=idx_step)
00403 {
00404 for (int32_t j=i; j<rhs_vectors_number; j++)
00405 {
00406 float64_t ij_distance = distance->compute(i,j);
00407 distance_matrix[i*rhs_vectors_number+j] = ij_distance;
00408 distance_matrix[j*rhs_vectors_number+i] = ij_distance;
00409 }
00410 }
00411 }
00412 else
00413 {
00414 if (chunk_by_lhs)
00415 {
00416 for (int32_t i=idx_start; i<idx_stop; i+=idx_step)
00417 {
00418 for (int32_t j=0; j<rhs_vectors_number; j++)
00419 {
00420 distance_matrix[j*lhs_vectors_number+i] = distance->compute(i,j);
00421 }
00422 }
00423 }
00424 else
00425 {
00426 for (int32_t j=idx_start; j<idx_stop; j+=idx_step)
00427 {
00428 for (int32_t i=0; i<lhs_vectors_number; i++)
00429 {
00430 distance_matrix[j*lhs_vectors_number+i] = distance->compute(i,j);
00431 }
00432 }
00433 }
00434 }
00435
00436 return NULL;
00437 }