Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/common.h"
00014 #include "lib/io.h"
00015 #include "lib/File.h"
00016 #include "lib/Time.h"
00017 #include "base/Parallel.h"
00018 #include "base/Parameter.h"
00019
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022
00023 #include <string.h>
00024 #include <unistd.h>
00025
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029
00030 using namespace shogun;
00031
00032 CDistance::CDistance() : CSGObject()
00033 {
00034 init();
00035 }
00036
00037
00038 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs) : CSGObject()
00039 {
00040 init();
00041 init(p_lhs, p_rhs);
00042 }
00043
00044 CDistance::~CDistance()
00045 {
00046 delete[] precomputed_matrix;
00047 precomputed_matrix=NULL;
00048
00049 remove_lhs_and_rhs();
00050 }
00051
00052 bool CDistance::init(CFeatures* l, CFeatures* r)
00053 {
00054
00055 ASSERT(l);
00056 ASSERT(r);
00057
00058
00059 ASSERT(l->get_feature_class()==r->get_feature_class());
00060 ASSERT(l->get_feature_type()==r->get_feature_type());
00061
00062
00063 remove_lhs_and_rhs();
00064
00065
00066 SG_REF(l);
00067 SG_REF(r);
00068
00069 lhs=l;
00070 rhs=r;
00071
00072 delete[] precomputed_matrix ;
00073 precomputed_matrix=NULL ;
00074
00075 return true;
00076 }
00077
00078 void CDistance::load(CFile* loader)
00079 {
00080 SG_SET_LOCALE_C;
00081 SG_RESET_LOCALE;
00082 }
00083
00084 void CDistance::save(CFile* writer)
00085 {
00086 SG_SET_LOCALE_C;
00087 SG_RESET_LOCALE;
00088 }
00089
00090 void CDistance::remove_lhs_and_rhs()
00091 {
00092 SG_UNREF(rhs);
00093 rhs = NULL;
00094
00095 SG_UNREF(lhs);
00096 lhs = NULL;
00097 }
00098
00099 void CDistance::remove_lhs()
00100 {
00101 SG_UNREF(lhs);
00102 lhs = NULL;
00103 }
00104
00106 void CDistance::remove_rhs()
00107 {
00108 SG_UNREF(rhs);
00109 rhs = NULL;
00110 }
00111
00112 CFeatures* CDistance::replace_rhs(CFeatures* r)
00113 {
00114
00115 ASSERT(r);
00116
00117
00118 ASSERT(lhs->get_feature_class()==r->get_feature_class());
00119 ASSERT(lhs->get_feature_type()==r->get_feature_type());
00120
00121
00122 CFeatures* tmp=rhs;
00123
00124 rhs=r;
00125
00126 delete[] precomputed_matrix ;
00127 precomputed_matrix=NULL ;
00128
00129
00130 return tmp;
00131 }
00132
00133 void CDistance::do_precompute_matrix()
00134 {
00135 int32_t num_left=lhs->get_num_vectors();
00136 int32_t num_right=rhs->get_num_vectors();
00137 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00138
00139 ASSERT(num_left==num_right);
00140 ASSERT(lhs==rhs);
00141 int32_t num=num_left;
00142
00143 delete[] precomputed_matrix;
00144 precomputed_matrix=new float32_t[num*(num+1)/2];
00145
00146 for (int32_t i=0; i<num; i++)
00147 {
00148 SG_PROGRESS(i*i,0,num*num);
00149 for (int32_t j=0; j<=i; j++)
00150 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00151 }
00152
00153 SG_PROGRESS(num*num,0,num*num);
00154 SG_DONE();
00155 }
00156
00157 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00158 {
00159 ASSERT(dst && m && n);
00160
00161 float64_t* result = NULL;
00162 CFeatures* f1 = lhs;
00163 CFeatures* f2 = rhs;
00164
00165 if (f1 && f2)
00166 {
00167 int32_t num_vec1=f1->get_num_vectors();
00168 int32_t num_vec2=f2->get_num_vectors();
00169 *m=num_vec1;
00170 *n=num_vec2;
00171
00172 int64_t total_num=num_vec1*num_vec2;
00173 int32_t num_done=0;
00174 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00175
00176 result=(float64_t*) malloc(total_num*sizeof(float64_t));
00177 ASSERT(result);
00178
00179 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00180 {
00181 for (int32_t i=0; i<num_vec1; i++)
00182 {
00183 for (int32_t j=i; j<num_vec1; j++)
00184 {
00185 float64_t v=distance(i,j);
00186
00187 result[i+j*num_vec1]=v;
00188 result[j+i*num_vec1]=v;
00189
00190 if (num_done%100000)
00191 SG_PROGRESS(num_done, 0, total_num-1);
00192
00193 if (i!=j)
00194 num_done+=2;
00195 else
00196 num_done+=1;
00197 }
00198 }
00199 }
00200 else
00201 {
00202 for (int32_t i=0; i<num_vec1; i++)
00203 {
00204 for (int32_t j=0; j<num_vec2; j++)
00205 {
00206 result[i+j*num_vec1]=distance(i,j) ;
00207
00208 if (num_done%100000)
00209 SG_PROGRESS(num_done, 0, total_num-1);
00210
00211 num_done++;
00212 }
00213 }
00214 }
00215
00216 SG_DONE();
00217 }
00218 else
00219 SG_ERROR( "no features assigned to distance\n");
00220
00221 *dst=result;
00222 }
00223
00224 float32_t* CDistance::get_distance_matrix_shortreal(
00225 int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00226 {
00227 float32_t* result = NULL;
00228 CFeatures* f1 = lhs;
00229 CFeatures* f2 = rhs;
00230
00231 if (f1 && f2)
00232 {
00233 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00234 SG_ERROR("distance matrix does not fit into target\n");
00235
00236 num_vec1=f1->get_num_vectors();
00237 num_vec2=f2->get_num_vectors();
00238 int64_t total_num=num_vec1*num_vec2;
00239 int32_t num_done=0;
00240
00241 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00242
00243 if (target)
00244 result=target;
00245 else
00246 result=new float32_t[total_num];
00247
00248 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00249 {
00250 for (int32_t i=0; i<num_vec1; i++)
00251 {
00252 for (int32_t j=i; j<num_vec1; j++)
00253 {
00254 float64_t v=distance(i,j);
00255
00256 result[i+j*num_vec1]=v;
00257 result[j+i*num_vec1]=v;
00258
00259 if (num_done%100000)
00260 SG_PROGRESS(num_done, 0, total_num-1);
00261
00262 if (i!=j)
00263 num_done+=2;
00264 else
00265 num_done+=1;
00266 }
00267 }
00268 }
00269 else
00270 {
00271 for (int32_t i=0; i<num_vec1; i++)
00272 {
00273 for (int32_t j=0; j<num_vec2; j++)
00274 {
00275 result[i+j*num_vec1]=distance(i,j) ;
00276
00277 if (num_done%100000)
00278 SG_PROGRESS(num_done, 0, total_num-1);
00279
00280 num_done++;
00281 }
00282 }
00283 }
00284
00285 SG_DONE();
00286 }
00287 else
00288 SG_ERROR( "no features assigned to distance\n");
00289
00290 return result;
00291 }
00292
00293 float64_t* CDistance::get_distance_matrix_real(
00294 int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00295 {
00296 float64_t* result = NULL;
00297 CFeatures* f1 = lhs;
00298 CFeatures* f2 = rhs;
00299
00300 if (f1 && f2)
00301 {
00302 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00303 SG_ERROR("distance matrix does not fit into target\n");
00304
00305 num_vec1=f1->get_num_vectors();
00306 num_vec2=f2->get_num_vectors();
00307 int64_t total_num=num_vec1*num_vec2;
00308 int32_t num_done=0;
00309
00310 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00311
00312 if (target)
00313 result=target;
00314 else
00315 result=new float64_t[total_num];
00316
00317 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00318 {
00319 for (int32_t i=0; i<num_vec1; i++)
00320 {
00321 for (int32_t j=i; j<num_vec1; j++)
00322 {
00323 float64_t v=distance(i,j);
00324
00325 result[i+j*num_vec1]=v;
00326 result[j+i*num_vec1]=v;
00327
00328 if (num_done%100000)
00329 SG_PROGRESS(num_done, 0, total_num-1);
00330
00331 if (i!=j)
00332 num_done+=2;
00333 else
00334 num_done+=1;
00335 }
00336 }
00337 }
00338 else
00339 {
00340 for (int32_t i=0; i<num_vec1; i++)
00341 {
00342 for (int32_t j=0; j<num_vec2; j++)
00343 {
00344 result[i+j*num_vec1]=distance(i,j) ;
00345
00346 if (num_done%100000)
00347 SG_PROGRESS(num_done, 0, total_num-1);
00348
00349 num_done++;
00350 }
00351 }
00352 }
00353
00354 SG_DONE();
00355 }
00356 else
00357 SG_ERROR( "no features assigned to distance\n");
00358
00359 return result;
00360 }
00361
00362 void CDistance::init()
00363 {
00364 precomputed_matrix = NULL;
00365 precompute_matrix = false;
00366 lhs = NULL;
00367 rhs = NULL;
00368
00369 m_parameters->add((CSGObject**) &lhs, "lhs",
00370 "Feature vectors to occur on left hand side.");
00371 m_parameters->add((CSGObject**) &rhs, "rhs",
00372 "Feature vectors to occur on right hand side.");
00373 }