Distance.cpp

Go to the documentation of this file.
00001 /*
00002  * this program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006-2009 Christian Gehl
00008  * Written (W) 2006-2009 Soeren Sonnenburg
00009  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 #include "lib/common.h"
00014 #include "lib/io.h"
00015 #include "lib/File.h"
00016 #include "lib/Time.h"
00017 #include "base/Parallel.h"
00018 #include "base/Parameter.h"
00019 
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022 
00023 #include <string.h>
00024 #include <unistd.h>
00025 
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029 
00030 using namespace shogun;
00031 
00032 CDistance::CDistance() : CSGObject()
00033 {
00034     init();
00035 }
00036 
00037         
00038 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs) : CSGObject()
00039 {
00040     init();
00041     init(p_lhs, p_rhs);
00042 }
00043 
00044 CDistance::~CDistance()
00045 {
00046     delete[] precomputed_matrix;
00047     precomputed_matrix=NULL;
00048 
00049     remove_lhs_and_rhs();
00050 }
00051 
00052 bool CDistance::init(CFeatures* l, CFeatures* r)
00053 {
00054     //make sure features were indeed supplied
00055     ASSERT(l);
00056     ASSERT(r);
00057 
00058     //make sure features are compatible
00059     ASSERT(l->get_feature_class()==r->get_feature_class());
00060     ASSERT(l->get_feature_type()==r->get_feature_type());
00061 
00062     //remove references to previous features
00063     remove_lhs_and_rhs();
00064 
00065     //increase reference counts
00066     SG_REF(l);
00067     SG_REF(r);
00068 
00069     lhs=l;
00070     rhs=r;
00071 
00072     delete[] precomputed_matrix ;
00073     precomputed_matrix=NULL ;
00074 
00075     return true;
00076 }
00077 
00078 void CDistance::load(CFile* loader)
00079 {
00080     SG_SET_LOCALE_C;
00081     SG_RESET_LOCALE;
00082 }
00083 
00084 void CDistance::save(CFile* writer)
00085 {
00086     SG_SET_LOCALE_C;
00087     SG_RESET_LOCALE;
00088 }
00089 
00090 void CDistance::remove_lhs_and_rhs()
00091 {
00092     SG_UNREF(rhs);
00093     rhs = NULL;
00094 
00095     SG_UNREF(lhs);
00096     lhs = NULL;
00097 }
00098 
00099 void CDistance::remove_lhs()
00100 { 
00101     SG_UNREF(lhs);
00102     lhs = NULL;
00103 }
00104 
00106 void CDistance::remove_rhs()
00107 {
00108     SG_UNREF(rhs);
00109     rhs = NULL;
00110 }
00111 
00112 CFeatures* CDistance::replace_rhs(CFeatures* r)
00113 {
00114      //make sure features were indeed supplied
00115      ASSERT(r);
00116 
00117      //make sure features are compatible
00118      ASSERT(lhs->get_feature_class()==r->get_feature_class());
00119      ASSERT(lhs->get_feature_type()==r->get_feature_type());
00120 
00121      //remove references to previous rhs features
00122      CFeatures* tmp=rhs;
00123      
00124      rhs=r;
00125 
00126      delete[] precomputed_matrix ;
00127      precomputed_matrix=NULL ;
00128 
00129      // return old features including reference count
00130      return tmp;
00131 }
00132 
00133 void CDistance::do_precompute_matrix()
00134 {
00135     int32_t num_left=lhs->get_num_vectors();
00136     int32_t num_right=rhs->get_num_vectors();
00137     SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00138 
00139     ASSERT(num_left==num_right);
00140     ASSERT(lhs==rhs);
00141     int32_t num=num_left;
00142     
00143     delete[] precomputed_matrix;
00144     precomputed_matrix=new float32_t[num*(num+1)/2];
00145 
00146     for (int32_t i=0; i<num; i++)
00147     {
00148         SG_PROGRESS(i*i,0,num*num);
00149         for (int32_t j=0; j<=i; j++)
00150             precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00151     }
00152 
00153     SG_PROGRESS(num*num,0,num*num);
00154     SG_DONE();
00155 }
00156 
00157 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00158 {
00159     ASSERT(dst && m && n);
00160 
00161     float64_t* result = NULL;
00162     CFeatures* f1 = lhs;
00163     CFeatures* f2 = rhs;
00164 
00165     if (f1 && f2)
00166     {
00167         int32_t num_vec1=f1->get_num_vectors();
00168         int32_t num_vec2=f2->get_num_vectors();
00169         *m=num_vec1;
00170         *n=num_vec2;
00171 
00172         int64_t total_num=num_vec1*num_vec2;
00173         int32_t num_done=0;
00174         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00175 
00176         result=(float64_t*) malloc(total_num*sizeof(float64_t));
00177         ASSERT(result);
00178 
00179         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00180         {
00181             for (int32_t i=0; i<num_vec1; i++)
00182             {
00183                 for (int32_t j=i; j<num_vec1; j++)
00184                 {
00185                     float64_t v=distance(i,j);
00186 
00187                     result[i+j*num_vec1]=v;
00188                     result[j+i*num_vec1]=v;
00189 
00190                     if (num_done%100000)
00191                         SG_PROGRESS(num_done, 0, total_num-1);
00192 
00193                     if (i!=j)
00194                         num_done+=2;
00195                     else
00196                         num_done+=1;
00197                 }
00198             }
00199         }
00200         else
00201         {
00202             for (int32_t i=0; i<num_vec1; i++)
00203             {
00204                 for (int32_t j=0; j<num_vec2; j++)
00205                 {
00206                     result[i+j*num_vec1]=distance(i,j) ;
00207 
00208                     if (num_done%100000)
00209                         SG_PROGRESS(num_done, 0, total_num-1);
00210 
00211                     num_done++;
00212                 }
00213             }
00214         }
00215 
00216         SG_DONE();
00217     }
00218     else
00219       SG_ERROR( "no features assigned to distance\n");
00220 
00221     *dst=result;
00222 }
00223 
00224 float32_t* CDistance::get_distance_matrix_shortreal(
00225     int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00226 {
00227     float32_t* result = NULL;
00228     CFeatures* f1 = lhs;
00229     CFeatures* f2 = rhs;
00230 
00231     if (f1 && f2)
00232     {
00233         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00234             SG_ERROR("distance matrix does not fit into target\n");
00235 
00236         num_vec1=f1->get_num_vectors();
00237         num_vec2=f2->get_num_vectors();
00238         int64_t total_num=num_vec1*num_vec2;
00239         int32_t num_done=0;
00240 
00241         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00242 
00243         if (target)
00244             result=target;
00245         else
00246             result=new float32_t[total_num];
00247 
00248         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00249         {
00250             for (int32_t i=0; i<num_vec1; i++)
00251             {
00252                 for (int32_t j=i; j<num_vec1; j++)
00253                 {
00254                     float64_t v=distance(i,j);
00255 
00256                     result[i+j*num_vec1]=v;
00257                     result[j+i*num_vec1]=v;
00258 
00259                     if (num_done%100000)
00260                         SG_PROGRESS(num_done, 0, total_num-1);
00261 
00262                     if (i!=j)
00263                         num_done+=2;
00264                     else
00265                         num_done+=1;
00266                 }
00267             }
00268         }
00269         else
00270         {
00271             for (int32_t i=0; i<num_vec1; i++)
00272             {
00273                 for (int32_t j=0; j<num_vec2; j++)
00274                 {
00275                     result[i+j*num_vec1]=distance(i,j) ;
00276 
00277                     if (num_done%100000)
00278                         SG_PROGRESS(num_done, 0, total_num-1);
00279 
00280                     num_done++;
00281                 }
00282             }
00283         }
00284 
00285         SG_DONE();
00286     }
00287     else
00288       SG_ERROR( "no features assigned to distance\n");
00289 
00290     return result;
00291 }
00292 
00293 float64_t* CDistance::get_distance_matrix_real(
00294     int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00295 {
00296     float64_t* result = NULL;
00297     CFeatures* f1 = lhs;
00298     CFeatures* f2 = rhs;
00299 
00300     if (f1 && f2)
00301     {
00302         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00303             SG_ERROR("distance matrix does not fit into target\n");
00304 
00305         num_vec1=f1->get_num_vectors();
00306         num_vec2=f2->get_num_vectors();
00307         int64_t total_num=num_vec1*num_vec2;
00308         int32_t num_done=0;
00309 
00310         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00311 
00312         if (target)
00313             result=target;
00314         else
00315             result=new float64_t[total_num];
00316 
00317         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00318         {
00319             for (int32_t i=0; i<num_vec1; i++)
00320             {
00321                 for (int32_t j=i; j<num_vec1; j++)
00322                 {
00323                     float64_t v=distance(i,j);
00324 
00325                     result[i+j*num_vec1]=v;
00326                     result[j+i*num_vec1]=v;
00327 
00328                     if (num_done%100000)
00329                         SG_PROGRESS(num_done, 0, total_num-1);
00330 
00331                     if (i!=j)
00332                         num_done+=2;
00333                     else
00334                         num_done+=1;
00335                 }
00336             }
00337         }
00338         else
00339         {
00340             for (int32_t i=0; i<num_vec1; i++)
00341             {
00342                 for (int32_t j=0; j<num_vec2; j++)
00343                 {
00344                     result[i+j*num_vec1]=distance(i,j) ;
00345 
00346                     if (num_done%100000)
00347                         SG_PROGRESS(num_done, 0, total_num-1);
00348 
00349                     num_done++;
00350                 }
00351             }
00352         }
00353 
00354         SG_DONE();
00355     }
00356     else
00357       SG_ERROR( "no features assigned to distance\n");
00358 
00359     return result;
00360 }
00361 
00362 void CDistance::init()
00363 {
00364     precomputed_matrix = NULL;
00365     precompute_matrix = false;
00366     lhs = NULL;
00367     rhs = NULL;
00368 
00369     m_parameters->add((CSGObject**) &lhs, "lhs",
00370                       "Feature vectors to occur on left hand side.");
00371     m_parameters->add((CSGObject**) &rhs, "rhs",
00372                       "Feature vectors to occur on right hand side.");
00373 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation