DistanceMachine.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Christian Gehl
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST
00009  */
00010 
00011 #include <shogun/machine/DistanceMachine.h>
00012 #include <shogun/base/Parameter.h>
00013 
00014 using namespace shogun;
00015 
00016 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00017 struct D_THREAD_PARAM
00018 {
00019     CDistance* d;
00020     float64_t* r;
00021     int32_t idx_r_start;
00022     int32_t idx_start;
00023     int32_t idx_stop;
00024     int32_t idx_comp;
00025 };
00026 #endif // DOXYGEN_SHOULD_SKIP_THIS
00027 
00028 CDistanceMachine::CDistanceMachine()
00029 : CMachine()
00030 {
00031     init();
00032 }
00033 
00034 CDistanceMachine::~CDistanceMachine()
00035 {
00036     SG_UNREF(distance);
00037 }
00038 
00039 void CDistanceMachine::init()
00040 {
00041     /* all distance machines should store their models, i.e. cluster centers
00042      * At least, it has to be ensured, that after calling train(), or in the
00043      * call of apply() in the cases where there is no train method, the lhs
00044      * of the underlying distance is set to cluster centers */
00045     set_store_model_features(true);
00046 
00047     distance=NULL;
00048     m_parameters->add((CSGObject**)&distance, "distance", "Distance to use");
00049 }
00050 
00051 void CDistanceMachine::distances_lhs(float64_t* result,int32_t idx_a1,int32_t idx_a2,int32_t idx_b)
00052 {
00053     int32_t num_threads=parallel->get_num_threads();
00054     ASSERT(num_threads>0);
00055 
00056     ASSERT(result);
00057 
00058     if (num_threads < 2)
00059     {
00060         D_THREAD_PARAM param;
00061         param.d=distance;
00062         param.r=result;
00063         param.idx_r_start=idx_a1;
00064         param.idx_start=idx_a1;
00065         param.idx_stop=idx_a2+1;
00066         param.idx_comp=idx_b;
00067 
00068         run_distance_thread_lhs((void*) &param);
00069     }
00070 #ifdef HAVE_PTHREAD
00071     else
00072     {
00073         pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00074         D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
00075         int32_t num_vec=idx_a2-idx_a1+1; 
00076         int32_t step= num_vec/num_threads;
00077         int32_t t;
00078 
00079         pthread_attr_t attr;
00080         pthread_attr_init(&attr);
00081         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00082 
00083         for (t=0; t<num_threads-1; t++)
00084         {
00085             params[t].d = distance;
00086             params[t].r = result;
00087             params[t].idx_r_start=t*step;
00088             params[t].idx_start = (t*step)+idx_a1;
00089             params[t].idx_stop = ((t+1)*step)+idx_a1;
00090             params[t].idx_comp=idx_b;
00091 
00092             pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_lhs, (void*)&params[t]);
00093         }
00094         params[t].d = distance;
00095         params[t].r = result;
00096         params[t].idx_r_start=t*step;
00097         params[t].idx_start = (t*step)+idx_a1;
00098         params[t].idx_stop = idx_a2+1;
00099         params[t].idx_comp=idx_b;
00100 
00101         run_distance_thread_lhs(&params[t]);
00102             
00103         for (t=0; t<num_threads-1; t++)
00104             pthread_join(threads[t], NULL);
00105 
00106         pthread_attr_destroy(&attr);
00107         SG_FREE(params);
00108         SG_FREE(threads);
00109     }
00110 #endif
00111 }
00112 
00113 void CDistanceMachine::distances_rhs(float64_t* result,int32_t idx_b1,int32_t idx_b2,int32_t idx_a)
00114 {
00115     int32_t num_threads=parallel->get_num_threads();
00116     ASSERT(num_threads>0);
00117 
00118     ASSERT(result);
00119 
00120     if (num_threads < 2)
00121     {
00122         D_THREAD_PARAM param;
00123         param.d=distance;
00124         param.r=result;
00125         param.idx_r_start=idx_b1;
00126         param.idx_start=idx_b1;
00127         param.idx_stop=idx_b2+1;
00128         param.idx_comp=idx_a;
00129 
00130         run_distance_thread_rhs((void*) &param);
00131     }
00132 #ifndef WIN32
00133     else
00134     {
00135         pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00136         D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
00137         int32_t num_vec=idx_b2-idx_b1+1; 
00138         int32_t step= num_vec/num_threads;
00139         int32_t t;
00140 
00141         pthread_attr_t attr;
00142         pthread_attr_init(&attr);
00143         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
00144 
00145         for (t=0; t<num_threads-1; t++)
00146         {
00147             params[t].d = distance;
00148             params[t].r = result;
00149             params[t].idx_r_start=t*step;
00150             params[t].idx_start = (t*step)+idx_b1;
00151             params[t].idx_stop = ((t+1)*step)+idx_b1;
00152             params[t].idx_comp=idx_a;
00153 
00154             pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_rhs, (void*)&params[t]);
00155         }
00156         params[t].d = distance;
00157         params[t].r = result;
00158         params[t].idx_r_start=t*step;
00159         params[t].idx_start = (t*step)+idx_b1;
00160         params[t].idx_stop = idx_b2+1;
00161         params[t].idx_comp=idx_a;
00162 
00163         run_distance_thread_rhs(&params[t]);
00164             
00165         for (t=0; t<num_threads-1; t++)
00166             pthread_join(threads[t], NULL);
00167 
00168         pthread_attr_destroy(&attr);
00169         SG_FREE(params);
00170         SG_FREE(threads);
00171     }
00172 #endif
00173 }
00174 
00175 void* CDistanceMachine::run_distance_thread_lhs(void* p)
00176 {
00177     D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
00178     CDistance* distance=params->d;
00179     float64_t* res=params->r;
00180     int32_t idx_res_start=params->idx_r_start;
00181     int32_t idx_act=params->idx_start;
00182     int32_t idx_stop=params->idx_stop;
00183     int32_t idx_c=params->idx_comp;
00184 
00185     for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
00186         res[i] =distance->distance(idx_act,idx_c);
00187 
00188     return NULL;
00189 }
00190 
00191 void* CDistanceMachine::run_distance_thread_rhs(void* p)
00192 {
00193     D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
00194     CDistance* distance=params->d;
00195     float64_t* res=params->r;
00196     int32_t idx_res_start=params->idx_r_start;
00197     int32_t idx_act=params->idx_start;
00198     int32_t idx_stop=params->idx_stop;
00199     int32_t idx_c=params->idx_comp;
00200 
00201     for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
00202         res[i] =distance->distance(idx_c,idx_act);
00203 
00204     return NULL;
00205 }
00206 
00207 CLabels* CDistanceMachine::apply(CFeatures* data)
00208 {
00209     ASSERT(data);
00210 
00211     /* set distance features to given ones and apply to all */
00212     CFeatures* lhs=distance->get_lhs();
00213     distance->init(lhs, data);
00214     SG_UNREF(lhs);
00215 
00216     /* build result labels and classify all elements of procedure */
00217     CLabels* result=new CLabels(data->get_num_vectors());
00218     for (index_t i=0; i<data->get_num_vectors(); ++i)
00219         result->set_label(i, apply(i));
00220 
00221     return result;
00222 }
00223 
00224 CLabels* CDistanceMachine::apply()
00225 {
00226     /* call apply on complete right hand side */
00227     CFeatures* all=distance->get_rhs();
00228     CLabels* result=apply(all);
00229     SG_UNREF(all);
00230     return result;
00231 }
00232 
00233 float64_t CDistanceMachine::apply(int32_t num)
00234 {
00235     /* number of clusters */
00236     CFeatures* lhs=distance->get_lhs();
00237     int32_t num_clusters=lhs->get_num_vectors();
00238     SG_UNREF(lhs);
00239 
00240     /* (multiple threads) calculate distances to all cluster centers */
00241     float64_t* dists=SG_MALLOC(float64_t, num_clusters);
00242     distances_lhs(dists, 0, num_clusters-1, num);
00243 
00244     /* find cluster index with smallest distance */
00245     float64_t result=dists[0];
00246     index_t best_index=0;
00247     for (index_t i=1; i<num_clusters; ++i)
00248     {
00249         if (dists[i]<result)
00250         {
00251             result=dists[i];
00252             best_index=i;
00253         }
00254     }
00255 
00256     SG_FREE(dists);
00257 
00258     /* implicit cast */
00259     return best_index;
00260 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation