Hierarchical.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2009 Soeren Sonnenburg
00008  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/clustering/Hierarchical.h>
00012 #include <shogun/distance/Distance.h>
00013 #include <shogun/features/Labels.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/base/Parallel.h>
00017 
00018 #ifdef HAVE_PTHREAD
00019 #include <pthread.h>
00020 #endif
00021 
00022 using namespace shogun;
00023 
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 struct pair
00026 {
00028     int32_t idx1;
00030     int32_t idx2;
00031 };
00032 #endif // DOXYGEN_SHOULD_SKIP_THIS
00033 
00034 CHierarchical::CHierarchical()
00035 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
00036     table_size(0), pairs(NULL), merge_distance(NULL)
00037 {
00038 }
00039 
00040 CHierarchical::CHierarchical(int32_t merges_, CDistance* d)
00041 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
00042     table_size(0), pairs(NULL), merge_distance(NULL)
00043 {
00044     set_distance(d);
00045 }
00046 
00047 CHierarchical::~CHierarchical()
00048 {
00049     SG_FREE(merge_distance);
00050     SG_FREE(assignment);
00051     SG_FREE(pairs);
00052 }
00053 
00054 bool CHierarchical::train_machine(CFeatures* data)
00055 {
00056     ASSERT(distance);
00057 
00058     if (data)
00059         distance->init(data, data);
00060 
00061     CFeatures* lhs=distance->get_lhs();
00062     ASSERT(lhs);
00063 
00064     int32_t num=lhs->get_num_vectors();
00065     ASSERT(num>0);
00066 
00067     const int32_t num_pairs=num*(num-1)/2;
00068 
00069     SG_FREE(merge_distance);
00070     merge_distance=SG_MALLOC(float64_t, num);
00071     CMath::fill_vector(merge_distance, num, -1.0);
00072 
00073     SG_FREE(assignment);
00074     assignment=SG_MALLOC(int32_t, num);
00075     CMath::range_fill_vector(assignment, num);
00076 
00077     SG_FREE(pairs);
00078     pairs=SG_MALLOC(int32_t, 2*num);
00079     CMath::fill_vector(pairs, 2*num, -1);
00080 
00081     pair* index=SG_MALLOC(pair, num_pairs);
00082     float64_t* distances=SG_MALLOC(float64_t, num_pairs);
00083 
00084     int32_t offs=0;
00085     for (int32_t i=0; i<num; i++)
00086     {
00087         for (int32_t j=i+1; j<num; j++)
00088         {
00089             distances[offs]=distance->distance(i,j);
00090             index[offs].idx1=i;
00091             index[offs].idx2=j;
00092             offs++;                 //offs=i*(i+1)/2+j
00093         }
00094         SG_PROGRESS(i, 0, num-1);
00095     }
00096 
00097     CMath::qsort_index<float64_t,pair>(distances, index, (num-1)*num/2);
00098     //CMath::display_vector(distances, (num-1)*num/2, "dists");
00099 
00100     int32_t k=-1;
00101     int32_t l=0;
00102     for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
00103     {
00104         while (k<num_pairs-1)
00105         {
00106             k++;
00107 
00108             int32_t i=index[k].idx1;
00109             int32_t j=index[k].idx2;
00110             int32_t c1=assignment[i];
00111             int32_t c2=assignment[j];
00112 
00113             if (c1==c2)
00114                 continue;
00115             
00116             SG_PROGRESS(k, 0, num_pairs-1);
00117 
00118             if (c1<c2)
00119             {
00120                 pairs[2*l]=c1;
00121                 pairs[2*l+1]=c2;
00122             }
00123             else
00124             {
00125                 pairs[2*l]=c2;
00126                 pairs[2*l+1]=c1;
00127             }
00128             merge_distance[l]=distances[k];
00129 
00130             int32_t c=num+l;
00131             for (int32_t m=0; m<num; m++)
00132             {
00133                 if (assignment[m] == c1 || assignment[m] == c2)
00134                     assignment[m] = c;
00135             }
00136 #ifdef DEBUG_HIERARCHICAL
00137             SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l]);
00138 #endif
00139             break;
00140         }
00141     }
00142 
00143     assignment_size=num;
00144     table_size=l-1;
00145     ASSERT(table_size>0);
00146     SG_FREE(distances);
00147     SG_FREE(index);
00148     SG_UNREF(lhs)
00149 
00150     return true;
00151 }
00152 
00153 bool CHierarchical::load(FILE* srcfile)
00154 {
00155     SG_SET_LOCALE_C;
00156     SG_RESET_LOCALE;
00157     return false;
00158 }
00159 
00160 bool CHierarchical::save(FILE* dstfile)
00161 {
00162     SG_SET_LOCALE_C;
00163     SG_RESET_LOCALE;
00164     return false;
00165 }
00166 
00167 void CHierarchical::store_model_features()
00168 {
00169     /* TODO. Currently does nothing since apply methods are not implemented. */
00170 }
00171 
00172 CLabels* CHierarchical::apply(CFeatures* data)
00173 {
00174     return apply();
00175 }
00176 
00177 CLabels* CHierarchical::apply()
00178 {
00179     SG_ERROR("apply(...) not implemented for %s!\n", get_name());
00180     return NULL;
00181 }
00182 
00183 float64_t CHierarchical::apply(int32_t num)
00184 {
00185     apply();
00186     return 0;
00187 }
00188 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation