Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/clustering/Hierarchical.h>
00012 #include <shogun/distance/Distance.h>
00013 #include <shogun/features/Labels.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/base/Parallel.h>
00017
00018 #ifdef HAVE_PTHREAD
00019 #include <pthread.h>
00020 #endif
00021
00022 using namespace shogun;
00023
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 struct pair
00026 {
00028 int32_t idx1;
00030 int32_t idx2;
00031 };
00032 #endif // DOXYGEN_SHOULD_SKIP_THIS
00033
00034 CHierarchical::CHierarchical()
00035 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
00036 table_size(0), pairs(NULL), merge_distance(NULL)
00037 {
00038 }
00039
00040 CHierarchical::CHierarchical(int32_t merges_, CDistance* d)
00041 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
00042 table_size(0), pairs(NULL), merge_distance(NULL)
00043 {
00044 set_distance(d);
00045 }
00046
00047 CHierarchical::~CHierarchical()
00048 {
00049 SG_FREE(merge_distance);
00050 SG_FREE(assignment);
00051 SG_FREE(pairs);
00052 }
00053
00054 bool CHierarchical::train_machine(CFeatures* data)
00055 {
00056 ASSERT(distance);
00057
00058 if (data)
00059 distance->init(data, data);
00060
00061 CFeatures* lhs=distance->get_lhs();
00062 ASSERT(lhs);
00063
00064 int32_t num=lhs->get_num_vectors();
00065 ASSERT(num>0);
00066
00067 const int32_t num_pairs=num*(num-1)/2;
00068
00069 SG_FREE(merge_distance);
00070 merge_distance=SG_MALLOC(float64_t, num);
00071 CMath::fill_vector(merge_distance, num, -1.0);
00072
00073 SG_FREE(assignment);
00074 assignment=SG_MALLOC(int32_t, num);
00075 CMath::range_fill_vector(assignment, num);
00076
00077 SG_FREE(pairs);
00078 pairs=SG_MALLOC(int32_t, 2*num);
00079 CMath::fill_vector(pairs, 2*num, -1);
00080
00081 pair* index=SG_MALLOC(pair, num_pairs);
00082 float64_t* distances=SG_MALLOC(float64_t, num_pairs);
00083
00084 int32_t offs=0;
00085 for (int32_t i=0; i<num; i++)
00086 {
00087 for (int32_t j=i+1; j<num; j++)
00088 {
00089 distances[offs]=distance->distance(i,j);
00090 index[offs].idx1=i;
00091 index[offs].idx2=j;
00092 offs++;
00093 }
00094 SG_PROGRESS(i, 0, num-1);
00095 }
00096
00097 CMath::qsort_index<float64_t,pair>(distances, index, (num-1)*num/2);
00098
00099
00100 int32_t k=-1;
00101 int32_t l=0;
00102 for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
00103 {
00104 while (k<num_pairs-1)
00105 {
00106 k++;
00107
00108 int32_t i=index[k].idx1;
00109 int32_t j=index[k].idx2;
00110 int32_t c1=assignment[i];
00111 int32_t c2=assignment[j];
00112
00113 if (c1==c2)
00114 continue;
00115
00116 SG_PROGRESS(k, 0, num_pairs-1);
00117
00118 if (c1<c2)
00119 {
00120 pairs[2*l]=c1;
00121 pairs[2*l+1]=c2;
00122 }
00123 else
00124 {
00125 pairs[2*l]=c2;
00126 pairs[2*l+1]=c1;
00127 }
00128 merge_distance[l]=distances[k];
00129
00130 int32_t c=num+l;
00131 for (int32_t m=0; m<num; m++)
00132 {
00133 if (assignment[m] == c1 || assignment[m] == c2)
00134 assignment[m] = c;
00135 }
00136 #ifdef DEBUG_HIERARCHICAL
00137 SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l]);
00138 #endif
00139 break;
00140 }
00141 }
00142
00143 assignment_size=num;
00144 table_size=l-1;
00145 ASSERT(table_size>0);
00146 SG_FREE(distances);
00147 SG_FREE(index);
00148 SG_UNREF(lhs)
00149
00150 return true;
00151 }
00152
00153 bool CHierarchical::load(FILE* srcfile)
00154 {
00155 SG_SET_LOCALE_C;
00156 SG_RESET_LOCALE;
00157 return false;
00158 }
00159
00160 bool CHierarchical::save(FILE* dstfile)
00161 {
00162 SG_SET_LOCALE_C;
00163 SG_RESET_LOCALE;
00164 return false;
00165 }
00166
00167 void CHierarchical::store_model_features()
00168 {
00169
00170 }
00171
00172 CLabels* CHierarchical::apply(CFeatures* data)
00173 {
00174 return apply();
00175 }
00176
00177 CLabels* CHierarchical::apply()
00178 {
00179 SG_ERROR("apply(...) not implemented for %s!\n", get_name());
00180 return NULL;
00181 }
00182
00183 float64_t CHierarchical::apply(int32_t num)
00184 {
00185 apply();
00186 return 0;
00187 }
00188