SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Hierarchical.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Soeren Sonnenburg
8  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
13 #include <shogun/labels/Labels.h>
16 #include <shogun/base/Parallel.h>
17 
18 #ifdef HAVE_PTHREAD
19 #include <pthread.h>
20 #endif
21 
22 using namespace shogun;
23 
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 struct pair
26 {
28  int32_t idx1;
30  int32_t idx2;
31 };
32 #endif // DOXYGEN_SHOULD_SKIP_THIS
33 
35 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
36  table_size(0), pairs(NULL), merge_distance(NULL)
37 {
38 }
39 
41 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
42  table_size(0), pairs(NULL), merge_distance(NULL)
43 {
44  set_distance(d);
45 }
46 
48 {
49  SG_FREE(merge_distance);
50  SG_FREE(assignment);
51  SG_FREE(pairs);
52 }
53 
55 {
56  return CT_HIERARCHICAL;
57 }
58 
60 {
62 
63  if (data)
64  distance->init(data, data);
65 
66  CFeatures* lhs=distance->get_lhs();
67  ASSERT(lhs)
68 
69  int32_t num=lhs->get_num_vectors();
70  ASSERT(num>0)
71 
72  const int32_t num_pairs=num*(num-1)/2;
73 
74  SG_FREE(merge_distance);
75  merge_distance=SG_MALLOC(float64_t, num);
77 
78  SG_FREE(assignment);
79  assignment=SG_MALLOC(int32_t, num);
81 
82  SG_FREE(pairs);
83  pairs=SG_MALLOC(int32_t, 2*num);
85 
86  pair* index=SG_MALLOC(pair, num_pairs);
87  float64_t* distances=SG_MALLOC(float64_t, num_pairs);
88 
89  int32_t offs=0;
90  for (int32_t i=0; i<num; i++)
91  {
92  for (int32_t j=i+1; j<num; j++)
93  {
94  distances[offs]=distance->distance(i,j);
95  index[offs].idx1=i;
96  index[offs].idx2=j;
97  offs++; //offs=i*(i+1)/2+j
98  }
99  SG_PROGRESS(i, 0, num-1)
100  }
101 
102  CMath::qsort_index<float64_t,pair>(distances, index, (num-1)*num/2);
103  //CMath::display_vector(distances, (num-1)*num/2, "dists");
104 
105  int32_t k=-1;
106  int32_t l=0;
107  for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
108  {
109  while (k<num_pairs-1)
110  {
111  k++;
112 
113  int32_t i=index[k].idx1;
114  int32_t j=index[k].idx2;
115  int32_t c1=assignment[i];
116  int32_t c2=assignment[j];
117 
118  if (c1==c2)
119  continue;
120 
121  SG_PROGRESS(k, 0, num_pairs-1)
122 
123  if (c1<c2)
124  {
125  pairs[2*l]=c1;
126  pairs[2*l+1]=c2;
127  }
128  else
129  {
130  pairs[2*l]=c2;
131  pairs[2*l+1]=c1;
132  }
133  merge_distance[l]=distances[k];
134 
135  int32_t c=num+l;
136  for (int32_t m=0; m<num; m++)
137  {
138  if (assignment[m] == c1 || assignment[m] == c2)
139  assignment[m] = c;
140  }
141 #ifdef DEBUG_HIERARCHICAL
142  SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l])
143 #endif
144  break;
145  }
146  }
147 
148  assignment_size=num;
149  table_size=l-1;
150  ASSERT(table_size>0)
151  SG_FREE(distances);
152  SG_FREE(index);
153  SG_UNREF(lhs)
154 
155  return true;
156 }
157 
158 bool CHierarchical::load(FILE* srcfile)
159 {
162  return false;
163 }
164 
165 bool CHierarchical::save(FILE* dstfile)
166 {
169  return false;
170 }
171 
172 
174 {
175  return merges;
176 }
177 
179 {
180  return SGVector<int32_t>(assignment,table_size, false);
181 }
182 
184 {
186 }
187 
189 {
190  return SGMatrix<int32_t>(pairs,2,merges, false);
191 }
192 
193 
195 {
196  /* TODO. Currently does nothing since apply methods are not implemented. */
197 }

SHOGUN Machine Learning Toolbox - Documentation