SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
KMeansLloydImpl.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Parijat Mazumdar
8  */
9 
14 #include <shogun/io/SGIO.h>
15 
16 using namespace shogun;
17 
18 namespace shogun
19 {
20 void CKMeansLloydImpl::Lloyd_KMeans(int32_t k, CDistance* distance, int32_t max_iter, SGMatrix<float64_t> mus,
21  SGVector<int32_t> ClList, SGVector<float64_t> weights_set, bool fixed_centers)
22 {
25  int32_t XSize=lhs->get_num_vectors();
26  int32_t dimensions=lhs->get_num_features();
27 
29  CFeatures* rhs_cache=distance->replace_rhs(rhs_mus);
30 
32  dists.zero();
33 
34  int32_t changed=1;
35  int32_t iter=0;
36  int32_t vlen=0;
37  bool vfree=false;
38  float64_t* vec=NULL;
39 
40  while (changed && (iter<max_iter))
41  {
42  iter++;
43  if (iter==max_iter-1)
44  SG_SWARNING("kmeans clustering changed throughout %d iterations stopping...\n", max_iter-1)
45 
46  if (iter%1000 == 0)
47  SG_SINFO("Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter, max_iter, changed)
48  changed=0;
49 
50  if (!fixed_centers)
51  {
52  /* mus=zeros(dimensions, k) ; */
53  mus.zero();
54  for (int32_t i=0; i<XSize; i++)
55  {
56  int32_t Cl=ClList[i];
57 
58  vec=lhs->get_feature_vector(i, vlen, vfree);
59 
60  for (int32_t j=0; j<dimensions; j++)
61  mus.matrix[Cl*dimensions+j] += vec[j];
62 
63  lhs->free_feature_vector(vec, i, vfree);
64  }
65 
66  for (int32_t i=0; i<k; i++)
67  {
68  if (weights_set[i]!=0.0)
69  {
70  for (int32_t j=0; j<dimensions; j++)
71  mus.matrix[i*dimensions+j] /= weights_set[i];
72  }
73  }
74  }
75  rhs_mus->copy_feature_matrix(mus);
76  for (int32_t i=0; i<XSize; i++)
77  {
78  /* ks=ceil(rand(1,XSize)*XSize) ; */
79  const int32_t Pat=CMath::random(0, XSize-1);
80  const int32_t ClList_Pat=ClList[Pat];
81  int32_t imini, j;
82  float64_t mini;
83 
84  /* compute the distance of this point to all centers */
85  for(int32_t idx_k=0;idx_k<k;idx_k++)
86  dists[idx_k]=distance->distance(Pat,idx_k);
87 
88  /* [mini,imini]=min(dists(:,i)) ; */
89  imini=0 ; mini=dists[0];
90  for (j=1; j<k; j++)
91  if (dists[j]<mini)
92  {
93  mini=dists[j];
94  imini=j;
95  }
96 
97  if (imini!=ClList_Pat)
98  {
99  changed++;
100 
101  /* weights_set(imini) = weights_set(imini) + 1.0 ; */
102  weights_set[imini]+= 1.0;
103  /* weights_set(j) = weights_set(j) - 1.0 ; */
104  weights_set[ClList_Pat]-= 1.0;
105 
106  vec=lhs->get_feature_vector(Pat, vlen, vfree);
107 
108  for (j=0; j<dimensions; j++)
109  {
110  mus.matrix[imini*dimensions+j]-=
111  (vec[j]-mus.matrix[imini*dimensions+j]) / weights_set[imini];
112  }
113 
114  lhs->free_feature_vector(vec, Pat, vfree);
115 
116  /* mu_new = mu_old - (x - mu_old)/(n-1) */
117  /* if weights_set(j)~=0 */
118  if (weights_set[ClList_Pat]!=0.0)
119  {
120  vec=lhs->get_feature_vector(Pat, vlen, vfree);
121 
122  for (j=0; j<dimensions; j++)
123  {
124  mus.matrix[ClList_Pat*dimensions+j]-=
125  (vec[j]-mus.matrix[ClList_Pat*dimensions+j]) / weights_set[ClList_Pat];
126  }
127  lhs->free_feature_vector(vec, Pat, vfree);
128  }
129  else
130  {
131  /* mus(:,j)=zeros(dimensions,1) ; */
132  for (j=0; j<dimensions; j++)
133  mus.matrix[ClList_Pat*dimensions+j]=0;
134  }
135 
136  /* ClList(i)= imini ; */
137  ClList[Pat] = imini;
138  }
139  }
140  }
141  distance->replace_rhs(rhs_cache);
142  delete rhs_mus;
143  SG_UNREF(lhs);
144 }
145 }
float distance(CJLCoverTreePoint p1, CJLCoverTreePoint p2, float64_t upper_bound)
virtual void copy_feature_matrix(SGMatrix< ST > src)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:81
int32_t get_num_features() const
CFeatures * get_lhs()
Definition: Distance.h:195
#define SG_SWARNING(...)
Definition: SGIO.h:178
static uint64_t random()
Definition: Math.h:1019
virtual int32_t get_num_vectors() const
double float64_t
Definition: common.h:50
static void Lloyd_KMeans(int32_t k, CDistance *distance, int32_t max_iter, SGMatrix< float64_t > mus, SGVector< int32_t > ClList, SGVector< float64_t > weights_set, bool fixed_centers)
CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:145
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:189
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SINFO(...)
Definition: SGIO.h:173
static CDenseFeatures * obtain_from_generic(CFeatures *const base_features)

SHOGUN 机器学习工具包 - 项目文档