SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KMeansMiniBatch.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Parijat Mazumdar
8  */
9 
14 
15 using namespace shogun;
16 
17 namespace shogun
18 {
20 {
21  init_mb_params();
22 }
23 
24 CKMeansMiniBatch::CKMeansMiniBatch(int32_t k_i, CDistance* d_i, bool use_kmpp_i):CKMeansBase(k_i, d_i, use_kmpp_i)
25 {
26  init_mb_params();
27 }
28 
29 CKMeansMiniBatch::CKMeansMiniBatch(int32_t k_i, CDistance* d_i, SGMatrix<float64_t> centers_i):CKMeansBase(k_i, d_i, centers_i)
30 {
31  init_mb_params();
32 }
33 
35 {
36 }
37 
39 {
40  REQUIRE(b>0, "Parameter bach size should be > 0");
41  batch_size=b;
42 }
43 
45 {
46  return batch_size;
47 }
48 
50 {
51  REQUIRE(i>0, "Parameter number of iterations should be > 0");
52  minib_iter=i;
53 }
54 
56 {
57  return minib_iter;
58 }
59 
60 void CKMeansMiniBatch::set_mb_params(int32_t b, int32_t t)
61 {
62  REQUIRE(b>0, "Parameter bach size should be > 0");
63  REQUIRE(t>0, "Parameter number of iterations should be > 0");
64  batch_size=b;
65  minib_iter=t;
66 }
67 
69 {
71  "batch size not set to positive value. Current batch size %d \n", batch_size);
73  "number of iterations not set to positive value. Current iterations %d \n", minib_iter);
74 
78  CFeatures* rhs_cache=distance->replace_rhs(rhs_mus);
79  rhs_mus->set_feature_matrix(mus);
80  int32_t XSize=lhs->get_num_vectors();
81  int32_t dims=lhs->get_num_features();
82 
84  v.zero();
85 
86  for (int32_t i=0; i<minib_iter; i++)
87  {
88  SGVector<int32_t> M=mbchoose_rand(batch_size,XSize);
90  for (int32_t j=0; j<batch_size; j++)
91  {
93  for (int32_t p=0; p<k; p++)
94  dists[p]=distance->distance(M[j],p);
95 
96  int32_t imin=0;
97  float64_t min=dists[0];
98  for (int32_t p=1; p<k; p++)
99  {
100  if (dists[p]<min)
101  {
102  imin=p;
103  min=dists[p];
104  }
105  }
106  ncent[j]=imin;
107  }
108  for (int32_t j=0; j<batch_size; j++)
109  {
110  int32_t near=ncent[j];
111  SGVector<float64_t> c_alive=rhs_mus->get_feature_vector(near);
113  v[near]+=1.0;
114  float64_t eta=1.0/v[near];
115  for (int32_t c=0; c<dims; c++)
116  {
117  c_alive[c]=(1.0-eta)*c_alive[c]+eta*x[c];
118  }
119  }
120  }
121  SG_UNREF(lhs);
122  distance->replace_rhs(rhs_cache);
123  delete rhs_mus;
124 }
125 
126 SGVector<int32_t> CKMeansMiniBatch::mbchoose_rand(int32_t b, int32_t num)
127 {
130  chosen.zero();
131  int32_t ch=0;
132  while (ch<b)
133  {
134  const int32_t n=CMath::random(0,num-1);
135  if (chosen[n]==0)
136  {
137  chosen[n]+=1;
138  ret[ch]=n;
139  ch++;
140  }
141  }
142  return ret;
143 }
144 
145 void CKMeansMiniBatch::init_mb_params()
146 {
147  batch_size=-1;
148  minib_iter=-1;
149 }
150 
152 {
153  initialize_training(data);
156  return true;
157 }
158 
159 }
int32_t get_batch_size() const
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:87
int32_t get_num_features() const
CFeatures * get_lhs()
Definition: Distance.h:224
void set_feature_matrix(SGMatrix< ST > matrix)
#define REQUIRE(x,...)
Definition: SGIO.h:206
SGMatrix< float64_t > mus
Definition: KMeansBase.h:199
static uint64_t random()
Definition: Math.h:1019
void compute_cluster_variances()
Definition: KMeansBase.cpp:94
void initialize_training(CFeatures *data=NULL)
Definition: KMeansBase.cpp:142
virtual int32_t get_num_vectors() const
void set_batch_size(int32_t b)
double float64_t
Definition: common.h:50
virtual bool train_machine(CFeatures *data=NULL)
virtual CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:170
void set_mb_params(int32_t b, int32_t t)
int32_t get_mb_iter() const
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:206
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
static CDenseFeatures * obtain_from_generic(CFeatures *const base_features)

SHOGUN Machine Learning Toolbox - Documentation