SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KMeansMiniBatchImpl.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Parijat Mazumdar
8  */
9 
14 
15 using namespace shogun;
16 
17 namespace shogun
18 {
19 void CKMeansMiniBatchImpl::minibatch_KMeans(int32_t k, CDistance* distance, int32_t batch_size, int32_t minib_iter, SGMatrix<float64_t> mus)
20 {
21  REQUIRE(batch_size>0,
22  "batch size not set to positive value. Current batch size %d \n", batch_size);
23  REQUIRE(minib_iter>0,
24  "number of iterations not set to positive value. Current iterations %d \n", minib_iter);
25 
29  CFeatures* rhs_cache=distance->replace_rhs(rhs_mus);
30  rhs_mus->set_feature_matrix(mus);
31  int32_t XSize=lhs->get_num_vectors();
32  int32_t dims=lhs->get_num_features();
33 
35  v.zero();
36 
37  for (int32_t i=0; i<minib_iter; i++)
38  {
39  SGVector<int32_t> M=mbchoose_rand(batch_size,XSize);
40  SGVector<int32_t> ncent=SGVector<int32_t>(batch_size);
41  for (int32_t j=0; j<batch_size; j++)
42  {
44  for (int32_t p=0; p<k; p++)
45  dists[p]=distance->distance(M[j],p);
46 
47  int32_t imin=0;
48  float64_t min=dists[0];
49  for (int32_t p=1; p<k; p++)
50  {
51  if (dists[p]<min)
52  {
53  imin=p;
54  min=dists[p];
55  }
56  }
57  ncent[j]=imin;
58  }
59  for (int32_t j=0; j<batch_size; j++)
60  {
61  int32_t near=ncent[j];
62  SGVector<float64_t> c_alive=rhs_mus->get_feature_vector(near);
64  v[near]+=1.0;
65  float64_t eta=1.0/v[near];
66  for (int32_t c=0; c<dims; c++)
67  {
68  c_alive[c]=(1.0-eta)*c_alive[c]+eta*x[c];
69  }
70  }
71  }
72  SG_UNREF(lhs);
73  distance->replace_rhs(rhs_cache);
74  delete rhs_mus;
75 }
76 
77 SGVector<int32_t> CKMeansMiniBatchImpl::mbchoose_rand(int32_t b, int32_t num)
78 {
81  chosen.zero();
82  int32_t ch=0;
83  while (ch<b)
84  {
85  const int32_t n=CMath::random(0,num-1);
86  if (chosen[n]==0)
87  {
88  chosen[n]+=1;
89  ret[ch]=n;
90  ch++;
91  }
92  }
93  return ret;
94 }
95 }
float distance(CJLCoverTreePoint p1, CJLCoverTreePoint p2, float64_t upper_bound)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:81
int32_t get_num_features() const
CFeatures * get_lhs()
Definition: Distance.h:195
void set_feature_matrix(SGMatrix< ST > matrix)
#define REQUIRE(x,...)
Definition: SGIO.h:206
static uint64_t random()
Definition: Math.h:1019
virtual int32_t get_num_vectors() const
double float64_t
Definition: common.h:50
CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:145
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:189
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
static CDenseFeatures * obtain_from_generic(CFeatures *const base_features)
static void minibatch_KMeans(int32_t k, CDistance *distance, int32_t batch_size, int32_t minib_iter, SGMatrix< float64_t > mus)

SHOGUN Machine Learning Toolbox - Documentation