50 use_kmeanspp=use_kmpp;
72 "Expected %d initial cluster centers, got %d", k, centers.
num_cols);
74 "Expected %d dimensionional cluster centers, got %d", dimensions, centers.
num_rows);
75 mus_initial = centers;
83 for (int32_t i=0; i<XSize; i++)
93 for (int32_t j=0; j<dimensions; j++)
94 mus.
matrix[Cl*dimensions+j] += vec[j];
101 for (int32_t i=0; i<k; i++)
103 if (weights_set[i]!=0.0)
105 for (int32_t j=0; j<dimensions; j++)
106 mus.
matrix[i*dimensions+j] /= weights_set[i];
124 for(int32_t idx=0;idx<XSize;idx++)
126 for(int32_t m=0;m<k;m++)
130 for (int32_t i=0; i<XSize; i++)
137 if (dists[i*k+j]<mini)
150 for (int32_t i=0; i<XSize; i++)
152 const int32_t Cl = ClList[i];
153 weights_set[Cl]+=1.0;
159 for (int32_t j=0; j<dimensions; j++)
160 mus.
matrix[Cl*dimensions+j] += vec[j];
169 for (int32_t i=0; i<k; i++)
171 if (weights_set[i]!=0.0)
173 for (int32_t j=0; j<dimensions; j++)
174 mus.
matrix[i*dimensions+j] /= weights_set[i];
180 void CKMeans::compute_cluster_variances()
183 for (int32_t i=0; i<k; i++)
188 bool first_round=
true;
190 for (int32_t j=0; j<k; j++)
197 for (l=0; l<dimensions; l++)
200 mus.
matrix[i*dimensions+l]
201 -mus.
matrix[j*dimensions+l]);
212 if ((dist<rmin2) && (dist>=rmin1))
228 bool CKMeans::train_machine(
CFeatures* data)
239 int32_t XSize=lhs->get_num_vectors();
240 dimensions=lhs->get_num_features();
241 const int32_t XDimk=dimensions*k;
243 ASSERT(XSize>0 && dimensions>0);
247 mus_initial=kmeanspp();
253 memset(mus.matrix, 0, sizeof(
float64_t)*XDimk);
260 if (mus_initial.matrix)
263 set_random_centers(weights_set, ClList, XSize);
274 compute_cluster_variances();
305 REQUIRE(p_k>0,
"number of clusters should be > 0");
316 REQUIRE(iter>0,
"number of clusters should be > 0");
337 REQUIRE(b>0,
"Parameter bach size should be > 0");
348 REQUIRE(i>0,
"Parameter number of iterations should be > 0");
359 REQUIRE(b>0,
"Parameter bach size should be > 0");
360 REQUIRE(t>0,
"Parameter number of iterations should be > 0");
394 return fixed_centers;
397 void CKMeans::store_model_features()
425 for (int32_t point_idx=0;point_idx<num;point_idx++)
430 while (cent_id<count)
433 if (dists[point_idx]>dist_temp)
434 dists[point_idx]=dist_temp;
438 dists[point_idx]*=dists[point_idx];
439 sum+=dists[point_idx];
445 while ((chosen-=dists[mu_next])>0)
448 mu_index[count]=mu_next;
454 for (int32_t c_m=0;c_m<k;c_m++)
457 for (int32_t r_m=0;r_m<dim;r_m++)
458 mat(r_m,c_m)=feature[r_m];
int32_t get_mbKMeans_batch_size() const
int32_t get_mbKMeans_iter() const
virtual bool save(FILE *dstfile)
void set_mbKMeans_params(int32_t b, int32_t t)
virtual void copy_feature_matrix(SGMatrix< ST > src)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
Class Distance, a base class for all the distances used in the Shogun toolbox.
int32_t get_num_features() const
virtual int32_t get_num_vec_lhs()
void set_mbKMeans_batch_size(int32_t b)
void set_mbKMeans_iter(int32_t t)
void set_use_kmeanspp(bool kmpp)
A generic DistanceMachine interface.
bool get_use_kmeanspp() const
SGVector< float64_t > get_radiuses()
virtual bool load(FILE *srcfile)
static void Lloyd_KMeans(int32_t k, CDistance *distance, int32_t max_iter, SGMatrix< float64_t > mus, SGVector< int32_t > ClList, SGVector< float64_t > weights_set, bool fixed_centers)
void set_max_iter(int32_t iter)
void set_fixed_centers(bool fixed)
CFeatures * replace_rhs(CFeatures *rhs)
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
all of classes and functions are contained in the shogun namespace
virtual void set_initial_centers(SGMatrix< float64_t > centers)
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
The class Features is the base class of all feature objects.
void set_distance(CDistance *d)
void set_train_method(EKMeansMethod f)
virtual EFeatureType get_feature_type()=0
static CDenseFeatures * obtain_from_generic(CFeatures *const base_features)
EKMeansMethod get_train_method() const
static float32_t sqrt(float32_t x)
static void minibatch_KMeans(int32_t k, CDistance *distance, int32_t batch_size, int32_t minib_iter, SGMatrix< float64_t > mus)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
SGMatrix< float64_t > get_cluster_centers()