KMeans.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 2007-2009 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _KMEANS_H__
00013 #define _KMEANS_H__
00014 
00015 #include <stdio.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/features/SimpleFeatures.h>
00019 #include <shogun/distance/Distance.h>
00020 #include <shogun/machine/DistanceMachine.h>
00021 
00022 namespace shogun
00023 {
00024 class CDistanceMachine;
00025 
00039 class CKMeans : public CDistanceMachine
00040 {
00041     public:
00043         CKMeans();
00044 
00050         CKMeans(int32_t k, CDistance* d);
00051         virtual ~CKMeans();
00052 
00057         virtual inline EClassifierType get_classifier_type() { return CT_KMEANS; }
00058 
00064         virtual bool load(FILE* srcfile);
00065 
00071         virtual bool save(FILE* dstfile);
00072 
00077         inline void set_k(int32_t p_k)
00078         {
00079             ASSERT(p_k>0);
00080             this->k=p_k;
00081         }
00082 
00087         inline int32_t get_k()
00088         {
00089             return k;
00090         }
00091 
00096         inline void set_max_iter(int32_t iter)
00097         {
00098             ASSERT(iter>0);
00099             max_iter=iter;
00100         }
00101 
00106         inline float64_t get_max_iter()
00107         {
00108             return max_iter;
00109         }
00110 
00114         SGVector<float64_t> get_radiuses() { return R; }
00115 
00119         SGMatrix<float64_t> get_cluster_centers()
00120         {
00121             /* return empty matrix if no radiuses are there (not trained yet) */
00122             if (!R.vector)
00123                 return SGMatrix<float64_t>();
00124 
00125             CSimpleFeatures<float64_t>* lhs=
00126                 (CSimpleFeatures<float64_t>*)distance->get_lhs();
00127             SGMatrix<float64_t> centers=lhs->get_feature_matrix();
00128             SG_UNREF(lhs);
00129             return centers;
00130         }
00131 
00136         inline int32_t get_dimensions()
00137         {
00138             return dimensions;
00139         }
00140 
00142         inline virtual const char* get_name() const { return "KMeans"; }
00143 
00144     protected:
00150         void clustknb(bool use_old_mus, float64_t *mus_start);
00151 
00160         virtual bool train_machine(CFeatures* data=NULL);
00161 
00163         virtual void store_model_features();
00164 
00165     private:
00166         void init();
00167 
00168     protected:
00170         int32_t max_iter;
00171 
00173         int32_t k;
00174 
00176         int32_t dimensions;
00177 
00179         SGVector<float64_t> R;
00180         
00181     private:
00182         /* temporary variable for weighting over the train data */
00183         SGVector<float64_t> Weights;
00184 
00185         /* temp variable for cluster centers */
00186         SGMatrix<float64_t> mus;
00187 
00188 };
00189 }
00190 #endif
00191 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation