00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 2007-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _KMEANS_H__ 00013 #define _KMEANS_H__ 00014 00015 #include <stdio.h> 00016 #include <shogun/lib/common.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/features/SimpleFeatures.h> 00019 #include <shogun/distance/Distance.h> 00020 #include <shogun/machine/DistanceMachine.h> 00021 00022 namespace shogun 00023 { 00024 class CDistanceMachine; 00025 00039 class CKMeans : public CDistanceMachine 00040 { 00041 public: 00043 CKMeans(); 00044 00050 CKMeans(int32_t k, CDistance* d); 00051 virtual ~CKMeans(); 00052 00057 virtual inline EClassifierType get_classifier_type() { return CT_KMEANS; } 00058 00064 virtual bool load(FILE* srcfile); 00065 00071 virtual bool save(FILE* dstfile); 00072 00077 inline void set_k(int32_t p_k) 00078 { 00079 ASSERT(p_k>0); 00080 this->k=p_k; 00081 } 00082 00087 inline int32_t get_k() 00088 { 00089 return k; 00090 } 00091 00096 inline void set_max_iter(int32_t iter) 00097 { 00098 ASSERT(iter>0); 00099 max_iter=iter; 00100 } 00101 00106 inline float64_t get_max_iter() 00107 { 00108 return max_iter; 00109 } 00110 00114 SGVector<float64_t> get_radiuses() { return R; } 00115 00119 SGMatrix<float64_t> get_cluster_centers() 00120 { 00121 /* return empty matrix if no radiuses are there (not trained yet) */ 00122 if (!R.vector) 00123 return SGMatrix<float64_t>(); 00124 00125 CSimpleFeatures<float64_t>* lhs= 00126 (CSimpleFeatures<float64_t>*)distance->get_lhs(); 00127 SGMatrix<float64_t> centers=lhs->get_feature_matrix(); 00128 SG_UNREF(lhs); 00129 return centers; 00130 } 00131 00136 inline int32_t get_dimensions() 00137 { 00138 return dimensions; 00139 } 00140 00142 inline virtual const char* get_name() const { return "KMeans"; } 00143 00144 protected: 00150 void clustknb(bool use_old_mus, float64_t *mus_start); 00151 00160 virtual bool train_machine(CFeatures* data=NULL); 00161 00163 virtual void store_model_features(); 00164 00165 private: 00166 void init(); 00167 00168 protected: 00170 int32_t max_iter; 00171 00173 int32_t k; 00174 00176 int32_t dimensions; 00177 00179 SGVector<float64_t> R; 00180 00181 private: 00182 /* temporary variable for weighting over the train data */ 00183 SGVector<float64_t> Weights; 00184 00185 /* temp variable for cluster centers */ 00186 SGMatrix<float64_t> mus; 00187 00188 }; 00189 } 00190 #endif 00191