Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "classifier/KNN.h"
00014 #include "features/Labels.h"
00015 #include "lib/Mathematics.h"
00016 #include "lib/Signal.h"
00017
00018 using namespace shogun;
00019
00020 CKNN::CKNN()
00021 : CDistanceMachine(), k(3), num_classes(0), num_train_labels(0), train_labels(NULL)
00022 {
00023 }
00024
00025 CKNN::CKNN(int32_t k_, CDistance* d, CLabels* trainlab)
00026 : CDistanceMachine(), k(k_), num_classes(0), train_labels(NULL)
00027 {
00028 ASSERT(d);
00029 ASSERT(trainlab);
00030
00031 set_distance(d);
00032 set_labels(trainlab);
00033 num_train_labels=trainlab->get_num_labels();
00034 }
00035
00036
00037 CKNN::~CKNN()
00038 {
00039 delete[] train_labels;
00040 }
00041
00042 bool CKNN::train(CFeatures* data)
00043 {
00044 ASSERT(labels);
00045 ASSERT(distance);
00046
00047 if (data)
00048 {
00049 if (labels->get_num_labels() != data->get_num_vectors())
00050 SG_ERROR("Number of training vectors does not match number of labels\n");
00051 distance->init(data, data);
00052 }
00053
00054 train_labels=labels->get_int_labels(num_train_labels);
00055 ASSERT(train_labels);
00056 ASSERT(num_train_labels>0);
00057
00058 int32_t max_class=train_labels[0];
00059 int32_t min_class=train_labels[0];
00060
00061 int32_t i;
00062 for (i=1; i<num_train_labels; i++)
00063 {
00064 max_class=CMath::max(max_class, train_labels[i]);
00065 min_class=CMath::min(min_class, train_labels[i]);
00066 }
00067
00068 for (i=0; i<num_train_labels; i++)
00069 train_labels[i]-=min_class;
00070
00071 min_label=min_class;
00072 num_classes=max_class-min_class+1;
00073
00074 SG_INFO( "num_classes: %d (%+d to %+d) num_train: %d\n", num_classes, min_class, max_class, num_train_labels);
00075 return true;
00076 }
00077
00078 CLabels* CKNN::classify()
00079 {
00080 ASSERT(num_classes>0);
00081 ASSERT(distance);
00082 ASSERT(distance->get_num_vec_rhs());
00083
00084 int32_t num_lab=distance->get_num_vec_rhs();
00085 ASSERT(k<=num_lab);
00086
00087 CLabels* output=new CLabels(num_lab);
00088
00089
00090 float64_t* dists=new float64_t[num_train_labels];
00091 int32_t* train_lab=new int32_t[num_train_labels];
00092
00094 int32_t* classes=new int32_t[num_classes];
00095
00096 ASSERT(dists);
00097 ASSERT(train_lab);
00098 ASSERT(classes);
00099
00100 SG_INFO( "%d test examples\n", num_lab);
00101 CSignal::clear_cancel();
00102
00103 for (int32_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
00104 {
00105 SG_PROGRESS(i, 0, num_lab);
00106
00107
00108 distances_lhs(dists,0,num_train_labels-1,i);
00109 int32_t j;
00110 for (j=0; j<num_train_labels; j++)
00111 train_lab[j]=train_labels[j];
00112
00113
00114
00115 CMath::qsort_index(dists, train_lab, num_train_labels);
00116
00117
00118 for (j=0; j<num_classes; j++)
00119 classes[j]=0;
00120
00121 for (j=0; j<k; j++)
00122 classes[train_lab[j]]++;
00123
00124
00125 int32_t out_idx=0;
00126 int32_t out_max=0;
00127
00128 for (j=0; j<num_classes; j++)
00129 {
00130 if (out_max< classes[j])
00131 {
00132 out_idx= j;
00133 out_max= classes[j];
00134 }
00135 }
00136
00137 output->set_label(i, out_idx+min_label);
00138 }
00139
00140 delete[] dists;
00141 delete[] train_lab;
00142 delete[] classes;
00143
00144 return output;
00145 }
00146
00147 CLabels* CKNN::classify(CFeatures* data)
00148 {
00149 if (!distance)
00150 SG_ERROR("No distance assigned!\n");
00151
00152 CFeatures* lhs=distance->get_lhs();
00153 if (!lhs || !lhs->get_num_vectors())
00154 {
00155 SG_UNREF(lhs);
00156 SG_ERROR("No vectors on left hand side\n");
00157 }
00158 distance->init(lhs, data);
00159 SG_UNREF(lhs);
00160
00161 return classify();
00162 }
00163
00164 void CKNN::classify_for_multiple_k(int32_t** dst, int32_t* num_vec, int32_t* k_out)
00165 {
00166 ASSERT(dst);
00167 ASSERT(k_out);
00168 ASSERT(num_vec);
00169
00170 ASSERT(num_classes>0);
00171 ASSERT(distance);
00172 ASSERT(distance->get_num_vec_rhs());
00173
00174 int32_t num_lab=distance->get_num_vec_rhs();
00175 ASSERT(k<=num_lab);
00176
00177 int32_t* output=(int32_t*) malloc(sizeof(int32_t)*k*num_lab);
00178
00179
00180 float64_t* dists=new float64_t[num_train_labels];
00181 int32_t* train_lab=new int32_t[num_train_labels];
00182
00184 int32_t* classes=new int32_t[num_classes];
00185
00186 SG_INFO( "%d test examples\n", num_lab);
00187 CSignal::clear_cancel();
00188
00189 for (int32_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
00190 {
00191 SG_PROGRESS(i, 0, num_lab);
00192
00193
00194 distances_lhs(dists,0,num_train_labels-1,i);
00195 for (int32_t j=0; j<num_train_labels; j++)
00196 train_lab[j]=train_labels[j];
00197
00198
00199
00200 CMath::qsort_index(dists, train_lab, num_train_labels);
00201
00202
00203 for (int32_t j=0; j<num_classes; j++)
00204 classes[j]=0;
00205
00206 for (int32_t j=0; j<k; j++)
00207 {
00208 classes[train_lab[j]]++;
00209
00210
00211 int32_t out_idx=0;
00212 int32_t out_max=0;
00213
00214 for (int32_t c=0; c<num_classes; c++)
00215 {
00216 if (out_max< classes[c])
00217 {
00218 out_idx= c;
00219 out_max= classes[c];
00220 }
00221 }
00222 output[j*num_lab+i]=out_idx+min_label;
00223 }
00224 }
00225
00226 delete[] dists;
00227 delete[] train_lab;
00228 delete[] classes;
00229
00230 *dst=output;
00231 *k_out=k;
00232 *num_vec=num_lab;
00233 }
00234
00235 bool CKNN::load(FILE* srcfile)
00236 {
00237 SG_SET_LOCALE_C;
00238 SG_RESET_LOCALE;
00239 return false;
00240 }
00241
00242 bool CKNN::save(FILE* dstfile)
00243 {
00244 SG_SET_LOCALE_C;
00245 SG_RESET_LOCALE;
00246 return false;
00247 }