Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/ui/GUIPreprocessor.h>
00013 #include <shogun/ui/SGInterface.h>
00014
00015 #include <shogun/lib/config.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/lib/config.h>
00018 #include <shogun/preprocessor/LogPlusOne.h>
00019 #include <shogun/preprocessor/NormOne.h>
00020 #include <shogun/preprocessor/PruneVarSubMean.h>
00021 #include <shogun/preprocessor/PCA.h>
00022 #include <shogun/preprocessor/DecompressString.h>
00023 #include <shogun/preprocessor/SortWordString.h>
00024 #include <shogun/preprocessor/SortUlongString.h>
00025 #include <shogun/features/RealFileFeatures.h>
00026 #include <shogun/features/TOPFeatures.h>
00027 #include <shogun/features/FKFeatures.h>
00028 #include <shogun/features/StringFeatures.h>
00029 #include <shogun/features/SimpleFeatures.h>
00030 #include <shogun/features/SparseFeatures.h>
00031 #include <shogun/features/CombinedFeatures.h>
00032 #include <shogun/features/Features.h>
00033
00034 #include <string.h>
00035 #include <stdio.h>
00036
00037 using namespace shogun;
00038
00039 CGUIPreprocessor::CGUIPreprocessor(CSGInterface* ui_)
00040 : CSGObject(), ui(ui_)
00041 {
00042 preprocs=new CList(true);
00043 }
00044
00045 CGUIPreprocessor::~CGUIPreprocessor()
00046 {
00047 SG_UNREF(preprocs);
00048 }
00049
00050 CPreprocessor* CGUIPreprocessor::create_prunevarsubmean(bool divide_by_std)
00051 {
00052 CPreprocessor* preproc=new CPruneVarSubMean(divide_by_std);
00053
00054 if (preproc)
00055 SG_INFO("PRUNEVARSUBMEAN created (%p), divide_by_std %d", preproc, divide_by_std);
00056 else
00057 SG_ERROR("Could not create preproc PRUNEVARSUBMEAN, divide_by_std %d", divide_by_std);
00058
00059 return preproc;
00060 }
00061
00062 CPreprocessor* CGUIPreprocessor::create_pca(bool do_whitening, float64_t threshold)
00063 {
00064 #ifdef HAVE_LAPACK
00065 CPreprocessor* preproc=new CPCA(do_whitening, THRESHOLD, threshold);
00066
00067 if (preproc)
00068 SG_INFO("PCA created (%p), do_whitening %i threshold %e", preproc, do_whitening, threshold);
00069 else
00070 SG_ERROR("Could not create preproc PCA, do_whitening %i threshold %e", do_whitening, threshold);
00071
00072 return preproc;
00073 #else //HAVE_LAPACK
00074 SG_ERROR("Could not create preproc PCA - lapack not available at compile time\n");
00075 return NULL;
00076 #endif //HAVE_LAPACK
00077 }
00078
00079 CPreprocessor* CGUIPreprocessor::create_generic(EPreprocessorType type)
00080 {
00081 CPreprocessor* preproc=NULL;
00082
00083 switch (type)
00084 {
00085 case P_NORMONE:
00086 preproc=new CNormOne(); break;
00087 case P_LOGPLUSONE:
00088 preproc=new CLogPlusOne(); break;
00089 case P_SORTWORDSTRING:
00090 preproc=new CSortWordString(); break;
00091 case P_SORTULONGSTRING:
00092 preproc=new CSortUlongString(); break;
00093 case P_DECOMPRESSCHARSTRING:
00094 preproc=new CDecompressString<char>(LZO); break;
00095 default:
00096 SG_ERROR("Unknown Preprocessor type %d\n", type);
00097 }
00098
00099 if (preproc)
00100 SG_INFO("Preproc of type %d created (%p).\n", type, preproc);
00101 else
00102 SG_ERROR("Could not create preproc of type %d.\n", type);
00103
00104 return preproc;
00105 }
00106
00107 bool CGUIPreprocessor::add_preproc(CPreprocessor* preproc)
00108 {
00109 return preprocs->append_element_at_listend(preproc);
00110 }
00111
00112 bool CGUIPreprocessor::clean_preproc()
00113 {
00114 SG_UNREF(preprocs);
00115 preprocs=new CList(true);
00116 return (preprocs!=NULL);
00117 }
00118
00119 bool CGUIPreprocessor::del_preproc()
00120 {
00121 SG_INFO("Deleting preproc %i/(%i).\n", preprocs->get_num_elements()-1, preprocs->get_num_elements());
00122
00123 CSGObject* preproc=preprocs->delete_element();
00124 SG_UNREF(preproc);
00125
00126 return (preproc!=NULL);
00127 }
00128
00129 bool CGUIPreprocessor::attach_preproc(char* target, bool do_force)
00130 {
00131 bool result=false;
00132
00133 if (strncmp(target, "TRAIN", 5)==0)
00134 {
00135 CFeatures* f=ui->ui_features->get_train_features();
00136 if (!f)
00137 SG_ERROR("No train features assigned!\n");
00138
00139 if (f->get_feature_class()==C_COMBINED)
00140 f=((CCombinedFeatures*)f)->get_last_feature_obj();
00141
00142 preprocess_features(f, NULL, do_force);
00143 ui->ui_features->invalidate_train();
00144 result=true;
00145 }
00146 else if (strncmp(target, "TEST", 4)==0)
00147 {
00148 CFeatures* f_test=ui->ui_features->get_test_features();
00149 if (!f_test)
00150 SG_ERROR("No test features assigned!\n");
00151
00152 CFeatures* f_train=ui->ui_features->get_train_features();
00153 if (!f_train)
00154 SG_ERROR("No train features assigned!\n");
00155
00156 EFeatureClass fclass_train=f_train->get_feature_class();
00157 EFeatureClass fclass_test=f_test->get_feature_class();
00158
00159 if (fclass_train==fclass_test)
00160 {
00161 if (fclass_train==C_COMBINED)
00162 {
00163 if (((CCombinedFeatures*) f_train)->check_feature_obj_compatibility((CCombinedFeatures*) f_test))
00164 {
00165
00166 CFeatures* te_feat=((CCombinedFeatures*) f_test)->get_first_feature_obj();
00167 CFeatures* tr_feat=((CCombinedFeatures*) f_train)->get_first_feature_obj();
00168
00169 int32_t num_combined=((CCombinedFeatures*) f_test)->get_num_feature_obj();
00170 ASSERT(((CCombinedFeatures*) f_train)->get_num_feature_obj()==num_combined);
00171
00172 if (!(num_combined && tr_feat && te_feat))
00173 SG_ERROR("One of the combined features has no sub-features ?!\n");
00174
00175 SG_INFO("BEGIN PREPROCESSING COMBINED FEATURES (%d sub-featureobjects).\n", num_combined);
00176
00177 int32_t n=0;
00178 while (n<num_combined && tr_feat && te_feat)
00179 {
00180
00181 SG_INFO("TRAIN ");
00182 tr_feat->list_feature_obj();
00183 SG_INFO("TEST ");
00184 te_feat->list_feature_obj();
00185 preprocess_features(tr_feat, te_feat, do_force);
00186 tr_feat=((CCombinedFeatures*) f_train)->get_next_feature_obj();
00187 te_feat=((CCombinedFeatures*) f_test)->get_next_feature_obj();
00188 n++;
00189 }
00190 ASSERT(n==num_combined);
00191 result=true;
00192 SG_INFO( "END PREPROCESSING COMBINED FEATURES\n");
00193 }
00194 else
00195 SG_ERROR( "combined features not compatible\n");
00196 }
00197 else
00198 {
00199 preprocess_features(f_train, f_test, do_force);
00200 ui->ui_features->invalidate_test();
00201 result=true;
00202 }
00203 }
00204 else
00205 SG_ERROR("Features not compatible.\n");
00206 }
00207 else
00208 SG_ERROR("Features not correctly assigned!\n");
00209
00211 if (result)
00212 clean_preproc();
00213
00214 return result;
00215 }
00216
00217 bool CGUIPreprocessor::preprocess_features(CFeatures* trainfeat, CFeatures* testfeat, bool force)
00218 {
00219 if (trainfeat)
00220 {
00221 if (testfeat)
00222 {
00223
00224
00225 SG_DEBUG( "%d preprocessors attached to train features %d to test features\n", trainfeat->get_num_preprocessors(), testfeat->get_num_preprocessors());
00226
00227 if (trainfeat->get_num_preprocessors() < testfeat->get_num_preprocessors())
00228 {
00229 SG_ERROR( "more preprocessors attached to test features than to train features\n");
00230 return false;
00231 }
00232
00233 if (trainfeat->get_num_preprocessors() && (trainfeat->get_num_preprocessors() > testfeat->get_num_preprocessors()))
00234 {
00235 for (int32_t i=0; i<trainfeat->get_num_preprocessors(); i++)
00236 {
00237 CPreprocessor* preproc = trainfeat->get_preprocessor(i);
00238 preproc->init(trainfeat);
00239 testfeat->add_preprocessor(preproc);
00240 SG_UNREF(preproc);
00241 }
00242
00243 preproc_all_features(testfeat, force);
00244 }
00245 }
00246 else
00247 {
00248 CPreprocessor* preproc = (CPreprocessor*) preprocs->get_first_element();
00249
00250 if (preproc)
00251 {
00252 preproc->init(trainfeat);
00253 trainfeat->add_preprocessor(preproc);
00254
00255 preproc_all_features(trainfeat, force);
00256 SG_UNREF(preproc);
00257 }
00258
00259 while ( (preproc = (CPreprocessor*) preprocs->get_next_element()) !=NULL )
00260 {
00261 preproc->init(trainfeat);
00262 trainfeat->add_preprocessor(preproc);
00263 SG_UNREF(preproc);
00264
00265 preproc_all_features(trainfeat, force);
00266 }
00267 }
00268
00269 return true;
00270 }
00271 else
00272 SG_ERROR( "no features for preprocessing available!\n");
00273
00274 return false;
00275 }
00276
00277 bool CGUIPreprocessor::preproc_all_features(CFeatures* f, bool force)
00278 {
00279 switch (f->get_feature_class())
00280 {
00281 case C_SIMPLE:
00282 switch (f->get_feature_type())
00283 {
00284 case F_DREAL:
00285 return ((CSimpleFeatures<float64_t>*) f)->apply_preprocessor(force);
00286 case F_SHORT:
00287 return ((CSimpleFeatures<int16_t>*) f)->apply_preprocessor(force);
00288 case F_WORD:
00289 return ((CSimpleFeatures<uint16_t>*) f)->apply_preprocessor(force);
00290 case F_CHAR:
00291 return ((CSimpleFeatures<char>*) f)->apply_preprocessor(force);
00292 case F_BYTE:
00293 return ((CSimpleFeatures<uint8_t>*) f)->apply_preprocessor(force);
00294 default:
00295 SG_NOTIMPLEMENTED;
00296 }
00297 break;
00298 case C_STRING:
00299 switch (f->get_feature_type())
00300 {
00301 case F_WORD:
00302 return ((CStringFeatures<uint16_t>*) f)->apply_preprocessor(force);
00303 case F_ULONG:
00304 return ((CStringFeatures<uint64_t>*) f)->apply_preprocessor(force);
00305 default:
00306 SG_NOTIMPLEMENTED;
00307 }
00308 break;
00309 case C_SPARSE:
00310 switch (f->get_feature_type())
00311 {
00312 case F_DREAL:
00313 return ((CSparseFeatures<float64_t>*) f)->apply_preprocessor(force);
00314 default:
00315 SG_NOTIMPLEMENTED;
00316 };
00317 break;
00318 case C_COMBINED:
00319 SG_ERROR( "Combined feature objects cannot be preprocessed. Only its sub-feature objects!\n");
00320 break;
00321 default:
00322 SG_NOTIMPLEMENTED;
00323 }
00324
00325 return false;
00326 }