GUIPreprocessor.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/ui/GUIPreprocessor.h>
00013 #include <shogun/ui/SGInterface.h>
00014 
00015 #include <shogun/lib/config.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/lib/config.h>
00018 #include <shogun/preprocessor/LogPlusOne.h>
00019 #include <shogun/preprocessor/NormOne.h>
00020 #include <shogun/preprocessor/PruneVarSubMean.h>
00021 #include <shogun/preprocessor/PCA.h>
00022 #include <shogun/preprocessor/DecompressString.h>
00023 #include <shogun/preprocessor/SortWordString.h>
00024 #include <shogun/preprocessor/SortUlongString.h>
00025 #include <shogun/features/RealFileFeatures.h>
00026 #include <shogun/features/TOPFeatures.h>
00027 #include <shogun/features/FKFeatures.h>
00028 #include <shogun/features/StringFeatures.h>
00029 #include <shogun/features/SimpleFeatures.h>
00030 #include <shogun/features/SparseFeatures.h>
00031 #include <shogun/features/CombinedFeatures.h>
00032 #include <shogun/features/Features.h>
00033 
00034 #include <string.h>
00035 #include <stdio.h>
00036 
00037 using namespace shogun;
00038 
00039 CGUIPreprocessor::CGUIPreprocessor(CSGInterface* ui_)
00040 : CSGObject(), ui(ui_)
00041 {
00042     preprocs=new CList(true);
00043 }
00044 
00045 CGUIPreprocessor::~CGUIPreprocessor()
00046 {
00047     SG_UNREF(preprocs);
00048 }
00049 
00050 CPreprocessor* CGUIPreprocessor::create_prunevarsubmean(bool divide_by_std)
00051 {
00052     CPreprocessor* preproc=new CPruneVarSubMean(divide_by_std);
00053 
00054     if (preproc)
00055         SG_INFO("PRUNEVARSUBMEAN created (%p), divide_by_std %d", preproc, divide_by_std);
00056     else
00057         SG_ERROR("Could not create preproc PRUNEVARSUBMEAN, divide_by_std %d", divide_by_std);
00058 
00059     return preproc;
00060 }
00061 
00062 CPreprocessor* CGUIPreprocessor::create_pca(bool do_whitening, float64_t threshold)
00063 {
00064 #ifdef HAVE_LAPACK
00065     CPreprocessor* preproc=new CPCA(do_whitening, THRESHOLD, threshold);
00066 
00067     if (preproc)
00068         SG_INFO("PCA created (%p), do_whitening %i threshold %e", preproc, do_whitening, threshold);
00069     else
00070         SG_ERROR("Could not create preproc PCA, do_whitening %i threshold %e", do_whitening, threshold);
00071 
00072     return preproc;
00073 #else //HAVE_LAPACK
00074     SG_ERROR("Could not create preproc PCA - lapack not available at compile time\n");
00075     return NULL;
00076 #endif //HAVE_LAPACK
00077 }
00078 
00079 CPreprocessor* CGUIPreprocessor::create_generic(EPreprocessorType type)
00080 {
00081     CPreprocessor* preproc=NULL;
00082 
00083     switch (type)
00084     {
00085         case P_NORMONE:
00086             preproc=new CNormOne(); break;
00087         case P_LOGPLUSONE:
00088             preproc=new CLogPlusOne(); break;
00089         case P_SORTWORDSTRING:
00090             preproc=new CSortWordString(); break;
00091         case P_SORTULONGSTRING:
00092             preproc=new CSortUlongString(); break;
00093         case P_DECOMPRESSCHARSTRING:
00094             preproc=new CDecompressString<char>(LZO); break;
00095         default:
00096             SG_ERROR("Unknown Preprocessor type %d\n", type);
00097     }
00098 
00099     if (preproc)
00100         SG_INFO("Preproc of type %d created (%p).\n", type, preproc);
00101     else
00102         SG_ERROR("Could not create preproc of type %d.\n", type);
00103 
00104     return preproc;
00105 }
00106 
00107 bool CGUIPreprocessor::add_preproc(CPreprocessor* preproc)
00108 {
00109     return preprocs->append_element_at_listend(preproc);
00110 }
00111 
00112 bool CGUIPreprocessor::clean_preproc()
00113 {
00114     SG_UNREF(preprocs);
00115     preprocs=new CList(true);
00116     return (preprocs!=NULL);
00117 }
00118 
00119 bool CGUIPreprocessor::del_preproc()
00120 {
00121     SG_INFO("Deleting preproc %i/(%i).\n", preprocs->get_num_elements()-1, preprocs->get_num_elements());
00122 
00123     CSGObject* preproc=preprocs->delete_element();
00124     SG_UNREF(preproc);
00125 
00126     return (preproc!=NULL);
00127 }
00128 
00129 bool CGUIPreprocessor::attach_preproc(char* target, bool do_force)
00130 {
00131     bool result=false;
00132 
00133     if (strncmp(target, "TRAIN", 5)==0)
00134     {
00135         CFeatures* f=ui->ui_features->get_train_features();
00136         if (!f)
00137             SG_ERROR("No train features assigned!\n");
00138 
00139         if (f->get_feature_class()==C_COMBINED)
00140             f=((CCombinedFeatures*)f)->get_last_feature_obj();
00141 
00142         preprocess_features(f, NULL, do_force);
00143         ui->ui_features->invalidate_train();
00144         result=true;
00145     }
00146     else if (strncmp(target, "TEST", 4)==0)
00147     {
00148         CFeatures* f_test=ui->ui_features->get_test_features();
00149         if (!f_test)
00150             SG_ERROR("No test features assigned!\n");
00151 
00152         CFeatures* f_train=ui->ui_features->get_train_features();
00153         if (!f_train)
00154             SG_ERROR("No train features assigned!\n");
00155 
00156         EFeatureClass fclass_train=f_train->get_feature_class();
00157         EFeatureClass fclass_test=f_test->get_feature_class();
00158 
00159         if (fclass_train==fclass_test)
00160         {
00161             if (fclass_train==C_COMBINED)
00162             {
00163                 if (((CCombinedFeatures*) f_train)->check_feature_obj_compatibility((CCombinedFeatures*) f_test))
00164                 {
00165                     //preprocess the last test feature obj
00166                     CFeatures* te_feat=((CCombinedFeatures*) f_test)->get_first_feature_obj();
00167                     CFeatures* tr_feat=((CCombinedFeatures*) f_train)->get_first_feature_obj();
00168 
00169                     int32_t num_combined=((CCombinedFeatures*) f_test)->get_num_feature_obj();
00170                     ASSERT(((CCombinedFeatures*) f_train)->get_num_feature_obj()==num_combined);
00171 
00172                     if (!(num_combined && tr_feat && te_feat))
00173                         SG_ERROR("One of the combined features has no sub-features ?!\n");
00174 
00175                     SG_INFO("BEGIN PREPROCESSING COMBINED FEATURES (%d sub-featureobjects).\n", num_combined);
00176                     
00177                     int32_t n=0;
00178                     while (n<num_combined && tr_feat && te_feat)
00179                     {
00180                         // and preprocess using that one 
00181                         SG_INFO("TRAIN ");
00182                         tr_feat->list_feature_obj();
00183                         SG_INFO("TEST ");
00184                         te_feat->list_feature_obj();
00185                         preprocess_features(tr_feat, te_feat, do_force);
00186                         tr_feat=((CCombinedFeatures*) f_train)->get_next_feature_obj();
00187                         te_feat=((CCombinedFeatures*) f_test)->get_next_feature_obj();
00188                         n++;
00189                     }
00190                     ASSERT(n==num_combined);
00191                     result=true;
00192                     SG_INFO( "END PREPROCESSING COMBINED FEATURES\n");
00193                 }
00194                 else
00195                     SG_ERROR( "combined features not compatible\n");
00196             }
00197             else
00198             {
00199                 preprocess_features(f_train, f_test, do_force);
00200                 ui->ui_features->invalidate_test();
00201                 result=true;
00202             }
00203         }
00204         else
00205             SG_ERROR("Features not compatible.\n");
00206     }
00207     else
00208         SG_ERROR("Features not correctly assigned!\n");
00209 
00211     if (result)
00212         clean_preproc();
00213 
00214     return result;
00215 }
00216 
00217 bool CGUIPreprocessor::preprocess_features(CFeatures* trainfeat, CFeatures* testfeat, bool force)
00218 {
00219     if (trainfeat)
00220     {
00221         if (testfeat)
00222         {
00223             // if we don't have a preproc for trainfeatures we 
00224             // don't need a preproc for test features
00225             SG_DEBUG( "%d preprocessors attached to train features %d to test features\n", trainfeat->get_num_preprocessors(), testfeat->get_num_preprocessors());
00226 
00227             if (trainfeat->get_num_preprocessors() < testfeat->get_num_preprocessors())
00228             {
00229                 SG_ERROR( "more preprocessors attached to test features than to train features\n");
00230                 return false;
00231             }
00232 
00233             if (trainfeat->get_num_preprocessors() && (trainfeat->get_num_preprocessors() > testfeat->get_num_preprocessors()))
00234             {
00235                 for (int32_t i=0; i<trainfeat->get_num_preprocessors();  i++)
00236                 {
00237                     CPreprocessor* preproc = trainfeat->get_preprocessor(i);
00238                     preproc->init(trainfeat);
00239                     testfeat->add_preprocessor(preproc);
00240                     SG_UNREF(preproc);
00241                 }
00242 
00243                 preproc_all_features(testfeat, force);
00244             }
00245         }
00246         else
00247         {
00248             CPreprocessor* preproc = (CPreprocessor*) preprocs->get_first_element();
00249 
00250             if (preproc)
00251             {
00252                 preproc->init(trainfeat);
00253                 trainfeat->add_preprocessor(preproc);
00254 
00255                 preproc_all_features(trainfeat, force);
00256                 SG_UNREF(preproc);
00257             }
00258 
00259             while ( (preproc = (CPreprocessor*) preprocs->get_next_element()) !=NULL )
00260             {
00261                 preproc->init(trainfeat);
00262                 trainfeat->add_preprocessor(preproc);
00263                 SG_UNREF(preproc);
00264 
00265                 preproc_all_features(trainfeat, force);
00266             }
00267         }
00268 
00269         return true;
00270     }
00271     else
00272         SG_ERROR( "no features for preprocessing available!\n");
00273 
00274     return false;
00275 }
00276 
00277 bool CGUIPreprocessor::preproc_all_features(CFeatures* f, bool force)
00278 {
00279     switch (f->get_feature_class())
00280     {
00281         case C_SIMPLE:
00282             switch (f->get_feature_type())
00283             {
00284                 case F_DREAL:
00285                     return ((CSimpleFeatures<float64_t>*) f)->apply_preprocessor(force);
00286                 case F_SHORT:
00287                     return ((CSimpleFeatures<int16_t>*) f)->apply_preprocessor(force);
00288                 case F_WORD:
00289                     return ((CSimpleFeatures<uint16_t>*) f)->apply_preprocessor(force);
00290                 case F_CHAR:
00291                     return ((CSimpleFeatures<char>*) f)->apply_preprocessor(force);
00292                 case F_BYTE:
00293                     return ((CSimpleFeatures<uint8_t>*) f)->apply_preprocessor(force);
00294                 default:
00295                     SG_NOTIMPLEMENTED;
00296             }
00297             break;
00298         case C_STRING:
00299             switch (f->get_feature_type())
00300             {
00301                 case F_WORD:
00302                     return ((CStringFeatures<uint16_t>*) f)->apply_preprocessor(force);
00303                 case F_ULONG:
00304                     return ((CStringFeatures<uint64_t>*) f)->apply_preprocessor(force);
00305                 default:
00306                     SG_NOTIMPLEMENTED;
00307             }
00308             break;
00309         case C_SPARSE:
00310             switch (f->get_feature_type())
00311             {
00312                 case F_DREAL:
00313                     return ((CSparseFeatures<float64_t>*) f)->apply_preprocessor(force);
00314                 default:
00315                     SG_NOTIMPLEMENTED;
00316             };
00317             break;
00318         case C_COMBINED:
00319             SG_ERROR( "Combined feature objects cannot be preprocessed. Only its sub-feature objects!\n");
00320             break;
00321         default:
00322             SG_NOTIMPLEMENTED;
00323     }
00324 
00325     return false;
00326 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation