SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GUIPreprocessor.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/ui/SGInterface.h>
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/io/SGIO.h>
17 #include <shogun/lib/config.h>
33 
34 #include <string.h>
35 #include <stdio.h>
36 
37 using namespace shogun;
38 
40 : CSGObject(), ui(ui_)
41 {
42  preprocs=new CList(true);
43 }
44 
46 {
48 }
49 
51 {
52  CPreprocessor* preproc=new CPruneVarSubMean(divide_by_std);
53 
54  if (preproc)
55  SG_INFO("PRUNEVARSUBMEAN created (%p), divide_by_std %d", preproc, divide_by_std)
56  else
57  SG_ERROR("Could not create preproc PRUNEVARSUBMEAN, divide_by_std %d", divide_by_std)
58 
59  return preproc;
60 }
61 
63 {
64 #ifdef HAVE_EIGEN3
65  CPreprocessor* preproc=new CPCA(do_whitening, THRESHOLD, threshold);
66 
67  if (preproc)
68  SG_INFO("PCA created (%p), do_whitening %i threshold %e", preproc, do_whitening, threshold)
69  else
70  SG_ERROR("Could not create preproc PCA, do_whitening %i threshold %e", do_whitening, threshold)
71 
72  return preproc;
73 #else //HAVE_EIGEN3
74  SG_ERROR("Could not create preproc PCA - eigen3 not available at compile time\n")
75  return NULL;
76 #endif //HAVE_EIGEN3
77 }
78 
80 {
81  CPreprocessor* preproc=NULL;
82 
83  switch (type)
84  {
85  case P_NORMONE:
86  preproc=new CNormOne(); break;
87  case P_LOGPLUSONE:
88  preproc=new CLogPlusOne(); break;
89  case P_SORTWORDSTRING:
90  preproc=new CSortWordString(); break;
91  case P_SORTULONGSTRING:
92  preproc=new CSortUlongString(); break;
94  preproc=new CDecompressString<char>(LZO); break;
95  default:
96  SG_ERROR("Unknown Preprocessor type %d\n", type)
97  }
98 
99  if (preproc)
100  SG_INFO("Preproc of type %d created (%p).\n", type, preproc)
101  else
102  SG_ERROR("Could not create preproc of type %d.\n", type)
103 
104  return preproc;
105 }
106 
108 {
109  return preprocs->append_element_at_listend(preproc);
110 }
111 
113 {
115  preprocs=new CList(true);
116  return (preprocs!=NULL);
117 }
118 
120 {
121  SG_INFO("Deleting preproc %i/(%i).\n", preprocs->get_num_elements()-1, preprocs->get_num_elements())
122 
123  CSGObject* preproc=preprocs->delete_element();
124  SG_UNREF(preproc);
125 
126  return (preproc!=NULL);
127 }
128 
129 bool CGUIPreprocessor::attach_preproc(char* target, bool do_force)
130 {
131  bool result=false;
132 
133  if (strncmp(target, "TRAIN", 5)==0)
134  {
135  CFeatures* f=ui->ui_features->get_train_features();
136  if (!f)
137  SG_ERROR("No train features assigned!\n")
138 
139  if (f->get_feature_class()==C_COMBINED)
140  f=((CCombinedFeatures*)f)->get_last_feature_obj();
141 
142  preprocess_features(f, NULL, do_force);
143  ui->ui_features->invalidate_train();
144  result=true;
145  }
146  else if (strncmp(target, "TEST", 4)==0)
147  {
148  CFeatures* f_test=ui->ui_features->get_test_features();
149  if (!f_test)
150  SG_ERROR("No test features assigned!\n")
151 
152  CFeatures* f_train=ui->ui_features->get_train_features();
153  if (!f_train)
154  SG_ERROR("No train features assigned!\n")
155 
156  EFeatureClass fclass_train=f_train->get_feature_class();
157  EFeatureClass fclass_test=f_test->get_feature_class();
158 
159  if (fclass_train==fclass_test)
160  {
161  if (fclass_train==C_COMBINED)
162  {
163  if (((CCombinedFeatures*) f_train)->check_feature_obj_compatibility((CCombinedFeatures*) f_test))
164  {
165 
166  int32_t num_combined=((CCombinedFeatures*) f_test)->get_num_feature_obj();
167  ASSERT(((CCombinedFeatures*) f_train)->get_num_feature_obj()==num_combined)
168 
169  if (!num_combined)
170  SG_ERROR("One of the combined features has no sub-features ?!\n")
171 
172  //preprocess the last test feature obj
173  SG_INFO("BEGIN PREPROCESSING COMBINED FEATURES (%d sub-featureobjects).\n", num_combined)
174  index_t f_idx = 0;
175  for (; f_idx<num_combined; f_idx++)
176  {
177  CFeatures* te_feat=((CCombinedFeatures*) f_test)->get_feature_obj(f_idx);
178  CFeatures* tr_feat=((CCombinedFeatures*) f_train)->get_feature_obj(f_idx);
179 
180  if (!(te_feat && tr_feat))
181  break;
182 
183  // and preprocess using that one
184  SG_INFO("TRAIN ")
185  tr_feat->list_feature_obj();
186  SG_INFO("TEST ")
187  te_feat->list_feature_obj();
188  preprocess_features(tr_feat, te_feat, do_force);
189  }
190  ASSERT(f_idx==num_combined)
191  result=true;
192  SG_INFO("END PREPROCESSING COMBINED FEATURES\n")
193  }
194  else
195  SG_ERROR("combined features not compatible\n")
196  }
197  else
198  {
199  preprocess_features(f_train, f_test, do_force);
200  ui->ui_features->invalidate_test();
201  result=true;
202  }
203  }
204  else
205  SG_ERROR("Features not compatible.\n")
206  }
207  else
208  SG_ERROR("Features not correctly assigned!\n")
209 
210 
211  if (result)
212  clean_preproc();
213 
214  return result;
215 }
216 
217 bool CGUIPreprocessor::preprocess_features(CFeatures* trainfeat, CFeatures* testfeat, bool force)
218 {
219  if (trainfeat)
220  {
221  if (testfeat)
222  {
223  // if we don't have a preproc for trainfeatures we
224  // don't need a preproc for test features
225  SG_DEBUG("%d preprocessors attached to train features %d to test features\n", trainfeat->get_num_preprocessors(), testfeat->get_num_preprocessors())
226 
227  if (trainfeat->get_num_preprocessors() < testfeat->get_num_preprocessors())
228  {
229  SG_ERROR("more preprocessors attached to test features than to train features\n")
230  return false;
231  }
232 
233  if (trainfeat->get_num_preprocessors() && (trainfeat->get_num_preprocessors() > testfeat->get_num_preprocessors()))
234  {
235  for (int32_t i=0; i<trainfeat->get_num_preprocessors(); i++)
236  {
237  CPreprocessor* preproc = trainfeat->get_preprocessor(i);
238  preproc->init(trainfeat);
239  testfeat->add_preprocessor(preproc);
240  SG_UNREF(preproc);
241  }
242 
243  preproc_all_features(testfeat, force);
244  }
245  }
246  else
247  {
249 
250  if (preproc)
251  {
252  preproc->init(trainfeat);
253  trainfeat->add_preprocessor(preproc);
254 
255  preproc_all_features(trainfeat, force);
256  SG_UNREF(preproc);
257  }
258 
259  while ( (preproc = (CPreprocessor*) preprocs->get_next_element()) !=NULL )
260  {
261  preproc->init(trainfeat);
262  trainfeat->add_preprocessor(preproc);
263  SG_UNREF(preproc);
264 
265  preproc_all_features(trainfeat, force);
266  }
267  }
268 
269  return true;
270  }
271  else
272  SG_ERROR("no features for preprocessing available!\n")
273 
274  return false;
275 }
276 
278 {
279  switch (f->get_feature_class())
280  {
281  case C_DENSE:
282  switch (f->get_feature_type())
283  {
284  case F_DREAL:
285  return ((CDenseFeatures<float64_t>*) f)->apply_preprocessor(force);
286  case F_SHORT:
287  return ((CDenseFeatures<int16_t>*) f)->apply_preprocessor(force);
288  case F_WORD:
289  return ((CDenseFeatures<uint16_t>*) f)->apply_preprocessor(force);
290  case F_CHAR:
291  return ((CDenseFeatures<char>*) f)->apply_preprocessor(force);
292  case F_BYTE:
293  return ((CDenseFeatures<uint8_t>*) f)->apply_preprocessor(force);
294  default:
296  }
297  break;
298  case C_STRING:
299  switch (f->get_feature_type())
300  {
301  case F_WORD:
302  return ((CStringFeatures<uint16_t>*) f)->apply_preprocessor(force);
303  case F_ULONG:
304  return ((CStringFeatures<uint64_t>*) f)->apply_preprocessor(force);
305  default:
307  }
308  break;
309  case C_SPARSE:
310  switch (f->get_feature_type())
311  {
312  case F_DREAL:
313  return ((CSparseFeatures<float64_t>*) f)->apply_preprocessor(force);
314  default:
316  };
317  break;
318  case C_COMBINED:
319  SG_ERROR("Combined feature objects cannot be preprocessed. Only its sub-feature objects!\n")
320  break;
321  default:
323  }
324 
325  return false;
326 }
#define SG_INFO(...)
Definition: SGIO.h:118
virtual bool init(CFeatures *features)=0
CSGObject * get_next_element()
Definition: List.h:185
EPreprocessorType
Definition: Preprocessor.h:32
int32_t index_t
Definition: common.h:62
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
CPreprocessor * get_preprocessor(int32_t num) const
Definition: Features.cpp:93
Preprocessor SortUlongString, sorts the indivual strings in ascending order.
Preprocessor LogPlusOne does what the name says, it adds one to a dense real valued vector and takes ...
Definition: LogPlusOne.h:34
CSGObject * delete_element()
Definition: List.h:502
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
Preprocessor PruneVarSubMean will substract the mean and remove features that have zero variance...
CPreprocessor * create_prunevarsubmean(bool divide_by_std=false)
int32_t get_num_preprocessors() const
Definition: Features.cpp:155
CSGObject * get_first_element()
Definition: List.h:151
void list_feature_obj() const
Definition: Features.cpp:171
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
CPreprocessor * create_generic(EPreprocessorType type)
double float64_t
Definition: common.h:50
bool preproc_all_features(CFeatures *f, bool force)
virtual EFeatureClass get_feature_class() const =0
int32_t get_num_elements()
Definition: List.h:145
bool preprocess_features(CFeatures *trainfeat, CFeatures *testfeat, bool force)
virtual void add_preprocessor(CPreprocessor *p)
Definition: Features.cpp:85
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
Preprocessor NormOne, normalizes vectors to have norm 1.
Definition: NormOne.h:34
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
bool add_preproc(CPreprocessor *preproc)
The class Features is the base class of all feature objects.
Definition: Features.h:68
Preprocessor PCA performs principial component analysis on input feature vectors/matrices. When the init method in PCA is called with proper feature matrix X (with say N number of vectors and D feature dimension), a transformation matrix is computed and stored internally. This transformation matrix is then used to transform all D-dimensional feature vectors or feature matrices (with D feature dimensions) supplied via apply_to_feature_matrix or apply_to_feature_vector methods. This tranformation outputs the T-Dimensional approximation of all these input vectors and matrices (where T<=min(D,N)). The transformation matrix is essentially a DxT matrix, the columns of which correspond to the eigenvectors of the covariance matrix(XX') having top T eigenvalues.
Definition: PCA.h:113
bool attach_preproc(char *target, bool do_force=false)
Class Preprocessor defines a preprocessor interface.
Definition: Preprocessor.h:75
Preprocessor that decompresses compressed strings.
Preprocessor SortWordString, sorts the indivual strings in ascending order.
The class CombinedFeatures is used to combine a number of of feature objects into a single CombinedFe...
bool append_element_at_listend(CSGObject *data)
Definition: List.h:386
CPreprocessor * create_pca(bool do_whitening, float64_t threshold)
virtual EFeatureType get_feature_type() const =0
Class List implements a doubly connected list for low-level-objects.
Definition: List.h:84

SHOGUN Machine Learning Toolbox - Documentation