SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GUIFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/ui/GUIFeatures.h>
13 #include <shogun/ui/SGInterface.h>
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/io/SGIO.h>
17 #include <shogun/io/CSVFile.h>
18 
19 using namespace shogun;
20 
21 CGUIFeatures::CGUIFeatures(CSGInterface* ui_)
22 : CSGObject(), ui(ui_), train_features(NULL), test_features(NULL),
23  ref_features(NULL)
24 {
25 }
26 
28 {
32 }
33 
35 {
36  CKernel *k = ui->ui_kernel->get_kernel();
37  if (k)
38  k->remove_lhs();
39 }
40 
42 {
43  CKernel *k = ui->ui_kernel->get_kernel();
44  if (k)
45  k->remove_rhs();
46 }
47 
49  char* filename, char* fclass, char* type, char* target, int32_t size,
50  int32_t comp_features)
51 {
52  bool result=false;
53  CFeatures** f_ptr=NULL;
54 
55  if (strncmp(target, "TRAIN", 5)==0)
56  {
57  f_ptr=&train_features;
59  }
60  else if (strncmp(target, "TEST", 4)==0)
61  {
62  f_ptr=&test_features;
64  }
65  else
66  SG_ERROR("Unknown target %s, neither TRAIN nor TEST.\n", target)
67 
68  SG_UNREF(*f_ptr);
69  *f_ptr=NULL;
70 
71  CCSVFile* file=new CCSVFile(filename);
72  if (strncmp(fclass, "SIMPLE", 6)==0)
73  {
74  if (strncmp(type, "REAL", 4)==0)
75  {
76  *f_ptr=new CDenseFeatures<float64_t>(file);
77  }
78  else if (strncmp(type, "BYTE", 4)==0)
79  {
81  *f_ptr=new CDenseFeatures<uint8_t>(file);
82  }
83  else if (strncmp(type, "CHAR", 4)==0)
84  {
86  *f_ptr=new CDenseFeatures<char>(file);
87  }
88  else if (strncmp(type, "SHORT", 5)==0)
89  {
90  *f_ptr=new CDenseFeatures<int16_t>(file);
91  }
92  else
93  {
94  SG_ERROR("Unknown type.\n")
95  return false;
96  }
97  }
98  else if (strncmp(fclass, "SPARSE", 6)==0)
99  {
101  }
102  else if (strncmp(fclass, "STRING", 6)==0)
103  {
104  if (strncmp(type, "REAL", 4)==0)
105  {
106  *f_ptr=new CStringFeatures<float64_t>(file);
107  }
108  else if (strncmp(type, "BYTE", 4)==0)
109  {
111  *f_ptr=new CStringFeatures<uint8_t>(file, DNA);
112  }
113  else if (strncmp(type, "CHAR", 4)==0)
114  {
116  *f_ptr=new CStringFeatures<char>(file, DNA);
117  }
118  else if (strncmp(type, "SHORT", 5)==0)
119  {
120  *f_ptr=new CStringFeatures<int16_t>(file);
121  }
122  else if (strncmp(type, "WORD", 4)==0)
123  {
124  *f_ptr=new CStringFeatures<uint16_t>(file);
125  }
126  else if (strncmp(type, "ULONG", 5)==0)
127  {
128  *f_ptr=new CStringFeatures<uint64_t>(file);
129  }
130  else
131  {
132  SG_ERROR("Unknown type.\n")
133  return false;
134  }
135  }
136  SG_UNREF(file);
137 
138  return result;
139 }
140 
141 bool CGUIFeatures::save(char* filename, char* type, char* target)
142 {
143  bool result=false;
144 
145  CFeatures** f_ptr=NULL;
146 
147  if (strncmp(target, "TRAIN", 5)==0)
148  {
149  f_ptr=&train_features;
150  }
151  else if (strncmp(target, "TEST", 4)==0)
152  {
153  f_ptr=&test_features;
154  }
155  else
156  SG_ERROR("Unknown target %s, neither TRAIN nor TEST.\n", target)
157 
158  if (*f_ptr)
159  {
160  try
161  {
162  CCSVFile* file=new CCSVFile(filename, 'w');
163  if (strncmp(type, "REAL", 4)==0)
164  {
165  ((CDenseFeatures<float64_t>*) (*f_ptr))->save(file);
166  }
167  else if (strncmp(type, "BYTE", 4)==0)
168  {
169  ((CDenseFeatures<uint8_t>*) (*f_ptr))->save(file);
170  }
171  else if (strncmp(type, "CHAR", 4)==0)
172  {
173  ((CDenseFeatures<char>*) (*f_ptr))->save(file);
174  }
175  else if (strncmp(type, "SHORT", 5)==0)
176  {
177  ((CDenseFeatures<int16_t>*) (*f_ptr))->save(file);
178  }
179  else if (strncmp(type, "WORD", 4)==0)
180  {
181  ((CDenseFeatures<uint16_t>*) (*f_ptr))->save(file);
182  }
183  else
184  {
185  SG_ERROR("Unknown type.\n")
186  return false;
187  }
188  SG_UNREF(file);
189  }
190  catch (...)
191  {
192  SG_ERROR("Writing to file %s failed!\n", filename)
193  }
194 
195  SG_INFO("Successfully written features into \"%s\" !\n", filename)
196  result=true;
197 
198  } else
199  SG_ERROR("Set features first.\n")
200 
201  return result;
202 }
203 
204 bool CGUIFeatures::clean(char* target)
205 {
206  if (strncmp(target, "TRAIN", 5)==0)
207  set_train_features(NULL);
208  else if (strncmp(target, "TEST", 4)==0)
209  set_test_features(NULL);
210  else
211  SG_ERROR("Unknown target %s, neither TRAIN nor TEST.\n", target)
212 
213  return true;
214 }
215 
216 bool CGUIFeatures::reshape(char* target, int32_t num_feat, int32_t num_vec)
217 {
218  CFeatures** f_ptr=NULL;
219 
220  if (strncmp(target, "TRAIN", 5)==0)
221  {
222  f_ptr=&train_features;
224  }
225  else if (strncmp(target, "TEST", 4)==0)
226  {
227  f_ptr=&test_features;
228  invalidate_test();
229  }
230  else
231  {
232  SG_ERROR("Invalid target %s\n", target)
233  return false;
234  }
235 
236  bool result=false;
237  if (f_ptr)
238  {
239  SG_INFO("reshape data to %d x %d\n", num_feat, num_vec)
240  result=(*f_ptr)->reshape(num_feat, num_vec);
241 
242  if (!result)
243  SG_ERROR("Reshaping failed.\n")
244  }
245 
246  return result;
247 }
248 
250 {
251  CFeatures* features;
252 
253  if (strncmp(target, "TEST", 4)==0)
254  features=get_test_features();
255  else if (strncmp(target, "TRAIN", 5)==0)
256  features=get_train_features();
257  else
258  return NULL;
259 
260  if (features->get_feature_class()==C_COMBINED)
261  features=((CCombinedFeatures*) features)->get_last_feature_obj();
262 
263  return features;
264 }
265 
266 bool CGUIFeatures::set_convert_features(CFeatures* features, char* target)
267 {
268  CFeatures* features_prev;
269 
270  if (strncmp(target, "TEST", 4)==0)
271  features_prev=get_test_features();
272  else if (strncmp(target, "TRAIN", 5)==0)
273  features_prev=get_train_features();
274  else
275  return false;
276 
277  // in case of combined features delete current (==last) feature obj
278  // pointer from list (feature object got deleted already above)
279  // and append *f_ptr which holds the newly created feature object
280  if (features_prev->get_feature_class()==C_COMBINED)
281  {
282  CCombinedFeatures* combined=(CCombinedFeatures*) features_prev;
283  combined->delete_feature_obj(combined->get_num_feature_obj()-1);
284  combined->append_feature_obj(features);
285  combined->list_feature_objs();
286  }
287  else // set features to new test/train features
288  {
289  if (strncmp(target, "TEST", 4)==0)
290  set_test_features(features);
291  else
292  set_train_features(features);
293  }
294 
295  return true;
296 }
297 
300 {
301  if (src &&
302  src->get_feature_class()==C_DENSE &&
303  src->get_feature_type()==F_DREAL)
304  {
305  //create sparse features with 0 cache
306  SG_INFO("Attempting to convert dense feature matrix to a sparse one.\n")
308  int32_t num_f=0;
309  int32_t num_v=0;
310  float64_t* feats=src->get_feature_matrix(num_f, num_v);
311  target->set_full_feature_matrix(SGMatrix<float64_t>(feats, num_f, num_v));
312  return target;
313  }
314  else
315  SG_ERROR("No SIMPLE DREAL features available.\n")
316 
317  return NULL;
318 }
319 
322 {
323  if (src && src->get_feature_class()==C_DENSE)
324  {
325  int32_t num_vec=src->get_num_vectors();
326  SGString<char>* strings=SG_MALLOC(SGString<char>, num_vec);
327  int32_t max_len=-1;
328 
329  for (int32_t i=0; i<num_vec; i++)
330  {
331  bool to_free=false;
332  int32_t len=0;
333  char* str=src->get_feature_vector(i, len, to_free);
334  strings[i].slen=len ;
335  for (int32_t j=0; j<len; j++)
336  if (str[j]==0)
337  {
338  strings[i].slen=j ;
339  break ;
340  } ;
341  strings[i].string=SG_MALLOC(char, strings[i].slen);
342 
343  for (int32_t j=0; j<strings[i].slen; j++)
344  strings[i].string[j]=str[j];
345 
346  if (strings[i].slen> max_len)
347  max_len=strings[i].slen;
348 
349  src->free_feature_vector(str, i, to_free);
350  }
351 
353  target->set_features(strings, num_vec, max_len);
354  return target;
355  }
356  else
357  SG_ERROR("No features of class/type SIMPLE/CHAR available.\n")
358 
359  return NULL;
360 }
361 
364 {
365  CPluginEstimate* pie=ui->ui_pluginestimate->get_estimator();
366 
367  if (src &&
368  src->get_feature_type()==F_WORD &&
369  src->get_feature_class()==C_DENSE &&
370  pie)
371  {
373  int32_t num_feat=src->get_num_features();
374  int32_t num_vec=src->get_num_vectors();
375  float64_t* fm=SG_MALLOC(float64_t, num_vec*num_feat);
376 
377  if (fm)
378  {
379  for (int32_t i=0; i<num_vec; i++)
380  {
381  int32_t len=0;
382  bool to_free=false;
383  uint16_t* vec = src->get_feature_vector(i, len, to_free);
384  ASSERT(num_feat==len)
385 
386  for (int32_t j=0; j<num_feat; j++)
387  fm[i*num_feat+j]=
388  pie->get_parameterwise_log_odds(vec[j], j);
389 
390  src->free_feature_vector(vec, i, to_free);
391  }
392  target->set_feature_matrix(SGMatrix<float64_t>(fm, num_feat, num_vec));
393 
394  }
395  return target;
396  }
397  else
398  SG_ERROR("No SIMPLE WORD features or PluginEstimator available.\n")
399 
400  return NULL;
401 }
402 
403 
406 {
407  CTOPFeatures* tf=NULL;
408 
409  if (src &&
410  src->get_feature_class()==C_DENSE &&
411  src->get_feature_type()==F_WORD)
412  {
413  SG_INFO("Converting to TOP features.\n")
414 
415  if (ui->ui_hmm->get_pos() && ui->ui_hmm->get_neg())
416  {
417  ui->ui_hmm->get_pos()->set_observations(src);
418  ui->ui_hmm->get_neg()->set_observations(src);
419 
420  bool neglinear=false;
421  bool poslinear=false;
422 
423  tf=new CTOPFeatures(
424  0, ui->ui_hmm->get_pos(), ui->ui_hmm->get_neg(),
425  neglinear, poslinear);
427  }
428  else
429  SG_ERROR("HMMs not correctly assigned!\n")
430  }
431  else
432  SG_ERROR("No SIMPLE WORD features available.\n")
433 
434  return tf;
435 }
436 
439 {
440  CFKFeatures* fkf=NULL;
441 
442  SG_INFO("Converting to FK features.\n")
443 
444  if (ui->ui_hmm->get_pos() && ui->ui_hmm->get_neg())
445  {
446  CStringFeatures<uint16_t>* old_obs_pos=
447  ui->ui_hmm->get_pos()->get_observations();
448  CStringFeatures<uint16_t>* old_obs_neg=
449  ui->ui_hmm->get_neg()->get_observations();
450 
451  CStringFeatures<uint16_t>* string_feat=src;
452  ui->ui_hmm->get_pos()->set_observations(string_feat);
453  ui->ui_hmm->get_neg()->set_observations(string_feat);
454 
455  fkf=new CFKFeatures(
456  0, ui->ui_hmm->get_pos(), ui->ui_hmm->get_neg());
457  //, neglinear, poslinear);
458  if (train_features)
459  fkf->set_opt_a(((CFKFeatures*) train_features)->get_weight_a());
460  else
461  SG_ERROR("Need train features to set optimal a.\n")
462 
463  ASSERT(fkf->set_feature_matrix())
464 
465  ui->ui_hmm->get_pos()->set_observations(old_obs_pos);
466  ui->ui_hmm->get_neg()->set_observations(old_obs_neg);
467  }
468  else
469  SG_ERROR("HMMs not correctly assigned!\n")
470 
471  return fkf;
472 }
473 
474 
477 {
478  if (src &&
479  src->get_feature_class()==C_SPARSE &&
480  src->get_feature_type() == F_DREAL)
481  {
482  //create dense features with 0 cache
483  SG_INFO("Attempting to convert sparse feature matrix to a dense one.\n")
485  if (rf)
486  {
488  rf->set_feature_matrix(feats);
489  return rf;
490  }
491  }
492  else
493  SG_ERROR("No SPARSE REAL features available.\n")
494 
495  return NULL;
496 }
497 
499  CStringFeatures<uint16_t>* src, bool use_norm)
500 {
501  return new CExplicitSpecFeatures(src, use_norm);
502 }
503 
505  CDenseFeatures<char>* src, float64_t gap_cost)
506 {
507  if (src &&
508  src->get_feature_class()==C_DENSE &&
509  src->get_feature_type()==F_CHAR)
510  {
511  //create dense features with 0 cache
512  SG_INFO("Converting CHAR features to REAL ones.\n")
513 
515  if (rf)
516  {
517  SG_INFO("Start aligment with gapCost=%1.2f.\n", gap_cost)
518  /*rf->Align_char_features(
519  src, (CDenseFeatures<char>*) ref_features, gap_cost);*/
520  SG_INFO("Conversion was successful.\n")
521  return rf;
522  }
523  }
524  else
525  SG_ERROR("No SIMPLE CHAR features available.\n")
526 
527  SG_ERROR("Conversion failed.\n")
528  return NULL;
529 }
530 
532 {
533  if (strncmp(target, "TRAIN", 5)==0)
534  {
537  train_features=NULL;
539  return true;
540  }
541  else if (strncmp(target, "TEST", 4)==0)
542  {
545  test_features=NULL;
546  invalidate_test();
547  return true;
548  }
549 
550  return false;
551 }
552 
554 {
555  ASSERT(f)
557 
558  if (!train_features)
559  {
562  }
563 
565  {
566  CFeatures* first_elem=train_features;
569  ((CCombinedFeatures*) train_features)->append_feature_obj(first_elem);
570  ((CCombinedFeatures*) train_features)->list_feature_objs();
571  SG_UNREF(first_elem);
572  }
573 
574  bool result=((CCombinedFeatures*) train_features)->append_feature_obj(f);
575  if (result)
576  ((CCombinedFeatures*) train_features)->list_feature_objs();
577  else
578  SG_ERROR("appending feature object failed\n")
579 }
580 
582 {
583  ASSERT(f)
584  SG_PRINT("DOTFVEC %d\n", f->get_num_vectors())
586 
587  if (!train_features)
588  {
591  }
592 
594  {
596  SG_ERROR("Trainfeatures not based on DotFeatures.\n")
597 
601  ((CCombinedDotFeatures*) train_features)->append_feature_obj(first_elem);
602  ((CCombinedDotFeatures*) train_features)->list_feature_objs();
603  SG_UNREF(first_elem);
604  }
605 
606  bool result=((CCombinedDotFeatures*) train_features)->append_feature_obj(f);
607  if (result)
608  ((CCombinedDotFeatures*) train_features)->list_feature_objs();
609  else
610  SG_ERROR("appending dot feature object failed\n")
611 }
612 
614 {
615  ASSERT(f)
616  invalidate_test();
617 
618  if (!test_features)
619  {
622  }
623 
625  {
627  SG_ERROR("Trainfeatures not based on DotFeatures.\n")
628 
629  CDotFeatures* first_elem=(CDotFeatures*) test_features;
632  ((CCombinedDotFeatures*) test_features)->append_feature_obj(first_elem);
633  ((CCombinedDotFeatures*) test_features)->list_feature_objs();
634  SG_UNREF(first_elem);
635  }
636 
637  bool result=((CCombinedDotFeatures*) test_features)->append_feature_obj(f);
638  if (result)
639  ((CCombinedDotFeatures*) test_features)->list_feature_objs();
640  else
641  SG_ERROR("Appending feature object failed.\n")
642 }
643 
645 {
646  ASSERT(f)
647  invalidate_test();
648 
649  if (!test_features)
650  {
653  }
654 
656  {
657  CFeatures* first_elem=test_features;
660  ((CCombinedFeatures*) test_features)->append_feature_obj(first_elem);
661  ((CCombinedFeatures*) test_features)->list_feature_objs();
662  SG_UNREF(first_elem);
663  }
664 
665  bool result=((CCombinedFeatures*) test_features)->append_feature_obj(f);
666  if (result)
667  ((CCombinedFeatures*) test_features)->list_feature_objs();
668  else
669  SG_ERROR("Appending feature object failed.\n")
670 }
671 
673 {
674  CCombinedFeatures* cf=NULL;
675  if (strncmp(target, "TRAIN", 5)==0)
676  {
677  if (!train_features)
678  SG_ERROR("No train features available.\n")
680  SG_ERROR("Train features are not combined features.\n")
681 
683  }
684  else if (strncmp(target, "TEST", 4)==0)
685  {
686  if (!test_features)
687  SG_ERROR("No test features available.\n")
689  SG_ERROR("Test features are not combined features.\n")
690 
692  }
693  else
694  SG_ERROR("Unknown target %s, neither TRAIN nor TEST.\n", target)
695 
696  if (!cf->delete_feature_obj(cf->get_num_feature_obj()-1))
697  SG_ERROR("No features available to delete.\n")
698 
699  return false;
700 }
float64_t set_opt_a(float64_t a=-1)
Definition: FKFeatures.cpp:91
void add_train_features(CFeatures *f)
CSGInterface * ui
Definition: GUIFeatures.h:228
CFeatures * test_features
Definition: GUIFeatures.h:232
#define SG_INFO(...)
Definition: SGIO.h:118
DNA - letters A,C,G,T.
Definition: Alphabet.h:26
CDenseFeatures< float64_t > * convert_simple_word_to_simple_salzberg(CDenseFeatures< uint16_t > *src)
Template class StringFeatures implements a list of strings.
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const
bool set_test_features(CFeatures *f)
Definition: GUIFeatures.h:75
CTOPFeatures * convert_string_word_to_simple_top(CStringFeatures< uint16_t > *src)
CStringFeatures< char > * convert_simple_char_to_string_char(CDenseFeatures< char > *src)
CExplicitSpecFeatures * convert_string_byte_to_spec_word(CStringFeatures< uint16_t > *src, bool use_norm)
SGMatrix< ST > get_feature_matrix()
bool load(char *filename, char *fclass, char *type, char *target, int32_t size, int32_t comp_features)
Definition: GUIFeatures.cpp:48
void set_feature_matrix(SGMatrix< ST > matrix)
virtual float64_t * set_feature_matrix()
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
The class Alphabet implements an alphabet and alphabet utility functions.
Definition: Alphabet.h:91
float64_t get_parameterwise_log_odds(uint16_t obs, int32_t position)
bool save(char *filename, char *type, char *target)
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from kernel
Definition: Kernel.cpp:693
Features that support dot products among other operations.
Definition: DotFeatures.h:44
bool del_last_feature_obj(char *target)
CFKFeatures * convert_string_word_to_simple_fk(CStringFeatures< uint16_t > *src)
#define SG_REF(x)
Definition: SGObject.h:54
Class CSVFile used to read data from comma-separated values (CSV) files. See http://en.wikipedia.org/wiki/Comma-separated_values.
Definition: CSVFile.h:29
void add_test_dotfeatures(CDotFeatures *f)
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
virtual int32_t get_num_vectors() const
bool set_convert_features(CFeatures *features, char *target)
virtual EFeatureClass get_feature_class() const
void add_test_features(CFeatures *f)
double float64_t
Definition: common.h:50
CFeatures * ref_features
Definition: GUIFeatures.h:234
CSparseFeatures< float64_t > * convert_simple_real_to_sparse_real(CDenseFeatures< float64_t > *src)
virtual EFeatureClass get_feature_class() const
virtual void set_full_feature_matrix(SGMatrix< ST > full)
virtual EFeatureClass get_feature_class() const =0
CFeatures * get_train_features()
Definition: GUIFeatures.h:56
bool set_train_features(CFeatures *f)
Definition: GUIFeatures.h:63
virtual EFeatureType get_feature_type() const
virtual EFeatureType get_feature_type() const
Features that compute the Spectrum Kernel feature space explicitly.
The class TOPFeatures implements TOP kernel features obtained from two Hidden Markov models...
Definition: TOPFeatures.h:70
bool clean(char *target)
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
CFeatures * get_test_features()
Definition: GUIFeatures.h:58
virtual EFeatureClass get_feature_class() const
virtual float64_t * set_feature_matrix()
Definition: FKFeatures.cpp:214
CDenseFeatures< float64_t > * convert_simple_char_to_simple_align(CDenseFeatures< char > *src, float64_t gap_cost=0)
The class Features is the base class of all feature objects.
Definition: Features.h:68
bool set_reference_features(char *target)
class PluginEstimate
virtual void remove_lhs()
Definition: Kernel.cpp:679
Features that allow stacking of a number of DotFeatures.
index_t slen
Definition: SGString.h:79
void add_train_dotfeatures(CDotFeatures *f)
virtual EFeatureType get_feature_type() const
The Kernel base class.
Definition: Kernel.h:159
bool reshape(char *target, int32_t num_feat, int32_t num_vec)
CFeatures * train_features
Definition: GUIFeatures.h:230
SGMatrix< ST > get_full_feature_matrix()
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
The class CombinedFeatures is used to combine a number of of feature objects into a single CombinedFe...
bool delete_feature_obj(int32_t idx)
CDenseFeatures< float64_t > * convert_sparse_real_to_simple_real(CSparseFeatures< float64_t > *src)
The class FKFeatures implements Fischer kernel features obtained from two Hidden Markov models...
Definition: FKFeatures.h:43
bool append_feature_obj(CFeatures *obj)
void set_features(SGStringList< ST > feats)
CFeatures * get_convert_features(char *target)

SHOGUN Machine Learning Toolbox - Documentation