SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Features.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2012 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
15 #include <shogun/io/SGIO.h>
16 #include <shogun/base/Parameter.h>
17 
18 #include <string.h>
19 
20 using namespace shogun;
21 
22 CFeatures::CFeatures(int32_t size)
23 : CSGObject()
24 {
25  init();
26  cache_size = size;
27 }
28 
30 : CSGObject(orig)
31 {
32  init();
33 
34  preproc = orig.preproc;
35  num_preproc = orig.num_preproc;
36 
37  preprocessed=SG_MALLOC(bool, orig.num_preproc);
38  memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
39 }
40 
42 : CSGObject()
43 {
44  init();
45 
46  load(loader);
47  SG_INFO("Feature object loaded (%p)\n",this) ;
48 }
49 
51 {
54 }
55 
56 void CFeatures::init()
57 {
58  SG_ADD(&properties, "properties", "Feature properties", MS_NOT_AVAILABLE);
59  SG_ADD(&cache_size, "cache_size", "Size of cache in MB", MS_NOT_AVAILABLE);
60 
61  /* TODO, use SGVector for arrays to be able to use SG_ADD macro */
62  m_parameters->add_vector((CSGObject***) &preproc, &num_preproc, "preproc",
63  "List of preprocessors");
64  m_parameters->add_vector(&preprocessed, &num_preproc, "preprocessed",
65  "Feature[i] is already preprocessed");
66 
67  SG_ADD((CSGObject**)&m_subset_stack, "subset_stack", "Stack of subsets",
69 
72 
73  properties = FP_NONE;
74  cache_size = 0;
75  preproc = NULL;
76  num_preproc = 0;
77  preprocessed = NULL;
78 }
79 
82 {
83  SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
84  ASSERT(p);
85 
86  bool* preprocd=SG_MALLOC(bool, num_preproc+1);
87  CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
88  for (int32_t i=0; i<num_preproc; i++)
89  {
90  pps[i]=preproc[i];
91  preprocd[i]=preprocessed[i];
92  }
93  SG_FREE(preproc);
94  SG_FREE(preprocessed);
95  preproc=pps;
96  preprocessed=preprocd;
97  preproc[num_preproc]=p;
98  preprocessed[num_preproc]=false;
99 
100  num_preproc++;
101 
102  for (int32_t i=0; i<num_preproc; i++)
103  SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
104 
105  SG_REF(p);
106 
107  return num_preproc;
108 }
109 
112 {
113  if (num<num_preproc)
114  {
115  SG_REF(preproc[num]);
116  return preproc[num];
117  }
118  else
119  return NULL;
120 }
121 
124 {
125  int32_t num=0;
126 
127  for (int32_t i=0; i<num_preproc; i++)
128  {
129  if (preprocessed[i])
130  num++;
131  }
132 
133  return num;
134 }
135 
138 {
139  while (del_preprocessor(0));
140 }
141 
144 {
145  CPreprocessor** pps=NULL;
146  bool* preprocd=NULL;
147  CPreprocessor* removed_preproc=NULL;
148 
149  if (num_preproc>0 && num<num_preproc)
150  {
151  removed_preproc=preproc[num];
152 
153  if (num_preproc>1)
154  {
155  pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
156  preprocd= SG_MALLOC(bool, num_preproc-1);
157 
158  if (pps && preprocd)
159  {
160  int32_t j=0;
161  for (int32_t i=0; i<num_preproc; i++)
162  {
163  if (i!=num)
164  {
165  pps[j]=preproc[i];
166  preprocd[j]=preprocessed[i];
167  j++;
168  }
169  }
170  }
171  }
172 
173  SG_FREE(preproc);
174  preproc=pps;
175  SG_FREE(preprocessed);
176  preprocessed=preprocd;
177 
178  num_preproc--;
179 
180  for (int32_t i=0; i<num_preproc; i++)
181  SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
182  }
183 
184  SG_UNREF(removed_preproc);
185  return removed_preproc;
186 }
187 
189 {
190  preprocessed[num]=true;
191 }
192 
193 bool CFeatures::is_preprocessed(int32_t num) const
194 {
195  return preprocessed[num];
196 }
197 
199 {
200  return num_preproc;
201 }
202 
204 {
205  return cache_size;
206 }
207 
208 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
209 {
211  return false;
212 }
213 
215 {
216  SG_INFO( "%p - ", this);
217  switch (get_feature_class())
218  {
219  case C_UNKNOWN:
220  SG_INFO( "C_UNKNOWN ");
221  break;
222  case C_DENSE:
223  SG_INFO( "C_DENSE ");
224  break;
225  case C_SPARSE:
226  SG_INFO( "C_SPARSE ");
227  break;
228  case C_STRING:
229  SG_INFO( "C_STRING ");
230  break;
231  case C_COMBINED:
232  SG_INFO( "C_COMBINED ");
233  break;
234  case C_COMBINED_DOT:
235  SG_INFO( "C_COMBINED_DOT ");
236  break;
237  case C_WD:
238  SG_INFO( "C_WD ");
239  break;
240  case C_SPEC:
241  SG_INFO( "C_SPEC ");
242  break;
243  case C_WEIGHTEDSPEC:
244  SG_INFO( "C_WEIGHTEDSPEC ");
245  break;
246  case C_STREAMING_DENSE:
247  SG_INFO( "C_STREAMING_DENSE ");
248  break;
249  case C_STREAMING_SPARSE:
250  SG_INFO( "C_STREAMING_SPARSE ");
251  break;
252  case C_STREAMING_STRING:
253  SG_INFO( "C_STREAMING_STRING ");
254  break;
255  case C_STREAMING_VW:
256  SG_INFO( "C_STREAMING_VW ");
257  break;
258  case C_ANY:
259  SG_INFO( "C_ANY ");
260  break;
261  default:
262  SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
263  }
264 
265  switch (get_feature_type())
266  {
267  case F_UNKNOWN:
268  SG_INFO( "F_UNKNOWN \n");
269  break;
270  case F_CHAR:
271  SG_INFO( "F_CHAR \n");
272  break;
273  case F_BYTE:
274  SG_INFO( "F_BYTE \n");
275  break;
276  case F_SHORT:
277  SG_INFO( "F_SHORT \n");
278  break;
279  case F_WORD:
280  SG_INFO( "F_WORD \n");
281  break;
282  case F_INT:
283  SG_INFO( "F_INT \n");
284  break;
285  case F_UINT:
286  SG_INFO( "F_UINT \n");
287  break;
288  case F_LONG:
289  SG_INFO( "F_LONG \n");
290  break;
291  case F_ULONG:
292  SG_INFO( "F_ULONG \n");
293  break;
294  case F_SHORTREAL:
295  SG_INFO( "F_SHORTEAL \n");
296  break;
297  case F_DREAL:
298  SG_INFO( "F_DREAL \n");
299  break;
300  case F_LONGREAL:
301  SG_INFO( "F_LONGREAL \n");
302  break;
303  case F_ANY:
304  SG_INFO( "F_ANY \n");
305  break;
306  default:
307  SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
308  }
309 }
310 
311 
312 void CFeatures::load(CFile* loader)
313 {
317 }
318 
319 void CFeatures::save(CFile* writer)
320 {
324 }
325 
327 {
328  bool result=false;
329 
330  if (f)
331  result= ( (this->get_feature_class() == f->get_feature_class()) &&
332  (this->get_feature_type() == f->get_feature_type()));
333  return result;
334 }
335 
337 {
338  return (properties & p) != 0;
339 }
340 
342 {
343  properties |= p;
344 }
345 
347 {
348  properties &= (properties | p) ^ p;
349 }
350 
352 {
353  m_subset_stack->add_subset(subset);
355 }
356 
358 {
361 }
362 
364 {
367 }
368 
370 {
371  return m_subset_stack;
372 }
373 
375 {
376  SG_ERROR("%s::copy_subset(): copy_subset and therefore model storage of "
377  "CMachine (required for cross-validation and model-selection is "
378  "not yet implemented yet. Ask developers!\n", get_name());
379  return NULL;
380 }

SHOGUN Machine Learning Toolbox - Documentation