Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <shogun/features/Features.h>
00014 #include <shogun/preprocessor/Preprocessor.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/base/Parameter.h>
00017
00018 #include <string.h>
00019
00020 using namespace shogun;
00021
00022 CFeatures::CFeatures(int32_t size)
00023 : CSGObject()
00024 {
00025 init();
00026 cache_size = size;
00027 }
00028
00029 CFeatures::CFeatures(const CFeatures& orig)
00030 : CSGObject(orig)
00031 {
00032 init();
00033
00034 preproc = orig.preproc;
00035 num_preproc = orig.num_preproc;
00036
00037 preprocessed=SG_MALLOC(bool, orig.num_preproc);
00038 memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
00039 }
00040
00041 CFeatures::CFeatures(CFile* loader)
00042 : CSGObject()
00043 {
00044 init();
00045
00046 load(loader);
00047 SG_INFO("Feature object loaded (%p)\n",this) ;
00048 }
00049
00050 CFeatures::~CFeatures()
00051 {
00052 clean_preprocessors();
00053 SG_UNREF(m_subset_stack);
00054 }
00055
00056 void CFeatures::init()
00057 {
00058 SG_ADD(&properties, "properties", "Feature properties", MS_NOT_AVAILABLE);
00059 SG_ADD(&cache_size, "cache_size", "Size of cache in MB", MS_NOT_AVAILABLE);
00060
00061
00062 m_parameters->add_vector((CSGObject***) &preproc, &num_preproc, "preproc",
00063 "List of preprocessors");
00064 m_parameters->add_vector(&preprocessed, &num_preproc, "preprocessed",
00065 "Feature[i] is already preprocessed");
00066
00067 SG_ADD((CSGObject**)&m_subset_stack, "subset_stack", "Stack of subsets",
00068 MS_NOT_AVAILABLE);
00069
00070 m_subset_stack=new CSubsetStack();
00071 SG_REF(m_subset_stack);
00072
00073 properties = FP_NONE;
00074 cache_size = 0;
00075 preproc = NULL;
00076 num_preproc = 0;
00077 preprocessed = NULL;
00078 }
00079
00081 int32_t CFeatures::add_preprocessor(CPreprocessor* p)
00082 {
00083 SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
00084 ASSERT(p);
00085
00086 bool* preprocd=SG_MALLOC(bool, num_preproc+1);
00087 CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
00088 for (int32_t i=0; i<num_preproc; i++)
00089 {
00090 pps[i]=preproc[i];
00091 preprocd[i]=preprocessed[i];
00092 }
00093 SG_FREE(preproc);
00094 SG_FREE(preprocessed);
00095 preproc=pps;
00096 preprocessed=preprocd;
00097 preproc[num_preproc]=p;
00098 preprocessed[num_preproc]=false;
00099
00100 num_preproc++;
00101
00102 for (int32_t i=0; i<num_preproc; i++)
00103 SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
00104
00105 SG_REF(p);
00106
00107 return num_preproc;
00108 }
00109
00111 CPreprocessor* CFeatures::get_preprocessor(int32_t num) const
00112 {
00113 if (num<num_preproc)
00114 {
00115 SG_REF(preproc[num]);
00116 return preproc[num];
00117 }
00118 else
00119 return NULL;
00120 }
00121
00123 int32_t CFeatures::get_num_preprocessed() const
00124 {
00125 int32_t num=0;
00126
00127 for (int32_t i=0; i<num_preproc; i++)
00128 {
00129 if (preprocessed[i])
00130 num++;
00131 }
00132
00133 return num;
00134 }
00135
00137 void CFeatures::clean_preprocessors()
00138 {
00139 while (del_preprocessor(0));
00140 }
00141
00143 CPreprocessor* CFeatures::del_preprocessor(int32_t num)
00144 {
00145 CPreprocessor** pps=NULL;
00146 bool* preprocd=NULL;
00147 CPreprocessor* removed_preproc=NULL;
00148
00149 if (num_preproc>0 && num<num_preproc)
00150 {
00151 removed_preproc=preproc[num];
00152
00153 if (num_preproc>1)
00154 {
00155 pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
00156 preprocd= SG_MALLOC(bool, num_preproc-1);
00157
00158 if (pps && preprocd)
00159 {
00160 int32_t j=0;
00161 for (int32_t i=0; i<num_preproc; i++)
00162 {
00163 if (i!=num)
00164 {
00165 pps[j]=preproc[i];
00166 preprocd[j]=preprocessed[i];
00167 j++;
00168 }
00169 }
00170 }
00171 }
00172
00173 SG_FREE(preproc);
00174 preproc=pps;
00175 SG_FREE(preprocessed);
00176 preprocessed=preprocd;
00177
00178 num_preproc--;
00179
00180 for (int32_t i=0; i<num_preproc; i++)
00181 SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
00182 }
00183
00184 SG_UNREF(removed_preproc);
00185 return removed_preproc;
00186 }
00187
00188 void CFeatures::set_preprocessed(int32_t num)
00189 {
00190 preprocessed[num]=true;
00191 }
00192
00193 bool CFeatures::is_preprocessed(int32_t num) const
00194 {
00195 return preprocessed[num];
00196 }
00197
00198 int32_t CFeatures::get_num_preprocessors() const
00199 {
00200 return num_preproc;
00201 }
00202
00203 int32_t CFeatures::get_cache_size() const
00204 {
00205 return cache_size;
00206 }
00207
00208 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
00209 {
00210 SG_NOTIMPLEMENTED;
00211 return false;
00212 }
00213
00214 void CFeatures::list_feature_obj() const
00215 {
00216 SG_INFO( "%p - ", this);
00217 switch (get_feature_class())
00218 {
00219 case C_UNKNOWN:
00220 SG_INFO( "C_UNKNOWN ");
00221 break;
00222 case C_DENSE:
00223 SG_INFO( "C_DENSE ");
00224 break;
00225 case C_SPARSE:
00226 SG_INFO( "C_SPARSE ");
00227 break;
00228 case C_STRING:
00229 SG_INFO( "C_STRING ");
00230 break;
00231 case C_COMBINED:
00232 SG_INFO( "C_COMBINED ");
00233 break;
00234 case C_COMBINED_DOT:
00235 SG_INFO( "C_COMBINED_DOT ");
00236 break;
00237 case C_WD:
00238 SG_INFO( "C_WD ");
00239 break;
00240 case C_SPEC:
00241 SG_INFO( "C_SPEC ");
00242 break;
00243 case C_WEIGHTEDSPEC:
00244 SG_INFO( "C_WEIGHTEDSPEC ");
00245 break;
00246 case C_STREAMING_DENSE:
00247 SG_INFO( "C_STREAMING_DENSE ");
00248 break;
00249 case C_STREAMING_SPARSE:
00250 SG_INFO( "C_STREAMING_SPARSE ");
00251 break;
00252 case C_STREAMING_STRING:
00253 SG_INFO( "C_STREAMING_STRING ");
00254 break;
00255 case C_STREAMING_VW:
00256 SG_INFO( "C_STREAMING_VW ");
00257 break;
00258 case C_ANY:
00259 SG_INFO( "C_ANY ");
00260 break;
00261 default:
00262 SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00263 }
00264
00265 switch (get_feature_type())
00266 {
00267 case F_UNKNOWN:
00268 SG_INFO( "F_UNKNOWN \n");
00269 break;
00270 case F_CHAR:
00271 SG_INFO( "F_CHAR \n");
00272 break;
00273 case F_BYTE:
00274 SG_INFO( "F_BYTE \n");
00275 break;
00276 case F_SHORT:
00277 SG_INFO( "F_SHORT \n");
00278 break;
00279 case F_WORD:
00280 SG_INFO( "F_WORD \n");
00281 break;
00282 case F_INT:
00283 SG_INFO( "F_INT \n");
00284 break;
00285 case F_UINT:
00286 SG_INFO( "F_UINT \n");
00287 break;
00288 case F_LONG:
00289 SG_INFO( "F_LONG \n");
00290 break;
00291 case F_ULONG:
00292 SG_INFO( "F_ULONG \n");
00293 break;
00294 case F_SHORTREAL:
00295 SG_INFO( "F_SHORTEAL \n");
00296 break;
00297 case F_DREAL:
00298 SG_INFO( "F_DREAL \n");
00299 break;
00300 case F_LONGREAL:
00301 SG_INFO( "F_LONGREAL \n");
00302 break;
00303 case F_ANY:
00304 SG_INFO( "F_ANY \n");
00305 break;
00306 default:
00307 SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
00308 }
00309 }
00310
00311
00312 void CFeatures::load(CFile* loader)
00313 {
00314 SG_SET_LOCALE_C;
00315 SG_NOTIMPLEMENTED;
00316 SG_RESET_LOCALE;
00317 }
00318
00319 void CFeatures::save(CFile* writer)
00320 {
00321 SG_SET_LOCALE_C;
00322 SG_NOTIMPLEMENTED;
00323 SG_RESET_LOCALE;
00324 }
00325
00326 bool CFeatures::check_feature_compatibility(CFeatures* f) const
00327 {
00328 bool result=false;
00329
00330 if (f)
00331 result= ( (this->get_feature_class() == f->get_feature_class()) &&
00332 (this->get_feature_type() == f->get_feature_type()));
00333 return result;
00334 }
00335
00336 bool CFeatures::has_property(EFeatureProperty p) const
00337 {
00338 return (properties & p) != 0;
00339 }
00340
00341 void CFeatures::set_property(EFeatureProperty p)
00342 {
00343 properties |= p;
00344 }
00345
00346 void CFeatures::unset_property(EFeatureProperty p)
00347 {
00348 properties &= (properties | p) ^ p;
00349 }
00350
00351 void CFeatures::add_subset(SGVector<index_t> subset)
00352 {
00353 m_subset_stack->add_subset(subset);
00354 subset_changed_post();
00355 }
00356
00357 void CFeatures::remove_subset()
00358 {
00359 m_subset_stack->remove_subset();
00360 subset_changed_post();
00361 }
00362
00363 void CFeatures::remove_all_subsets()
00364 {
00365 m_subset_stack->remove_all_subsets();
00366 subset_changed_post();
00367 }
00368
00369 CSubsetStack* CFeatures::get_subset_stack()
00370 {
00371 return m_subset_stack;
00372 }
00373
00374 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices)
00375 {
00376 SG_ERROR("%s::copy_subset(): copy_subset and therefore model storage of "
00377 "CMachine (required for cross-validation and model-selection is "
00378 "not yet implemented yet. Ask developers!\n", get_name());
00379 return NULL;
00380 }