Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <shogun/features/Features.h>
00014 #include <shogun/preprocessor/Preprocessor.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/base/Parameter.h>
00017
00018 #include <string.h>
00019
00020 using namespace shogun;
00021
00022 CFeatures::CFeatures(int32_t size)
00023 : CSGObject()
00024 {
00025 init();
00026 cache_size = size;
00027 }
00028
00029 CFeatures::CFeatures(const CFeatures& orig)
00030 : CSGObject(orig)
00031 {
00032 init();
00033
00034 preproc = orig.preproc;
00035 num_preproc = orig.num_preproc;
00036
00037 preprocessed=SG_MALLOC(bool, orig.num_preproc);
00038 memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
00039 }
00040
00041 CFeatures::CFeatures(CFile* loader)
00042 : CSGObject()
00043 {
00044 init();
00045
00046 load(loader);
00047 SG_INFO("Feature object loaded (%p)\n",this) ;
00048 }
00049
00050 CFeatures::~CFeatures()
00051 {
00052 clean_preprocessors();
00053 delete m_subset;
00054 }
00055
00056 void
00057 CFeatures::init()
00058 {
00059 m_parameters->add(&properties, "properties",
00060 "Feature properties.");
00061 m_parameters->add(&cache_size, "cache_size",
00062 "Size of cache in MB.");
00063
00064 m_parameters->add_vector((CSGObject***) &preproc,
00065 &num_preproc, "preproc",
00066 "List of preprocessors.");
00067 m_parameters->add_vector(&preprocessed,
00068 &num_preproc, "preprocessed",
00069 "Feature[i] is already preprocessed.");
00070
00071 m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object");
00072
00073 m_subset=NULL;
00074 properties = FP_NONE;
00075 cache_size = 0;
00076 preproc = NULL;
00077 num_preproc = 0;
00078 preprocessed = NULL;
00079 }
00080
00082 int32_t CFeatures::add_preprocessor(CPreprocessor* p)
00083 {
00084 SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
00085 ASSERT(p);
00086
00087 bool* preprocd=SG_MALLOC(bool, num_preproc+1);
00088 CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
00089 for (int32_t i=0; i<num_preproc; i++)
00090 {
00091 pps[i]=preproc[i];
00092 preprocd[i]=preprocessed[i];
00093 }
00094 SG_FREE(preproc);
00095 SG_FREE(preprocessed);
00096 preproc=pps;
00097 preprocessed=preprocd;
00098 preproc[num_preproc]=p;
00099 preprocessed[num_preproc]=false;
00100
00101 num_preproc++;
00102
00103 for (int32_t i=0; i<num_preproc; i++)
00104 SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
00105
00106 SG_REF(p);
00107
00108 return num_preproc;
00109 }
00110
00112 CPreprocessor* CFeatures::get_preprocessor(int32_t num)
00113 {
00114 if (num<num_preproc)
00115 {
00116 SG_REF(preproc[num]);
00117 return preproc[num];
00118 }
00119 else
00120 return NULL;
00121 }
00122
00124 int32_t CFeatures::get_num_preprocessed()
00125 {
00126 int32_t num=0;
00127
00128 for (int32_t i=0; i<num_preproc; i++)
00129 {
00130 if (preprocessed[i])
00131 num++;
00132 }
00133
00134 return num;
00135 }
00136
00138 void CFeatures::clean_preprocessors()
00139 {
00140 while (del_preprocessor(0));
00141 }
00142
00144 CPreprocessor* CFeatures::del_preprocessor(int32_t num)
00145 {
00146 CPreprocessor** pps=NULL;
00147 bool* preprocd=NULL;
00148 CPreprocessor* removed_preproc=NULL;
00149
00150 if (num_preproc>0 && num<num_preproc)
00151 {
00152 removed_preproc=preproc[num];
00153
00154 if (num_preproc>1)
00155 {
00156 pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
00157 preprocd= SG_MALLOC(bool, num_preproc-1);
00158
00159 if (pps && preprocd)
00160 {
00161 int32_t j=0;
00162 for (int32_t i=0; i<num_preproc; i++)
00163 {
00164 if (i!=num)
00165 {
00166 pps[j]=preproc[i];
00167 preprocd[j]=preprocessed[i];
00168 j++;
00169 }
00170 }
00171 }
00172 }
00173
00174 SG_FREE(preproc);
00175 preproc=pps;
00176 SG_FREE(preprocessed);
00177 preprocessed=preprocd;
00178
00179 num_preproc--;
00180
00181 for (int32_t i=0; i<num_preproc; i++)
00182 SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
00183 }
00184
00185 SG_UNREF(removed_preproc);
00186 return removed_preproc;
00187 }
00188
00189 void CFeatures::set_preprocessed(int32_t num)
00190 {
00191 preprocessed[num]=true;
00192 }
00193
00194 bool CFeatures::is_preprocessed(int32_t num)
00195 {
00196 return preprocessed[num];
00197 }
00198
00199 int32_t CFeatures::get_num_preprocessors() const
00200 {
00201 return num_preproc;
00202 }
00203
00204 int32_t CFeatures::get_cache_size()
00205 {
00206 return cache_size;
00207 }
00208
00209 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
00210 {
00211 SG_NOTIMPLEMENTED;
00212 return false;
00213 }
00214
00215 void CFeatures::list_feature_obj()
00216 {
00217 SG_INFO( "%p - ", this);
00218 switch (get_feature_class())
00219 {
00220 case C_UNKNOWN:
00221 SG_INFO( "C_UNKNOWN ");
00222 break;
00223 case C_SIMPLE:
00224 SG_INFO( "C_SIMPLE ");
00225 break;
00226 case C_SPARSE:
00227 SG_INFO( "C_SPARSE ");
00228 break;
00229 case C_STRING:
00230 SG_INFO( "C_STRING ");
00231 break;
00232 case C_COMBINED:
00233 SG_INFO( "C_COMBINED ");
00234 break;
00235 case C_COMBINED_DOT:
00236 SG_INFO( "C_COMBINED_DOT ");
00237 break;
00238 case C_WD:
00239 SG_INFO( "C_WD ");
00240 break;
00241 case C_SPEC:
00242 SG_INFO( "C_SPEC ");
00243 break;
00244 case C_WEIGHTEDSPEC:
00245 SG_INFO( "C_WEIGHTEDSPEC ");
00246 break;
00247 case C_STREAMING_SIMPLE:
00248 SG_INFO( "C_STREAMING_SIMPLE ");
00249 break;
00250 case C_STREAMING_SPARSE:
00251 SG_INFO( "C_STREAMING_SPARSE ");
00252 break;
00253 case C_STREAMING_STRING:
00254 SG_INFO( "C_STREAMING_STRING ");
00255 break;
00256 case C_STREAMING_VW:
00257 SG_INFO( "C_STREAMING_VW ");
00258 break;
00259 case C_ANY:
00260 SG_INFO( "C_ANY ");
00261 break;
00262 default:
00263 SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00264 }
00265
00266 switch (get_feature_type())
00267 {
00268 case F_UNKNOWN:
00269 SG_INFO( "F_UNKNOWN \n");
00270 break;
00271 case F_CHAR:
00272 SG_INFO( "F_CHAR \n");
00273 break;
00274 case F_BYTE:
00275 SG_INFO( "F_BYTE \n");
00276 break;
00277 case F_SHORT:
00278 SG_INFO( "F_SHORT \n");
00279 break;
00280 case F_WORD:
00281 SG_INFO( "F_WORD \n");
00282 break;
00283 case F_INT:
00284 SG_INFO( "F_INT \n");
00285 break;
00286 case F_UINT:
00287 SG_INFO( "F_UINT \n");
00288 break;
00289 case F_LONG:
00290 SG_INFO( "F_LONG \n");
00291 break;
00292 case F_ULONG:
00293 SG_INFO( "F_ULONG \n");
00294 break;
00295 case F_SHORTREAL:
00296 SG_INFO( "F_SHORTEAL \n");
00297 break;
00298 case F_DREAL:
00299 SG_INFO( "F_DREAL \n");
00300 break;
00301 case F_LONGREAL:
00302 SG_INFO( "F_LONGREAL \n");
00303 break;
00304 case F_ANY:
00305 SG_INFO( "F_ANY \n");
00306 break;
00307 default:
00308 SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
00309 }
00310 }
00311
00312
00313 void CFeatures::load(CFile* loader)
00314 {
00315 SG_SET_LOCALE_C;
00316 SG_NOTIMPLEMENTED;
00317 SG_RESET_LOCALE;
00318 }
00319
00320 void CFeatures::save(CFile* writer)
00321 {
00322 SG_SET_LOCALE_C;
00323 SG_NOTIMPLEMENTED;
00324 SG_RESET_LOCALE;
00325 }
00326
00327 bool CFeatures::check_feature_compatibility(CFeatures* f)
00328 {
00329 bool result=false;
00330
00331 if (f)
00332 result= ( (this->get_feature_class() == f->get_feature_class()) &&
00333 (this->get_feature_type() == f->get_feature_type()));
00334 return result;
00335 }
00336
00337 bool CFeatures::has_property(EFeatureProperty p)
00338 {
00339 return (properties & p) != 0;
00340 }
00341
00342 void CFeatures::set_property(EFeatureProperty p)
00343 {
00344 properties |= p;
00345 }
00346
00347 void CFeatures::unset_property(EFeatureProperty p)
00348 {
00349 properties &= (properties | p) ^ p;
00350 }
00351
00352 void CFeatures::set_subset(CSubset* subset)
00353 {
00354 SG_UNREF(m_subset);
00355 m_subset=subset;
00356 SG_REF(subset);
00357 subset_changed_post();
00358 }
00359
00360 index_t CFeatures::subset_idx_conversion(index_t idx) const
00361 {
00362 return m_subset ? m_subset->subset_idx_conversion(idx) : idx;
00363 }
00364
00365 bool CFeatures::has_subset() const
00366 {
00367 return m_subset!=NULL;
00368 }
00369
00370 void CFeatures::remove_subset()
00371 {
00372 set_subset(NULL);
00373 }
00374
00375 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices)
00376 {
00377 SG_ERROR("copy_subset and therefore model storage of CMachine "
00378 "(required for cross-validation and model-selection is ",
00379 "not yet implemented for feature type %s\n", get_name());
00380 return NULL;
00381 }