Features.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Subset support written (W) 2011 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/features/Features.h>
00014 #include <shogun/preprocessor/Preprocessor.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/base/Parameter.h>
00017 
00018 #include <string.h>
00019 
00020 using namespace shogun;
00021 
00022 CFeatures::CFeatures(int32_t size)
00023 : CSGObject()
00024 {
00025     init();
00026     cache_size = size;
00027 }
00028 
00029 CFeatures::CFeatures(const CFeatures& orig)
00030 : CSGObject(orig)
00031 {
00032     init();
00033 
00034     preproc = orig.preproc;
00035     num_preproc = orig.num_preproc;
00036 
00037     preprocessed=SG_MALLOC(bool, orig.num_preproc);
00038     memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
00039 }
00040 
00041 CFeatures::CFeatures(CFile* loader)
00042 : CSGObject()
00043 {
00044     init();
00045 
00046     load(loader);
00047     SG_INFO("Feature object loaded (%p)\n",this) ;
00048 }
00049 
00050 CFeatures::~CFeatures()
00051 {
00052     clean_preprocessors();
00053     delete m_subset;
00054 }
00055 
00056 void
00057 CFeatures::init()
00058 {
00059     m_parameters->add(&properties, "properties",
00060                       "Feature properties.");
00061     m_parameters->add(&cache_size, "cache_size",
00062                       "Size of cache in MB.");
00063 
00064     m_parameters->add_vector((CSGObject***) &preproc,
00065                              &num_preproc, "preproc",
00066                              "List of preprocessors.");
00067     m_parameters->add_vector(&preprocessed,
00068                              &num_preproc, "preprocessed",
00069                              "Feature[i] is already preprocessed.");
00070 
00071     m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object");
00072 
00073     m_subset=NULL;
00074     properties = FP_NONE;
00075     cache_size = 0;
00076     preproc = NULL;
00077     num_preproc = 0;
00078     preprocessed = NULL;
00079 }
00080 
00082 int32_t CFeatures::add_preprocessor(CPreprocessor* p)
00083 {
00084     SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
00085     ASSERT(p);
00086 
00087     bool* preprocd=SG_MALLOC(bool, num_preproc+1);
00088     CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
00089     for (int32_t i=0; i<num_preproc; i++)
00090     {
00091         pps[i]=preproc[i];
00092         preprocd[i]=preprocessed[i];
00093     }
00094     SG_FREE(preproc);
00095     SG_FREE(preprocessed);
00096     preproc=pps;
00097     preprocessed=preprocd;
00098     preproc[num_preproc]=p;
00099     preprocessed[num_preproc]=false;
00100 
00101     num_preproc++;
00102 
00103     for (int32_t i=0; i<num_preproc; i++)
00104         SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
00105 
00106     SG_REF(p);
00107 
00108     return num_preproc;
00109 }
00110 
00112 CPreprocessor* CFeatures::get_preprocessor(int32_t num)
00113 {
00114     if (num<num_preproc)
00115     {
00116         SG_REF(preproc[num]);
00117         return preproc[num];
00118     }
00119     else
00120         return NULL;
00121 }
00122 
00124 int32_t CFeatures::get_num_preprocessed()
00125 {
00126     int32_t num=0;
00127 
00128     for (int32_t i=0; i<num_preproc; i++)
00129     {
00130         if (preprocessed[i])
00131             num++;
00132     }
00133 
00134     return num;
00135 }
00136 
00138 void CFeatures::clean_preprocessors()
00139 {
00140     while (del_preprocessor(0));
00141 }
00142 
00144 CPreprocessor* CFeatures::del_preprocessor(int32_t num)
00145 {
00146     CPreprocessor** pps=NULL;
00147     bool* preprocd=NULL;
00148     CPreprocessor* removed_preproc=NULL;
00149 
00150     if (num_preproc>0 && num<num_preproc)
00151     {
00152         removed_preproc=preproc[num];
00153 
00154         if (num_preproc>1)
00155         {
00156             pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
00157             preprocd= SG_MALLOC(bool, num_preproc-1);
00158 
00159             if (pps && preprocd)
00160             {
00161                 int32_t j=0;
00162                 for (int32_t i=0; i<num_preproc; i++)
00163                 {
00164                     if (i!=num)
00165                     {
00166                         pps[j]=preproc[i];
00167                         preprocd[j]=preprocessed[i];
00168                         j++;
00169                     }
00170                 }
00171             }
00172         }
00173 
00174         SG_FREE(preproc);
00175         preproc=pps;
00176         SG_FREE(preprocessed);
00177         preprocessed=preprocd;
00178 
00179         num_preproc--;
00180 
00181         for (int32_t i=0; i<num_preproc; i++)
00182             SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
00183     }
00184 
00185     SG_UNREF(removed_preproc);
00186     return removed_preproc;
00187 }
00188 
00189 void CFeatures::set_preprocessed(int32_t num)
00190 {
00191     preprocessed[num]=true;
00192 }
00193 
00194 bool CFeatures::is_preprocessed(int32_t num)
00195 {
00196     return preprocessed[num];
00197 }
00198 
00199 int32_t CFeatures::get_num_preprocessors() const
00200 {
00201     return num_preproc;
00202 }
00203 
00204 int32_t CFeatures::get_cache_size()
00205 {
00206     return cache_size;
00207 }
00208 
00209 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
00210 {
00211     SG_NOTIMPLEMENTED;
00212     return false;
00213 }
00214 
00215 void CFeatures::list_feature_obj()
00216 {
00217     SG_INFO( "%p - ", this);
00218     switch (get_feature_class())
00219     {
00220         case C_UNKNOWN:
00221             SG_INFO( "C_UNKNOWN ");
00222             break;
00223         case C_SIMPLE:
00224             SG_INFO( "C_SIMPLE ");
00225             break;
00226         case C_SPARSE:
00227             SG_INFO( "C_SPARSE ");
00228             break;
00229         case C_STRING:
00230             SG_INFO( "C_STRING ");
00231             break;
00232         case C_COMBINED:
00233             SG_INFO( "C_COMBINED ");
00234             break;
00235         case C_COMBINED_DOT:
00236             SG_INFO( "C_COMBINED_DOT ");
00237             break;
00238         case C_WD:
00239             SG_INFO( "C_WD ");
00240             break;
00241         case C_SPEC:
00242             SG_INFO( "C_SPEC ");
00243             break;
00244         case C_WEIGHTEDSPEC:
00245             SG_INFO( "C_WEIGHTEDSPEC ");
00246             break;
00247         case C_STREAMING_SIMPLE:
00248             SG_INFO( "C_STREAMING_SIMPLE ");
00249             break;
00250         case C_STREAMING_SPARSE:
00251             SG_INFO( "C_STREAMING_SPARSE ");
00252             break;
00253         case C_STREAMING_STRING:
00254             SG_INFO( "C_STREAMING_STRING ");
00255             break;
00256         case C_STREAMING_VW:
00257             SG_INFO( "C_STREAMING_VW ");
00258             break;
00259         case C_ANY:
00260             SG_INFO( "C_ANY ");
00261             break;
00262         default:
00263          SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00264     }
00265 
00266     switch (get_feature_type())
00267     {
00268         case F_UNKNOWN:
00269             SG_INFO( "F_UNKNOWN \n");
00270             break;
00271         case F_CHAR:
00272             SG_INFO( "F_CHAR \n");
00273             break;
00274         case F_BYTE:
00275             SG_INFO( "F_BYTE \n");
00276             break;
00277         case F_SHORT:
00278             SG_INFO( "F_SHORT \n");
00279             break;
00280         case F_WORD:
00281             SG_INFO( "F_WORD \n");
00282             break;
00283         case F_INT:
00284             SG_INFO( "F_INT \n");
00285             break;
00286         case F_UINT:
00287             SG_INFO( "F_UINT \n");
00288             break;
00289         case F_LONG:
00290             SG_INFO( "F_LONG \n");
00291             break;
00292         case F_ULONG:
00293             SG_INFO( "F_ULONG \n");
00294             break;
00295         case F_SHORTREAL:
00296             SG_INFO( "F_SHORTEAL \n");
00297             break;
00298         case F_DREAL:
00299             SG_INFO( "F_DREAL \n");
00300             break;
00301         case F_LONGREAL:
00302             SG_INFO( "F_LONGREAL \n");
00303             break;
00304         case F_ANY:
00305             SG_INFO( "F_ANY \n");
00306             break;
00307         default:
00308          SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
00309     }
00310 }
00311 
00312 
00313 void CFeatures::load(CFile* loader)
00314 {
00315     SG_SET_LOCALE_C;
00316     SG_NOTIMPLEMENTED;
00317     SG_RESET_LOCALE;
00318 }
00319 
00320 void CFeatures::save(CFile* writer)
00321 {
00322     SG_SET_LOCALE_C;
00323     SG_NOTIMPLEMENTED;
00324     SG_RESET_LOCALE;
00325 }
00326 
00327 bool CFeatures::check_feature_compatibility(CFeatures* f)
00328 {
00329     bool result=false;
00330 
00331     if (f)
00332         result= ( (this->get_feature_class() == f->get_feature_class()) &&
00333                 (this->get_feature_type() == f->get_feature_type()));
00334     return result;
00335 }
00336 
00337 bool CFeatures::has_property(EFeatureProperty p)
00338 {
00339     return (properties & p) != 0;
00340 }
00341 
00342 void CFeatures::set_property(EFeatureProperty p)
00343 {
00344     properties |= p;
00345 }
00346 
00347 void CFeatures::unset_property(EFeatureProperty p)
00348 {
00349     properties &= (properties | p) ^ p;
00350 }
00351 
00352 void CFeatures::set_subset(CSubset* subset)
00353 {
00354     SG_UNREF(m_subset);
00355     m_subset=subset;
00356     SG_REF(subset);
00357     subset_changed_post();
00358 }
00359 
00360 index_t CFeatures::subset_idx_conversion(index_t idx) const
00361 {
00362     return m_subset ? m_subset->subset_idx_conversion(idx) : idx;
00363 }
00364 
00365 bool CFeatures::has_subset() const
00366 {
00367     return m_subset!=NULL;
00368 }
00369 
00370 void CFeatures::remove_subset()
00371 {
00372     set_subset(NULL);
00373 }
00374 
00375 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices)
00376 {
00377     SG_ERROR("copy_subset and therefore model storage of CMachine "
00378             "(required for cross-validation and model-selection is ",
00379             "not yet implemented for feature type %s\n", get_name());
00380     return NULL;
00381 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation