Features.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Written (W) 2011-2012 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/features/Features.h>
00014 #include <shogun/preprocessor/Preprocessor.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/base/Parameter.h>
00017 
00018 #include <string.h>
00019 
00020 using namespace shogun;
00021 
00022 CFeatures::CFeatures(int32_t size)
00023 : CSGObject()
00024 {
00025     init();
00026     cache_size = size;
00027 }
00028 
00029 CFeatures::CFeatures(const CFeatures& orig)
00030 : CSGObject(orig)
00031 {
00032     init();
00033 
00034     preproc = orig.preproc;
00035     num_preproc = orig.num_preproc;
00036 
00037     preprocessed=SG_MALLOC(bool, orig.num_preproc);
00038     memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
00039 }
00040 
00041 CFeatures::CFeatures(CFile* loader)
00042 : CSGObject()
00043 {
00044     init();
00045 
00046     load(loader);
00047     SG_INFO("Feature object loaded (%p)\n",this) ;
00048 }
00049 
00050 CFeatures::~CFeatures()
00051 {
00052     clean_preprocessors();
00053     SG_UNREF(m_subset_stack);
00054 }
00055 
00056 void CFeatures::init()
00057 {
00058     SG_ADD(&properties, "properties", "Feature properties", MS_NOT_AVAILABLE);
00059     SG_ADD(&cache_size, "cache_size", "Size of cache in MB", MS_NOT_AVAILABLE);
00060 
00061     /* TODO, use SGVector for arrays to be able to use SG_ADD macro */
00062     m_parameters->add_vector((CSGObject***) &preproc, &num_preproc, "preproc",
00063             "List of preprocessors");
00064     m_parameters->add_vector(&preprocessed, &num_preproc, "preprocessed",
00065             "Feature[i] is already preprocessed");
00066 
00067     SG_ADD((CSGObject**)&m_subset_stack, "subset_stack", "Stack of subsets",
00068             MS_NOT_AVAILABLE);
00069 
00070     m_subset_stack=new CSubsetStack();
00071     SG_REF(m_subset_stack);
00072 
00073     properties = FP_NONE;
00074     cache_size = 0;
00075     preproc = NULL;
00076     num_preproc = 0;
00077     preprocessed = NULL;
00078 }
00079 
00081 int32_t CFeatures::add_preprocessor(CPreprocessor* p)
00082 {
00083     SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
00084     ASSERT(p);
00085 
00086     bool* preprocd=SG_MALLOC(bool, num_preproc+1);
00087     CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
00088     for (int32_t i=0; i<num_preproc; i++)
00089     {
00090         pps[i]=preproc[i];
00091         preprocd[i]=preprocessed[i];
00092     }
00093     SG_FREE(preproc);
00094     SG_FREE(preprocessed);
00095     preproc=pps;
00096     preprocessed=preprocd;
00097     preproc[num_preproc]=p;
00098     preprocessed[num_preproc]=false;
00099 
00100     num_preproc++;
00101 
00102     for (int32_t i=0; i<num_preproc; i++)
00103         SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
00104 
00105     SG_REF(p);
00106 
00107     return num_preproc;
00108 }
00109 
00111 CPreprocessor* CFeatures::get_preprocessor(int32_t num) const
00112 {
00113     if (num<num_preproc)
00114     {
00115         SG_REF(preproc[num]);
00116         return preproc[num];
00117     }
00118     else
00119         return NULL;
00120 }
00121 
00123 int32_t CFeatures::get_num_preprocessed() const
00124 {
00125     int32_t num=0;
00126 
00127     for (int32_t i=0; i<num_preproc; i++)
00128     {
00129         if (preprocessed[i])
00130             num++;
00131     }
00132 
00133     return num;
00134 }
00135 
00137 void CFeatures::clean_preprocessors()
00138 {
00139     while (del_preprocessor(0));
00140 }
00141 
00143 CPreprocessor* CFeatures::del_preprocessor(int32_t num)
00144 {
00145     CPreprocessor** pps=NULL;
00146     bool* preprocd=NULL;
00147     CPreprocessor* removed_preproc=NULL;
00148 
00149     if (num_preproc>0 && num<num_preproc)
00150     {
00151         removed_preproc=preproc[num];
00152 
00153         if (num_preproc>1)
00154         {
00155             pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
00156             preprocd= SG_MALLOC(bool, num_preproc-1);
00157 
00158             if (pps && preprocd)
00159             {
00160                 int32_t j=0;
00161                 for (int32_t i=0; i<num_preproc; i++)
00162                 {
00163                     if (i!=num)
00164                     {
00165                         pps[j]=preproc[i];
00166                         preprocd[j]=preprocessed[i];
00167                         j++;
00168                     }
00169                 }
00170             }
00171         }
00172 
00173         SG_FREE(preproc);
00174         preproc=pps;
00175         SG_FREE(preprocessed);
00176         preprocessed=preprocd;
00177 
00178         num_preproc--;
00179 
00180         for (int32_t i=0; i<num_preproc; i++)
00181             SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
00182     }
00183 
00184     SG_UNREF(removed_preproc);
00185     return removed_preproc;
00186 }
00187 
00188 void CFeatures::set_preprocessed(int32_t num)
00189 {
00190     preprocessed[num]=true;
00191 }
00192 
00193 bool CFeatures::is_preprocessed(int32_t num) const
00194 {
00195     return preprocessed[num];
00196 }
00197 
00198 int32_t CFeatures::get_num_preprocessors() const
00199 {
00200     return num_preproc;
00201 }
00202 
00203 int32_t CFeatures::get_cache_size() const
00204 {
00205     return cache_size;
00206 }
00207 
00208 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
00209 {
00210     SG_NOTIMPLEMENTED;
00211     return false;
00212 }
00213 
00214 void CFeatures::list_feature_obj() const
00215 {
00216     SG_INFO( "%p - ", this);
00217     switch (get_feature_class())
00218     {
00219         case C_UNKNOWN:
00220             SG_INFO( "C_UNKNOWN ");
00221             break;
00222         case C_DENSE:
00223             SG_INFO( "C_DENSE ");
00224             break;
00225         case C_SPARSE:
00226             SG_INFO( "C_SPARSE ");
00227             break;
00228         case C_STRING:
00229             SG_INFO( "C_STRING ");
00230             break;
00231         case C_COMBINED:
00232             SG_INFO( "C_COMBINED ");
00233             break;
00234         case C_COMBINED_DOT:
00235             SG_INFO( "C_COMBINED_DOT ");
00236             break;
00237         case C_WD:
00238             SG_INFO( "C_WD ");
00239             break;
00240         case C_SPEC:
00241             SG_INFO( "C_SPEC ");
00242             break;
00243         case C_WEIGHTEDSPEC:
00244             SG_INFO( "C_WEIGHTEDSPEC ");
00245             break;
00246         case C_STREAMING_DENSE:
00247             SG_INFO( "C_STREAMING_DENSE ");
00248             break;
00249         case C_STREAMING_SPARSE:
00250             SG_INFO( "C_STREAMING_SPARSE ");
00251             break;
00252         case C_STREAMING_STRING:
00253             SG_INFO( "C_STREAMING_STRING ");
00254             break;
00255         case C_STREAMING_VW:
00256             SG_INFO( "C_STREAMING_VW ");
00257             break;
00258         case C_ANY:
00259             SG_INFO( "C_ANY ");
00260             break;
00261         default:
00262          SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00263     }
00264 
00265     switch (get_feature_type())
00266     {
00267         case F_UNKNOWN:
00268             SG_INFO( "F_UNKNOWN \n");
00269             break;
00270         case F_CHAR:
00271             SG_INFO( "F_CHAR \n");
00272             break;
00273         case F_BYTE:
00274             SG_INFO( "F_BYTE \n");
00275             break;
00276         case F_SHORT:
00277             SG_INFO( "F_SHORT \n");
00278             break;
00279         case F_WORD:
00280             SG_INFO( "F_WORD \n");
00281             break;
00282         case F_INT:
00283             SG_INFO( "F_INT \n");
00284             break;
00285         case F_UINT:
00286             SG_INFO( "F_UINT \n");
00287             break;
00288         case F_LONG:
00289             SG_INFO( "F_LONG \n");
00290             break;
00291         case F_ULONG:
00292             SG_INFO( "F_ULONG \n");
00293             break;
00294         case F_SHORTREAL:
00295             SG_INFO( "F_SHORTEAL \n");
00296             break;
00297         case F_DREAL:
00298             SG_INFO( "F_DREAL \n");
00299             break;
00300         case F_LONGREAL:
00301             SG_INFO( "F_LONGREAL \n");
00302             break;
00303         case F_ANY:
00304             SG_INFO( "F_ANY \n");
00305             break;
00306         default:
00307          SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
00308     }
00309 }
00310 
00311 
00312 void CFeatures::load(CFile* loader)
00313 {
00314     SG_SET_LOCALE_C;
00315     SG_NOTIMPLEMENTED;
00316     SG_RESET_LOCALE;
00317 }
00318 
00319 void CFeatures::save(CFile* writer)
00320 {
00321     SG_SET_LOCALE_C;
00322     SG_NOTIMPLEMENTED;
00323     SG_RESET_LOCALE;
00324 }
00325 
00326 bool CFeatures::check_feature_compatibility(CFeatures* f) const
00327 {
00328     bool result=false;
00329 
00330     if (f)
00331         result= ( (this->get_feature_class() == f->get_feature_class()) &&
00332                 (this->get_feature_type() == f->get_feature_type()));
00333     return result;
00334 }
00335 
00336 bool CFeatures::has_property(EFeatureProperty p) const
00337 {
00338     return (properties & p) != 0;
00339 }
00340 
00341 void CFeatures::set_property(EFeatureProperty p)
00342 {
00343     properties |= p;
00344 }
00345 
00346 void CFeatures::unset_property(EFeatureProperty p)
00347 {
00348     properties &= (properties | p) ^ p;
00349 }
00350 
00351 void CFeatures::add_subset(SGVector<index_t> subset)
00352 {
00353     m_subset_stack->add_subset(subset);
00354     subset_changed_post();
00355 }
00356 
00357 void CFeatures::remove_subset()
00358 {
00359     m_subset_stack->remove_subset();
00360     subset_changed_post();
00361 }
00362 
00363 void CFeatures::remove_all_subsets()
00364 {
00365     m_subset_stack->remove_all_subsets();
00366     subset_changed_post();
00367 }
00368 
00369 CSubsetStack* CFeatures::get_subset_stack()
00370 {
00371     return m_subset_stack;
00372 }
00373 
00374 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices)
00375 {
00376     SG_ERROR("%s::copy_subset(): copy_subset and therefore model storage of "
00377             "CMachine (required for cross-validation and model-selection is "
00378             "not yet implemented yet. Ask developers!\n", get_name());
00379     return NULL;
00380 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation