Features.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Subset support written (W) 2011 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/features/Features.h>
00014 #include <shogun/preprocessor/Preprocessor.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/base/Parameter.h>
00017 
00018 #include <string.h>
00019 
00020 using namespace shogun;
00021 
00022 CFeatures::CFeatures(int32_t size)
00023 : CSGObject()
00024 {
00025     init();
00026     cache_size = size;
00027 }
00028 
00029 CFeatures::CFeatures(const CFeatures& orig)
00030 : CSGObject(orig)
00031 {
00032     init();
00033 
00034     preproc = orig.preproc;
00035     num_preproc = orig.num_preproc;
00036 
00037     preprocessed=SG_MALLOC(bool, orig.num_preproc);
00038     memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
00039 }
00040 
00041 CFeatures::CFeatures(CFile* loader)
00042 : CSGObject()
00043 {
00044     init();
00045 
00046     load(loader);
00047     SG_INFO("Feature object loaded (%p)\n",this) ;
00048 }
00049 
00050 CFeatures::~CFeatures()
00051 {
00052     clean_preprocessors();
00053     delete m_subset;
00054 }
00055 
00056 void
00057 CFeatures::init(void)
00058 {
00059     m_parameters->add(&properties, "properties",
00060                       "Feature properties.");
00061     m_parameters->add(&cache_size, "cache_size",
00062                       "Size of cache in MB.");
00063 
00064     m_parameters->add_vector((CSGObject***) &preproc,
00065                              &num_preproc, "preproc",
00066                              "List of preprocessors.");
00067     m_parameters->add_vector(&preprocessed,
00068                              &num_preproc, "preprocessed",
00069                              "Feature[i] is already preprocessed.");
00070 
00071     m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object");
00072 
00073     m_subset=NULL;
00074     properties = FP_NONE;
00075     cache_size = 0;
00076     preproc = NULL;
00077     num_preproc = 0;
00078     preprocessed = NULL;
00079 }
00080 
00082 int32_t CFeatures::add_preprocessor(CPreprocessor* p)
00083 {
00084     SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
00085     ASSERT(p);
00086 
00087     bool* preprocd=SG_MALLOC(bool, num_preproc+1);
00088     CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
00089     for (int32_t i=0; i<num_preproc; i++)
00090     {
00091         pps[i]=preproc[i];
00092         preprocd[i]=preprocessed[i];
00093     }
00094     SG_FREE(preproc);
00095     SG_FREE(preprocessed);
00096     preproc=pps;
00097     preprocessed=preprocd;
00098     preproc[num_preproc]=p;
00099     preprocessed[num_preproc]=false;
00100 
00101     num_preproc++;
00102 
00103     for (int32_t i=0; i<num_preproc; i++)
00104         SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
00105 
00106     SG_REF(p);
00107 
00108     return num_preproc;
00109 }
00110 
00112 CPreprocessor* CFeatures::get_preprocessor(int32_t num)
00113 {
00114     if (num<num_preproc)
00115     {
00116         SG_REF(preproc[num]);
00117         return preproc[num];
00118     }
00119     else
00120         return NULL;
00121 }
00122 
00124 int32_t CFeatures::get_num_preprocessed()
00125 {
00126     int32_t num=0;
00127 
00128     for (int32_t i=0; i<num_preproc; i++)
00129     {
00130         if (preprocessed[i])
00131             num++;
00132     }
00133 
00134     return num;
00135 }
00136 
00138 void CFeatures::clean_preprocessors()
00139 {
00140     while (del_preprocessor(0));
00141 }
00142 
00144 CPreprocessor* CFeatures::del_preprocessor(int32_t num)
00145 {
00146     CPreprocessor** pps=NULL;
00147     bool* preprocd=NULL;
00148     CPreprocessor* removed_preproc=NULL;
00149 
00150     if (num_preproc>0 && num<num_preproc)
00151     {
00152         removed_preproc=preproc[num];
00153 
00154         if (num_preproc>1)
00155         {
00156             pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
00157             preprocd= SG_MALLOC(bool, num_preproc-1);
00158 
00159             if (pps && preprocd)
00160             {
00161                 int32_t j=0;
00162                 for (int32_t i=0; i<num_preproc; i++)
00163                 {
00164                     if (i!=num)
00165                     {
00166                         pps[j]=preproc[i];
00167                         preprocd[j]=preprocessed[i];
00168                         j++;
00169                     }
00170                 }
00171             }
00172         }
00173 
00174         SG_FREE(preproc);
00175         preproc=pps;
00176         SG_FREE(preprocessed);
00177         preprocessed=preprocd;
00178 
00179         num_preproc--;
00180 
00181         for (int32_t i=0; i<num_preproc; i++)
00182             SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
00183     }
00184 
00185     SG_UNREF(removed_preproc);
00186     return removed_preproc;
00187 }
00188 
00189 void CFeatures::list_feature_obj()
00190 {
00191     SG_INFO( "%p - ", this);
00192     switch (get_feature_class())
00193     {
00194         case C_UNKNOWN:
00195             SG_INFO( "C_UNKNOWN ");
00196             break;
00197         case C_SIMPLE:
00198             SG_INFO( "C_SIMPLE ");
00199             break;
00200         case C_SPARSE:
00201             SG_INFO( "C_SPARSE ");
00202             break;
00203         case C_STRING:
00204             SG_INFO( "C_STRING ");
00205             break;
00206         case C_COMBINED:
00207             SG_INFO( "C_COMBINED ");
00208             break;
00209         case C_COMBINED_DOT:
00210             SG_INFO( "C_COMBINED_DOT ");
00211             break;
00212         case C_WD:
00213             SG_INFO( "C_WD ");
00214             break;
00215         case C_SPEC:
00216             SG_INFO( "C_SPEC ");
00217             break;
00218         case C_WEIGHTEDSPEC:
00219             SG_INFO( "C_WEIGHTEDSPEC ");
00220             break;
00221         case C_STREAMING_SIMPLE:
00222             SG_INFO( "C_STREAMING_SIMPLE ");
00223             break;
00224         case C_STREAMING_SPARSE:
00225             SG_INFO( "C_STREAMING_SPARSE ");
00226             break;
00227         case C_STREAMING_STRING:
00228             SG_INFO( "C_STREAMING_STRING ");
00229             break;
00230         case C_STREAMING_VW:
00231             SG_INFO( "C_STREAMING_VW ");
00232             break;
00233         case C_ANY:
00234             SG_INFO( "C_ANY ");
00235             break;
00236         default:
00237          SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00238     }
00239 
00240     switch (get_feature_type())
00241     {
00242         case F_UNKNOWN:
00243             SG_INFO( "F_UNKNOWN \n");
00244             break;
00245         case F_CHAR:
00246             SG_INFO( "F_CHAR \n");
00247             break;
00248         case F_BYTE:
00249             SG_INFO( "F_BYTE \n");
00250             break;
00251         case F_SHORT:
00252             SG_INFO( "F_SHORT \n");
00253             break;
00254         case F_WORD:
00255             SG_INFO( "F_WORD \n");
00256             break;
00257         case F_INT:
00258             SG_INFO( "F_INT \n");
00259             break;
00260         case F_UINT:
00261             SG_INFO( "F_UINT \n");
00262             break;
00263         case F_LONG:
00264             SG_INFO( "F_LONG \n");
00265             break;
00266         case F_ULONG:
00267             SG_INFO( "F_ULONG \n");
00268             break;
00269         case F_SHORTREAL:
00270             SG_INFO( "F_SHORTEAL \n");
00271             break;
00272         case F_DREAL:
00273             SG_INFO( "F_DREAL \n");
00274             break;
00275         case F_LONGREAL:
00276             SG_INFO( "F_LONGREAL \n");
00277             break;
00278         case F_ANY:
00279             SG_INFO( "F_ANY \n");
00280             break;
00281         default:
00282          SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
00283     }
00284 }
00285 
00286 bool CFeatures::check_feature_compatibility(CFeatures* f)
00287 {
00288     bool result=false;
00289 
00290     if (f)
00291         result= ( (this->get_feature_class() == f->get_feature_class()) &&
00292                 (this->get_feature_type() == f->get_feature_type()));
00293     return result;
00294 }
00295 
00296 void CFeatures::set_subset(CSubset* subset)
00297 {
00298     SG_UNREF(m_subset);
00299     m_subset=subset;
00300     SG_REF(subset);
00301     subset_changed_post();
00302 }
00303 
00304 void CFeatures::remove_subset()
00305 {
00306     set_subset(NULL);
00307 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation