Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "preproc/PruneVarSubMean.h"
00013 #include "preproc/SimplePreProc.h"
00014 #include "features/Features.h"
00015 #include "features/SimpleFeatures.h"
00016 #include "lib/io.h"
00017 #include "lib/Mathematics.h"
00018
00019 using namespace shogun;
00020
00021 CPruneVarSubMean::CPruneVarSubMean(bool divide)
00022 : CSimplePreProc<float64_t>("PruneVarSubMean","PVSM"), idx(NULL), mean(NULL),
00023 std(NULL), num_idx(0), divide_by_std(divide), initialized(false)
00024 {
00025 }
00026
00027 CPruneVarSubMean::~CPruneVarSubMean()
00028 {
00029 cleanup();
00030 }
00031
00033 bool CPruneVarSubMean::init(CFeatures* p_f)
00034 {
00035 if (!initialized)
00036 {
00037 ASSERT(p_f->get_feature_class()==C_SIMPLE);
00038 ASSERT(p_f->get_feature_type()==F_DREAL);
00039
00040 CSimpleFeatures<float64_t> *f=(CSimpleFeatures<float64_t>*) p_f;
00041 int32_t num_examples=f->get_num_vectors();
00042 int32_t num_features=((CSimpleFeatures<float64_t>*)f)->get_num_features();
00043
00044 delete[] mean;
00045 delete[] idx;
00046 delete[] std;
00047 mean=NULL;
00048 idx=NULL;
00049 std=NULL;
00050
00051 mean=new float64_t[num_features];
00052 float64_t* var=new float64_t[num_features];
00053 int32_t i,j;
00054
00055 for (i=0; i<num_features; i++)
00056 {
00057 mean[i]=0;
00058 var[i]=0 ;
00059 }
00060
00061
00062 for (i=0; i<num_examples; i++)
00063 {
00064 int32_t len ; bool free ;
00065 float64_t* feature=f->get_feature_vector(i, len, free) ;
00066
00067 for (j=0; j<len; j++)
00068 mean[j]+=feature[j];
00069
00070 f->free_feature_vector(feature, i, free) ;
00071 }
00072
00073 for (j=0; j<num_features; j++)
00074 mean[j]/=num_examples ;
00075
00076
00077 for (i=0; i<num_examples; i++)
00078 {
00079 int32_t len ; bool free ;
00080 float64_t* feature=f->get_feature_vector(i, len, free) ;
00081
00082 for (j=0; j<num_features; j++)
00083 var[j]+=(mean[j]-feature[j])*(mean[j]-feature[j]) ;
00084
00085 f->free_feature_vector(feature, i, free) ;
00086 }
00087
00088 int32_t num_ok=0;
00089 int32_t* idx_ok=new int[num_features];
00090
00091 for (j=0; j<num_features; j++)
00092 {
00093 var[j]/=num_examples ;
00094
00095 if (var[j]>=1e-14)
00096 {
00097 idx_ok[num_ok]=j ;
00098 num_ok++ ;
00099 }
00100 }
00101
00102 SG_INFO( "Reducing number of features from %i to %i\n", num_features, num_ok) ;
00103
00104 delete[] idx ;
00105 idx=new int[num_ok];
00106 float64_t* new_mean=new float64_t[num_ok];
00107 std=new float64_t[num_ok];
00108
00109 for (j=0; j<num_ok; j++)
00110 {
00111 idx[j]=idx_ok[j] ;
00112 new_mean[j]=mean[idx_ok[j]];
00113 std[j]=sqrt(var[idx_ok[j]]);
00114 }
00115 num_idx=num_ok ;
00116 delete[] idx_ok ;
00117 delete[] mean;
00118 delete[] var;
00119 mean=new_mean;
00120
00121 initialized=true;
00122 return true ;
00123 }
00124 else
00125 return false;
00126 }
00127
00129 void CPruneVarSubMean::cleanup()
00130 {
00131 delete[] idx;
00132 idx=NULL;
00133 delete[] mean;
00134 mean=NULL;
00135 delete[] std;
00136 std=NULL;
00137 }
00138
00142 float64_t* CPruneVarSubMean::apply_to_feature_matrix(CFeatures* f)
00143 {
00144 ASSERT(initialized);
00145
00146 int32_t num_vectors=0;
00147 int32_t num_features=0;
00148 float64_t* m=((CSimpleFeatures<float64_t>*) f)->get_feature_matrix(num_features, num_vectors);
00149
00150 SG_INFO( "get Feature matrix: %ix%i\n", num_vectors, num_features);
00151 SG_INFO( "Preprocessing feature matrix\n");
00152 for (int32_t vec=0; vec<num_vectors; vec++)
00153 {
00154 float64_t* v_src=&m[num_features*vec];
00155 float64_t* v_dst=&m[num_idx*vec];
00156
00157 if (divide_by_std)
00158 {
00159 for (int32_t feat=0; feat<num_idx; feat++)
00160 v_dst[feat]=(v_src[idx[feat]]-mean[feat])/std[feat];
00161 }
00162 else
00163 {
00164 for (int32_t feat=0; feat<num_idx; feat++)
00165 v_dst[feat]=(v_src[idx[feat]]-mean[feat]);
00166 }
00167 }
00168
00169 ((CSimpleFeatures<float64_t>*) f)->set_num_features(num_idx);
00170 ((CSimpleFeatures<float64_t>*) f)->get_feature_matrix(num_features, num_vectors);
00171 SG_INFO( "new Feature matrix: %ix%i\n", num_vectors, num_features);
00172
00173 return m;
00174 }
00175
00178 float64_t* CPruneVarSubMean::apply_to_feature_vector(float64_t* f, int32_t &len)
00179 {
00180 float64_t* ret=NULL;
00181
00182 if (initialized)
00183 {
00184 ret=new float64_t[num_idx] ;
00185
00186 if (divide_by_std)
00187 {
00188 for (int32_t i=0; i<num_idx; i++)
00189 ret[i]=(f[idx[i]]-mean[i])/std[i];
00190 }
00191 else
00192 {
00193 for (int32_t i=0; i<num_idx; i++)
00194 ret[i]=(f[idx[i]]-mean[i]);
00195 }
00196 len=num_idx ;
00197 }
00198 else
00199 {
00200 ret=new float64_t[len] ;
00201 for (int32_t i=0; i<len; i++)
00202 ret[i]=f[i];
00203 }
00204
00205 return ret;
00206 }