PolyFeatures.cpp

Go to the documentation of this file.
00001 #include <shogun/features/PolyFeatures.h>
00002 
00003 using namespace shogun;
00004 
00005 CPolyFeatures::CPolyFeatures() :CDotFeatures()
00006 {
00007     m_feat=NULL;
00008     m_degree=0;
00009     m_normalize=false;
00010     m_input_dimensions=0;
00011     m_multi_index=NULL;
00012     m_multinomial_coefficients=NULL;
00013     m_normalization_values=NULL;
00014 
00015     register_parameters();
00016 }
00017 
00018 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize)
00019     : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
00020         m_normalization_values(NULL)
00021 {
00022     ASSERT(feat);
00023 
00024     m_feat = feat;
00025     SG_REF(m_feat);
00026     m_degree=degree;
00027     m_normalize=normalize;
00028     m_input_dimensions=feat->get_num_features();
00029     m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00030 
00031     store_multi_index();
00032     store_multinomial_coefficients();
00033     if (m_normalize)
00034         store_normalization_values();
00035 
00036     register_parameters();
00037 }
00038 
00039 
00040 CPolyFeatures::~CPolyFeatures()
00041 {
00042     SG_FREE(m_multi_index);
00043     SG_FREE(m_multinomial_coefficients);
00044     SG_FREE(m_normalization_values);
00045     SG_UNREF(m_feat);
00046 }
00047 
00048 CPolyFeatures::CPolyFeatures(const CPolyFeatures & orig)
00049 {
00050     SG_PRINT("CPolyFeatures:\n");
00051     SG_NOTIMPLEMENTED;
00052 };
00053 
00054 int32_t CPolyFeatures::get_dim_feature_space() const
00055 {
00056     return m_output_dimensions;
00057 }
00058 
00059 int32_t CPolyFeatures::get_nnz_features_for_vector(int32_t num)
00060 {
00061     return m_output_dimensions;
00062 }
00063 
00064 EFeatureType CPolyFeatures::get_feature_type()
00065 {
00066     return F_UNKNOWN;
00067 }
00068 
00069 EFeatureClass CPolyFeatures::get_feature_class()
00070 {
00071     return C_POLY;
00072 }
00073 
00074 int32_t CPolyFeatures::get_num_vectors() const
00075 {
00076     if (m_feat)
00077         return m_feat->get_num_vectors();
00078     else
00079         return 0;
00080 
00081 }
00082 
00083 int32_t CPolyFeatures::get_size()
00084 {
00085     return sizeof(float64_t);
00086 }
00087 
00088 void* CPolyFeatures::get_feature_iterator(int32_t vector_index)
00089 {
00090     SG_NOTIMPLEMENTED;
00091     return NULL;
00092 }
00093 
00094 bool CPolyFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00095 {
00096     SG_NOTIMPLEMENTED;
00097     return NULL;
00098 }
00099 
00100 void CPolyFeatures::free_feature_iterator(void* iterator)
00101 {
00102     SG_NOTIMPLEMENTED;
00103 }
00104 
00105 
00106 
00107 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00108 {
00109     ASSERT(df);
00110     ASSERT(df->get_feature_type() == get_feature_type());
00111     ASSERT(df->get_feature_class() == get_feature_class());
00112 
00113     CPolyFeatures* pf=(CPolyFeatures*) df;
00114 
00115     int32_t len1;
00116     bool do_free1;
00117     float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00118 
00119     int32_t len2;
00120     bool do_free2;
00121     float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00122 
00123     float64_t sum=0;
00124     int cnt=0;
00125     for (int j=0; j<m_output_dimensions; j++)
00126     {
00127         float64_t out1=m_multinomial_coefficients[j];
00128         float64_t out2=m_multinomial_coefficients[j];
00129         for (int k=0; k<m_degree; k++)
00130         {
00131             out1*=vec1[m_multi_index[cnt]];
00132             out2*=vec2[m_multi_index[cnt]];
00133             cnt++;
00134         }
00135         sum+=out1*out2;
00136     }
00137     m_feat->free_feature_vector(vec1, len1, do_free1);
00138     pf->m_feat->free_feature_vector(vec2, len2, do_free2);
00139 
00140     return sum;
00141 }
00142 
00143 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00144 {
00145     if (vec2_len != m_output_dimensions)
00146         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00147 
00148     int32_t len;
00149     bool do_free;
00150     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00151 
00152     
00153     int cnt=0;
00154     float64_t sum=0;
00155     for (int j=0; j<vec2_len; j++)
00156     {
00157         float64_t output=m_multinomial_coefficients[j];
00158         for (int k=0; k<m_degree; k++)
00159         {
00160             output*=vec[m_multi_index[cnt]];
00161             cnt++;
00162         }
00163         sum+=output*vec2[j];
00164     }
00165     if (m_normalize)
00166         sum = sum/m_normalization_values[vec_idx1];
00167 
00168     m_feat->free_feature_vector(vec, len, do_free);
00169     return sum;
00170 }
00171 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00172 {
00173     if (vec2_len != m_output_dimensions)
00174         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00175 
00176     int32_t len;
00177     bool do_free;
00178     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00179 
00180     
00181     int cnt=0;
00182     float32_t norm_val=1;
00183     if (m_normalize)
00184         norm_val = m_normalization_values[vec_idx1];
00185     alpha/=norm_val;
00186     for (int j=0; j<vec2_len; j++)
00187     {
00188         float64_t output=m_multinomial_coefficients[j];
00189         for (int k=0; k<m_degree; k++)
00190         {
00191             output*=vec[m_multi_index[cnt]];
00192             cnt++;
00193         }
00194         if (abs_val)
00195             output=CMath::abs(output);
00196 
00197         vec2[j]+=alpha*output;
00198     }
00199     m_feat->free_feature_vector(vec, len, do_free);
00200 }
00201 void CPolyFeatures::store_normalization_values()
00202 {
00203     SG_FREE(m_normalization_values);
00204 
00205     int32_t num_vec = this->get_num_vectors();
00206 
00207     m_normalization_values=SG_MALLOC(float32_t, num_vec);
00208     for (int i=0; i<num_vec; i++)
00209     {
00210         float64_t tmp = CMath::sqrt(dot(i, this,i));
00211         if (tmp==0)
00212             // trap division by zero
00213             m_normalization_values[i]=1;
00214         else
00215             m_normalization_values[i]=tmp;
00216     }
00217         
00218 }
00219 
00220 void CPolyFeatures::store_multi_index()
00221 {
00222     SG_FREE(m_multi_index);
00223 
00224         m_multi_index=SG_MALLOC(uint16_t, m_output_dimensions*m_degree);
00225 
00226         uint16_t* exponents = SG_MALLOC(uint16_t, m_input_dimensions);
00227         if (!exponents)
00228         SG_ERROR( "Error allocating mem \n");   
00229     /*copy adress: otherwise it will be overwritten in recursion*/
00230         uint16_t* index = m_multi_index;
00231         enumerate_multi_index(0, &index, exponents, m_degree);
00232 
00233     SG_FREE(exponents);
00234 }
00235 
00236 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00237 {
00238     if (feat_idx==m_input_dimensions-1 || degree==0)
00239     {
00240         if (feat_idx==m_input_dimensions-1)
00241             exponents[feat_idx] = degree;
00242         if (degree==0)
00243             exponents[feat_idx] = 0;
00244         int32_t i, j;
00245         for (j=0; j<feat_idx+1; j++)
00246             for (i=0; i<exponents[j]; i++)
00247             {
00248                 **index = j;
00249                 (*index)++;
00250             }
00251         exponents[feat_idx] = 0;
00252         return;
00253     }
00254     int32_t k;
00255     for (k=0; k<=degree; k++)
00256     {
00257         exponents[feat_idx] =  k;
00258         enumerate_multi_index(feat_idx+1, index,  exponents, degree-k); 
00259     }
00260     return;
00261 
00262 }
00263 
00264 void CPolyFeatures::store_multinomial_coefficients()
00265 {
00266     SG_FREE(m_multinomial_coefficients);
00267 
00268     m_multinomial_coefficients = SG_MALLOC(float64_t, m_output_dimensions);
00269     int32_t* exponents = SG_MALLOC(int32_t, m_input_dimensions);
00270     if (!exponents)
00271         SG_ERROR( "Error allocating mem \n");
00272     int32_t j=0;
00273     for (j=0; j<m_input_dimensions; j++)
00274         exponents[j] = 0;
00275     int32_t k, cnt=0;
00276     for (j=0; j<m_output_dimensions; j++)
00277     {
00278         for (k=0; k<m_degree; k++)
00279         {
00280             exponents[m_multi_index[cnt]] ++;
00281             cnt++;
00282         }
00283         m_multinomial_coefficients[j] =  sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00284         for (k=0; k<m_input_dimensions; k++)
00285         {
00286             exponents[k]=0;
00287         }
00288     }
00289     SG_FREE(exponents);
00290 }
00291 
00292 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00293 {
00294         
00295     /* for this problem k is usually small (<=degree),
00296      * thus it is efficient to
00297      * to use recursion and prune end recursions*/  
00298     if (n<k)
00299         return 0;
00300     if (k>n/2)
00301         k = n-k;
00302     if (k<0)
00303         return 0;
00304     if (k==0)
00305         return 1;
00306     if (k==1)
00307         return n;
00308     if (k<4)
00309         return bico2(n-1, k-1)+bico2(n-1, k);
00310 
00311     /* call function as implemented in numerical recipies:
00312      * much more efficient for large binomial coefficients*/
00313     return bico(n, k);
00314     
00315 }
00316 
00317 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00318 {
00319     if (N==1)
00320         return 1;
00321     if (D==0)
00322         return 1;
00323     int32_t d;
00324     int32_t ret = 0;
00325     for (d=0; d<=D; d++)
00326         ret += calc_feature_space_dimensions(N-1, d);
00327 
00328     return ret;
00329 }
00330 
00331 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00332 {
00333     int32_t ret = 1, i;
00334     int32_t n = 0;
00335     for (i=0; i<len; i++)
00336     {
00337         n += exps[i];
00338         ret *= bico2(n, exps[i]);
00339     }
00340     return ret;
00341 }
00342 
00343 /* gammln as implemented in the
00344  * second edition of Numerical Recipes in C */
00345 float64_t CPolyFeatures::gammln(float64_t xx)
00346 {
00347     float64_t x,y,tmp,ser;
00348     static float64_t cof[6]={76.18009172947146,    -86.50532032941677,
00349                           24.01409824083091,    -1.231739572450155,
00350                           0.1208650973866179e-2,-0.5395239384953e-5};
00351     int32_t j;
00352 
00353     y=x=xx;
00354     tmp=x+5.5;
00355     tmp -= (x+0.5)*log(tmp);
00356     ser=1.000000000190015;
00357     for (j=0;j<=5;j++) ser += cof[j]/++y;
00358     return -tmp+log(2.5066282746310005*ser/x);
00359 }
00360 
00361 float64_t CPolyFeatures::factln(int32_t n)
00362 {
00363     static float64_t a[101];
00364 
00365     if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
00366     if (n <= 1) return 0.0;
00367     if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00368     else return gammln(n+1.0);
00369 }
00370 
00371 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00372 {
00373     /* use floor to clean roundoff errors*/
00374     return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00375 }
00376 CFeatures* CPolyFeatures::duplicate() const
00377 {
00378     return new CPolyFeatures(*this);
00379 }
00380 
00381 void CPolyFeatures::register_parameters()
00382 {
00383     m_parameters->add((CSGObject**) &m_feat, "features",
00384                 "Features in original space.");
00385     m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
00386     m_parameters->add(&m_normalize, "normalize", "Normalize?");
00387     m_parameters->add(&m_input_dimensions, "input_dimensions",
00388             "Dimensions of the input space.");
00389     m_parameters->add(&m_output_dimensions, "output_dimensions",
00390             "Dimensions of the feature space of the polynomial kernel.");
00391 
00392     multi_index_length=m_output_dimensions*m_degree;
00393     m_parameters->add_vector(
00394             &m_multi_index,
00395             &multi_index_length,
00396             "multi_index",
00397             "Flattened matrix of all multi indices that sum do the"
00398             " degree of the polynomial kernel.");
00399 
00400     multinomial_coefficients_length=m_output_dimensions;
00401     m_parameters->add_vector(&m_multinomial_coefficients,
00402             &multinomial_coefficients_length, "multinomial_coefficients",
00403             "Multinomial coefficients for all multi-indices.");
00404 
00405     normalization_values_length=get_num_vectors();
00406     m_parameters->add_vector(&m_normalization_values,
00407             &normalization_values_length, "normalization_values",
00408             "Norm of each training example.");
00409 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation