PolyFeatures.cpp

Go to the documentation of this file.
00001 #include <shogun/features/PolyFeatures.h>
00002 
00003 using namespace shogun;
00004 
00005 CPolyFeatures::CPolyFeatures() :CDotFeatures()
00006 {
00007     m_feat=NULL;
00008     m_degree=0;
00009     m_normalize=false;
00010     m_input_dimensions=0;
00011     m_multi_index=NULL;
00012     m_multinomial_coefficients=NULL;
00013     m_normalization_values=NULL;
00014 
00015     register_parameters();
00016 }
00017 
00018 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize)
00019     : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
00020         m_normalization_values(NULL)
00021 {
00022     ASSERT(feat);
00023 
00024     m_feat = feat;
00025     SG_REF(m_feat);
00026     m_degree=degree;
00027     m_normalize=normalize;
00028     m_input_dimensions=feat->get_num_features();
00029     m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00030 
00031     store_multi_index();
00032     store_multinomial_coefficients();
00033     if (m_normalize)
00034         store_normalization_values();
00035 
00036     register_parameters();
00037 }
00038 
00039 CPolyFeatures::~CPolyFeatures()
00040 {
00041     SG_FREE(m_multi_index);
00042     SG_FREE(m_multinomial_coefficients);
00043     SG_FREE(m_normalization_values);
00044     SG_UNREF(m_feat);
00045 }
00046 
00047 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00048 {
00049     ASSERT(df);
00050     ASSERT(df->get_feature_type() == get_feature_type());
00051     ASSERT(df->get_feature_class() == get_feature_class());
00052 
00053     CPolyFeatures* pf=(CPolyFeatures*) df;
00054 
00055     int32_t len1;
00056     bool do_free1;
00057     float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00058 
00059     int32_t len2;
00060     bool do_free2;
00061     float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00062 
00063     float64_t sum=0;
00064     int cnt=0;
00065     for (int j=0; j<m_output_dimensions; j++)
00066     {
00067         float64_t out1=m_multinomial_coefficients[j];
00068         float64_t out2=m_multinomial_coefficients[j];
00069         for (int k=0; k<m_degree; k++)
00070         {
00071             out1*=vec1[m_multi_index[cnt]];
00072             out2*=vec2[m_multi_index[cnt]];
00073             cnt++;
00074         }
00075         sum+=out1*out2;
00076     }
00077     m_feat->free_feature_vector(vec1, len1, do_free1);
00078     pf->m_feat->free_feature_vector(vec2, len2, do_free2);
00079 
00080     return sum;
00081 }
00082 
00083 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00084 {
00085     if (vec2_len != m_output_dimensions)
00086         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00087 
00088     int32_t len;
00089     bool do_free;
00090     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00091 
00092     
00093     int cnt=0;
00094     float64_t sum=0;
00095     for (int j=0; j<vec2_len; j++)
00096     {
00097         float64_t output=m_multinomial_coefficients[j];
00098         for (int k=0; k<m_degree; k++)
00099         {
00100             output*=vec[m_multi_index[cnt]];
00101             cnt++;
00102         }
00103         sum+=output*vec2[j];
00104     }
00105     if (m_normalize)
00106         sum = sum/m_normalization_values[vec_idx1];
00107 
00108     m_feat->free_feature_vector(vec, len, do_free);
00109     return sum;
00110 }
00111 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00112 {
00113     if (vec2_len != m_output_dimensions)
00114         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00115 
00116     int32_t len;
00117     bool do_free;
00118     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00119 
00120     
00121     int cnt=0;
00122     float32_t norm_val=1;
00123     if (m_normalize)
00124         norm_val = m_normalization_values[vec_idx1];
00125     alpha/=norm_val;
00126     for (int j=0; j<vec2_len; j++)
00127     {
00128         float64_t output=m_multinomial_coefficients[j];
00129         for (int k=0; k<m_degree; k++)
00130         {
00131             output*=vec[m_multi_index[cnt]];
00132             cnt++;
00133         }
00134         if (abs_val)
00135             output=CMath::abs(output);
00136 
00137         vec2[j]+=alpha*output;
00138     }
00139     m_feat->free_feature_vector(vec, len, do_free);
00140 }
00141 void CPolyFeatures::store_normalization_values()
00142 {
00143     SG_FREE(m_normalization_values);
00144 
00145     int32_t num_vec = this->get_num_vectors();
00146 
00147     m_normalization_values=SG_MALLOC(float32_t, num_vec);
00148     for (int i=0; i<num_vec; i++)
00149     {
00150         float64_t tmp = CMath::sqrt(dot(i, this,i));
00151         if (tmp==0)
00152             // trap division by zero
00153             m_normalization_values[i]=1;
00154         else
00155             m_normalization_values[i]=tmp;
00156     }
00157         
00158 }
00159 
00160 void CPolyFeatures::store_multi_index()
00161 {
00162     SG_FREE(m_multi_index);
00163 
00164         m_multi_index=SG_MALLOC(uint16_t, m_output_dimensions*m_degree);
00165 
00166         uint16_t* exponents = SG_MALLOC(uint16_t, m_input_dimensions);
00167         if (!exponents)
00168         SG_ERROR( "Error allocating mem \n");   
00169     /*copy adress: otherwise it will be overwritten in recursion*/
00170         uint16_t* index = m_multi_index;
00171         enumerate_multi_index(0, &index, exponents, m_degree);
00172 
00173     SG_FREE(exponents);
00174 }
00175 
00176 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00177 {
00178     if (feat_idx==m_input_dimensions-1 || degree==0)
00179     {
00180         if (feat_idx==m_input_dimensions-1)
00181             exponents[feat_idx] = degree;
00182         if (degree==0)
00183             exponents[feat_idx] = 0;
00184         int32_t i, j;
00185         for (j=0; j<feat_idx+1; j++)
00186             for (i=0; i<exponents[j]; i++)
00187             {
00188                 **index = j;
00189                 (*index)++;
00190             }
00191         exponents[feat_idx] = 0;
00192         return;
00193     }
00194     int32_t k;
00195     for (k=0; k<=degree; k++)
00196     {
00197         exponents[feat_idx] =  k;
00198         enumerate_multi_index(feat_idx+1, index,  exponents, degree-k); 
00199     }
00200     return;
00201 
00202 }
00203 
00204 void CPolyFeatures::store_multinomial_coefficients()
00205 {
00206     SG_FREE(m_multinomial_coefficients);
00207 
00208     m_multinomial_coefficients = SG_MALLOC(float64_t, m_output_dimensions);
00209     int32_t* exponents = SG_MALLOC(int32_t, m_input_dimensions);
00210     if (!exponents)
00211         SG_ERROR( "Error allocating mem \n");
00212     int32_t j=0;
00213     for (j=0; j<m_input_dimensions; j++)
00214         exponents[j] = 0;
00215     int32_t k, cnt=0;
00216     for (j=0; j<m_output_dimensions; j++)
00217     {
00218         for (k=0; k<m_degree; k++)
00219         {
00220             exponents[m_multi_index[cnt]] ++;
00221             cnt++;
00222         }
00223         m_multinomial_coefficients[j] =  sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00224         for (k=0; k<m_input_dimensions; k++)
00225         {
00226             exponents[k]=0;
00227         }
00228     }
00229     SG_FREE(exponents);
00230 }
00231 
00232 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00233 {
00234         
00235     /* for this problem k is usually small (<=degree),
00236      * thus it is efficient to
00237      * to use recursion and prune end recursions*/  
00238     if (n<k)
00239         return 0;
00240     if (k>n/2)
00241         k = n-k;
00242     if (k<0)
00243         return 0;
00244     if (k==0)
00245         return 1;
00246     if (k==1)
00247         return n;
00248     if (k<4)
00249         return bico2(n-1, k-1)+bico2(n-1, k);
00250 
00251     /* call function as implemented in numerical recipies:
00252      * much more efficient for large binomial coefficients*/
00253     return bico(n, k);
00254     
00255 }
00256 
00257 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00258 {
00259     if (N==1)
00260         return 1;
00261     if (D==0)
00262         return 1;
00263     int32_t d;
00264     int32_t ret = 0;
00265     for (d=0; d<=D; d++)
00266         ret += calc_feature_space_dimensions(N-1, d);
00267 
00268     return ret;
00269 }
00270 
00271 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00272 {
00273     int32_t ret = 1, i;
00274     int32_t n = 0;
00275     for (i=0; i<len; i++)
00276     {
00277         n += exps[i];
00278         ret *= bico2(n, exps[i]);
00279     }
00280     return ret;
00281 }
00282 
00283 /* gammln as implemented in the
00284  * second edition of Numerical Recipes in C */
00285 float64_t CPolyFeatures::gammln(float64_t xx)
00286 {
00287     float64_t x,y,tmp,ser;
00288     static float64_t cof[6]={76.18009172947146,    -86.50532032941677,
00289                           24.01409824083091,    -1.231739572450155,
00290                           0.1208650973866179e-2,-0.5395239384953e-5};
00291     int32_t j;
00292 
00293     y=x=xx;
00294     tmp=x+5.5;
00295     tmp -= (x+0.5)*log(tmp);
00296     ser=1.000000000190015;
00297     for (j=0;j<=5;j++) ser += cof[j]/++y;
00298     return -tmp+log(2.5066282746310005*ser/x);
00299 }
00300 
00301 float64_t CPolyFeatures::factln(int32_t n)
00302 {
00303     static float64_t a[101];
00304 
00305     if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
00306     if (n <= 1) return 0.0;
00307     if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00308     else return gammln(n+1.0);
00309 }
00310 
00311 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00312 {
00313     /* use floor to clean roundoff errors*/
00314     return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00315 }
00316 CFeatures* CPolyFeatures::duplicate() const
00317 {
00318     return new CPolyFeatures(*this);
00319 }
00320 
00321 void CPolyFeatures::register_parameters()
00322 {
00323     m_parameters->add((CSGObject**) &m_feat, "features",
00324                 "Features in original space.");
00325     m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
00326     m_parameters->add(&m_normalize, "normalize", "Normalize?");
00327     m_parameters->add(&m_input_dimensions, "input_dimensions",
00328             "Dimensions of the input space.");
00329     m_parameters->add(&m_output_dimensions, "output_dimensions",
00330             "Dimensions of the feature space of the polynomial kernel.");
00331 
00332     multi_index_length=m_output_dimensions*m_degree;
00333     m_parameters->add_vector(
00334             &m_multi_index,
00335             &multi_index_length,
00336             "multi_index",
00337             "Flattened matrix of all multi indices that sum do the"
00338             " degree of the polynomial kernel.");
00339 
00340     multinomial_coefficients_length=m_output_dimensions;
00341     m_parameters->add_vector(&m_multinomial_coefficients,
00342             &multinomial_coefficients_length, "multinomial_coefficients",
00343             "Multinomial coefficients for all multi-indices.");
00344 
00345     normalization_values_length=get_num_vectors();
00346     m_parameters->add_vector(&m_normalization_values,
00347             &normalization_values_length, "normalization_values",
00348             "Norm of each training example.");
00349 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation