SHOGUN: PolyFeatures.cpp Source File

Go to the documentation of this file.
00001 #include "features/PolyFeatures.h"
00002 
00003 using namespace shogun;
00004 
00005 CPolyFeatures::CPolyFeatures(void) :CDotFeatures()
00006 {
00007     SG_UNSTABLE("CPolyFeatures::CPolyFeatures(void)", "\n");
00008 
00009     m_feat = NULL;
00010     m_degree = 0;
00011     m_normalize = false;
00012     m_input_dimensions = 0;
00013     m_multi_index = NULL;
00014     m_multinomial_coefficients = NULL;
00015     m_normalization_values = NULL;
00016 }
00017 
00018 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize)
00019     : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL), 
00020         m_normalization_values(NULL)
00021 {
00022     ASSERT(feat);
00023 
00024     m_feat = feat;
00025     SG_REF(m_feat);
00026     m_degree=degree;
00027     m_normalize=normalize;
00028     m_input_dimensions=feat->get_num_features();
00029     m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00030 
00031     store_multi_index();
00032     store_multinomial_coefficients();
00033     if (m_normalize)
00034         store_normalization_values();
00035 }
00036 
00037 CPolyFeatures::~CPolyFeatures()
00038 {
00039     delete[] m_multi_index;
00040     delete[] m_multinomial_coefficients;
00041     delete[] m_normalization_values;
00042     SG_UNREF(m_feat);
00043 }
00044 
00045 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00046 {
00047     ASSERT(df);
00048     ASSERT(df->get_feature_type() == get_feature_type());
00049     ASSERT(df->get_feature_class() == get_feature_class());
00050 
00051     CPolyFeatures* pf=(CPolyFeatures*) df;
00052 
00053     int32_t len1;
00054     bool do_free1;
00055     float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00056 
00057     int32_t len2;
00058     bool do_free2;
00059     float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00060 
00061     float64_t sum=0;
00062     int cnt=0;
00063     for (int j=0; j<m_output_dimensions; j++)
00064     {
00065         float64_t out1=m_multinomial_coefficients[j];
00066         float64_t out2=m_multinomial_coefficients[j];
00067         for (int k=0; k<m_degree; k++)
00068         {
00069             out1*=vec1[m_multi_index[cnt]];
00070             out2*=vec2[m_multi_index[cnt]];
00071             cnt++;
00072         }
00073         sum+=out1*out2;
00074     }
00075     m_feat->free_feature_vector(vec1, len1, do_free1);
00076     pf->m_feat->free_feature_vector(vec2, len2, do_free2);
00077 
00078     return sum;
00079 }
00080 
00081 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00082 {
00083     if (vec2_len != m_output_dimensions)
00084         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00085 
00086     int32_t len;
00087     bool do_free;
00088     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00089 
00090     
00091     int cnt=0;
00092     float64_t sum=0;
00093     for (int j=0; j<vec2_len; j++)
00094     {
00095         float64_t output=m_multinomial_coefficients[j];
00096         for (int k=0; k<m_degree; k++)
00097         {
00098             output*=vec[m_multi_index[cnt]];
00099             cnt++;
00100         }
00101         sum+=output*vec2[j];
00102     }
00103     if (m_normalize)
00104         sum = sum/m_normalization_values[vec_idx1];
00105 
00106     m_feat->free_feature_vector(vec, len, do_free);
00107     return sum;
00108 }
00109 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00110 {
00111     if (vec2_len != m_output_dimensions)
00112         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00113 
00114     int32_t len;
00115     bool do_free;
00116     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00117 
00118     
00119     int cnt=0;
00120     float32_t norm_val=1;
00121     if (m_normalize)
00122         norm_val = m_normalization_values[vec_idx1];
00123     alpha/=norm_val;
00124     for (int j=0; j<vec2_len; j++)
00125     {
00126         float64_t output=m_multinomial_coefficients[j];
00127         for (int k=0; k<m_degree; k++)
00128         {
00129             output*=vec[m_multi_index[cnt]];
00130             cnt++;
00131         }
00132         if (abs_val)
00133             output=CMath::abs(output); 
00134 
00135         vec2[j]+=alpha*output;
00136     }
00137     m_feat->free_feature_vector(vec, len, do_free);
00138 }
00139 void CPolyFeatures::store_normalization_values()
00140 {
00141     delete[] m_normalization_values;
00142 
00143     int32_t num_vec = this->get_num_vectors();
00144 
00145     m_normalization_values=new float32_t[num_vec];
00146     for (int i=0; i<num_vec; i++)
00147     {
00148         float64_t tmp = CMath::sqrt(dot(i, this,i)); 
00149         if (tmp==0)
00150             // trap division by zero
00151             m_normalization_values[i]=1;
00152         else 
00153             m_normalization_values[i]=tmp;
00154     }
00155         
00156 }
00157 
00158 void CPolyFeatures::store_multi_index()
00159 {
00160     delete[] m_multi_index;
00161 
00162         m_multi_index=new uint16_t[m_output_dimensions*m_degree];
00163 
00164         uint16_t* exponents = new uint16_t[m_input_dimensions];
00165         if (!exponents)
00166         SG_ERROR( "Error allocating mem \n");   
00167     /*copy adress: otherwise it will be overwritten in recursion*/
00168         uint16_t* index = m_multi_index;
00169         enumerate_multi_index(0, &index, exponents, m_degree);
00170 
00171     delete[] exponents;
00172 }
00173 
00174 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00175 {
00176     if (feat_idx==m_input_dimensions-1 || degree==0)
00177     {
00178         if (feat_idx==m_input_dimensions-1)
00179             exponents[feat_idx] = degree;
00180         if (degree==0)
00181             exponents[feat_idx] = 0;
00182         int32_t i, j;
00183         for (j=0; j<feat_idx+1; j++)
00184             for (i=0; i<exponents[j]; i++)
00185             {
00186                 **index = j;
00187                 (*index)++;
00188             }
00189         exponents[feat_idx] = 0;
00190         return;
00191     }
00192     int32_t k;
00193     for (k=0; k<=degree; k++)
00194     {
00195         exponents[feat_idx] =  k;
00196         enumerate_multi_index(feat_idx+1, index,  exponents, degree-k); 
00197     }
00198     return; 
00199 
00200 }
00201 
00202 void CPolyFeatures::store_multinomial_coefficients()
00203 {
00204     delete[] m_multinomial_coefficients;
00205 
00206     m_multinomial_coefficients = new float64_t[m_output_dimensions];
00207     int32_t* exponents = new int32_t[m_input_dimensions];
00208     if (!exponents)
00209         SG_ERROR( "Error allocating mem \n");
00210     int32_t j=0;
00211     for (j=0; j<m_input_dimensions; j++)
00212         exponents[j] = 0;
00213     int32_t k, cnt=0;
00214     for (j=0; j<m_output_dimensions; j++)
00215     {
00216         for (k=0; k<m_degree; k++)
00217         {
00218             exponents[m_multi_index[cnt]] ++;
00219             cnt++;
00220         }
00221         m_multinomial_coefficients[j] =  sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00222         for (k=0; k<m_input_dimensions; k++)
00223         {
00224             exponents[k]=0;
00225         }
00226     }
00227     delete[] exponents;
00228 }
00229 
00230 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00231 {
00232         
00233     /* for this problem k is usually small (<=degree), 
00234      * thus it is efficient to 
00235      * to use recursion and prune end recursions*/  
00236     if (n<k) 
00237         return 0;
00238     if (k>n/2)
00239         k = n-k;
00240     if (k<0)
00241         return 0;
00242     if (k==0)
00243         return 1;
00244     if (k==1)
00245         return n;
00246     if (k<4)
00247         return bico2(n-1, k-1)+bico2(n-1, k);
00248 
00249     /* call function as implemented in numerical recipies:
00250      * much more efficient for large binomial coefficients*/
00251     return bico(n, k);
00252     
00253 }
00254 
00255 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00256 {
00257     if (N==1)
00258         return 1;
00259     if (D==0)
00260         return 1;
00261     int32_t d;
00262     int32_t ret = 0;
00263     for (d=0; d<=D; d++)
00264         ret += calc_feature_space_dimensions(N-1, d);
00265 
00266     return ret;
00267 }
00268 
00269 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00270 {
00271     int32_t ret = 1, i;
00272     int32_t n = 0;
00273     for (i=0; i<len; i++)
00274     {
00275         n += exps[i];
00276         ret *= bico2(n, exps[i]);
00277     }
00278     return ret; 
00279 }
00280 
00281 /* gammln as implemented in the
00282  * second edition of Numerical Recipes in C */
00283 float64_t CPolyFeatures::gammln(float64_t xx)
00284 {
00285     float64_t x,y,tmp,ser;
00286     static float64_t cof[6]={76.18009172947146,    -86.50532032941677,
00287                           24.01409824083091,    -1.231739572450155,
00288                           0.1208650973866179e-2,-0.5395239384953e-5};
00289     int32_t j;
00290 
00291     y=x=xx;
00292     tmp=x+5.5;
00293     tmp -= (x+0.5)*log(tmp);
00294     ser=1.000000000190015;
00295     for (j=0;j<=5;j++) ser += cof[j]/++y;
00296     return -tmp+log(2.5066282746310005*ser/x);
00297 }
00298 
00299 float64_t CPolyFeatures::factln(int32_t n)
00300 {
00301     static float64_t a[101];
00302 
00303     if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
00304     if (n <= 1) return 0.0;
00305     if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00306     else return gammln(n+1.0);
00307 }
00308 
00309 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00310 {
00311     /* use floor to clean roundoff errors*/
00312     return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00313 }
00314 CFeatures* CPolyFeatures::duplicate() const
00315 {
00316     return new CPolyFeatures(*this);
00317 }