00001 #include <shogun/features/PolyFeatures.h>
00002
00003 using namespace shogun;
00004
00005 CPolyFeatures::CPolyFeatures() :CDotFeatures()
00006 {
00007 m_feat=NULL;
00008 m_degree=0;
00009 m_normalize=false;
00010 m_input_dimensions=0;
00011 m_multi_index=NULL;
00012 m_multinomial_coefficients=NULL;
00013 m_normalization_values=NULL;
00014
00015 register_parameters();
00016 }
00017
00018 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize)
00019 : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
00020 m_normalization_values(NULL)
00021 {
00022 ASSERT(feat);
00023
00024 m_feat = feat;
00025 SG_REF(m_feat);
00026 m_degree=degree;
00027 m_normalize=normalize;
00028 m_input_dimensions=feat->get_num_features();
00029 m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00030
00031 store_multi_index();
00032 store_multinomial_coefficients();
00033 if (m_normalize)
00034 store_normalization_values();
00035
00036 register_parameters();
00037 }
00038
00039 CPolyFeatures::~CPolyFeatures()
00040 {
00041 SG_FREE(m_multi_index);
00042 SG_FREE(m_multinomial_coefficients);
00043 SG_FREE(m_normalization_values);
00044 SG_UNREF(m_feat);
00045 }
00046
00047 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00048 {
00049 ASSERT(df);
00050 ASSERT(df->get_feature_type() == get_feature_type());
00051 ASSERT(df->get_feature_class() == get_feature_class());
00052
00053 CPolyFeatures* pf=(CPolyFeatures*) df;
00054
00055 int32_t len1;
00056 bool do_free1;
00057 float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00058
00059 int32_t len2;
00060 bool do_free2;
00061 float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00062
00063 float64_t sum=0;
00064 int cnt=0;
00065 for (int j=0; j<m_output_dimensions; j++)
00066 {
00067 float64_t out1=m_multinomial_coefficients[j];
00068 float64_t out2=m_multinomial_coefficients[j];
00069 for (int k=0; k<m_degree; k++)
00070 {
00071 out1*=vec1[m_multi_index[cnt]];
00072 out2*=vec2[m_multi_index[cnt]];
00073 cnt++;
00074 }
00075 sum+=out1*out2;
00076 }
00077 m_feat->free_feature_vector(vec1, len1, do_free1);
00078 pf->m_feat->free_feature_vector(vec2, len2, do_free2);
00079
00080 return sum;
00081 }
00082
00083 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00084 {
00085 if (vec2_len != m_output_dimensions)
00086 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00087
00088 int32_t len;
00089 bool do_free;
00090 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00091
00092
00093 int cnt=0;
00094 float64_t sum=0;
00095 for (int j=0; j<vec2_len; j++)
00096 {
00097 float64_t output=m_multinomial_coefficients[j];
00098 for (int k=0; k<m_degree; k++)
00099 {
00100 output*=vec[m_multi_index[cnt]];
00101 cnt++;
00102 }
00103 sum+=output*vec2[j];
00104 }
00105 if (m_normalize)
00106 sum = sum/m_normalization_values[vec_idx1];
00107
00108 m_feat->free_feature_vector(vec, len, do_free);
00109 return sum;
00110 }
00111 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00112 {
00113 if (vec2_len != m_output_dimensions)
00114 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00115
00116 int32_t len;
00117 bool do_free;
00118 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00119
00120
00121 int cnt=0;
00122 float32_t norm_val=1;
00123 if (m_normalize)
00124 norm_val = m_normalization_values[vec_idx1];
00125 alpha/=norm_val;
00126 for (int j=0; j<vec2_len; j++)
00127 {
00128 float64_t output=m_multinomial_coefficients[j];
00129 for (int k=0; k<m_degree; k++)
00130 {
00131 output*=vec[m_multi_index[cnt]];
00132 cnt++;
00133 }
00134 if (abs_val)
00135 output=CMath::abs(output);
00136
00137 vec2[j]+=alpha*output;
00138 }
00139 m_feat->free_feature_vector(vec, len, do_free);
00140 }
00141 void CPolyFeatures::store_normalization_values()
00142 {
00143 SG_FREE(m_normalization_values);
00144
00145 int32_t num_vec = this->get_num_vectors();
00146
00147 m_normalization_values=SG_MALLOC(float32_t, num_vec);
00148 for (int i=0; i<num_vec; i++)
00149 {
00150 float64_t tmp = CMath::sqrt(dot(i, this,i));
00151 if (tmp==0)
00152
00153 m_normalization_values[i]=1;
00154 else
00155 m_normalization_values[i]=tmp;
00156 }
00157
00158 }
00159
00160 void CPolyFeatures::store_multi_index()
00161 {
00162 SG_FREE(m_multi_index);
00163
00164 m_multi_index=SG_MALLOC(uint16_t, m_output_dimensions*m_degree);
00165
00166 uint16_t* exponents = SG_MALLOC(uint16_t, m_input_dimensions);
00167 if (!exponents)
00168 SG_ERROR( "Error allocating mem \n");
00169
00170 uint16_t* index = m_multi_index;
00171 enumerate_multi_index(0, &index, exponents, m_degree);
00172
00173 SG_FREE(exponents);
00174 }
00175
00176 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00177 {
00178 if (feat_idx==m_input_dimensions-1 || degree==0)
00179 {
00180 if (feat_idx==m_input_dimensions-1)
00181 exponents[feat_idx] = degree;
00182 if (degree==0)
00183 exponents[feat_idx] = 0;
00184 int32_t i, j;
00185 for (j=0; j<feat_idx+1; j++)
00186 for (i=0; i<exponents[j]; i++)
00187 {
00188 **index = j;
00189 (*index)++;
00190 }
00191 exponents[feat_idx] = 0;
00192 return;
00193 }
00194 int32_t k;
00195 for (k=0; k<=degree; k++)
00196 {
00197 exponents[feat_idx] = k;
00198 enumerate_multi_index(feat_idx+1, index, exponents, degree-k);
00199 }
00200 return;
00201
00202 }
00203
00204 void CPolyFeatures::store_multinomial_coefficients()
00205 {
00206 SG_FREE(m_multinomial_coefficients);
00207
00208 m_multinomial_coefficients = SG_MALLOC(float64_t, m_output_dimensions);
00209 int32_t* exponents = SG_MALLOC(int32_t, m_input_dimensions);
00210 if (!exponents)
00211 SG_ERROR( "Error allocating mem \n");
00212 int32_t j=0;
00213 for (j=0; j<m_input_dimensions; j++)
00214 exponents[j] = 0;
00215 int32_t k, cnt=0;
00216 for (j=0; j<m_output_dimensions; j++)
00217 {
00218 for (k=0; k<m_degree; k++)
00219 {
00220 exponents[m_multi_index[cnt]] ++;
00221 cnt++;
00222 }
00223 m_multinomial_coefficients[j] = sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00224 for (k=0; k<m_input_dimensions; k++)
00225 {
00226 exponents[k]=0;
00227 }
00228 }
00229 SG_FREE(exponents);
00230 }
00231
00232 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00233 {
00234
00235
00236
00237
00238 if (n<k)
00239 return 0;
00240 if (k>n/2)
00241 k = n-k;
00242 if (k<0)
00243 return 0;
00244 if (k==0)
00245 return 1;
00246 if (k==1)
00247 return n;
00248 if (k<4)
00249 return bico2(n-1, k-1)+bico2(n-1, k);
00250
00251
00252
00253 return bico(n, k);
00254
00255 }
00256
00257 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00258 {
00259 if (N==1)
00260 return 1;
00261 if (D==0)
00262 return 1;
00263 int32_t d;
00264 int32_t ret = 0;
00265 for (d=0; d<=D; d++)
00266 ret += calc_feature_space_dimensions(N-1, d);
00267
00268 return ret;
00269 }
00270
00271 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00272 {
00273 int32_t ret = 1, i;
00274 int32_t n = 0;
00275 for (i=0; i<len; i++)
00276 {
00277 n += exps[i];
00278 ret *= bico2(n, exps[i]);
00279 }
00280 return ret;
00281 }
00282
00283
00284
00285 float64_t CPolyFeatures::gammln(float64_t xx)
00286 {
00287 float64_t x,y,tmp,ser;
00288 static float64_t cof[6]={76.18009172947146, -86.50532032941677,
00289 24.01409824083091, -1.231739572450155,
00290 0.1208650973866179e-2,-0.5395239384953e-5};
00291 int32_t j;
00292
00293 y=x=xx;
00294 tmp=x+5.5;
00295 tmp -= (x+0.5)*log(tmp);
00296 ser=1.000000000190015;
00297 for (j=0;j<=5;j++) ser += cof[j]/++y;
00298 return -tmp+log(2.5066282746310005*ser/x);
00299 }
00300
00301 float64_t CPolyFeatures::factln(int32_t n)
00302 {
00303 static float64_t a[101];
00304
00305 if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
00306 if (n <= 1) return 0.0;
00307 if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00308 else return gammln(n+1.0);
00309 }
00310
00311 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00312 {
00313
00314 return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00315 }
00316 CFeatures* CPolyFeatures::duplicate() const
00317 {
00318 return new CPolyFeatures(*this);
00319 }
00320
00321 void CPolyFeatures::register_parameters()
00322 {
00323 m_parameters->add((CSGObject**) &m_feat, "features",
00324 "Features in original space.");
00325 m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
00326 m_parameters->add(&m_normalize, "normalize", "Normalize?");
00327 m_parameters->add(&m_input_dimensions, "input_dimensions",
00328 "Dimensions of the input space.");
00329 m_parameters->add(&m_output_dimensions, "output_dimensions",
00330 "Dimensions of the feature space of the polynomial kernel.");
00331
00332 multi_index_length=m_output_dimensions*m_degree;
00333 m_parameters->add_vector(
00334 &m_multi_index,
00335 &multi_index_length,
00336 "multi_index",
00337 "Flattened matrix of all multi indices that sum do the"
00338 " degree of the polynomial kernel.");
00339
00340 multinomial_coefficients_length=m_output_dimensions;
00341 m_parameters->add_vector(&m_multinomial_coefficients,
00342 &multinomial_coefficients_length, "multinomial_coefficients",
00343 "Multinomial coefficients for all multi-indices.");
00344
00345 normalization_values_length=get_num_vectors();
00346 m_parameters->add_vector(&m_normalization_values,
00347 &normalization_values_length, "normalization_values",
00348 "Norm of each training example.");
00349 }