00001 #include "features/PolyFeatures.h"
00002
00003 using namespace shogun;
00004
00005 CPolyFeatures::CPolyFeatures(void) :CDotFeatures()
00006 {
00007 SG_UNSTABLE("CPolyFeatures::CPolyFeatures(void)", "\n");
00008
00009 m_feat = NULL;
00010 m_degree = 0;
00011 m_normalize = false;
00012 m_input_dimensions = 0;
00013 m_multi_index = NULL;
00014 m_multinomial_coefficients = NULL;
00015 m_normalization_values = NULL;
00016 }
00017
00018 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize)
00019 : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
00020 m_normalization_values(NULL)
00021 {
00022 ASSERT(feat);
00023
00024 m_feat = feat;
00025 SG_REF(m_feat);
00026 m_degree=degree;
00027 m_normalize=normalize;
00028 m_input_dimensions=feat->get_num_features();
00029 m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00030
00031 store_multi_index();
00032 store_multinomial_coefficients();
00033 if (m_normalize)
00034 store_normalization_values();
00035 }
00036
00037 CPolyFeatures::~CPolyFeatures()
00038 {
00039 delete[] m_multi_index;
00040 delete[] m_multinomial_coefficients;
00041 delete[] m_normalization_values;
00042 SG_UNREF(m_feat);
00043 }
00044
00045 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00046 {
00047 ASSERT(df);
00048 ASSERT(df->get_feature_type() == get_feature_type());
00049 ASSERT(df->get_feature_class() == get_feature_class());
00050
00051 CPolyFeatures* pf=(CPolyFeatures*) df;
00052
00053 int32_t len1;
00054 bool do_free1;
00055 float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00056
00057 int32_t len2;
00058 bool do_free2;
00059 float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00060
00061 float64_t sum=0;
00062 int cnt=0;
00063 for (int j=0; j<m_output_dimensions; j++)
00064 {
00065 float64_t out1=m_multinomial_coefficients[j];
00066 float64_t out2=m_multinomial_coefficients[j];
00067 for (int k=0; k<m_degree; k++)
00068 {
00069 out1*=vec1[m_multi_index[cnt]];
00070 out2*=vec2[m_multi_index[cnt]];
00071 cnt++;
00072 }
00073 sum+=out1*out2;
00074 }
00075 m_feat->free_feature_vector(vec1, len1, do_free1);
00076 pf->m_feat->free_feature_vector(vec2, len2, do_free2);
00077
00078 return sum;
00079 }
00080
00081 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00082 {
00083 if (vec2_len != m_output_dimensions)
00084 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00085
00086 int32_t len;
00087 bool do_free;
00088 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00089
00090
00091 int cnt=0;
00092 float64_t sum=0;
00093 for (int j=0; j<vec2_len; j++)
00094 {
00095 float64_t output=m_multinomial_coefficients[j];
00096 for (int k=0; k<m_degree; k++)
00097 {
00098 output*=vec[m_multi_index[cnt]];
00099 cnt++;
00100 }
00101 sum+=output*vec2[j];
00102 }
00103 if (m_normalize)
00104 sum = sum/m_normalization_values[vec_idx1];
00105
00106 m_feat->free_feature_vector(vec, len, do_free);
00107 return sum;
00108 }
00109 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00110 {
00111 if (vec2_len != m_output_dimensions)
00112 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00113
00114 int32_t len;
00115 bool do_free;
00116 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00117
00118
00119 int cnt=0;
00120 float32_t norm_val=1;
00121 if (m_normalize)
00122 norm_val = m_normalization_values[vec_idx1];
00123 alpha/=norm_val;
00124 for (int j=0; j<vec2_len; j++)
00125 {
00126 float64_t output=m_multinomial_coefficients[j];
00127 for (int k=0; k<m_degree; k++)
00128 {
00129 output*=vec[m_multi_index[cnt]];
00130 cnt++;
00131 }
00132 if (abs_val)
00133 output=CMath::abs(output);
00134
00135 vec2[j]+=alpha*output;
00136 }
00137 m_feat->free_feature_vector(vec, len, do_free);
00138 }
00139 void CPolyFeatures::store_normalization_values()
00140 {
00141 delete[] m_normalization_values;
00142
00143 int32_t num_vec = this->get_num_vectors();
00144
00145 m_normalization_values=new float32_t[num_vec];
00146 for (int i=0; i<num_vec; i++)
00147 {
00148 float64_t tmp = CMath::sqrt(dot(i, this,i));
00149 if (tmp==0)
00150
00151 m_normalization_values[i]=1;
00152 else
00153 m_normalization_values[i]=tmp;
00154 }
00155
00156 }
00157
00158 void CPolyFeatures::store_multi_index()
00159 {
00160 delete[] m_multi_index;
00161
00162 m_multi_index=new uint16_t[m_output_dimensions*m_degree];
00163
00164 uint16_t* exponents = new uint16_t[m_input_dimensions];
00165 if (!exponents)
00166 SG_ERROR( "Error allocating mem \n");
00167
00168 uint16_t* index = m_multi_index;
00169 enumerate_multi_index(0, &index, exponents, m_degree);
00170
00171 delete[] exponents;
00172 }
00173
00174 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00175 {
00176 if (feat_idx==m_input_dimensions-1 || degree==0)
00177 {
00178 if (feat_idx==m_input_dimensions-1)
00179 exponents[feat_idx] = degree;
00180 if (degree==0)
00181 exponents[feat_idx] = 0;
00182 int32_t i, j;
00183 for (j=0; j<feat_idx+1; j++)
00184 for (i=0; i<exponents[j]; i++)
00185 {
00186 **index = j;
00187 (*index)++;
00188 }
00189 exponents[feat_idx] = 0;
00190 return;
00191 }
00192 int32_t k;
00193 for (k=0; k<=degree; k++)
00194 {
00195 exponents[feat_idx] = k;
00196 enumerate_multi_index(feat_idx+1, index, exponents, degree-k);
00197 }
00198 return;
00199
00200 }
00201
00202 void CPolyFeatures::store_multinomial_coefficients()
00203 {
00204 delete[] m_multinomial_coefficients;
00205
00206 m_multinomial_coefficients = new float64_t[m_output_dimensions];
00207 int32_t* exponents = new int32_t[m_input_dimensions];
00208 if (!exponents)
00209 SG_ERROR( "Error allocating mem \n");
00210 int32_t j=0;
00211 for (j=0; j<m_input_dimensions; j++)
00212 exponents[j] = 0;
00213 int32_t k, cnt=0;
00214 for (j=0; j<m_output_dimensions; j++)
00215 {
00216 for (k=0; k<m_degree; k++)
00217 {
00218 exponents[m_multi_index[cnt]] ++;
00219 cnt++;
00220 }
00221 m_multinomial_coefficients[j] = sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00222 for (k=0; k<m_input_dimensions; k++)
00223 {
00224 exponents[k]=0;
00225 }
00226 }
00227 delete[] exponents;
00228 }
00229
00230 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00231 {
00232
00233
00234
00235
00236 if (n<k)
00237 return 0;
00238 if (k>n/2)
00239 k = n-k;
00240 if (k<0)
00241 return 0;
00242 if (k==0)
00243 return 1;
00244 if (k==1)
00245 return n;
00246 if (k<4)
00247 return bico2(n-1, k-1)+bico2(n-1, k);
00248
00249
00250
00251 return bico(n, k);
00252
00253 }
00254
00255 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00256 {
00257 if (N==1)
00258 return 1;
00259 if (D==0)
00260 return 1;
00261 int32_t d;
00262 int32_t ret = 0;
00263 for (d=0; d<=D; d++)
00264 ret += calc_feature_space_dimensions(N-1, d);
00265
00266 return ret;
00267 }
00268
00269 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00270 {
00271 int32_t ret = 1, i;
00272 int32_t n = 0;
00273 for (i=0; i<len; i++)
00274 {
00275 n += exps[i];
00276 ret *= bico2(n, exps[i]);
00277 }
00278 return ret;
00279 }
00280
00281
00282
00283 float64_t CPolyFeatures::gammln(float64_t xx)
00284 {
00285 float64_t x,y,tmp,ser;
00286 static float64_t cof[6]={76.18009172947146, -86.50532032941677,
00287 24.01409824083091, -1.231739572450155,
00288 0.1208650973866179e-2,-0.5395239384953e-5};
00289 int32_t j;
00290
00291 y=x=xx;
00292 tmp=x+5.5;
00293 tmp -= (x+0.5)*log(tmp);
00294 ser=1.000000000190015;
00295 for (j=0;j<=5;j++) ser += cof[j]/++y;
00296 return -tmp+log(2.5066282746310005*ser/x);
00297 }
00298
00299 float64_t CPolyFeatures::factln(int32_t n)
00300 {
00301 static float64_t a[101];
00302
00303 if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
00304 if (n <= 1) return 0.0;
00305 if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00306 else return gammln(n+1.0);
00307 }
00308
00309 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00310 {
00311
00312 return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00313 }
00314 CFeatures* CPolyFeatures::duplicate() const
00315 {
00316 return new CPolyFeatures(*this);
00317 }