00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "features/SparsePolyFeatures.h"
00011 #include "lib/Hash.h"
00012
00013 using namespace shogun;
00014
00015 CSparsePolyFeatures::CSparsePolyFeatures(void)
00016 {
00017 SG_UNSTABLE("CSparsePolyFeatures::CSparsePolyFeatures(void)",
00018 "\n");
00019
00020 m_feat = NULL;
00021 m_degree = 0;
00022 m_normalize = false;
00023 m_input_dimensions = 0;
00024 m_output_dimensions = 0;
00025 m_normalization_values = NULL;
00026 mask = 0;
00027 m_hash_bits = 0;
00028 }
00029
00030 CSparsePolyFeatures::CSparsePolyFeatures(CSparseFeatures<float64_t>* feat, int32_t degree, bool normalize, int32_t hash_bits)
00031 : CDotFeatures(), m_normalization_values(NULL)
00032 {
00033 ASSERT(feat);
00034
00035 m_feat = feat;
00036 SG_REF(m_feat);
00037 m_degree=degree;
00038 m_normalize=normalize;
00039 m_hash_bits=hash_bits;
00040 mask=(uint32_t) (((uint64_t) 1)<<m_hash_bits)-1;
00041 m_output_dimensions=1<<m_hash_bits;
00042 m_input_dimensions=feat->get_num_features();
00043
00044 if (m_normalize)
00045 store_normalization_values();
00046 }
00047
00048 CSparsePolyFeatures::~CSparsePolyFeatures()
00049 {
00050 delete[] m_normalization_values;
00051 SG_UNREF(m_feat);
00052 }
00053
00054 float64_t CSparsePolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00055 {
00056 ASSERT(df);
00057 ASSERT(df->get_feature_type() == get_feature_type());
00058 ASSERT(df->get_feature_class() == get_feature_class());
00059
00060 CSparsePolyFeatures* pf=(CSparsePolyFeatures*) df;
00061
00062 int32_t len1, len2;
00063 bool do_free1, do_free2;
00064 TSparseEntry<float64_t>* vec1 = m_feat->get_sparse_feature_vector(vec_idx1, len1, do_free1);
00065 TSparseEntry<float64_t>* vec2 = pf->m_feat->get_sparse_feature_vector(vec_idx2, len2, do_free2);
00066
00067 float64_t result=CSparseFeatures<float64_t>::sparse_dot(1, vec1, len1, vec2, len2);
00068 result=CMath::pow(result, m_degree);
00069
00070 m_feat->free_feature_vector(vec1, len1, do_free1);
00071 pf->m_feat->free_feature_vector(vec2, len2, do_free2);
00072
00073 return result;
00074 }
00075
00076 float64_t CSparsePolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00077 {
00078 if (vec2_len != m_output_dimensions)
00079 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00080
00081 int32_t vlen;
00082 bool do_free;
00083 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free);
00084
00085 float64_t result=0;
00086
00087 if (vec)
00088 {
00089 if (m_degree==2)
00090 {
00091
00092 for (int32_t i=0; i<vlen; i++)
00093 {
00094 float64_t v1=vec[i].entry;
00095 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF);
00096
00097 for (int32_t j=i; j<vlen; j++)
00098 {
00099 float64_t v2=vec[j].entry;
00100 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask;
00101 float64_t v;
00102
00103 if (i==j)
00104 v=v1*v1;
00105 else
00106 v=CMath::sqrt(2.0)*v1*v2;
00107
00108 result+=v*vec2[h];
00109 }
00110 }
00111 }
00112 else if (m_degree==3)
00113 SG_NOTIMPLEMENTED;
00114 }
00115
00116 if (m_normalize)
00117 result/=m_normalization_values[vec_idx1];
00118
00119 m_feat->free_feature_vector(vec, vlen, do_free);
00120 return result;
00121 }
00122
00123 void CSparsePolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00124 {
00125 if (vec2_len != m_output_dimensions)
00126 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00127
00128 int32_t vlen;
00129 bool do_free;
00130 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free);
00131
00132 float64_t norm_val=1.0;
00133 if (m_normalize)
00134 norm_val = m_normalization_values[vec_idx1];
00135 alpha/=norm_val;
00136
00137 if (m_degree==2)
00138 {
00139
00140 for (int32_t i=0; i<vlen; i++)
00141 {
00142 float64_t v1=vec[i].entry;
00143 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF);
00144
00145 for (int32_t j=i; j<vlen; j++)
00146 {
00147 float64_t v2=vec[j].entry;
00148 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask;
00149 float64_t v;
00150
00151 if (i==j)
00152 v=alpha*v1*v1;
00153 else
00154 v=alpha*CMath::sqrt(2.0)*v1*v2;
00155
00156 if (abs_val)
00157 vec2[h]+=CMath::abs(v);
00158 else
00159 vec2[h]+=v;
00160 }
00161 }
00162 }
00163 else if (m_degree==3)
00164 SG_NOTIMPLEMENTED;
00165
00166 m_feat->free_feature_vector(vec, vlen, do_free);
00167 }
00168
00169 void CSparsePolyFeatures::store_normalization_values()
00170 {
00171 delete[] m_normalization_values;
00172
00173 int32_t num_vec = this->get_num_vectors();
00174
00175 m_normalization_values=new float64_t[num_vec];
00176 for (int i=0; i<num_vec; i++)
00177 {
00178 float64_t val = CMath::sqrt(dot(i, this,i));
00179 if (val==0)
00180
00181 m_normalization_values[i]=1.0;
00182 else
00183 m_normalization_values[i]=val;
00184 }
00185
00186 }
00187
00188 CFeatures* CSparsePolyFeatures::duplicate() const
00189 {
00190 return new CSparsePolyFeatures(*this);
00191 }