Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <shogun/features/BinnedDotFeatures.h>
00011 #include <shogun/base/Parameter.h>
00012
00013 using namespace shogun;
00014
00015 CBinnedDotFeatures::CBinnedDotFeatures(int32_t size)
00016 : CDotFeatures(size)
00017 {
00018 init();
00019 }
00020
00021
00022 CBinnedDotFeatures::CBinnedDotFeatures(const CBinnedDotFeatures & orig)
00023 : CDotFeatures(orig), m_bins(orig.m_bins), m_fill(orig.m_fill),
00024 m_norm_one(orig.m_norm_one)
00025 {
00026 init();
00027 }
00028
00029 CBinnedDotFeatures::CBinnedDotFeatures(CDenseFeatures<float64_t>* sf, SGMatrix<float64_t> bins)
00030 {
00031 init();
00032 set_simple_features(sf);
00033 set_bins(bins);
00034
00035 }
00036
00037 CBinnedDotFeatures::~CBinnedDotFeatures()
00038 {
00039 SG_UNREF(m_features);
00040 }
00041
00042 int32_t CBinnedDotFeatures::get_dim_feature_space() const
00043 {
00044 return m_bins.num_rows*m_bins.num_cols;
00045 }
00046
00047 float64_t CBinnedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00048 {
00049 ASSERT(df);
00050 ASSERT(df->get_feature_type() == get_feature_type());
00051 ASSERT(df->get_feature_class() == get_feature_class());
00052
00053 float64_t result=0;
00054 double sum1=0;
00055 double sum2=0;
00056
00057 SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);
00058 SGVector<float64_t> vec2=((CBinnedDotFeatures*) df)->m_features->get_feature_vector(vec_idx2);
00059
00060 for (int32_t i=0; i<m_bins.num_cols; i++)
00061 {
00062 float64_t v1=vec1.vector[i];
00063 float64_t v2=vec2.vector[i];
00064 float64_t* col=m_bins.get_column_vector(i);
00065
00066 for (int32_t j=0; j<m_bins.num_rows; j++)
00067 {
00068 if (m_fill)
00069 {
00070 if (col[j]<=v1)
00071 {
00072 sum1+=1.0;
00073
00074 if (col[j]<=v2)
00075 {
00076 sum2+=1.0;
00077 result+=1.0;
00078 }
00079 }
00080 else
00081 {
00082 if (col[j]<=v2)
00083 sum2+=1.0;
00084 else
00085 break;
00086 }
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098 }
00099 else
00100 {
00101 if (col[j]<=v1 && (j+1)<m_bins.num_rows && col[j+1]>v1 &&
00102 col[j]<=v2 && (j+1)<m_bins.num_rows && col[j+1]>v2)
00103 {
00104 result+=1;
00105 break;
00106 }
00107 }
00108 }
00109 }
00110 m_features->free_feature_vector(vec1, vec_idx1);
00111 ((CBinnedDotFeatures*) df)->m_features->free_feature_vector(vec2, vec_idx2);
00112
00113 if (m_fill && m_norm_one && sum1!=0 && sum2!=0)
00114 result/=CMath::sqrt(sum1*sum2);
00115
00116 return result;
00117
00118 }
00119
00120 float64_t CBinnedDotFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00121 {
00122 assert_shape(vec2_len);
00123
00124 float64_t result=0;
00125 double sum=0;
00126
00127 SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);
00128
00129
00130 for (int32_t i=0; i<m_bins.num_cols; i++)
00131 {
00132 float64_t v=vec1.vector[i];
00133 float64_t* col=m_bins.get_column_vector(i);
00134 int32_t offs=i*m_bins.num_rows;
00135
00136 for (int32_t j=0; j<m_bins.num_rows; j++)
00137 {
00138 if (m_fill)
00139 {
00140 if (col[j]<=v)
00141 {
00142 result+=vec2[offs+j];
00143 sum+=1.0;
00144 }
00145 }
00146 else
00147 {
00148 if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v)
00149 {
00150 result+=vec2[offs+j];
00151 break;
00152 }
00153 }
00154 }
00155 }
00156 m_features->free_feature_vector(vec1, vec_idx1);
00157
00158 if (m_fill && m_norm_one && sum!=0)
00159 result/=CMath::sqrt(sum);
00160
00161 return result;
00162 }
00163
00164 void CBinnedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00165 {
00166 assert_shape(vec2_len);
00167 SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);
00168
00169 if (m_fill && m_norm_one)
00170 {
00171 float64_t alpha_correction=0;
00172 for (int32_t i=0; i<m_bins.num_cols; i++)
00173 {
00174 float64_t v=vec1.vector[i];
00175 float64_t* col=m_bins.get_column_vector(i);
00176
00177 for (int32_t j=0; j<m_bins.num_rows; j++)
00178 {
00179 if (col[j]<=v)
00180 alpha_correction+=1.0;
00181 }
00182 }
00183
00184 if (alpha_correction==0.0)
00185 return;
00186
00187 alpha/=CMath::sqrt(alpha_correction);
00188 }
00189
00190 for (int32_t i=0; i<m_bins.num_cols; i++)
00191 {
00192 float64_t v=vec1.vector[i];
00193 float64_t* col=m_bins.get_column_vector(i);
00194 int32_t offs=i*m_bins.num_rows;
00195
00196 for (int32_t j=0; j<m_bins.num_rows; j++)
00197 {
00198 if (m_fill)
00199 {
00200 if (col[j]<=v)
00201 vec2[offs+j]+=alpha;
00202 }
00203 else
00204 {
00205 if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v)
00206 {
00207 vec2[offs+j]+=alpha;
00208 break;
00209 }
00210 }
00211 }
00212 }
00213 m_features->free_feature_vector(vec1, vec_idx1);
00214 }
00215
00216 void CBinnedDotFeatures::assert_shape(int32_t vec2_len)
00217 {
00218 if (m_bins.num_cols*m_bins.num_rows != vec2_len)
00219 {
00220 SG_ERROR("Bin matrix has shape (%d,%d) = %d entries, not matching vector"
00221 " length %d\n", m_bins.num_cols,m_bins.num_rows,
00222 m_bins.num_cols*m_bins.num_rows,vec2_len);
00223 }
00224
00225 if (m_features && m_bins.num_cols != m_features->get_num_features())
00226 {
00227 SG_ERROR("Number of colums (%d) doesn't match number of features "
00228 "(%d)\n", m_bins.num_cols, m_features->get_num_features());
00229 }
00230
00231 }
00232
00233 int32_t CBinnedDotFeatures::get_nnz_features_for_vector(int32_t num)
00234 {
00235 if (m_fill)
00236 return m_bins.num_rows;
00237 else
00238 return 1;
00239 }
00240
00241 void* CBinnedDotFeatures::get_feature_iterator(int32_t vector_index)
00242 {
00243 SG_NOTIMPLEMENTED;
00244 return NULL;
00245 }
00246
00247 bool CBinnedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00248 {
00249 SG_NOTIMPLEMENTED;
00250 return false;
00251 }
00252
00253 void CBinnedDotFeatures::free_feature_iterator(void* iterator)
00254 {
00255 SG_NOTIMPLEMENTED;
00256 }
00257
00258 bool CBinnedDotFeatures::get_fill()
00259 {
00260 return m_fill;
00261 }
00262
00263 void CBinnedDotFeatures::set_fill(bool fill)
00264 {
00265 m_fill=fill;
00266 }
00267
00268 bool CBinnedDotFeatures::get_norm_one()
00269 {
00270 return m_fill;
00271 }
00272
00273 void CBinnedDotFeatures::set_norm_one(bool norm_one)
00274 {
00275 m_norm_one=norm_one;
00276 }
00277
00278 void CBinnedDotFeatures::set_bins(SGMatrix<float64_t> bins)
00279 {
00280 m_bins=bins;
00281 }
00282
00283 SGMatrix<float64_t> CBinnedDotFeatures::get_bins()
00284 {
00285 return m_bins;
00286 }
00287
00288 void CBinnedDotFeatures::set_simple_features(CDenseFeatures<float64_t>* features)
00289 {
00290 SG_REF(features);
00291 m_features=features;
00292 }
00293
00294 CDenseFeatures<float64_t>* CBinnedDotFeatures::get_simple_features()
00295 {
00296 SG_REF(m_features);
00297 return m_features;
00298 }
00299
00300 void CBinnedDotFeatures::init()
00301 {
00302 m_features=NULL;
00303 m_fill=true;
00304 m_norm_one=false;
00305 }
00306
00307 const char* CBinnedDotFeatures::get_name() const
00308 {
00309 return "BinnedDotFeatures";
00310 }
00311
00312 CFeatures* CBinnedDotFeatures::duplicate() const
00313 {
00314 return new CBinnedDotFeatures(*this);
00315 }
00316
00317 EFeatureType CBinnedDotFeatures::get_feature_type() const
00318 {
00319 return F_DREAL;
00320 }
00321
00322
00323 EFeatureClass CBinnedDotFeatures::get_feature_class() const
00324 {
00325 return C_BINNED_DOT;
00326 }
00327
00328 int32_t CBinnedDotFeatures::get_num_vectors() const
00329 {
00330 ASSERT(m_features);
00331 return m_features->get_num_vectors();
00332 }
00333
00334 int32_t CBinnedDotFeatures::get_size() const
00335 {
00336 return sizeof(float64_t);
00337 }
00338