SHOGUN: BinnedDotFeatures.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Copyright (C) 2012 Soeren Sonnenburg
00008  */
00009 
00010 #include <shogun/features/BinnedDotFeatures.h>
00011 #include <shogun/base/Parameter.h>
00012 
00013 using namespace shogun;
00014 
00015 CBinnedDotFeatures::CBinnedDotFeatures(int32_t size)
00016     : CDotFeatures(size)
00017 {
00018     init();
00019 }
00020 
00021 
00022 CBinnedDotFeatures::CBinnedDotFeatures(const CBinnedDotFeatures & orig)
00023     : CDotFeatures(orig), m_bins(orig.m_bins), m_fill(orig.m_fill),
00024     m_norm_one(orig.m_norm_one)
00025 {
00026     init();
00027 }
00028 
00029 CBinnedDotFeatures::CBinnedDotFeatures(CDenseFeatures<float64_t>* sf, SGMatrix<float64_t> bins)
00030 {
00031     init();
00032     set_simple_features(sf);
00033     set_bins(bins);
00034 
00035 }
00036 
00037 CBinnedDotFeatures::~CBinnedDotFeatures()
00038 {
00039     SG_UNREF(m_features);
00040 }
00041 
00042 int32_t CBinnedDotFeatures::get_dim_feature_space() const
00043 {
00044     return m_bins.num_rows*m_bins.num_cols;
00045 }
00046 
00047 float64_t CBinnedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00048 {
00049     ASSERT(df);
00050     ASSERT(df->get_feature_type() == get_feature_type());
00051     ASSERT(df->get_feature_class() == get_feature_class());
00052 
00053     float64_t result=0;
00054     double sum1=0;
00055     double sum2=0;
00056 
00057     SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);
00058     SGVector<float64_t> vec2=((CBinnedDotFeatures*) df)->m_features->get_feature_vector(vec_idx2);
00059 
00060     for (int32_t i=0; i<m_bins.num_cols; i++)
00061     {
00062         float64_t v1=vec1.vector[i];
00063         float64_t v2=vec2.vector[i];
00064         float64_t* col=m_bins.get_column_vector(i);
00065 
00066         for (int32_t j=0; j<m_bins.num_rows; j++)
00067         {
00068             if (m_fill)
00069             {
00070                 if (col[j]<=v1)
00071                 {
00072                     sum1+=1.0;
00073 
00074                     if (col[j]<=v2)
00075                     {
00076                         sum2+=1.0;
00077                         result+=1.0;
00078                     }
00079                 }
00080                 else
00081                 {
00082                     if (col[j]<=v2)
00083                         sum2+=1.0;
00084                     else
00085                         break;
00086                 }
00087 
00088                 /* the above is the fast version of
00089                 if (col[j]<=v1 && col[j]<=v2)
00090                     result+=1.0;
00091 
00092                 if (col[j]<=v1)
00093                     sum1+=1.0;
00094 
00095                 if (col[j]<=v2)
00096                     sum2+=1.0;
00097                 */
00098             }
00099             else
00100             {
00101                 if (col[j]<=v1 && (j+1)<m_bins.num_rows && col[j+1]>v1 &&
00102                         col[j]<=v2 && (j+1)<m_bins.num_rows && col[j+1]>v2)
00103                 {
00104                     result+=1;
00105                     break;
00106                 }
00107             }
00108         }
00109     }
00110     m_features->free_feature_vector(vec1, vec_idx1);
00111     ((CBinnedDotFeatures*) df)->m_features->free_feature_vector(vec2, vec_idx2);
00112 
00113     if (m_fill && m_norm_one && sum1!=0 && sum2!=0)
00114         result/=CMath::sqrt(sum1*sum2);
00115 
00116     return result;
00117 
00118 }
00119 
00120 float64_t CBinnedDotFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00121 {
00122     assert_shape(vec2_len);
00123 
00124     float64_t result=0;
00125     double sum=0;
00126 
00127     SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);
00128 
00129 
00130     for (int32_t i=0; i<m_bins.num_cols; i++)
00131     {
00132         float64_t v=vec1.vector[i];
00133         float64_t* col=m_bins.get_column_vector(i);
00134         int32_t offs=i*m_bins.num_rows;
00135 
00136         for (int32_t j=0; j<m_bins.num_rows; j++)
00137         {
00138             if (m_fill)
00139             {
00140                 if (col[j]<=v)
00141                 {
00142                     result+=vec2[offs+j];
00143                     sum+=1.0;
00144                 }
00145             }
00146             else
00147             {
00148                 if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v)
00149                 {
00150                     result+=vec2[offs+j];
00151                     break;
00152                 }
00153             }
00154         }
00155     }
00156     m_features->free_feature_vector(vec1, vec_idx1);
00157 
00158     if (m_fill && m_norm_one && sum!=0)
00159         result/=CMath::sqrt(sum);
00160 
00161     return result;
00162 }
00163 
00164 void CBinnedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00165 {
00166     assert_shape(vec2_len);
00167     SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);
00168 
00169     if (m_fill && m_norm_one)
00170     {
00171         float64_t alpha_correction=0;
00172         for (int32_t i=0; i<m_bins.num_cols; i++)
00173         {
00174             float64_t v=vec1.vector[i];
00175             float64_t* col=m_bins.get_column_vector(i);
00176 
00177             for (int32_t j=0; j<m_bins.num_rows; j++)
00178             {
00179                 if (col[j]<=v)
00180                     alpha_correction+=1.0;
00181             }
00182         }
00183 
00184         if (alpha_correction==0.0)
00185             return;
00186 
00187         alpha/=CMath::sqrt(alpha_correction);
00188     }
00189 
00190     for (int32_t i=0; i<m_bins.num_cols; i++)
00191     {
00192         float64_t v=vec1.vector[i];
00193         float64_t* col=m_bins.get_column_vector(i);
00194         int32_t offs=i*m_bins.num_rows;
00195 
00196         for (int32_t j=0; j<m_bins.num_rows; j++)
00197         {
00198             if (m_fill)
00199             {
00200                 if (col[j]<=v)
00201                     vec2[offs+j]+=alpha;
00202             }
00203             else
00204             {
00205                 if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v)
00206                 {
00207                     vec2[offs+j]+=alpha;
00208                     break;
00209                 }
00210             }
00211         }
00212     }
00213     m_features->free_feature_vector(vec1, vec_idx1);
00214 }
00215 
00216 void CBinnedDotFeatures::assert_shape(int32_t vec2_len)
00217 {
00218     if (m_bins.num_cols*m_bins.num_rows != vec2_len)
00219     {
00220         SG_ERROR("Bin matrix has shape (%d,%d) = %d entries, not matching vector"
00221                 " length %d\n", m_bins.num_cols,m_bins.num_rows,
00222                 m_bins.num_cols*m_bins.num_rows,vec2_len);
00223     }
00224 
00225     if (m_features && m_bins.num_cols != m_features->get_num_features())
00226     {
00227         SG_ERROR("Number of colums (%d) doesn't match number of features "
00228                 "(%d)\n", m_bins.num_cols, m_features->get_num_features());
00229     }
00230 
00231 }
00232 
00233 int32_t CBinnedDotFeatures::get_nnz_features_for_vector(int32_t num)
00234 {
00235     if (m_fill)
00236         return m_bins.num_rows;
00237     else
00238         return 1;
00239 }
00240 
00241 void* CBinnedDotFeatures::get_feature_iterator(int32_t vector_index)
00242 {
00243     SG_NOTIMPLEMENTED;
00244     return NULL;
00245 }
00246 
00247 bool CBinnedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00248 {
00249     SG_NOTIMPLEMENTED;
00250     return false;
00251 }
00252 
00253 void CBinnedDotFeatures::free_feature_iterator(void* iterator)
00254 {
00255     SG_NOTIMPLEMENTED;
00256 }
00257 
00258 bool CBinnedDotFeatures::get_fill()
00259 {
00260     return m_fill;
00261 }
00262 
00263 void CBinnedDotFeatures::set_fill(bool fill)
00264 {
00265     m_fill=fill;
00266 }
00267 
00268 bool CBinnedDotFeatures::get_norm_one()
00269 {
00270     return m_fill;
00271 }
00272 
00273 void CBinnedDotFeatures::set_norm_one(bool norm_one)
00274 {
00275     m_norm_one=norm_one;
00276 }
00277 
00278 void CBinnedDotFeatures::set_bins(SGMatrix<float64_t> bins)
00279 {
00280     m_bins=bins;
00281 }
00282 
00283 SGMatrix<float64_t> CBinnedDotFeatures::get_bins()
00284 {
00285     return m_bins;
00286 }
00287 
00288 void CBinnedDotFeatures::set_simple_features(CDenseFeatures<float64_t>* features)
00289 {
00290     SG_REF(features);
00291     m_features=features;
00292 }
00293 
00294 CDenseFeatures<float64_t>* CBinnedDotFeatures::get_simple_features()
00295 {
00296     SG_REF(m_features);
00297     return m_features;
00298 }
00299 
00300 void CBinnedDotFeatures::init()
00301 {
00302     m_features=NULL;
00303     m_fill=true;
00304     m_norm_one=false;
00305 }
00306 
00307 const char* CBinnedDotFeatures::get_name() const
00308 {
00309     return "BinnedDotFeatures";
00310 }
00311 
00312 CFeatures* CBinnedDotFeatures::duplicate() const
00313 {
00314     return new CBinnedDotFeatures(*this);
00315 }
00316 
00317 EFeatureType CBinnedDotFeatures::get_feature_type() const
00318 {
00319     return F_DREAL;
00320 }
00321 
00322 
00323 EFeatureClass CBinnedDotFeatures::get_feature_class() const
00324 {
00325     return C_BINNED_DOT;
00326 }
00327 
00328 int32_t CBinnedDotFeatures::get_num_vectors() const
00329 {
00330     ASSERT(m_features);
00331     return m_features->get_num_vectors();
00332 }
00333 
00334 int32_t CBinnedDotFeatures::get_size() const
00335 {
00336     return sizeof(float64_t);
00337 }
00338