SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
BinnedDotFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Soeren Sonnenburg
8  */
9 
11 #include <shogun/base/Parameter.h>
12 
13 using namespace shogun;
14 
16  : CDotFeatures(size)
17 {
18  init();
19 }
20 
21 
23  : CDotFeatures(orig), m_bins(orig.m_bins), m_fill(orig.m_fill),
24  m_norm_one(orig.m_norm_one)
25 {
26  init();
27 }
28 
30 {
31  init();
33  set_bins(bins);
34 
35 }
36 
38 {
40 }
41 
43 {
45 }
46 
47 float64_t CBinnedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
48 {
49  ASSERT(df)
52 
53  float64_t result=0;
54  double sum1=0;
55  double sum2=0;
56 
58  SGVector<float64_t> vec2=((CBinnedDotFeatures*) df)->m_features->get_feature_vector(vec_idx2);
59 
60  for (int32_t i=0; i<m_bins.num_cols; i++)
61  {
62  float64_t v1=vec1.vector[i];
63  float64_t v2=vec2.vector[i];
65 
66  for (int32_t j=0; j<m_bins.num_rows; j++)
67  {
68  if (m_fill)
69  {
70  if (col[j]<=v1)
71  {
72  sum1+=1.0;
73 
74  if (col[j]<=v2)
75  {
76  sum2+=1.0;
77  result+=1.0;
78  }
79  }
80  else
81  {
82  if (col[j]<=v2)
83  sum2+=1.0;
84  else
85  break;
86  }
87 
88  /* the above is the fast version of
89  if (col[j]<=v1 && col[j]<=v2)
90  result+=1.0;
91 
92  if (col[j]<=v1)
93  sum1+=1.0;
94 
95  if (col[j]<=v2)
96  sum2+=1.0;
97  */
98  }
99  else
100  {
101  if (col[j]<=v1 && (j+1)<m_bins.num_rows && col[j+1]>v1 &&
102  col[j]<=v2 && (j+1)<m_bins.num_rows && col[j+1]>v2)
103  {
104  result+=1;
105  break;
106  }
107  }
108  }
109  }
110  m_features->free_feature_vector(vec1, vec_idx1);
111  ((CBinnedDotFeatures*) df)->m_features->free_feature_vector(vec2, vec_idx2);
112 
113  if (m_fill && m_norm_one && sum1!=0 && sum2!=0)
114  result/=CMath::sqrt(sum1*sum2);
115 
116  return result;
117 
118 }
119 
120 float64_t CBinnedDotFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
121 {
122  assert_shape(vec2_len);
123 
124  float64_t result=0;
125  double sum=0;
126 
128 
129 
130  for (int32_t i=0; i<m_bins.num_cols; i++)
131  {
132  float64_t v=vec1.vector[i];
134  int32_t offs=i*m_bins.num_rows;
135 
136  for (int32_t j=0; j<m_bins.num_rows; j++)
137  {
138  if (m_fill)
139  {
140  if (col[j]<=v)
141  {
142  result+=vec2[offs+j];
143  sum+=1.0;
144  }
145  }
146  else
147  {
148  if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v)
149  {
150  result+=vec2[offs+j];
151  break;
152  }
153  }
154  }
155  }
156  m_features->free_feature_vector(vec1, vec_idx1);
157 
158  if (m_fill && m_norm_one && sum!=0)
159  result/=CMath::sqrt(sum);
160 
161  return result;
162 }
163 
164 void CBinnedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
165 {
166  assert_shape(vec2_len);
168 
169  if (m_fill && m_norm_one)
170  {
171  float64_t alpha_correction=0;
172  for (int32_t i=0; i<m_bins.num_cols; i++)
173  {
174  float64_t v=vec1.vector[i];
176 
177  for (int32_t j=0; j<m_bins.num_rows; j++)
178  {
179  if (col[j]<=v)
180  alpha_correction+=1.0;
181  }
182  }
183 
184  if (alpha_correction==0.0)
185  return;
186 
187  alpha/=CMath::sqrt(alpha_correction);
188  }
189 
190  for (int32_t i=0; i<m_bins.num_cols; i++)
191  {
192  float64_t v=vec1.vector[i];
194  int32_t offs=i*m_bins.num_rows;
195 
196  for (int32_t j=0; j<m_bins.num_rows; j++)
197  {
198  if (m_fill)
199  {
200  if (col[j]<=v)
201  vec2[offs+j]+=alpha;
202  }
203  else
204  {
205  if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v)
206  {
207  vec2[offs+j]+=alpha;
208  break;
209  }
210  }
211  }
212  }
213  m_features->free_feature_vector(vec1, vec_idx1);
214 }
215 
216 void CBinnedDotFeatures::assert_shape(int32_t vec2_len)
217 {
218  if (m_bins.num_cols*m_bins.num_rows != vec2_len)
219  {
220  SG_ERROR("Bin matrix has shape (%d,%d) = %d entries, not matching vector"
221  " length %d\n", m_bins.num_cols,m_bins.num_rows,
222  m_bins.num_cols*m_bins.num_rows,vec2_len);
223  }
224 
226  {
227  SG_ERROR("Number of colums (%d) doesn't match number of features "
229  }
230 
231 }
232 
234 {
235  if (m_fill)
236  return m_bins.num_rows;
237  else
238  return 1;
239 }
240 
241 void* CBinnedDotFeatures::get_feature_iterator(int32_t vector_index)
242 {
244  return NULL;
245 }
246 
247 bool CBinnedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
248 {
250  return false;
251 }
252 
254 {
256 }
257 
259 {
260  return m_fill;
261 }
262 
264 {
265  m_fill=fill;
266 }
267 
269 {
270  return m_fill;
271 }
272 
274 {
275  m_norm_one=norm_one;
276 }
277 
279 {
280  m_bins=bins;
281 }
282 
284 {
285  return m_bins;
286 }
287 
289 {
290  SG_REF(features);
291  m_features=features;
292 }
293 
295 {
297  return m_features;
298 }
299 
300 void CBinnedDotFeatures::init()
301 {
302  m_features=NULL;
303  m_fill=true;
304  m_norm_one=false;
305 }
306 
307 const char* CBinnedDotFeatures::get_name() const
308 {
309  return "BinnedDotFeatures";
310 }
311 
313 {
314  return new CBinnedDotFeatures(*this);
315 }
316 
318 {
319  return F_DREAL;
320 }
321 
322 
324 {
325  return C_BINNED_DOT;
326 }
327 
329 {
331  return m_features->get_num_vectors();
332 }
CDenseFeatures< float64_t > * get_simple_features()
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const
void set_norm_one(bool norm_one)
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
CDenseFeatures< float64_t > * m_features
underlying features
index_t num_cols
Definition: SGMatrix.h:378
void set_simple_features(CDenseFeatures< float64_t > *features)
Features that support dot products among other operations.
Definition: DotFeatures.h:44
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
virtual EFeatureClass get_feature_class() const
The class BinnedDotFeatures contains a 0-1 conversion of features into bins.
#define ASSERT(x)
Definition: SGIO.h:201
virtual int32_t get_num_vectors() const
virtual int32_t get_nnz_features_for_vector(int32_t num)
double float64_t
Definition: common.h:50
SGMatrix< float64_t > m_bins
bins with limits
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual EFeatureClass get_feature_class() const =0
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:115
void set_bins(SGMatrix< float64_t > bins)
bool m_fill
fill up with 1's or flag just one column
virtual int32_t get_dim_feature_space() const
virtual CFeatures * duplicate() const
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19
virtual const char * get_name() const
virtual void free_feature_iterator(void *iterator)
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
The class Features is the base class of all feature objects.
Definition: Features.h:68
bool m_norm_one
normalize vectors to have norm one
SGMatrix< float64_t > get_bins()
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
virtual void * get_feature_iterator(int32_t vector_index)
virtual EFeatureType get_feature_type() const =0
virtual int32_t get_num_vectors() const
virtual EFeatureType get_feature_type() const

SHOGUN Machine Learning Toolbox - Documentation