SHOGUN  6.1.3
StreamingDenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Shashwat Lal Das
8  * Written (W) 2012 Heiko Strathmann
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 
16 
17 namespace shogun
18 {
19 template<class T>
22 {
24  init();
25  parser.set_free_vector_after_release(false);
26 }
27 
28 template<class T>
30  bool is_labelled, int32_t size) :
32 {
33  init(file, is_labelled, size);
35  parser.set_free_vector_after_release(false);
36 }
37 
39  CDenseFeatures<T>* dense_features, float64_t* lab) :
41 {
42  REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n")
43 
45  bool is_labelled;
46  int32_t size=1024;
47 
48  is_labelled=lab;
49  file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab);
50  init(file, is_labelled, size);
52  parser.set_free_vector_after_release(false);
53  parser.set_free_vectors_on_destruct(false);
54  seekable=true;
55 }
56 
58 {
59  SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name())
60  /* needed to prevent double free memory errors */
61  current_vector.vector=NULL;
62  current_vector.vlen=0;
63  SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name())
64 }
65 
67 {
68  if (seekable)
69  {
71  if (parser.is_running())
72  parser.end_parser();
73  parser.exit_parser();
74  parser.init(working_file, has_labels, 1);
75  parser.set_free_vector_after_release(false);
76  parser.set_free_vectors_on_destruct(false);
77  parser.start_parser();
78  }
79 }
80 
82  const float32_t* vec2, int32_t vec2_len)
83 {
84  ASSERT(vec2_len==current_vector.vlen)
85  float32_t result=0;
86 
87  for (int32_t i=0; i<current_vector.vlen; i++)
88  result+=current_vector[i]*vec2[i];
89 
90  return result;
91 }
92 
94  const float64_t* vec2, int32_t vec2_len)
95 {
96  ASSERT(vec2_len==current_vector.vlen)
97  float64_t result=0;
98 
99  for (int32_t i=0; i<current_vector.vlen; i++)
100  result+=current_vector[i]*vec2[i];
101 
102  return result;
103 }
104 
106  float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
107 {
108  ASSERT(vec2_len==current_vector.vlen)
109 
110  if (abs_val)
111  {
112  for (int32_t i=0; i<current_vector.vlen; i++)
113  vec2[i]+=alpha*CMath::abs(current_vector[i]);
114  }
115  else
116  {
117  for (int32_t i=0; i<current_vector.vlen; i++)
118  vec2[i]+=alpha*current_vector[i];
119  }
120 }
121 
123  float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val)
124 {
125  ASSERT(vec2_len==current_vector.vlen)
126 
127  if (abs_val)
128  {
129  for (int32_t i=0; i<current_vector.vlen; i++)
130  vec2[i]+=alpha*CMath::abs(current_vector[i]);
131  }
132  else
133  {
134  for (int32_t i=0; i<current_vector.vlen; i++)
135  vec2[i]+=alpha*current_vector[i];
136  }
137 }
138 
140 {
141  return current_vector.vlen;
142 }
143 
144 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const
145 {
146  return 1;
147 }
148 
149 template<class T>
151 {
152  parser.set_read_vector(&CStreamingFile::get_vector);
153 }
154 
155 template<class T>
157 {
158  parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
159 }
160 
161 #define GET_FEATURE_TYPE(f_type, sg_type) \
162 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \
163 { \
164  return f_type; \
165 }
166 
169 GET_FEATURE_TYPE(F_BYTE, uint8_t)
170 GET_FEATURE_TYPE(F_BYTE, int8_t)
171 GET_FEATURE_TYPE(F_SHORT, int16_t)
172 GET_FEATURE_TYPE(F_WORD, uint16_t)
173 GET_FEATURE_TYPE(F_INT, int32_t)
174 GET_FEATURE_TYPE(F_UINT, uint32_t)
175 GET_FEATURE_TYPE(F_LONG, int64_t)
176 GET_FEATURE_TYPE(F_ULONG, uint64_t)
180 #undef GET_FEATURE_TYPE
181 
182 template<class T>
183 void CStreamingDenseFeatures<T>::init()
184 {
185  working_file=NULL;
186  seekable=false;
187 
188  /* needed to prevent double free memory errors */
189  current_vector.vector=NULL;
190  current_vector.vlen=-1;
191 
192  set_generic<T>();
193 }
194 
195 template<class T>
196 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled,
197  int32_t size)
198 {
199  init();
200  has_labels=is_labelled;
201  working_file=file;
203  parser.init(file, is_labelled, size);
204  seekable=false;
205 }
206 
207 template<class T>
209 {
210  if (!parser.is_running())
211  parser.start_parser();
212 }
213 
214 template<class T>
216 {
217  parser.end_parser();
218 }
219 
220 template<class T>
222 {
223  SG_DEBUG("entering\n");
224  bool ret_value;
225  ret_value=(bool)parser.get_next_example(current_vector.vector,
227 
228  SG_DEBUG("leaving\n");
229  return ret_value;
230 }
231 
232 template<class T>
234 {
235  return current_vector;
236 }
237 
238 template<class T>
240 {
242 
243  return current_label;
244 }
245 
246 template<class T>
248 {
249  parser.finalize_example();
250 }
251 
252 template<class T>
254 {
255  return current_vector.vlen;
256 }
257 
258 template<class T>
260 {
261  ASSERT(df)
265 
266  SGVector<T> other_vector=sf->get_vector();
267 
268  return linalg::dot(current_vector, other_vector);
269 }
270 
271 template<class T>
273 {
274  int32_t len1;
275  len1=sgvec1.vlen;
276 
277  if (len1!=current_vector.vlen)
278  SG_ERROR(
279  "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
280 
281  return linalg::dot(current_vector, sgvec1);
282 }
283 
284 template<class T>
286 {
287  return current_vector.vlen;
288 }
289 
290 template<class T>
292 {
293  return C_STREAMING_DENSE;
294 }
295 
296 template<class T>
298  index_t num_elements)
299 {
300  SG_DEBUG("entering\n");
301  SG_DEBUG("Streaming %d elements\n", num_elements)
302 
303  REQUIRE(num_elements>0, "Requested number of feature vectors (%d) must be "
304  "positive\n", num_elements);
305 
306  /* init matrix empty, as we dont know the dimension yet */
307  SGMatrix<T> matrix;
308 
309  for (index_t i=0; i<num_elements; ++i)
310  {
311  /* check if we run out of data */
312  if (!get_next_example())
313  {
314  SG_WARNING("Ran out of streaming data, reallocating matrix and "
315  "returning!\n");
316 
317  /* allocating space for data so far, not this mighe be 0 bytes */
318  SGMatrix<T> so_far(matrix.num_rows, i);
319 
320  /* copy */
321  sg_memcpy(so_far.matrix, matrix.matrix,
322  so_far.num_rows*so_far.num_cols*sizeof(T));
323 
324  matrix=so_far;
325  break;
326  }
327  else
328  {
329  /* allocate matrix memory in first iteration */
330  if (!matrix.matrix)
331  {
332  SG_DEBUG("Allocating %dx%d matrix\n",
333  current_vector.vlen, num_elements);
334  matrix=SGMatrix<T>(current_vector.vlen, num_elements);
335  }
336 
337  /* get an example from stream and copy to feature matrix */
338  SGVector<T> vec=get_vector();
339 
340  /* check for inconsistent dimensions */
341  REQUIRE(vec.vlen==matrix.num_rows,
342  "Dimension of streamed vector (%d) does not match "
343  "dimensions of previous vectors (%d)\n",
344  vec.vlen, matrix.num_rows);
345 
346  /* copy vector into matrix */
347  sg_memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector,
348  vec.vlen*sizeof(T));
349 
350  /* clean up */
351  release_example();
352  }
353 
354  }
355 
356  /* create new feature object from collected data */
357  CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix);
358 
359  SG_DEBUG("leaving returning %dx%d matrix\n", matrix.num_rows,
360  matrix.num_cols);
361 
362  return result;
363 }
364 
365 template class CStreamingDenseFeatures<bool> ;
366 template class CStreamingDenseFeatures<char> ;
367 template class CStreamingDenseFeatures<int8_t> ;
368 template class CStreamingDenseFeatures<uint8_t> ;
369 template class CStreamingDenseFeatures<int16_t> ;
370 template class CStreamingDenseFeatures<uint16_t> ;
371 template class CStreamingDenseFeatures<int32_t> ;
372 template class CStreamingDenseFeatures<uint32_t> ;
373 template class CStreamingDenseFeatures<int64_t> ;
374 template class CStreamingDenseFeatures<uint64_t> ;
375 template class CStreamingDenseFeatures<float32_t> ;
376 template class CStreamingDenseFeatures<float64_t> ;
378 }
virtual int32_t get_dim_feature_space() const
virtual CFeatures * get_streamed_features(index_t num_elements)
Class CStreamingFileFromDenseFeatures is a derived class of CStreamingFile which creates an input sou...
This class implements streaming features with dense feature vectors.
int32_t index_t
Definition: common.h:72
bool has_labels
Whether examples are labelled or not.
virtual void add_to_dense_vec(float32_t alpha, float32_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual void get_vector(bool *&vector, int32_t &len)
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:181
T dot(const SGVector< T > &a, const SGVector< T > &b)
SGVector< T > current_vector
The current example&#39;s feature vector as an SGVector<T>
#define GET_FEATURE_TYPE(f_type, sg_type)
virtual float32_t dense_dot(const float32_t *vec2, int32_t vec2_len)
virtual EFeatureClass get_feature_class() const
#define SG_REF(x)
Definition: SGObject.h:52
CStreamingFile * working_file
The StreamingFile object to read from.
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
A Streaming File access class.
Definition: StreamingFile.h:34
#define ASSERT(x)
Definition: SGIO.h:176
CInputParser< T > parser
The parser object, which reads from input and returns parsed example objects.
double float64_t
Definition: common.h:60
long double floatmax_t
Definition: common.h:61
virtual float32_t dot(SGVector< T > vec)
virtual const char * get_name() const
index_t num_rows
Definition: SGMatrix.h:495
shogun vector
virtual EFeatureClass get_feature_class() const =0
index_t num_cols
Definition: SGMatrix.h:497
Streaming features that support dot products among other operations.
float64_t current_label
The current example&#39;s label.
float float32_t
Definition: common.h:59
shogun matrix
virtual void get_vector_and_label(bool *&vector, int32_t &len, float64_t &label)
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
bool seekable
Whether the stream is seekable.
The class Features is the base class of all feature objects.
Definition: Features.h:69
virtual int32_t get_num_vectors() const
#define SG_WARNING(...)
Definition: SGIO.h:127
virtual EFeatureType get_feature_type() const =0
virtual EFeatureType get_feature_type() const
index_t vlen
Definition: SGVector.h:571
static T abs(T a)
Definition: Math.h:161

SHOGUN Machine Learning Toolbox - Documentation