SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StreamingDenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Shashwat Lal Das
8  * Written (W) 2012 Heiko Strathmann
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 
15 
16 namespace shogun
17 {
18 template<class T>
21 {
23  init();
24  parser.set_free_vector_after_release(false);
25 }
26 
27 template<class T>
29  bool is_labelled, int32_t size) :
31 {
32  init(file, is_labelled, size);
34  parser.set_free_vector_after_release(false);
35 }
36 
38  CDenseFeatures<T>* dense_features, float64_t* lab) :
40 {
41  REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n")
42 
44  bool is_labelled;
45  int32_t size=1024;
46 
47  is_labelled=lab;
48  file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab);
49  init(file, is_labelled, size);
51  parser.set_free_vector_after_release(false);
52  parser.set_free_vectors_on_destruct(false);
53  seekable=true;
54 }
55 
57 {
58  SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name())
59  /* needed to prevent double free memory errors */
60  current_vector.vector=NULL;
61  current_vector.vlen=0;
62  SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name())
63 }
64 
66 {
67  if (seekable)
68  {
69  ((CStreamingFileFromDenseFeatures<T>*)working_file)->reset_stream();
70  parser.exit_parser();
71  parser.init(working_file, has_labels, 1);
72  parser.set_free_vector_after_release(false);
73  parser.start_parser();
74  }
75 }
76 
78  const float32_t* vec2, int32_t vec2_len)
79 {
80  ASSERT(vec2_len==current_vector.vlen)
81  float32_t result=0;
82 
83  for (int32_t i=0; i<current_vector.vlen; i++)
84  result+=current_vector[i]*vec2[i];
85 
86  return result;
87 }
88 
90  const float64_t* vec2, int32_t vec2_len)
91 {
92  ASSERT(vec2_len==current_vector.vlen)
93  float64_t result=0;
94 
95  for (int32_t i=0; i<current_vector.vlen; i++)
96  result+=current_vector[i]*vec2[i];
97 
98  return result;
99 }
100 
102  float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
103 {
104  ASSERT(vec2_len==current_vector.vlen)
105 
106  if (abs_val)
107  {
108  for (int32_t i=0; i<current_vector.vlen; i++)
109  vec2[i]+=alpha*CMath::abs(current_vector[i]);
110  }
111  else
112  {
113  for (int32_t i=0; i<current_vector.vlen; i++)
114  vec2[i]+=alpha*current_vector[i];
115  }
116 }
117 
119  float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val)
120 {
121  ASSERT(vec2_len==current_vector.vlen)
122 
123  if (abs_val)
124  {
125  for (int32_t i=0; i<current_vector.vlen; i++)
126  vec2[i]+=alpha*CMath::abs(current_vector[i]);
127  }
128  else
129  {
130  for (int32_t i=0; i<current_vector.vlen; i++)
131  vec2[i]+=alpha*current_vector[i];
132  }
133 }
134 
136 {
137  return current_vector.vlen;
138 }
139 
141 {
142  return new CStreamingDenseFeatures<T>(*this);
143 }
144 
145 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const
146 {
147  return 1;
148 }
149 
150 template<class T>
152 {
153  parser.set_read_vector(&CStreamingFile::get_vector);
154 }
155 
156 template<class T>
158 {
159  parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
160 }
161 
162 #define GET_FEATURE_TYPE(f_type, sg_type) \
163 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \
164 { \
165  return f_type; \
166 }
167 
170 GET_FEATURE_TYPE(F_BYTE, uint8_t)
171 GET_FEATURE_TYPE(F_BYTE, int8_t)
172 GET_FEATURE_TYPE(F_SHORT, int16_t)
173 GET_FEATURE_TYPE(F_WORD, uint16_t)
174 GET_FEATURE_TYPE(F_INT, int32_t)
175 GET_FEATURE_TYPE(F_UINT, uint32_t)
176 GET_FEATURE_TYPE(F_LONG, int64_t)
177 GET_FEATURE_TYPE(F_ULONG, uint64_t)
181 #undef GET_FEATURE_TYPE
182 
183 template<class T>
184 void CStreamingDenseFeatures<T>::init()
185 {
186  working_file=NULL;
187  seekable=false;
188 
189  /* needed to prevent double free memory errors */
190  current_vector.vector=NULL;
191  current_vector.vlen=-1;
192 
193  set_generic<T>();
194 }
195 
196 template<class T>
197 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled,
198  int32_t size)
199 {
200  init();
201  has_labels=is_labelled;
202  working_file=file;
203  SG_REF(working_file);
204  parser.init(file, is_labelled, size);
205  seekable=false;
206 }
207 
208 template<class T>
210 {
211  if (!parser.is_running())
212  parser.start_parser();
213 }
214 
215 template<class T>
217 {
218  parser.end_parser();
219 }
220 
221 template<class T>
223 {
224  SG_DEBUG("entering\n");
225  bool ret_value;
226  ret_value=(bool)parser.get_next_example(current_vector.vector,
227  current_vector.vlen, current_label);
228 
229  SG_DEBUG("leaving\n");
230  return ret_value;
231 }
232 
233 template<class T>
235 {
236  return current_vector;
237 }
238 
239 template<class T>
241 {
242  ASSERT(has_labels)
243 
244  return current_label;
245 }
246 
247 template<class T>
249 {
250  parser.finalize_example();
251 }
252 
253 template<class T>
255 {
256  return current_vector.vlen;
257 }
258 
259 template<class T>
261 {
262  ASSERT(df)
263  ASSERT(df->get_feature_type() == get_feature_type())
264  ASSERT(df->get_feature_class() == get_feature_class())
266 
267  SGVector<T> other_vector=sf->get_vector();
268 
269  return SGVector<T>::dot(current_vector.vector, other_vector.vector, current_vector.vlen);
270 }
271 
272 template<class T>
274 {
275  int32_t len1;
276  len1=sgvec1.vlen;
277 
278  if (len1!=current_vector.vlen)
279  SG_ERROR(
280  "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
281 
282  return SGVector<T>::dot(current_vector.vector, sgvec1.vector, len1);
283 }
284 
285 template<class T>
287 {
288  return current_vector.vlen;
289 }
290 
291 template<class T>
293 {
294  return C_STREAMING_DENSE;
295 }
296 
297 template<class T>
299  index_t num_elements)
300 {
301  SG_DEBUG("entering\n");
302  SG_DEBUG("Streaming %d elements\n", num_elements)
303 
304  REQUIRE(num_elements>0, "Requested number of feature vectors (%d) must be "
305  "positive\n", num_elements);
306 
307  /* init matrix empty, as we dont know the dimension yet */
308  SGMatrix<T> matrix;
309 
310  for (index_t i=0; i<num_elements; ++i)
311  {
312  /* check if we run out of data */
313  if (!get_next_example())
314  {
315  SG_WARNING("Ran out of streaming data, reallocating matrix and "
316  "returning!\n");
317 
318  /* allocating space for data so far, not this mighe be 0 bytes */
319  SGMatrix<T> so_far(matrix.num_rows, i);
320 
321  /* copy */
322  memcpy(so_far.matrix, matrix.matrix,
323  so_far.num_rows*so_far.num_cols*sizeof(T));
324 
325  matrix=so_far;
326  break;
327  }
328  else
329  {
330  /* allocate matrix memory in first iteration */
331  if (!matrix.matrix)
332  {
333  SG_DEBUG("Allocating %dx%d matrix\n",
334  current_vector.vlen, num_elements);
335  matrix=SGMatrix<T>(current_vector.vlen, num_elements);
336  }
337 
338  /* get an example from stream and copy to feature matrix */
339  SGVector<T> vec=get_vector();
340 
341  /* check for inconsistent dimensions */
342  REQUIRE(vec.vlen==matrix.num_rows,
343  "Dimension of streamed vector (%d) does not match "
344  "dimensions of previous vectors (%d)\n",
345  vec.vlen, matrix.num_rows);
346 
347  /* copy vector into matrix */
348  memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector,
349  vec.vlen*sizeof(T));
350 
351  /* clean up */
352  release_example();
353  }
354 
355  }
356 
357  /* create new feature object from collected data */
358  CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix);
359 
360  SG_DEBUG("leaving returning %dx%d matrix\n", matrix.num_rows,
361  matrix.num_cols);
362 
363  return result;
364 }
365 
366 template class CStreamingDenseFeatures<bool> ;
367 template class CStreamingDenseFeatures<char> ;
368 template class CStreamingDenseFeatures<int8_t> ;
369 template class CStreamingDenseFeatures<uint8_t> ;
370 template class CStreamingDenseFeatures<int16_t> ;
371 template class CStreamingDenseFeatures<uint16_t> ;
372 template class CStreamingDenseFeatures<int32_t> ;
373 template class CStreamingDenseFeatures<uint32_t> ;
374 template class CStreamingDenseFeatures<int64_t> ;
375 template class CStreamingDenseFeatures<uint64_t> ;
376 template class CStreamingDenseFeatures<float32_t> ;
377 template class CStreamingDenseFeatures<float64_t> ;
379 }

SHOGUN Machine Learning Toolbox - Documentation