SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StreamingDenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Shashwat Lal Das
8  * Written (W) 2012 Heiko Strathmann
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 
15 
16 namespace shogun
17 {
18 template<class T>
21 {
23  init();
24  parser.set_free_vector_after_release(false);
25 }
26 
27 template<class T>
29  bool is_labelled, int32_t size) :
31 {
32  init(file, is_labelled, size);
34  parser.set_free_vector_after_release(false);
35 }
36 
38  CDenseFeatures<T>* dense_features, float64_t* lab) :
40 {
41  REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n")
42 
44  bool is_labelled;
45  int32_t size=1024;
46 
47  is_labelled=lab;
48  file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab);
49  init(file, is_labelled, size);
51  parser.set_free_vector_after_release(false);
52  parser.set_free_vectors_on_destruct(false);
53  seekable=true;
54 }
55 
57 {
58  SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name())
59  /* needed to prevent double free memory errors */
60  current_vector.vector=NULL;
61  current_vector.vlen=0;
62  SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name())
63 }
64 
66 {
67  if (seekable)
68  {
69  ((CStreamingFileFromDenseFeatures<T>*)working_file)->reset_stream();
70  parser.exit_parser();
71  parser.init(working_file, has_labels, 1);
72  parser.set_free_vector_after_release(false);
73  parser.start_parser();
74  }
75 }
76 
78  const float32_t* vec2, int32_t vec2_len)
79 {
80  ASSERT(vec2_len==current_vector.vlen)
81  float32_t result=0;
82 
83  for (int32_t i=0; i<current_vector.vlen; i++)
84  result+=current_vector[i]*vec2[i];
85 
86  return result;
87 }
88 
90  const float64_t* vec2, int32_t vec2_len)
91 {
92  ASSERT(vec2_len==current_vector.vlen)
93  float64_t result=0;
94 
95  for (int32_t i=0; i<current_vector.vlen; i++)
96  result+=current_vector[i]*vec2[i];
97 
98  return result;
99 }
100 
102  float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
103 {
104  ASSERT(vec2_len==current_vector.vlen)
105 
106  if (abs_val)
107  {
108  for (int32_t i=0; i<current_vector.vlen; i++)
109  vec2[i]+=alpha*CMath::abs(current_vector[i]);
110  }
111  else
112  {
113  for (int32_t i=0; i<current_vector.vlen; i++)
114  vec2[i]+=alpha*current_vector[i];
115  }
116 }
117 
119  float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val)
120 {
121  ASSERT(vec2_len==current_vector.vlen)
122 
123  if (abs_val)
124  {
125  for (int32_t i=0; i<current_vector.vlen; i++)
126  vec2[i]+=alpha*CMath::abs(current_vector[i]);
127  }
128  else
129  {
130  for (int32_t i=0; i<current_vector.vlen; i++)
131  vec2[i]+=alpha*current_vector[i];
132  }
133 }
134 
136 {
137  return current_vector.vlen;
138 }
139 
141 {
142  return new CStreamingDenseFeatures<T>(*this);
143 }
144 
145 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const
146 {
147  return 1;
148 }
149 
150 template<class T>
152 {
153  parser.set_read_vector(&CStreamingFile::get_vector);
154 }
155 
156 template<class T>
158 {
159  parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
160 }
161 
162 #define GET_FEATURE_TYPE(f_type, sg_type) \
163 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \
164 { \
165  return f_type; \
166 }
167 
170 GET_FEATURE_TYPE(F_BYTE, uint8_t)
171 GET_FEATURE_TYPE(F_BYTE, int8_t)
172 GET_FEATURE_TYPE(F_SHORT, int16_t)
173 GET_FEATURE_TYPE(F_WORD, uint16_t)
174 GET_FEATURE_TYPE(F_INT, int32_t)
175 GET_FEATURE_TYPE(F_UINT, uint32_t)
176 GET_FEATURE_TYPE(F_LONG, int64_t)
177 GET_FEATURE_TYPE(F_ULONG, uint64_t)
181 #undef GET_FEATURE_TYPE
182 
183 template<class T>
184 void CStreamingDenseFeatures<T>::init()
185 {
186  working_file=NULL;
187  seekable=false;
188 
189  /* needed to prevent double free memory errors */
190  current_vector.vector=NULL;
191  current_vector.vlen=-1;
192 
193  set_generic<T>();
194 }
195 
196 template<class T>
197 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled,
198  int32_t size)
199 {
200  init();
201  has_labels=is_labelled;
202  working_file=file;
203  SG_REF(working_file);
204  parser.init(file, is_labelled, size);
205  seekable=false;
206 }
207 
208 template<class T>
210 {
211  if (!parser.is_running())
212  parser.start_parser();
213 }
214 
215 template<class T>
217 {
218  parser.end_parser();
219 }
220 
221 template<class T>
223 {
224  bool ret_value;
225  ret_value=(bool)parser.get_next_example(current_vector.vector,
226  current_vector.vlen, current_label);
227 
228  return ret_value;
229 }
230 
231 template<class T>
233 {
234  return current_vector;
235 }
236 
237 template<class T>
239 {
240  ASSERT(has_labels)
241 
242  return current_label;
243 }
244 
245 template<class T>
247 {
248  parser.finalize_example();
249 }
250 
251 template<class T>
253 {
254  return current_vector.vlen;
255 }
256 
257 template<class T>
259 {
260  ASSERT(df)
261  ASSERT(df->get_feature_type() == get_feature_type())
262  ASSERT(df->get_feature_class() == get_feature_class())
264 
265  SGVector<T> other_vector=sf->get_vector();
266 
267  return SGVector<T>::dot(current_vector.vector, other_vector.vector, current_vector.vlen);
268 }
269 
270 template<class T>
272 {
273  int32_t len1;
274  len1=sgvec1.vlen;
275 
276  if (len1!=current_vector.vlen)
277  SG_ERROR(
278  "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
279 
280  return SGVector<T>::dot(current_vector.vector, sgvec1.vector, len1);
281 }
282 
283 template<class T>
285 {
286  return current_vector.vlen;
287 }
288 
289 template<class T>
291 {
292  return C_STREAMING_DENSE;
293 }
294 
295 template<class T>
297  index_t num_elements)
298 {
299  SG_DEBUG("entering %s(%p)::get_streamed_features(%d)\n", get_name(), this,
300  num_elements);
301 
302  /* init matrix empty since num_rows is not yet known */
303  SGMatrix<T> matrix;
304 
305  for (index_t i=0; i<num_elements; ++i)
306  {
307  /* check if we run out of data */
308  if (!get_next_example())
309  {
310  SG_WARNING("%s::get_streamed_features(): ran out of streaming "
311  "data, reallocating matrix and returning!\n", get_name());
312 
313  /* allocating space for data so far */
314  SGMatrix<T> so_far(matrix.num_rows, i);
315 
316  /* copy */
317  memcpy(so_far.matrix, matrix.matrix,
318  so_far.num_rows*so_far.num_cols*sizeof(T));
319 
320  matrix=so_far;
321  break;
322  }
323  else
324  {
325  /* allocate matrix memory during first run */
326  if (!matrix.matrix)
327  {
328  SG_DEBUG("%s::get_streamed_features(): allocating %dx%d matrix\n",
329  get_name(), current_vector.vlen, num_elements);
330  matrix=SGMatrix<T>(current_vector.vlen, num_elements);
331  }
332 
333  /* get an example from stream and copy to feature matrix */
334  SGVector<T> vec=get_vector();
335 
336  /* check for inconsistent dimensions */
337  if (vec.vlen!=matrix.num_rows)
338  {
339  SG_ERROR("%s::get_streamed_features(): streamed vectors have "
340  "different dimensions. This is not allowed!\n",
341  get_name());
342  }
343 
344  /* copy vector into matrix */
345  memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector,
346  vec.vlen*sizeof(T));
347 
348  /* evtl output vector */
349  if (sg_io->get_loglevel()==MSG_DEBUG)
350  {
351  SG_DEBUG("%d. ", i)
352  vec.display_vector("streamed vector");
353  }
354 
355  /* clean up */
356  release_example();
357  }
358 
359  }
360 
361  /* create new feature object from collected data */
362  CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix);
363 
364  SG_DEBUG("leaving %s(%p)::get_streamed_features(%d) and returning %dx%d "
365  "matrix\n", get_name(), this, num_elements, matrix.num_rows,
366  matrix.num_cols);
367 
368  return result;
369 }
370 
371 template class CStreamingDenseFeatures<bool> ;
372 template class CStreamingDenseFeatures<char> ;
373 template class CStreamingDenseFeatures<int8_t> ;
374 template class CStreamingDenseFeatures<uint8_t> ;
375 template class CStreamingDenseFeatures<int16_t> ;
376 template class CStreamingDenseFeatures<uint16_t> ;
377 template class CStreamingDenseFeatures<int32_t> ;
378 template class CStreamingDenseFeatures<uint32_t> ;
379 template class CStreamingDenseFeatures<int64_t> ;
380 template class CStreamingDenseFeatures<uint64_t> ;
381 template class CStreamingDenseFeatures<float32_t> ;
382 template class CStreamingDenseFeatures<float64_t> ;
384 }

SHOGUN Machine Learning Toolbox - Documentation