SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2013 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
16 #include <shogun/io/SGIO.h>
17 #include <shogun/base/Parameter.h>
19 
20 #include <string.h>
21 
22 namespace shogun {
23 
24 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
25 {
26  init();
27 }
28 
29 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
30  CDotFeatures(orig)
31 {
32  init();
35 
36  if (orig.m_subset_stack != NULL)
37  {
41  }
42 }
43 
45  CDotFeatures()
46 {
47  init();
48  set_feature_matrix(matrix);
49 }
50 
51 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
52  CDotFeatures()
53 {
54  init();
55  set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
56 }
57 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
58  CDotFeatures()
59 {
60  init();
61  load(loader);
62 }
63 
64 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
65 {
66  return new CDenseFeatures<ST>(*this);
67 }
68 
70 {
71  free_features();
72 }
73 
74 template<class ST> void CDenseFeatures<ST>::free_features()
75 {
76  m_subset_stack->remove_all_subsets();
77  free_feature_matrix();
78  SG_UNREF(feature_cache);
79 }
80 
81 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
82 {
83  m_subset_stack->remove_all_subsets();
84  feature_matrix=SGMatrix<ST>();
85  num_vectors = 0;
86  num_features = 0;
87 }
88 
89 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
90 {
91  /* index conversion for subset, only for array access */
92  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
93 
94  len = num_features;
95 
96  if (feature_matrix.matrix)
97  {
98  dofree = false;
99  return &feature_matrix.matrix[real_num * int64_t(num_features)];
100  }
101 
102  ST* feat = NULL;
103  dofree = false;
104 
105  if (feature_cache)
106  {
107  feat = feature_cache->lock_entry(real_num);
108 
109  if (feat)
110  return feat;
111  else
112  feat = feature_cache->set_entry(real_num);
113  }
114 
115  if (!feat)
116  dofree = true;
117  feat = compute_feature_vector(num, len, feat);
118 
119  if (get_num_preprocessors())
120  {
121  int32_t tmp_len = len;
122  ST* tmp_feat_before = feat;
123  ST* tmp_feat_after = NULL;
124 
125  for (int32_t i = 0; i < get_num_preprocessors(); i++)
126  {
128  (CDensePreprocessor<ST>*) get_preprocessor(i);
129  // temporary hack
131  SGVector<ST>(tmp_feat_before, tmp_len));
132  tmp_feat_after = applied.vector;
133  SG_UNREF(p);
134 
135  if (i != 0) // delete feature vector, except for the the first one, i.e., feat
136  SG_FREE(tmp_feat_before);
137  tmp_feat_before = tmp_feat_after;
138  }
139 
140  // note: tmp_feat_after should be checked as it is used by memcpy
141  if (tmp_feat_after)
142  {
143  memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
144  SG_FREE(tmp_feat_after);
145 
146  len = tmp_len;
147  }
148  }
149  return feat;
150 }
151 
152 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
153 {
154  /* index conversion for subset, only for array access */
155  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
156 
157  if (num>=get_num_vectors())
158  {
159  SG_ERROR("Index out of bounds (number of vectors %d, you "
160  "requested %d)\n", get_num_vectors(), num);
161  }
162 
163  if (!feature_matrix.matrix)
164  SG_ERROR("Requires a in-memory feature matrix\n")
165 
166  if (vector.vlen != num_features)
167  SG_ERROR(
168  "Vector not of length %d (has %d)\n", num_features, vector.vlen);
169 
170  memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
171  int64_t(num_features) * sizeof(ST));
172 }
173 
175 {
176  /* index conversion for subset, only for array access */
177  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
178 
179  if (num >= get_num_vectors())
180  {
181  SG_ERROR("Index out of bounds (number of vectors %d, you "
182  "requested %d)\n", get_num_vectors(), real_num);
183  }
184 
185  int32_t vlen;
186  bool do_free;
187  ST* vector= get_feature_vector(num, vlen, do_free);
188  return SGVector<ST>(vector, vlen, do_free);
189 }
190 
191 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
192 {
193  if (feature_cache)
194  feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
195 
196  if (dofree)
197  SG_FREE(feat_vec);
198 }
199 
200 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
201 {
202  free_feature_vector(vec.vector, num, false);
203  vec=SGVector<ST>();
204 }
205 
206 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
207 {
208  if (m_subset_stack->has_subsets())
209  SG_ERROR("A subset is set, cannot call vector_subset\n")
210 
211  ASSERT(feature_matrix.matrix)
212  ASSERT(idx_len<=num_vectors)
213 
214  int32_t num_vec = num_vectors;
215  num_vectors = idx_len;
216 
217  int32_t old_ii = -1;
218 
219  for (int32_t i = 0; i < idx_len; i++)
220  {
221  int32_t ii = idx[i];
222  ASSERT(old_ii<ii)
223 
224  if (ii < 0 || ii >= num_vec)
225  SG_ERROR("Index out of range: should be 0<%d<%d\n", ii, num_vec)
226 
227  if (i == ii)
228  continue;
229 
230  memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
231  &feature_matrix.matrix[int64_t(num_features) * ii],
232  num_features * sizeof(ST));
233  old_ii = ii;
234  }
235 }
236 
237 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
238 {
239  if (m_subset_stack->has_subsets())
240  SG_ERROR("A subset is set, cannot call feature_subset\n")
241 
242  ASSERT(feature_matrix.matrix)
243  ASSERT(idx_len<=num_features)
244  int32_t num_feat = num_features;
245  num_features = idx_len;
246 
247  for (int32_t i = 0; i < num_vectors; i++)
248  {
249  ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
250  ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
251 
252  int32_t old_jj = -1;
253  for (int32_t j = 0; j < idx_len; j++)
254  {
255  int32_t jj = idx[j];
256  ASSERT(old_jj<jj)
257  if (jj < 0 || jj >= num_feat)
258  SG_ERROR(
259  "Index out of range: should be 0<%d<%d\n", jj, num_feat);
260 
261  dst[j] = src[jj];
262  old_jj = jj;
263  }
264  }
265 }
266 
268 {
269  if (!m_subset_stack->has_subsets())
270  return feature_matrix;
271 
272  SGMatrix<ST> submatrix(num_features, get_num_vectors());
273 
274  /* copy a subset vector wise */
275  for (int32_t i=0; i<submatrix.num_cols; ++i)
276  {
277  int32_t real_i = m_subset_stack->subset_idx_conversion(i);
278  memcpy(&submatrix.matrix[i*int64_t(num_features)],
279  &feature_matrix.matrix[real_i * int64_t(num_features)],
280  num_features * sizeof(ST));
281  }
282 
283  return submatrix;
284 }
285 
287 {
288  SGMatrix<ST> st_feature_matrix=feature_matrix;
289  m_subset_stack->remove_all_subsets();
290  SG_UNREF(feature_cache);
291  clean_preprocessors();
292  free_feature_matrix();
293  return st_feature_matrix;
294 }
295 
297 {
298  m_subset_stack->remove_all_subsets();
299  free_feature_matrix();
300  feature_matrix = matrix;
301  num_features = matrix.num_rows;
302  num_vectors = matrix.num_cols;
303 }
304 
305 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
306 {
307  num_feat = num_features;
308  num_vec = num_vectors;
309  return feature_matrix.matrix;
310 }
311 
313 {
314  int32_t num_feat;
315  int32_t num_vec;
316  ST* fm = get_transposed(num_feat, num_vec);
317 
318  return new CDenseFeatures<ST>(fm, num_feat, num_vec);
319 }
320 
321 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
322 {
323  num_feat = get_num_vectors();
324  num_vec = num_features;
325 
326  int32_t old_num_vec=get_num_vectors();
327 
328  ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
329 
330  for (int32_t i=0; i<old_num_vec; i++)
331  {
332  SGVector<ST> vec=get_feature_vector(i);
333 
334  for (int32_t j=0; j<vec.vlen; j++)
335  fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
336 
337  free_feature_vector(vec, i);
338  }
339 
340  return fm;
341 }
342 
344 {
345  if (m_subset_stack->has_subsets())
346  SG_ERROR("A subset is set, cannot call copy_feature_matrix\n")
347 
348  free_feature_matrix();
349  feature_matrix = src.clone();
350  num_features = src.num_rows;
351  num_vectors = src.num_cols;
352  initialize_cache();
353 }
354 
356 {
357  m_subset_stack->remove_all_subsets();
358 
359  int32_t num_feat = df->get_dim_feature_space();
360  int32_t num_vec = df->get_num_vectors();
361 
362  ASSERT(num_feat>0 && num_vec>0)
363 
364  free_feature_matrix();
365  feature_matrix = SGMatrix<ST>(num_feat, num_vec);
366 
367  for (int32_t i = 0; i < num_vec; i++)
368  {
370  ASSERT(num_feat==v.vlen)
371 
372  for (int32_t j = 0; j < num_feat; j++)
373  feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
374  }
375  num_features = num_feat;
376  num_vectors = num_vec;
377 }
378 
379 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
380 {
381  if (m_subset_stack->has_subsets())
382  SG_ERROR("A subset is set, cannot call apply_preproc\n")
383 
384  SG_DEBUG("force: %d\n", force_preprocessing)
385 
386  if (feature_matrix.matrix && get_num_preprocessors())
387  {
388  for (int32_t i = 0; i < get_num_preprocessors(); i++)
389  {
390  if ((!is_preprocessed(i) || force_preprocessing))
391  {
392  set_preprocessed(i);
394  (CDensePreprocessor<ST>*) get_preprocessor(i);
395  SG_INFO("preprocessing using preproc %s\n", p->get_name())
396 
397  if (p->apply_to_feature_matrix(this).matrix == NULL)
398  {
399  SG_UNREF(p);
400  return false;
401  }
402  SG_UNREF(p);
403 
404  }
405  }
406 
407  return true;
408  }
409  else
410  {
411  if (!feature_matrix.matrix)
412  SG_ERROR("no feature matrix\n")
413 
414  if (!get_num_preprocessors())
415  SG_ERROR("no preprocessors available\n")
416 
417  return false;
418  }
419 }
420 
421 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
422 {
423  return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
424 }
425 
426 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() const { return num_features; }
427 
428 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
429 {
430  num_features = num;
431  initialize_cache();
432 }
433 
434 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
435 {
436  if (m_subset_stack->has_subsets())
437  SG_ERROR("A subset is set, cannot call set_num_vectors\n")
438 
439  num_vectors = num;
440  initialize_cache();
441 }
442 
443 template<class ST> void CDenseFeatures<ST>::initialize_cache()
444 {
445  if (m_subset_stack->has_subsets())
446  SG_ERROR("A subset is set, cannot call initialize_cache\n")
447 
448  if (num_features && num_vectors)
449  {
450  SG_UNREF(feature_cache);
451  feature_cache = new CCache<ST>(get_cache_size(), num_features,
452  num_vectors);
453  SG_REF(feature_cache);
454  }
455 }
456 
457 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }
458 
459 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
460 {
461  if (m_subset_stack->has_subsets())
462  SG_ERROR("A subset is set, cannot call reshape\n")
463 
464  if (p_num_features * p_num_vectors
465  == this->num_features * this->num_vectors)
466  {
467  num_features = p_num_features;
468  num_vectors = p_num_vectors;
469  return true;
470  } else
471  return false;
472 }
473 
474 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
475 
476 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
477  int32_t vec_idx2)
478 {
479  ASSERT(df)
480  ASSERT(df->get_feature_type() == get_feature_type())
481  ASSERT(df->get_feature_class() == get_feature_class())
483 
484  int32_t len1, len2;
485  bool free1, free2;
486 
487  ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
488  ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
489 
490  float64_t result = CMath::dot(vec1, vec2, len1);
491 
492  free_feature_vector(vec1, vec_idx1, free1);
493  sf->free_feature_vector(vec2, vec_idx2, free2);
494 
495  return result;
496 }
497 
498 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
499  float64_t* vec2, int32_t vec2_len, bool abs_val)
500 {
501  ASSERT(vec2_len == num_features)
502 
503  int32_t vlen;
504  bool vfree;
505  ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
506 
507  ASSERT(vlen == num_features)
508 
509  if (abs_val)
510  {
511  for (int32_t i = 0; i < num_features; i++)
512  vec2[i] += alpha * CMath::abs(vec1[i]);
513  }
514  else
515  {
516  for (int32_t i = 0; i < num_features; i++)
517  vec2[i] += alpha * vec1[i];
518  }
519 
520  free_feature_vector(vec1, vec_idx1, vfree);
521 }
522 
523 template<>
525  float64_t* vec2, int32_t vec2_len, bool abs_val)
526 {
527  ASSERT(vec2_len == num_features)
528 
529  int32_t vlen;
530  bool vfree;
531  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
532 
533  ASSERT(vlen == num_features)
534 
535  if (abs_val)
536  {
537  for (int32_t i = 0; i < num_features; i++)
538  vec2[i] += alpha * CMath::abs(vec1[i]);
539  }
540  else
541  {
542  SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec2, alpha, vec1, num_features);
543  }
544 
545  free_feature_vector(vec1, vec_idx1, vfree);
546 }
547 
548 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
549 {
550  return num_features;
551 }
552 
553 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
554 {
555  if (vector_index>=get_num_vectors())
556  {
557  SG_ERROR("Index out of bounds (number of vectors %d, you "
558  "requested %d)\n", get_num_vectors(), vector_index);
559  }
560 
561  dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
562  iterator->vec = get_feature_vector(vector_index, iterator->vlen,
563  iterator->vfree);
564  iterator->vidx = vector_index;
565  iterator->index = 0;
566  return iterator;
567 }
568 
569 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
570  void* iterator)
571 {
572  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
573  if (!it || it->index >= it->vlen)
574  return false;
575 
576  index = it->index++;
577  value = (float64_t) it->vec[index];
578 
579  return true;
580 }
581 
582 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
583 {
584  if (!iterator)
585  return;
586 
587  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
588  free_feature_vector(it->vec, it->vidx, it->vfree);
589  SG_FREE(it);
590 }
591 
593 {
594  SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
595 
596  for (index_t i=0; i<indices.vlen; ++i)
597  {
598  index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
599  memcpy(&feature_matrix_copy.matrix[i*num_features],
600  &feature_matrix.matrix[real_idx*num_features],
601  num_features*sizeof(ST));
602  }
603 
604  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
605  SG_REF(result);
606  return result;
607 }
608 
609 template<class ST>
611 {
612  SG_DEBUG("Entering!\n");
613 
614  // sanity checks
615  index_t max=CMath::max(dims.vector, dims.vlen);
616  index_t min=CMath::min(dims.vector, dims.vlen);
617  REQUIRE(max<num_features && min>=0,
618  "Provided dimensions is in the range [%d, %d] but they "
619  "have to be within [0, %d]! But it \n", min, max, num_features);
620 
621  SGMatrix<ST> feature_matrix_copy(dims.vlen, get_num_vectors());
622 
623  for (index_t i=0; i<dims.vlen; ++i)
624  {
625  for (index_t j=0; j<get_num_vectors(); ++j)
626  {
627  index_t real_idx=m_subset_stack->subset_idx_conversion(j);
628  feature_matrix_copy(i, j)=feature_matrix(dims[i], real_idx);
629  }
630  }
631 
632  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
633  SG_REF(result);
634 
635  SG_DEBUG("Leaving!\n");
636  return result;
637 }
638 
639 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
640  ST* target)
641 {
643  len = 0;
644  return NULL;
645 }
646 
647 template<class ST> void CDenseFeatures<ST>::init()
648 {
649  num_vectors = 0;
650  num_features = 0;
651 
652  feature_matrix = SGMatrix<ST>();
653  feature_cache = NULL;
654 
655  set_generic<ST>();
656 
657  /* not store number of vectors in subset */
658  SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
659  SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
660  SG_ADD(&feature_matrix, "feature_matrix",
661  "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
662 }
663 
664 #define GET_FEATURE_TYPE(f_type, sg_type) \
665 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
666 { \
667  return f_type; \
668 }
669 
672 GET_FEATURE_TYPE(F_BYTE, uint8_t)
673 GET_FEATURE_TYPE(F_BYTE, int8_t)
674 GET_FEATURE_TYPE(F_SHORT, int16_t)
675 GET_FEATURE_TYPE(F_WORD, uint16_t)
676 GET_FEATURE_TYPE(F_INT, int32_t)
677 GET_FEATURE_TYPE(F_UINT, uint32_t)
678 GET_FEATURE_TYPE(F_LONG, int64_t)
679 GET_FEATURE_TYPE(F_ULONG, uint64_t)
683 #undef GET_FEATURE_TYPE
684 
685 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
686  const float64_t* vec2, int32_t vec2_len)
687 {
688  ASSERT(vec2_len == num_features)
689 
690  int32_t vlen;
691  bool vfree;
692  bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
693 
694  ASSERT(vlen == num_features)
695  float64_t result = 0;
696 
697  for (int32_t i = 0; i < num_features; i++)
698  result += vec1[i] ? vec2[i] : 0;
699 
700  free_feature_vector(vec1, vec_idx1, vfree);
701 
702  return result;
703 }
704 
705 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
706  const float64_t* vec2, int32_t vec2_len)
707 {
708  ASSERT(vec2_len == num_features)
709 
710  int32_t vlen;
711  bool vfree;
712  char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
713 
714  ASSERT(vlen == num_features)
715  float64_t result = 0;
716 
717  for (int32_t i = 0; i < num_features; i++)
718  result += vec1[i] * vec2[i];
719 
720  free_feature_vector(vec1, vec_idx1, vfree);
721 
722  return result;
723 }
724 
725 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
726  const float64_t* vec2, int32_t vec2_len)
727 {
728  ASSERT(vec2_len == num_features)
729 
730  int32_t vlen;
731  bool vfree;
732  int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
733 
734  ASSERT(vlen == num_features)
735  float64_t result = 0;
736 
737  for (int32_t i = 0; i < num_features; i++)
738  result += vec1[i] * vec2[i];
739 
740  free_feature_vector(vec1, vec_idx1, vfree);
741 
742  return result;
743 }
744 
746  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
747 {
748  ASSERT(vec2_len == num_features)
749 
750  int32_t vlen;
751  bool vfree;
752  uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
753 
754  ASSERT(vlen == num_features)
755  float64_t result = 0;
756 
757  for (int32_t i = 0; i < num_features; i++)
758  result += vec1[i] * vec2[i];
759 
760  free_feature_vector(vec1, vec_idx1, vfree);
761 
762  return result;
763 }
764 
766  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
767 {
768  ASSERT(vec2_len == num_features)
769 
770  int32_t vlen;
771  bool vfree;
772  int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
773 
774  ASSERT(vlen == num_features)
775  float64_t result = 0;
776 
777  for (int32_t i = 0; i < num_features; i++)
778  result += vec1[i] * vec2[i];
779 
780  free_feature_vector(vec1, vec_idx1, vfree);
781 
782  return result;
783 }
784 
786  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
787 {
788  ASSERT(vec2_len == num_features)
789 
790  int32_t vlen;
791  bool vfree;
792  uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
793 
794  ASSERT(vlen == num_features)
795  float64_t result = 0;
796 
797  for (int32_t i = 0; i < num_features; i++)
798  result += vec1[i] * vec2[i];
799 
800  free_feature_vector(vec1, vec_idx1, vfree);
801 
802  return result;
803 }
804 
806  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
807 {
808  ASSERT(vec2_len == num_features)
809 
810  int32_t vlen;
811  bool vfree;
812  int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
813 
814  ASSERT(vlen == num_features)
815  float64_t result = 0;
816 
817  for (int32_t i = 0; i < num_features; i++)
818  result += vec1[i] * vec2[i];
819 
820  free_feature_vector(vec1, vec_idx1, vfree);
821 
822  return result;
823 }
824 
826  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
827 {
828  ASSERT(vec2_len == num_features)
829 
830  int32_t vlen;
831  bool vfree;
832  uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
833 
834  ASSERT(vlen == num_features)
835  float64_t result = 0;
836 
837  for (int32_t i = 0; i < num_features; i++)
838  result += vec1[i] * vec2[i];
839 
840  free_feature_vector(vec1, vec_idx1, vfree);
841 
842  return result;
843 }
844 
846  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
847 {
848  ASSERT(vec2_len == num_features)
849 
850  int32_t vlen;
851  bool vfree;
852  int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
853 
854  ASSERT(vlen == num_features)
855  float64_t result = 0;
856 
857  for (int32_t i = 0; i < num_features; i++)
858  result += vec1[i] * vec2[i];
859 
860  free_feature_vector(vec1, vec_idx1, vfree);
861 
862  return result;
863 }
864 
866  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
867 {
868  ASSERT(vec2_len == num_features)
869 
870  int32_t vlen;
871  bool vfree;
872  uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
873 
874  ASSERT(vlen == num_features)
875  float64_t result = 0;
876 
877  for (int32_t i = 0; i < num_features; i++)
878  result += vec1[i] * vec2[i];
879 
880  free_feature_vector(vec1, vec_idx1, vfree);
881 
882  return result;
883 }
884 
886  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
887 {
888  ASSERT(vec2_len == num_features)
889 
890  int32_t vlen;
891  bool vfree;
892  float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
893 
894  ASSERT(vlen == num_features)
895  float64_t result = 0;
896 
897  for (int32_t i = 0; i < num_features; i++)
898  result += vec1[i] * vec2[i];
899 
900  free_feature_vector(vec1, vec_idx1, vfree);
901 
902  return result;
903 }
904 
906  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
907 {
908  ASSERT(vec2_len == num_features)
909 
910  int32_t vlen;
911  bool vfree;
912  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
913 
914  ASSERT(vlen == num_features)
915  float64_t result = CMath::dot(vec1, vec2, num_features);
916 
917  free_feature_vector(vec1, vec_idx1, vfree);
918 
919  return result;
920 }
921 
923  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
924 {
925  ASSERT(vec2_len == num_features)
926 
927  int32_t vlen;
928  bool vfree;
929  floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
930 
931  ASSERT(vlen == num_features)
932  float64_t result = 0;
933 
934  for (int32_t i = 0; i < num_features; i++)
935  result += vec1[i] * vec2[i];
936 
937  free_feature_vector(vec1, vec_idx1, vfree);
938 
939  return result;
940 }
941 
942 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
943 {
944  if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
945  return false;
946 
947  ST* vec1;
948  ST* vec2;
949  int32_t v1len, v2len;
950  bool v1free, v2free, stop = false;
951 
952  for (int32_t i = 0; i < num_vectors; i++)
953  {
954  vec1 = get_feature_vector(i, v1len, v1free);
955  vec2 = rhs->get_feature_vector(i, v2len, v2free);
956 
957  if (v1len!=v2len)
958  stop = true;
959 
960  for (int32_t j=0; j<v1len; j++)
961  {
962  if (vec1[j]!=vec2[j])
963  stop = true;
964  }
965 
966  free_feature_vector(vec1, i, v1free);
967  free_feature_vector(vec2, i, v2free);
968 
969  if (stop)
970  return false;
971  }
972 
973  return true;
974 }
975 
977  CList* others)
978 {
979  SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
980 
981  if (!others)
982  return NULL;
983 
984  /* first, check other features and count number of elements */
985  CSGObject* other=others->get_first_element();
986  index_t num_vectors_merged=num_vectors;
987  while (other)
988  {
989  CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
990 
991  if (!casted)
992  {
993  SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
994  "same type as %s\n",get_name(), other->get_name(), get_name());
995  }
996 
997  if (get_feature_type()!=casted->get_feature_type() ||
998  get_feature_class()!=casted->get_feature_class() ||
999  strcmp(get_name(), casted->get_name()))
1000  {
1001  SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
1002  get_name());
1003  }
1004 
1005  if (num_features!=casted->num_features)
1006  {
1007  SG_ERROR("%s::create_merged_copy(): Provided feature object has "
1008  "different dimension than this one\n");
1009  }
1010 
1011  num_vectors_merged+=casted->get_num_vectors();
1012 
1013  /* check if reference counting is used */
1014  if (others->get_delete_data())
1015  SG_UNREF(other);
1016  other=others->get_next_element();
1017  }
1018 
1019  /* create new feature matrix and copy both instances data into it */
1020  SGMatrix<ST> data(num_features, num_vectors_merged);
1021 
1022  /* copy data of this instance */
1023  SG_DEBUG("copying matrix of this instance\n")
1024  memcpy(data.matrix, feature_matrix.matrix,
1025  num_features*num_vectors*sizeof(ST));
1026 
1027  /* count number of vectors (not elements) processed so far */
1028  index_t num_processed=num_vectors;
1029 
1030  /* now copy data of other features block wise */
1031  other=others->get_first_element();
1032  while (other)
1033  {
1034  /* cast is safe due to above check */
1035  CDenseFeatures<ST>* casted=(CDenseFeatures<ST>*)other;
1036 
1037  SG_DEBUG("copying matrix of provided instance\n")
1038  memcpy(&(data.matrix[num_processed*num_features]),
1039  casted->get_feature_matrix().matrix,
1040  num_features*casted->get_num_vectors()*sizeof(ST));
1041 
1042  /* update counting */
1043  num_processed+=casted->get_num_vectors();
1044 
1045  /* check if reference counting is used */
1046  if (others->get_delete_data())
1047  SG_UNREF(other);
1048  other=others->get_next_element();
1049  }
1050 
1051  /* create new instance and return */
1052  CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
1053 
1054  SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
1055  return result;
1056 }
1057 
1059  CFeatures* other)
1060 {
1061  SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
1062 
1063  /* create list with one element and call general method */
1064  CList* list=new CList();
1065  list->append_element(other);
1066  CFeatures* result=create_merged_copy(list);
1067  SG_UNREF(list);
1068 
1069  SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
1070  return result;
1071 }
1072 
1073 template<class ST>
1075 {
1076  SGMatrix<ST> matrix;
1077  matrix.load(loader);
1078  set_feature_matrix(matrix);
1079 }
1080 
1081 template<class ST>
1083 {
1084  feature_matrix.save(writer);
1085 }
1086 
1088 {
1089  REQUIRE(base_features->get_feature_class() == C_DENSE,
1090  "base_features must be of dynamic type CDenseFeatures\n")
1091 
1092  return (CDenseFeatures< ST >*) base_features;
1093 }
1094 
1095 template class CDenseFeatures<bool>;
1096 template class CDenseFeatures<char>;
1097 template class CDenseFeatures<int8_t>;
1098 template class CDenseFeatures<uint8_t>;
1099 template class CDenseFeatures<int16_t>;
1100 template class CDenseFeatures<uint16_t>;
1101 template class CDenseFeatures<int32_t>;
1102 template class CDenseFeatures<uint32_t>;
1103 template class CDenseFeatures<int64_t>;
1104 template class CDenseFeatures<uint64_t>;
1105 template class CDenseFeatures<float32_t>;
1106 template class CDenseFeatures<float64_t>;
1107 template class CDenseFeatures<floatmax_t>;
1108 }
virtual const char * get_name() const =0
CSubsetStack * m_subset_stack
Definition: Features.h:352
void set_feature_vector(SGVector< ST > vector, int32_t num)
virtual int32_t get_dim_feature_space() const
#define SG_INFO(...)
Definition: SGIO.h:118
CSGObject * get_next_element()
Definition: List.h:185
The class DenseFeatures implements dense feature matrices.
Definition: LDA.h:41
virtual void load(CFile *loader)
virtual void copy_feature_matrix(SGMatrix< ST > src)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const
int32_t index_t
Definition: common.h:62
CDenseFeatures(int32_t size=0)
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:56
virtual CFeatures * duplicate() const
SGMatrix< ST > get_feature_matrix()
void set_feature_matrix(SGMatrix< ST > matrix)
SGMatrix< T > clone()
Definition: SGMatrix.cpp:260
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
index_t num_cols
Definition: SGMatrix.h:378
void vector_subset(int32_t *idx, int32_t idx_len)
bool get_delete_data()
Definition: List.h:575
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:51
int32_t num_features
number of features in cache
index_t num_rows
Definition: SGMatrix.h:376
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
class to add subset support to another class. A CSubsetStackStack instance should be added and wrappe...
Definition: SubsetStack.h:37
virtual int32_t get_dim_feature_space() const =0
virtual void save(CFile *saver)
index_t vlen
Definition: SGVector.h:494
CSGObject * get_first_element()
Definition: List.h:151
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
virtual int32_t get_num_vectors() const
Template class DensePreprocessor, base class for preprocessors (cf. CPreprocessor) that apply to CDen...
shogun vector
double float64_t
Definition: common.h:50
void set_num_vectors(int32_t num)
long double floatmax_t
Definition: common.h:51
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
A File access base class.
Definition: File.h:34
CDenseFeatures< ST > * get_transposed()
virtual EFeatureClass get_feature_class() const =0
void set_num_features(int32_t num)
SGMatrix< ST > feature_matrix
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
float float32_t
Definition: common.h:49
void feature_subset(int32_t *idx, int32_t idx_len)
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
virtual bool reshape(int32_t p_num_features, int32_t p_num_vectors)
virtual EFeatureClass get_feature_class() const
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual SGMatrix< ST > apply_to_feature_matrix(CFeatures *features)=0
bool append_element(CSGObject *data)
Definition: List.h:331
void obtain_from_dot(CDotFeatures *df)
int32_t num_vectors
number of vectors in cache
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
virtual EFeatureType get_feature_type() const
virtual bool apply_preprocessor(bool force_preprocessing=false)
Matrix::Scalar max(Matrix m)
Definition: Redux.h:66
#define SG_ADD(...)
Definition: SGObject.h:81
SGMatrix< ST > steal_feature_matrix()
virtual SGVector< ST > apply_to_feature_vector(SGVector< ST > vector)=0
virtual const char * get_name() const
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual EFeatureType get_feature_type() const =0
Class List implements a doubly connected list for low-level-objects.
Definition: List.h:84
#define GET_FEATURE_TYPE(f_type, sg_type)
static T abs(T a)
Definition: Math.h:179
void load(CFile *loader)
Definition: SGMatrix.cpp:1052

SHOGUN Machine Learning Toolbox - Documentation