SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2013 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
16 #include <shogun/io/SGIO.h>
17 #include <shogun/base/Parameter.h>
20 
21 #include <string.h>
22 
23 namespace shogun {
24 
25 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
26 {
27  init();
28 }
29 
30 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
31  CDotFeatures(orig)
32 {
33  init();
36 
37  if (orig.m_subset_stack != NULL)
38  {
42  }
43 }
44 
46  CDotFeatures()
47 {
48  init();
49  set_feature_matrix(matrix);
50 }
51 
52 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
53  CDotFeatures()
54 {
55  init();
56  set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
57 }
58 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
59  CDotFeatures()
60 {
61  init();
62  load(loader);
63 }
64 
65 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
66 {
67  return new CDenseFeatures<ST>(*this);
68 }
69 
71 {
72  free_features();
73 }
74 
75 template<class ST> void CDenseFeatures<ST>::free_features()
76 {
77  m_subset_stack->remove_all_subsets();
78  free_feature_matrix();
79  SG_UNREF(feature_cache);
80 }
81 
82 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
83 {
84  m_subset_stack->remove_all_subsets();
85  feature_matrix=SGMatrix<ST>();
86  num_vectors = 0;
87  num_features = 0;
88 }
89 
90 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
91 {
92  /* index conversion for subset, only for array access */
93  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
94 
95  len = num_features;
96 
97  if (feature_matrix.matrix)
98  {
99  dofree = false;
100  return &feature_matrix.matrix[real_num * int64_t(num_features)];
101  }
102 
103  ST* feat = NULL;
104  dofree = false;
105 
106  if (feature_cache)
107  {
108  feat = feature_cache->lock_entry(real_num);
109 
110  if (feat)
111  return feat;
112  else
113  feat = feature_cache->set_entry(real_num);
114  }
115 
116  if (!feat)
117  dofree = true;
118  feat = compute_feature_vector(num, len, feat);
119 
120  if (get_num_preprocessors())
121  {
122  int32_t tmp_len = len;
123  ST* tmp_feat_before = feat;
124  ST* tmp_feat_after = NULL;
125 
126  for (int32_t i = 0; i < get_num_preprocessors(); i++)
127  {
129  (CDensePreprocessor<ST>*) get_preprocessor(i);
130  // temporary hack
132  SGVector<ST>(tmp_feat_before, tmp_len));
133  tmp_feat_after = applied.vector;
134  SG_UNREF(p);
135 
136  if (i != 0) // delete feature vector, except for the the first one, i.e., feat
137  SG_FREE(tmp_feat_before);
138  tmp_feat_before = tmp_feat_after;
139  }
140 
141  // note: tmp_feat_after should be checked as it is used by memcpy
142  if (tmp_feat_after)
143  {
144  memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
145  SG_FREE(tmp_feat_after);
146 
147  len = tmp_len;
148  }
149  }
150  return feat;
151 }
152 
153 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
154 {
155  /* index conversion for subset, only for array access */
156  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
157 
158  if (num>=get_num_vectors())
159  {
160  SG_ERROR("Index out of bounds (number of vectors %d, you "
161  "requested %d)\n", get_num_vectors(), num);
162  }
163 
164  if (!feature_matrix.matrix)
165  SG_ERROR("Requires a in-memory feature matrix\n")
166 
167  if (vector.vlen != num_features)
168  SG_ERROR(
169  "Vector not of length %d (has %d)\n", num_features, vector.vlen);
170 
171  memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
172  int64_t(num_features) * sizeof(ST));
173 }
174 
176 {
177  /* index conversion for subset, only for array access */
178  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
179 
180  if (num >= get_num_vectors())
181  {
182  SG_ERROR("Index out of bounds (number of vectors %d, you "
183  "requested %d)\n", get_num_vectors(), real_num);
184  }
185 
186  int32_t vlen;
187  bool do_free;
188  ST* vector= get_feature_vector(num, vlen, do_free);
189  return SGVector<ST>(vector, vlen, do_free);
190 }
191 
192 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
193 {
194  if (feature_cache)
195  feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
196 
197  if (dofree)
198  SG_FREE(feat_vec);
199 }
200 
201 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
202 {
203  free_feature_vector(vec.vector, num, false);
204  vec=SGVector<ST>();
205 }
206 
207 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
208 {
209  if (m_subset_stack->has_subsets())
210  SG_ERROR("A subset is set, cannot call vector_subset\n")
211 
212  ASSERT(feature_matrix.matrix)
213  ASSERT(idx_len<=num_vectors)
214 
215  int32_t num_vec = num_vectors;
216  num_vectors = idx_len;
217 
218  int32_t old_ii = -1;
219 
220  for (int32_t i = 0; i < idx_len; i++)
221  {
222  int32_t ii = idx[i];
223  ASSERT(old_ii<ii)
224 
225  if (ii < 0 || ii >= num_vec)
226  SG_ERROR("Index out of range: should be 0<%d<%d\n", ii, num_vec)
227 
228  if (i == ii)
229  continue;
230 
231  memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
232  &feature_matrix.matrix[int64_t(num_features) * ii],
233  num_features * sizeof(ST));
234  old_ii = ii;
235  }
236 }
237 
238 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
239 {
240  if (m_subset_stack->has_subsets())
241  SG_ERROR("A subset is set, cannot call feature_subset\n")
242 
243  ASSERT(feature_matrix.matrix)
244  ASSERT(idx_len<=num_features)
245  int32_t num_feat = num_features;
246  num_features = idx_len;
247 
248  for (int32_t i = 0; i < num_vectors; i++)
249  {
250  ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
251  ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
252 
253  int32_t old_jj = -1;
254  for (int32_t j = 0; j < idx_len; j++)
255  {
256  int32_t jj = idx[j];
257  ASSERT(old_jj<jj)
258  if (jj < 0 || jj >= num_feat)
259  SG_ERROR(
260  "Index out of range: should be 0<%d<%d\n", jj, num_feat);
261 
262  dst[j] = src[jj];
263  old_jj = jj;
264  }
265  }
266 }
267 
269 {
270  if (!m_subset_stack->has_subsets())
271  return feature_matrix;
272 
273  SGMatrix<ST> submatrix(num_features, get_num_vectors());
274 
275  /* copy a subset vector wise */
276  for (int32_t i=0; i<submatrix.num_cols; ++i)
277  {
278  int32_t real_i = m_subset_stack->subset_idx_conversion(i);
279  memcpy(&submatrix.matrix[i*int64_t(num_features)],
280  &feature_matrix.matrix[real_i * int64_t(num_features)],
281  num_features * sizeof(ST));
282  }
283 
284  return submatrix;
285 }
286 
288 {
289  SGMatrix<ST> st_feature_matrix=feature_matrix;
290  m_subset_stack->remove_all_subsets();
291  SG_UNREF(feature_cache);
292  clean_preprocessors();
293  free_feature_matrix();
294  return st_feature_matrix;
295 }
296 
298 {
299  m_subset_stack->remove_all_subsets();
300  free_feature_matrix();
301  feature_matrix = matrix;
302  num_features = matrix.num_rows;
303  num_vectors = matrix.num_cols;
304 }
305 
306 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
307 {
308  num_feat = num_features;
309  num_vec = num_vectors;
310  return feature_matrix.matrix;
311 }
312 
314 {
315  int32_t num_feat;
316  int32_t num_vec;
317  ST* fm = get_transposed(num_feat, num_vec);
318 
319  return new CDenseFeatures<ST>(fm, num_feat, num_vec);
320 }
321 
322 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
323 {
324  num_feat = get_num_vectors();
325  num_vec = num_features;
326 
327  int32_t old_num_vec=get_num_vectors();
328 
329  ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
330 
331  for (int32_t i=0; i<old_num_vec; i++)
332  {
333  SGVector<ST> vec=get_feature_vector(i);
334 
335  for (int32_t j=0; j<vec.vlen; j++)
336  fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
337 
338  free_feature_vector(vec, i);
339  }
340 
341  return fm;
342 }
343 
345 {
346  if (m_subset_stack->has_subsets())
347  SG_ERROR("A subset is set, cannot call copy_feature_matrix\n")
348 
349  free_feature_matrix();
350  feature_matrix = src.clone();
351  num_features = src.num_rows;
352  num_vectors = src.num_cols;
353  initialize_cache();
354 }
355 
357 {
358  m_subset_stack->remove_all_subsets();
359 
360  int32_t num_feat = df->get_dim_feature_space();
361  int32_t num_vec = df->get_num_vectors();
362 
363  ASSERT(num_feat>0 && num_vec>0)
364 
365  free_feature_matrix();
366  feature_matrix = SGMatrix<ST>(num_feat, num_vec);
367 
368  for (int32_t i = 0; i < num_vec; i++)
369  {
371  ASSERT(num_feat==v.vlen)
372 
373  for (int32_t j = 0; j < num_feat; j++)
374  feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
375  }
376  num_features = num_feat;
377  num_vectors = num_vec;
378 }
379 
380 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
381 {
382  if (m_subset_stack->has_subsets())
383  SG_ERROR("A subset is set, cannot call apply_preproc\n")
384 
385  SG_DEBUG("force: %d\n", force_preprocessing)
386 
387  if (feature_matrix.matrix && get_num_preprocessors())
388  {
389  for (int32_t i = 0; i < get_num_preprocessors(); i++)
390  {
391  if ((!is_preprocessed(i) || force_preprocessing))
392  {
393  set_preprocessed(i);
395  (CDensePreprocessor<ST>*) get_preprocessor(i);
396  SG_INFO("preprocessing using preproc %s\n", p->get_name())
397 
398  if (p->apply_to_feature_matrix(this).matrix == NULL)
399  {
400  SG_UNREF(p);
401  return false;
402  }
403  SG_UNREF(p);
404 
405  }
406  }
407 
408  return true;
409  }
410  else
411  {
412  if (!feature_matrix.matrix)
413  SG_ERROR("no feature matrix\n")
414 
415  if (!get_num_preprocessors())
416  SG_ERROR("no preprocessors available\n")
417 
418  return false;
419  }
420 }
421 
422 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
423 {
424  return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
425 }
426 
427 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() const { return num_features; }
428 
429 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
430 {
431  num_features = num;
432  initialize_cache();
433 }
434 
435 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
436 {
437  if (m_subset_stack->has_subsets())
438  SG_ERROR("A subset is set, cannot call set_num_vectors\n")
439 
440  num_vectors = num;
441  initialize_cache();
442 }
443 
444 template<class ST> void CDenseFeatures<ST>::initialize_cache()
445 {
446  if (m_subset_stack->has_subsets())
447  SG_ERROR("A subset is set, cannot call initialize_cache\n")
448 
449  if (num_features && num_vectors)
450  {
451  SG_UNREF(feature_cache);
452  feature_cache = new CCache<ST>(get_cache_size(), num_features,
453  num_vectors);
454  SG_REF(feature_cache);
455  }
456 }
457 
458 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }
459 
460 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
461 {
462  if (m_subset_stack->has_subsets())
463  SG_ERROR("A subset is set, cannot call reshape\n")
464 
465  if (p_num_features * p_num_vectors
466  == this->num_features * this->num_vectors)
467  {
468  num_features = p_num_features;
469  num_vectors = p_num_vectors;
470  return true;
471  } else
472  return false;
473 }
474 
475 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
476 
477 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
478  int32_t vec_idx2)
479 {
480  ASSERT(df)
481  ASSERT(df->get_feature_type() == get_feature_type())
482  ASSERT(df->get_feature_class() == get_feature_class())
484 
485  int32_t len1, len2;
486  bool free1, free2;
487 
488  ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
489  ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
490 
491  float64_t result = CMath::dot(vec1, vec2, len1);
492 
493  free_feature_vector(vec1, vec_idx1, free1);
494  sf->free_feature_vector(vec2, vec_idx2, free2);
495 
496  return result;
497 }
498 
499 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
500  float64_t* vec2, int32_t vec2_len, bool abs_val)
501 {
502  ASSERT(vec2_len == num_features)
503 
504  int32_t vlen;
505  bool vfree;
506  ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
507 
508  ASSERT(vlen == num_features)
509 
510  if (abs_val)
511  {
512  for (int32_t i = 0; i < num_features; i++)
513  vec2[i] += alpha * CMath::abs(vec1[i]);
514  }
515  else
516  {
517  for (int32_t i = 0; i < num_features; i++)
518  vec2[i] += alpha * vec1[i];
519  }
520 
521  free_feature_vector(vec1, vec_idx1, vfree);
522 }
523 
524 template<>
526  float64_t* vec2, int32_t vec2_len, bool abs_val)
527 {
528  ASSERT(vec2_len == num_features)
529 
530  int32_t vlen;
531  bool vfree;
532  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
533 
534  ASSERT(vlen == num_features)
535 
536  if (abs_val)
537  {
538  for (int32_t i = 0; i < num_features; i++)
539  vec2[i] += alpha * CMath::abs(vec1[i]);
540  }
541  else
542  {
543  SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec2, alpha, vec1, num_features);
544  }
545 
546  free_feature_vector(vec1, vec_idx1, vfree);
547 }
548 
549 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
550 {
551  return num_features;
552 }
553 
554 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
555 {
556  if (vector_index>=get_num_vectors())
557  {
558  SG_ERROR("Index out of bounds (number of vectors %d, you "
559  "requested %d)\n", get_num_vectors(), vector_index);
560  }
561 
562  dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
563  iterator->vec = get_feature_vector(vector_index, iterator->vlen,
564  iterator->vfree);
565  iterator->vidx = vector_index;
566  iterator->index = 0;
567  return iterator;
568 }
569 
570 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
571  void* iterator)
572 {
573  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
574  if (!it || it->index >= it->vlen)
575  return false;
576 
577  index = it->index++;
578  value = (float64_t) it->vec[index];
579 
580  return true;
581 }
582 
583 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
584 {
585  if (!iterator)
586  return;
587 
588  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
589  free_feature_vector(it->vec, it->vidx, it->vfree);
590  SG_FREE(it);
591 }
592 
594 {
595  SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
596 
597  for (index_t i=0; i<indices.vlen; ++i)
598  {
599  index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
600  memcpy(&feature_matrix_copy.matrix[i*num_features],
601  &feature_matrix.matrix[real_idx*num_features],
602  num_features*sizeof(ST));
603  }
604 
605  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
606  SG_REF(result);
607  return result;
608 }
609 
610 template<class ST>
612 {
613  SG_DEBUG("Entering!\n");
614 
615  // sanity checks
616  index_t max=CMath::max(dims.vector, dims.vlen);
617  index_t min=CMath::min(dims.vector, dims.vlen);
618  REQUIRE(max<num_features && min>=0,
619  "Provided dimensions is in the range [%d, %d] but they "
620  "have to be within [0, %d]! But it \n", min, max, num_features);
621 
622  SGMatrix<ST> feature_matrix_copy(dims.vlen, get_num_vectors());
623 
624  for (index_t i=0; i<dims.vlen; ++i)
625  {
626  for (index_t j=0; j<get_num_vectors(); ++j)
627  {
628  index_t real_idx=m_subset_stack->subset_idx_conversion(j);
629  feature_matrix_copy(i, j)=feature_matrix(dims[i], real_idx);
630  }
631  }
632 
633  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
634  SG_REF(result);
635 
636  SG_DEBUG("Leaving!\n");
637  return result;
638 }
639 
640 template<class ST>
642 {
643  CFeatures* shallow_copy_features=NULL;
644 
645  SG_SDEBUG("Using underlying feature matrix with %d dimensions and %d feature vectors!\n", num_features, num_vectors);
646  SGMatrix<ST> shallow_copy_matrix(feature_matrix);
647  shallow_copy_features=new CDenseFeatures<ST>(shallow_copy_matrix);
648  SG_REF(shallow_copy_features);
649  if (m_subset_stack->has_subsets())
650  shallow_copy_features->add_subset(m_subset_stack->get_last_subset()->get_subset_idx());
651 
652  return shallow_copy_features;
653 }
654 
655 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
656  ST* target)
657 {
659  len = 0;
660  return NULL;
661 }
662 
663 template<class ST> void CDenseFeatures<ST>::init()
664 {
665  num_vectors = 0;
666  num_features = 0;
667 
668  feature_matrix = SGMatrix<ST>();
669  feature_cache = NULL;
670 
671  set_generic<ST>();
672 
673  /* not store number of vectors in subset */
674  SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
675  SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
676  SG_ADD(&feature_matrix, "feature_matrix",
677  "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
678 }
679 
680 #define GET_FEATURE_TYPE(f_type, sg_type) \
681 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
682 { \
683  return f_type; \
684 }
685 
688 GET_FEATURE_TYPE(F_BYTE, uint8_t)
689 GET_FEATURE_TYPE(F_BYTE, int8_t)
690 GET_FEATURE_TYPE(F_SHORT, int16_t)
691 GET_FEATURE_TYPE(F_WORD, uint16_t)
692 GET_FEATURE_TYPE(F_INT, int32_t)
693 GET_FEATURE_TYPE(F_UINT, uint32_t)
694 GET_FEATURE_TYPE(F_LONG, int64_t)
695 GET_FEATURE_TYPE(F_ULONG, uint64_t)
699 #undef GET_FEATURE_TYPE
700 
701 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
702  const float64_t* vec2, int32_t vec2_len)
703 {
704  ASSERT(vec2_len == num_features)
705 
706  int32_t vlen;
707  bool vfree;
708  bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
709 
710  ASSERT(vlen == num_features)
711  float64_t result = 0;
712 
713  for (int32_t i = 0; i < num_features; i++)
714  result += vec1[i] ? vec2[i] : 0;
715 
716  free_feature_vector(vec1, vec_idx1, vfree);
717 
718  return result;
719 }
720 
721 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
722  const float64_t* vec2, int32_t vec2_len)
723 {
724  ASSERT(vec2_len == num_features)
725 
726  int32_t vlen;
727  bool vfree;
728  char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
729 
730  ASSERT(vlen == num_features)
731  float64_t result = 0;
732 
733  for (int32_t i = 0; i < num_features; i++)
734  result += vec1[i] * vec2[i];
735 
736  free_feature_vector(vec1, vec_idx1, vfree);
737 
738  return result;
739 }
740 
741 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
742  const float64_t* vec2, int32_t vec2_len)
743 {
744  ASSERT(vec2_len == num_features)
745 
746  int32_t vlen;
747  bool vfree;
748  int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
749 
750  ASSERT(vlen == num_features)
751  float64_t result = 0;
752 
753  for (int32_t i = 0; i < num_features; i++)
754  result += vec1[i] * vec2[i];
755 
756  free_feature_vector(vec1, vec_idx1, vfree);
757 
758  return result;
759 }
760 
762  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
763 {
764  ASSERT(vec2_len == num_features)
765 
766  int32_t vlen;
767  bool vfree;
768  uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
769 
770  ASSERT(vlen == num_features)
771  float64_t result = 0;
772 
773  for (int32_t i = 0; i < num_features; i++)
774  result += vec1[i] * vec2[i];
775 
776  free_feature_vector(vec1, vec_idx1, vfree);
777 
778  return result;
779 }
780 
782  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
783 {
784  ASSERT(vec2_len == num_features)
785 
786  int32_t vlen;
787  bool vfree;
788  int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
789 
790  ASSERT(vlen == num_features)
791  float64_t result = 0;
792 
793  for (int32_t i = 0; i < num_features; i++)
794  result += vec1[i] * vec2[i];
795 
796  free_feature_vector(vec1, vec_idx1, vfree);
797 
798  return result;
799 }
800 
802  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
803 {
804  ASSERT(vec2_len == num_features)
805 
806  int32_t vlen;
807  bool vfree;
808  uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
809 
810  ASSERT(vlen == num_features)
811  float64_t result = 0;
812 
813  for (int32_t i = 0; i < num_features; i++)
814  result += vec1[i] * vec2[i];
815 
816  free_feature_vector(vec1, vec_idx1, vfree);
817 
818  return result;
819 }
820 
822  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
823 {
824  ASSERT(vec2_len == num_features)
825 
826  int32_t vlen;
827  bool vfree;
828  int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
829 
830  ASSERT(vlen == num_features)
831  float64_t result = 0;
832 
833  for (int32_t i = 0; i < num_features; i++)
834  result += vec1[i] * vec2[i];
835 
836  free_feature_vector(vec1, vec_idx1, vfree);
837 
838  return result;
839 }
840 
842  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
843 {
844  ASSERT(vec2_len == num_features)
845 
846  int32_t vlen;
847  bool vfree;
848  uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
849 
850  ASSERT(vlen == num_features)
851  float64_t result = 0;
852 
853  for (int32_t i = 0; i < num_features; i++)
854  result += vec1[i] * vec2[i];
855 
856  free_feature_vector(vec1, vec_idx1, vfree);
857 
858  return result;
859 }
860 
862  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
863 {
864  ASSERT(vec2_len == num_features)
865 
866  int32_t vlen;
867  bool vfree;
868  int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
869 
870  ASSERT(vlen == num_features)
871  float64_t result = 0;
872 
873  for (int32_t i = 0; i < num_features; i++)
874  result += vec1[i] * vec2[i];
875 
876  free_feature_vector(vec1, vec_idx1, vfree);
877 
878  return result;
879 }
880 
882  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
883 {
884  ASSERT(vec2_len == num_features)
885 
886  int32_t vlen;
887  bool vfree;
888  uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
889 
890  ASSERT(vlen == num_features)
891  float64_t result = 0;
892 
893  for (int32_t i = 0; i < num_features; i++)
894  result += vec1[i] * vec2[i];
895 
896  free_feature_vector(vec1, vec_idx1, vfree);
897 
898  return result;
899 }
900 
902  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
903 {
904  ASSERT(vec2_len == num_features)
905 
906  int32_t vlen;
907  bool vfree;
908  float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
909 
910  ASSERT(vlen == num_features)
911  float64_t result = 0;
912 
913  for (int32_t i = 0; i < num_features; i++)
914  result += vec1[i] * vec2[i];
915 
916  free_feature_vector(vec1, vec_idx1, vfree);
917 
918  return result;
919 }
920 
922  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
923 {
924  ASSERT(vec2_len == num_features)
925 
926  int32_t vlen;
927  bool vfree;
928  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
929 
930  ASSERT(vlen == num_features)
931  float64_t result = CMath::dot(vec1, vec2, num_features);
932 
933  free_feature_vector(vec1, vec_idx1, vfree);
934 
935  return result;
936 }
937 
939  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
940 {
941  ASSERT(vec2_len == num_features)
942 
943  int32_t vlen;
944  bool vfree;
945  floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
946 
947  ASSERT(vlen == num_features)
948  float64_t result = 0;
949 
950  for (int32_t i = 0; i < num_features; i++)
951  result += vec1[i] * vec2[i];
952 
953  free_feature_vector(vec1, vec_idx1, vfree);
954 
955  return result;
956 }
957 
958 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
959 {
960  if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
961  return false;
962 
963  ST* vec1;
964  ST* vec2;
965  int32_t v1len, v2len;
966  bool v1free, v2free, stop = false;
967 
968  for (int32_t i = 0; i < num_vectors; i++)
969  {
970  vec1 = get_feature_vector(i, v1len, v1free);
971  vec2 = rhs->get_feature_vector(i, v2len, v2free);
972 
973  if (v1len!=v2len)
974  stop = true;
975 
976  for (int32_t j=0; j<v1len; j++)
977  {
978  if (vec1[j]!=vec2[j])
979  stop = true;
980  }
981 
982  free_feature_vector(vec1, i, v1free);
983  free_feature_vector(vec2, i, v2free);
984 
985  if (stop)
986  return false;
987  }
988 
989  return true;
990 }
991 
993  CList* others)
994 {
995  SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
996 
997  if (!others)
998  return NULL;
999 
1000  /* first, check other features and count number of elements */
1001  CSGObject* other=others->get_first_element();
1002  index_t num_vectors_merged=num_vectors;
1003  while (other)
1004  {
1005  CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
1006 
1007  if (!casted)
1008  {
1009  SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
1010  "same type as %s\n",get_name(), other->get_name(), get_name());
1011  }
1012 
1013  if (get_feature_type()!=casted->get_feature_type() ||
1014  get_feature_class()!=casted->get_feature_class() ||
1015  strcmp(get_name(), casted->get_name()))
1016  {
1017  SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
1018  get_name());
1019  }
1020 
1021  if (num_features!=casted->num_features)
1022  {
1023  SG_ERROR("%s::create_merged_copy(): Provided feature object has "
1024  "different dimension than this one\n");
1025  }
1026 
1027  num_vectors_merged+=casted->get_num_vectors();
1028 
1029  /* check if reference counting is used */
1030  if (others->get_delete_data())
1031  SG_UNREF(other);
1032  other=others->get_next_element();
1033  }
1034 
1035  /* create new feature matrix and copy both instances data into it */
1036  SGMatrix<ST> data(num_features, num_vectors_merged);
1037 
1038  /* copy data of this instance */
1039  SG_DEBUG("copying matrix of this instance\n")
1040  memcpy(data.matrix, feature_matrix.matrix,
1041  num_features*num_vectors*sizeof(ST));
1042 
1043  /* count number of vectors (not elements) processed so far */
1044  index_t num_processed=num_vectors;
1045 
1046  /* now copy data of other features block wise */
1047  other=others->get_first_element();
1048  while (other)
1049  {
1050  /* cast is safe due to above check */
1051  CDenseFeatures<ST>* casted=(CDenseFeatures<ST>*)other;
1052 
1053  SG_DEBUG("copying matrix of provided instance\n")
1054  memcpy(&(data.matrix[num_processed*num_features]),
1055  casted->get_feature_matrix().matrix,
1056  num_features*casted->get_num_vectors()*sizeof(ST));
1057 
1058  /* update counting */
1059  num_processed+=casted->get_num_vectors();
1060 
1061  /* check if reference counting is used */
1062  if (others->get_delete_data())
1063  SG_UNREF(other);
1064  other=others->get_next_element();
1065  }
1066 
1067  /* create new instance and return */
1068  CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
1069 
1070  SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
1071  return result;
1072 }
1073 
1075  CFeatures* other)
1076 {
1077  SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
1078 
1079  /* create list with one element and call general method */
1080  CList* list=new CList();
1081  list->append_element(other);
1082  CFeatures* result=create_merged_copy(list);
1083  SG_UNREF(list);
1084 
1085  SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
1086  return result;
1087 }
1088 
1089 template<class ST>
1091 {
1092  SGMatrix<ST> matrix;
1093  matrix.load(loader);
1094  set_feature_matrix(matrix);
1095 }
1096 
1097 template<class ST>
1099 {
1100  feature_matrix.save(writer);
1101 }
1102 
1104 {
1105  REQUIRE(base_features->get_feature_class() == C_DENSE,
1106  "base_features must be of dynamic type CDenseFeatures\n")
1107 
1108  return (CDenseFeatures< ST >*) base_features;
1109 }
1110 
1111 template class CDenseFeatures<bool>;
1112 template class CDenseFeatures<char>;
1113 template class CDenseFeatures<int8_t>;
1114 template class CDenseFeatures<uint8_t>;
1115 template class CDenseFeatures<int16_t>;
1116 template class CDenseFeatures<uint16_t>;
1117 template class CDenseFeatures<int32_t>;
1118 template class CDenseFeatures<uint32_t>;
1119 template class CDenseFeatures<int64_t>;
1120 template class CDenseFeatures<uint64_t>;
1121 template class CDenseFeatures<float32_t>;
1122 template class CDenseFeatures<float64_t>;
1123 template class CDenseFeatures<floatmax_t>;
1124 }
virtual const char * get_name() const =0
CSubsetStack * m_subset_stack
Definition: Features.h:361
void set_feature_vector(SGVector< ST > vector, int32_t num)
virtual int32_t get_dim_feature_space() const
#define SG_INFO(...)
Definition: SGIO.h:118
CSGObject * get_next_element()
Definition: List.h:185
The class DenseFeatures implements dense feature matrices.
Definition: LDA.h:40
virtual void load(CFile *loader)
virtual void copy_feature_matrix(SGMatrix< ST > src)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const
int32_t index_t
Definition: common.h:62
CDenseFeatures(int32_t size=0)
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:58
virtual CFeatures * duplicate() const
SGMatrix< ST > get_feature_matrix()
void set_feature_matrix(SGMatrix< ST > matrix)
SGMatrix< T > clone()
Definition: SGMatrix.cpp:256
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
index_t num_cols
Definition: SGMatrix.h:376
void vector_subset(int32_t *idx, int32_t idx_len)
bool get_delete_data()
Definition: List.h:575
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:54
int32_t num_features
number of features in cache
index_t num_rows
Definition: SGMatrix.h:374
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
class to add subset support to another class. A CSubsetStackStack instance should be added and wrappe...
Definition: SubsetStack.h:37
virtual int32_t get_dim_feature_space() const =0
virtual void save(CFile *saver)
index_t vlen
Definition: SGVector.h:494
CSGObject * get_first_element()
Definition: List.h:151
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
virtual int32_t get_num_vectors() const
Template class DensePreprocessor, base class for preprocessors (cf. CPreprocessor) that apply to CDen...
shogun vector
double float64_t
Definition: common.h:50
void set_num_vectors(int32_t num)
long double floatmax_t
Definition: common.h:51
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
A File access base class.
Definition: File.h:34
CDenseFeatures< ST > * get_transposed()
virtual EFeatureClass get_feature_class() const =0
void set_num_features(int32_t num)
SGMatrix< ST > feature_matrix
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
float float32_t
Definition: common.h:49
void feature_subset(int32_t *idx, int32_t idx_len)
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SDEBUG(...)
Definition: SGIO.h:168
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
virtual bool reshape(int32_t p_num_features, int32_t p_num_vectors)
virtual EFeatureClass get_feature_class() const
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual SGMatrix< ST > apply_to_feature_matrix(CFeatures *features)=0
bool append_element(CSGObject *data)
Definition: List.h:331
void obtain_from_dot(CDotFeatures *df)
int32_t num_vectors
number of vectors in cache
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
virtual EFeatureType get_feature_type() const
virtual bool apply_preprocessor(bool force_preprocessing=false)
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68
#define SG_ADD(...)
Definition: SGObject.h:84
SGMatrix< ST > steal_feature_matrix()
virtual SGVector< ST > apply_to_feature_vector(SGVector< ST > vector)=0
virtual const char * get_name() const
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual void add_subset(SGVector< index_t > subset)
Definition: Features.cpp:310
virtual EFeatureType get_feature_type() const =0
Class List implements a doubly connected list for low-level-objects.
Definition: List.h:84
#define GET_FEATURE_TYPE(f_type, sg_type)
static T abs(T a)
Definition: Math.h:179
void load(CFile *loader)
Definition: SGMatrix.cpp:1048

SHOGUN Machine Learning Toolbox - Documentation