SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2013 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
16 #include <shogun/io/SGIO.h>
17 #include <shogun/base/Parameter.h>
19 
20 #include <string.h>
21 
22 namespace shogun {
23 
24 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
25 {
26  init();
27 }
28 
29 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
30  CDotFeatures(orig)
31 {
32  init();
35 
36  if (orig.m_subset_stack != NULL)
37  {
41  }
42 }
43 
45  CDotFeatures()
46 {
47  init();
48  set_feature_matrix(matrix);
49 }
50 
51 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
52  CDotFeatures()
53 {
54  init();
55  set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
56 }
57 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
58  CDotFeatures()
59 {
60  init();
61  load(loader);
62 }
63 
64 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
65 {
66  return new CDenseFeatures<ST>(*this);
67 }
68 
70 {
71  free_features();
72 }
73 
74 template<class ST> void CDenseFeatures<ST>::free_features()
75 {
76  m_subset_stack->remove_all_subsets();
77  free_feature_matrix();
78  SG_UNREF(feature_cache);
79 }
80 
81 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
82 {
83  m_subset_stack->remove_all_subsets();
84  feature_matrix=SGMatrix<ST>();
85  num_vectors = 0;
86  num_features = 0;
87 }
88 
89 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
90 {
91  /* index conversion for subset, only for array access */
92  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
93 
94  len = num_features;
95 
96  if (feature_matrix.matrix)
97  {
98  dofree = false;
99  return &feature_matrix.matrix[real_num * int64_t(num_features)];
100  }
101 
102  ST* feat = NULL;
103  dofree = false;
104 
105  if (feature_cache)
106  {
107  feat = feature_cache->lock_entry(real_num);
108 
109  if (feat)
110  return feat;
111  else
112  feat = feature_cache->set_entry(real_num);
113  }
114 
115  if (!feat)
116  dofree = true;
117  feat = compute_feature_vector(num, len, feat);
118 
119  if (get_num_preprocessors())
120  {
121  int32_t tmp_len = len;
122  ST* tmp_feat_before = feat;
123  ST* tmp_feat_after = NULL;
124 
125  for (int32_t i = 0; i < get_num_preprocessors(); i++)
126  {
128  (CDensePreprocessor<ST>*) get_preprocessor(i);
129  // temporary hack
131  SGVector<ST>(tmp_feat_before, tmp_len));
132  tmp_feat_after = applied.vector;
133  SG_UNREF(p);
134 
135  if (i != 0) // delete feature vector, except for the the first one, i.e., feat
136  SG_FREE(tmp_feat_before);
137  tmp_feat_before = tmp_feat_after;
138  }
139 
140  // note: tmp_feat_after should be checked as it is used by memcpy
141  if (tmp_feat_after)
142  {
143  memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
144  SG_FREE(tmp_feat_after);
145 
146  len = tmp_len;
147  }
148  }
149  return feat;
150 }
151 
152 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
153 {
154  /* index conversion for subset, only for array access */
155  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
156 
157  if (num>=get_num_vectors())
158  {
159  SG_ERROR("Index out of bounds (number of vectors %d, you "
160  "requested %d)\n", get_num_vectors(), num);
161  }
162 
163  if (!feature_matrix.matrix)
164  SG_ERROR("Requires a in-memory feature matrix\n")
165 
166  if (vector.vlen != num_features)
167  SG_ERROR(
168  "Vector not of length %d (has %d)\n", num_features, vector.vlen);
169 
170  memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
171  int64_t(num_features) * sizeof(ST));
172 }
173 
175 {
176  /* index conversion for subset, only for array access */
177  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
178 
179  if (num >= get_num_vectors())
180  {
181  SG_ERROR("Index out of bounds (number of vectors %d, you "
182  "requested %d)\n", get_num_vectors(), real_num);
183  }
184 
185  int32_t vlen;
186  bool do_free;
187  ST* vector= get_feature_vector(num, vlen, do_free);
188  return SGVector<ST>(vector, vlen, do_free);
189 }
190 
191 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
192 {
193  if (feature_cache)
194  feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
195 
196  if (dofree)
197  SG_FREE(feat_vec);
198 }
199 
200 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
201 {
202  free_feature_vector(vec.vector, num, false);
203  vec=SGVector<ST>();
204 }
205 
206 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
207 {
208  if (m_subset_stack->has_subsets())
209  SG_ERROR("A subset is set, cannot call vector_subset\n")
210 
211  ASSERT(feature_matrix.matrix)
212  ASSERT(idx_len<=num_vectors)
213 
214  int32_t num_vec = num_vectors;
215  num_vectors = idx_len;
216 
217  int32_t old_ii = -1;
218 
219  for (int32_t i = 0; i < idx_len; i++)
220  {
221  int32_t ii = idx[i];
222  ASSERT(old_ii<ii)
223 
224  if (ii < 0 || ii >= num_vec)
225  SG_ERROR("Index out of range: should be 0<%d<%d\n", ii, num_vec)
226 
227  if (i == ii)
228  continue;
229 
230  memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
231  &feature_matrix.matrix[int64_t(num_features) * ii],
232  num_features * sizeof(ST));
233  old_ii = ii;
234  }
235 }
236 
237 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
238 {
239  if (m_subset_stack->has_subsets())
240  SG_ERROR("A subset is set, cannot call feature_subset\n")
241 
242  ASSERT(feature_matrix.matrix)
243  ASSERT(idx_len<=num_features)
244  int32_t num_feat = num_features;
245  num_features = idx_len;
246 
247  for (int32_t i = 0; i < num_vectors; i++)
248  {
249  ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
250  ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
251 
252  int32_t old_jj = -1;
253  for (int32_t j = 0; j < idx_len; j++)
254  {
255  int32_t jj = idx[j];
256  ASSERT(old_jj<jj)
257  if (jj < 0 || jj >= num_feat)
258  SG_ERROR(
259  "Index out of range: should be 0<%d<%d\n", jj, num_feat);
260 
261  dst[j] = src[jj];
262  old_jj = jj;
263  }
264  }
265 }
266 
268 {
269  if (!m_subset_stack->has_subsets())
270  return feature_matrix;
271 
272  SGMatrix<ST> submatrix(num_features, get_num_vectors());
273 
274  /* copy a subset vector wise */
275  for (int32_t i=0; i<submatrix.num_cols; ++i)
276  {
277  int32_t real_i = m_subset_stack->subset_idx_conversion(i);
278  memcpy(&submatrix.matrix[i*int64_t(num_features)],
279  &feature_matrix.matrix[real_i * int64_t(num_features)],
280  num_features * sizeof(ST));
281  }
282 
283  return submatrix;
284 }
285 
287 {
288  SGMatrix<ST> st_feature_matrix=feature_matrix;
289  m_subset_stack->remove_all_subsets();
290  SG_UNREF(feature_cache);
291  clean_preprocessors();
292  free_feature_matrix();
293  return st_feature_matrix;
294 }
295 
297 {
298  m_subset_stack->remove_all_subsets();
299  free_feature_matrix();
300  feature_matrix = matrix;
301  num_features = matrix.num_rows;
302  num_vectors = matrix.num_cols;
303 }
304 
305 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
306 {
307  num_feat = num_features;
308  num_vec = num_vectors;
309  return feature_matrix.matrix;
310 }
311 
313 {
314  int32_t num_feat;
315  int32_t num_vec;
316  ST* fm = get_transposed(num_feat, num_vec);
317 
318  return new CDenseFeatures<ST>(fm, num_feat, num_vec);
319 }
320 
321 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
322 {
323  num_feat = get_num_vectors();
324  num_vec = num_features;
325 
326  int32_t old_num_vec=get_num_vectors();
327 
328  ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
329 
330  for (int32_t i=0; i<old_num_vec; i++)
331  {
332  SGVector<ST> vec=get_feature_vector(i);
333 
334  for (int32_t j=0; j<vec.vlen; j++)
335  fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
336 
337  free_feature_vector(vec, i);
338  }
339 
340  return fm;
341 }
342 
344 {
345  if (m_subset_stack->has_subsets())
346  SG_ERROR("A subset is set, cannot call copy_feature_matrix\n")
347 
348  free_feature_matrix();
349  feature_matrix = src.clone();
350  num_features = src.num_rows;
351  num_vectors = src.num_cols;
352  initialize_cache();
353 }
354 
356 {
357  m_subset_stack->remove_all_subsets();
358 
359  int32_t num_feat = df->get_dim_feature_space();
360  int32_t num_vec = df->get_num_vectors();
361 
362  ASSERT(num_feat>0 && num_vec>0)
363 
364  free_feature_matrix();
365  feature_matrix = SGMatrix<ST>(num_feat, num_vec);
366 
367  for (int32_t i = 0; i < num_vec; i++)
368  {
370  ASSERT(num_feat==v.vlen)
371 
372  for (int32_t j = 0; j < num_feat; j++)
373  feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
374  }
375  num_features = num_feat;
376  num_vectors = num_vec;
377 }
378 
379 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
380 {
381  if (m_subset_stack->has_subsets())
382  SG_ERROR("A subset is set, cannot call apply_preproc\n")
383 
384  SG_DEBUG("force: %d\n", force_preprocessing)
385 
386  if (feature_matrix.matrix && get_num_preprocessors())
387  {
388  for (int32_t i = 0; i < get_num_preprocessors(); i++)
389  {
390  if ((!is_preprocessed(i) || force_preprocessing))
391  {
392  set_preprocessed(i);
394  (CDensePreprocessor<ST>*) get_preprocessor(i);
395  SG_INFO("preprocessing using preproc %s\n", p->get_name())
396 
397  if (p->apply_to_feature_matrix(this).matrix == NULL)
398  {
399  SG_UNREF(p);
400  return false;
401  }
402  SG_UNREF(p);
403 
404  }
405  }
406 
407  return true;
408  }
409  else
410  {
411  if (!feature_matrix.matrix)
412  SG_ERROR("no feature matrix\n")
413 
414  if (!get_num_preprocessors())
415  SG_ERROR("no preprocessors available\n")
416 
417  return false;
418  }
419 }
420 
421 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
422 {
423  return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
424 }
425 
426 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() const { return num_features; }
427 
428 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
429 {
430  num_features = num;
431  initialize_cache();
432 }
433 
434 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
435 {
436  if (m_subset_stack->has_subsets())
437  SG_ERROR("A subset is set, cannot call set_num_vectors\n")
438 
439  num_vectors = num;
440  initialize_cache();
441 }
442 
443 template<class ST> void CDenseFeatures<ST>::initialize_cache()
444 {
445  if (m_subset_stack->has_subsets())
446  SG_ERROR("A subset is set, cannot call initialize_cache\n")
447 
448  if (num_features && num_vectors)
449  {
450  SG_UNREF(feature_cache);
451  feature_cache = new CCache<ST>(get_cache_size(), num_features,
452  num_vectors);
453  SG_REF(feature_cache);
454  }
455 }
456 
457 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }
458 
459 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
460 {
461  if (m_subset_stack->has_subsets())
462  SG_ERROR("A subset is set, cannot call reshape\n")
463 
464  if (p_num_features * p_num_vectors
465  == this->num_features * this->num_vectors)
466  {
467  num_features = p_num_features;
468  num_vectors = p_num_vectors;
469  return true;
470  } else
471  return false;
472 }
473 
474 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
475 
476 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
477  int32_t vec_idx2)
478 {
479  ASSERT(df)
480  ASSERT(df->get_feature_type() == get_feature_type())
481  ASSERT(df->get_feature_class() == get_feature_class())
483 
484  int32_t len1, len2;
485  bool free1, free2;
486 
487  ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
488  ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
489 
490  float64_t result = SGVector<ST>::dot(vec1, vec2, len1);
491 
492  free_feature_vector(vec1, vec_idx1, free1);
493  sf->free_feature_vector(vec2, vec_idx2, free2);
494 
495  return result;
496 }
497 
498 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
499  float64_t* vec2, int32_t vec2_len, bool abs_val)
500 {
501  ASSERT(vec2_len == num_features)
502 
503  int32_t vlen;
504  bool vfree;
505  ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
506 
507  ASSERT(vlen == num_features)
508 
509  if (abs_val)
510  {
511  for (int32_t i = 0; i < num_features; i++)
512  vec2[i] += alpha * CMath::abs(vec1[i]);
513  }
514  else
515  {
516  for (int32_t i = 0; i < num_features; i++)
517  vec2[i] += alpha * vec1[i];
518  }
519 
520  free_feature_vector(vec1, vec_idx1, vfree);
521 }
522 
523 template<>
525  float64_t* vec2, int32_t vec2_len, bool abs_val)
526 {
527  ASSERT(vec2_len == num_features)
528 
529  int32_t vlen;
530  bool vfree;
531  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
532 
533  ASSERT(vlen == num_features)
534 
535  if (abs_val)
536  {
537  for (int32_t i = 0; i < num_features; i++)
538  vec2[i] += alpha * CMath::abs(vec1[i]);
539  }
540  else
541  {
542  SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec2, alpha, vec1, num_features);
543  }
544 
545  free_feature_vector(vec1, vec_idx1, vfree);
546 }
547 
548 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
549 {
550  return num_features;
551 }
552 
553 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
554 {
555  if (vector_index>=get_num_vectors())
556  {
557  SG_ERROR("Index out of bounds (number of vectors %d, you "
558  "requested %d)\n", get_num_vectors(), vector_index);
559  }
560 
561  dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
562  iterator->vec = get_feature_vector(vector_index, iterator->vlen,
563  iterator->vfree);
564  iterator->vidx = vector_index;
565  iterator->index = 0;
566  return iterator;
567 }
568 
569 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
570  void* iterator)
571 {
572  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
573  if (!it || it->index >= it->vlen)
574  return false;
575 
576  index = it->index++;
577  value = (float64_t) it->vec[index];
578 
579  return true;
580 }
581 
582 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
583 {
584  if (!iterator)
585  return;
586 
587  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
588  free_feature_vector(it->vec, it->vidx, it->vfree);
589  SG_FREE(it);
590 }
591 
593 {
594  SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
595 
596  for (index_t i=0; i<indices.vlen; ++i)
597  {
598  index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
599  memcpy(&feature_matrix_copy.matrix[i*num_features],
600  &feature_matrix.matrix[real_idx*num_features],
601  num_features*sizeof(ST));
602  }
603 
604  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
605  SG_REF(result);
606  return result;
607 }
608 
609 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
610  ST* target)
611 {
613  len = 0;
614  return NULL;
615 }
616 
617 template<class ST> void CDenseFeatures<ST>::init()
618 {
619  num_vectors = 0;
620  num_features = 0;
621 
622  feature_matrix = SGMatrix<ST>();
623  feature_cache = NULL;
624 
625  set_generic<ST>();
626 
627  /* not store number of vectors in subset */
628  SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
629  SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
630  SG_ADD(&feature_matrix, "feature_matrix",
631  "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
632 }
633 
634 #define GET_FEATURE_TYPE(f_type, sg_type) \
635 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
636 { \
637  return f_type; \
638 }
639 
642 GET_FEATURE_TYPE(F_BYTE, uint8_t)
643 GET_FEATURE_TYPE(F_BYTE, int8_t)
644 GET_FEATURE_TYPE(F_SHORT, int16_t)
645 GET_FEATURE_TYPE(F_WORD, uint16_t)
646 GET_FEATURE_TYPE(F_INT, int32_t)
647 GET_FEATURE_TYPE(F_UINT, uint32_t)
648 GET_FEATURE_TYPE(F_LONG, int64_t)
649 GET_FEATURE_TYPE(F_ULONG, uint64_t)
653 #undef GET_FEATURE_TYPE
654 
655 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
656  const float64_t* vec2, int32_t vec2_len)
657 {
658  ASSERT(vec2_len == num_features)
659 
660  int32_t vlen;
661  bool vfree;
662  bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
663 
664  ASSERT(vlen == num_features)
665  float64_t result = 0;
666 
667  for (int32_t i = 0; i < num_features; i++)
668  result += vec1[i] ? vec2[i] : 0;
669 
670  free_feature_vector(vec1, vec_idx1, vfree);
671 
672  return result;
673 }
674 
675 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
676  const float64_t* vec2, int32_t vec2_len)
677 {
678  ASSERT(vec2_len == num_features)
679 
680  int32_t vlen;
681  bool vfree;
682  char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
683 
684  ASSERT(vlen == num_features)
685  float64_t result = 0;
686 
687  for (int32_t i = 0; i < num_features; i++)
688  result += vec1[i] * vec2[i];
689 
690  free_feature_vector(vec1, vec_idx1, vfree);
691 
692  return result;
693 }
694 
695 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
696  const float64_t* vec2, int32_t vec2_len)
697 {
698  ASSERT(vec2_len == num_features)
699 
700  int32_t vlen;
701  bool vfree;
702  int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
703 
704  ASSERT(vlen == num_features)
705  float64_t result = 0;
706 
707  for (int32_t i = 0; i < num_features; i++)
708  result += vec1[i] * vec2[i];
709 
710  free_feature_vector(vec1, vec_idx1, vfree);
711 
712  return result;
713 }
714 
716  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
717 {
718  ASSERT(vec2_len == num_features)
719 
720  int32_t vlen;
721  bool vfree;
722  uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
723 
724  ASSERT(vlen == num_features)
725  float64_t result = 0;
726 
727  for (int32_t i = 0; i < num_features; i++)
728  result += vec1[i] * vec2[i];
729 
730  free_feature_vector(vec1, vec_idx1, vfree);
731 
732  return result;
733 }
734 
736  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
737 {
738  ASSERT(vec2_len == num_features)
739 
740  int32_t vlen;
741  bool vfree;
742  int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
743 
744  ASSERT(vlen == num_features)
745  float64_t result = 0;
746 
747  for (int32_t i = 0; i < num_features; i++)
748  result += vec1[i] * vec2[i];
749 
750  free_feature_vector(vec1, vec_idx1, vfree);
751 
752  return result;
753 }
754 
756  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
757 {
758  ASSERT(vec2_len == num_features)
759 
760  int32_t vlen;
761  bool vfree;
762  uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
763 
764  ASSERT(vlen == num_features)
765  float64_t result = 0;
766 
767  for (int32_t i = 0; i < num_features; i++)
768  result += vec1[i] * vec2[i];
769 
770  free_feature_vector(vec1, vec_idx1, vfree);
771 
772  return result;
773 }
774 
776  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
777 {
778  ASSERT(vec2_len == num_features)
779 
780  int32_t vlen;
781  bool vfree;
782  int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
783 
784  ASSERT(vlen == num_features)
785  float64_t result = 0;
786 
787  for (int32_t i = 0; i < num_features; i++)
788  result += vec1[i] * vec2[i];
789 
790  free_feature_vector(vec1, vec_idx1, vfree);
791 
792  return result;
793 }
794 
796  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
797 {
798  ASSERT(vec2_len == num_features)
799 
800  int32_t vlen;
801  bool vfree;
802  uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
803 
804  ASSERT(vlen == num_features)
805  float64_t result = 0;
806 
807  for (int32_t i = 0; i < num_features; i++)
808  result += vec1[i] * vec2[i];
809 
810  free_feature_vector(vec1, vec_idx1, vfree);
811 
812  return result;
813 }
814 
816  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
817 {
818  ASSERT(vec2_len == num_features)
819 
820  int32_t vlen;
821  bool vfree;
822  int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
823 
824  ASSERT(vlen == num_features)
825  float64_t result = 0;
826 
827  for (int32_t i = 0; i < num_features; i++)
828  result += vec1[i] * vec2[i];
829 
830  free_feature_vector(vec1, vec_idx1, vfree);
831 
832  return result;
833 }
834 
836  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
837 {
838  ASSERT(vec2_len == num_features)
839 
840  int32_t vlen;
841  bool vfree;
842  uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
843 
844  ASSERT(vlen == num_features)
845  float64_t result = 0;
846 
847  for (int32_t i = 0; i < num_features; i++)
848  result += vec1[i] * vec2[i];
849 
850  free_feature_vector(vec1, vec_idx1, vfree);
851 
852  return result;
853 }
854 
856  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
857 {
858  ASSERT(vec2_len == num_features)
859 
860  int32_t vlen;
861  bool vfree;
862  float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
863 
864  ASSERT(vlen == num_features)
865  float64_t result = 0;
866 
867  for (int32_t i = 0; i < num_features; i++)
868  result += vec1[i] * vec2[i];
869 
870  free_feature_vector(vec1, vec_idx1, vfree);
871 
872  return result;
873 }
874 
876  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
877 {
878  ASSERT(vec2_len == num_features)
879 
880  int32_t vlen;
881  bool vfree;
882  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
883 
884  ASSERT(vlen == num_features)
885  float64_t result = SGVector<float64_t>::dot(vec1, vec2, num_features);
886 
887  free_feature_vector(vec1, vec_idx1, vfree);
888 
889  return result;
890 }
891 
893  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
894 {
895  ASSERT(vec2_len == num_features)
896 
897  int32_t vlen;
898  bool vfree;
899  floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
900 
901  ASSERT(vlen == num_features)
902  float64_t result = 0;
903 
904  for (int32_t i = 0; i < num_features; i++)
905  result += vec1[i] * vec2[i];
906 
907  free_feature_vector(vec1, vec_idx1, vfree);
908 
909  return result;
910 }
911 
912 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
913 {
914  if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
915  return false;
916 
917  ST* vec1;
918  ST* vec2;
919  int32_t v1len, v2len;
920  bool v1free, v2free, stop = false;
921 
922  for (int32_t i = 0; i < num_vectors; i++)
923  {
924  vec1 = get_feature_vector(i, v1len, v1free);
925  vec2 = rhs->get_feature_vector(i, v2len, v2free);
926 
927  if (v1len!=v2len)
928  stop = true;
929 
930  for (int32_t j=0; j<v1len; j++)
931  {
932  if (vec1[j]!=vec2[j])
933  stop = true;
934  }
935 
936  free_feature_vector(vec1, i, v1free);
937  free_feature_vector(vec2, i, v2free);
938 
939  if (stop)
940  return false;
941  }
942 
943  return true;
944 }
945 
947  CList* others)
948 {
949  SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
950 
951  if (!others)
952  return NULL;
953 
954  /* first, check other features and count number of elements */
955  CSGObject* other=others->get_first_element();
956  index_t num_vectors_merged=num_vectors;
957  while (other)
958  {
959  CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
960 
961  if (!casted)
962  {
963  SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
964  "same type as %s\n",get_name(), other->get_name(), get_name());
965  }
966 
967  if (get_feature_type()!=casted->get_feature_type() ||
968  get_feature_class()!=casted->get_feature_class() ||
969  strcmp(get_name(), casted->get_name()))
970  {
971  SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
972  get_name());
973  }
974 
975  if (num_features!=casted->num_features)
976  {
977  SG_ERROR("%s::create_merged_copy(): Provided feature object has "
978  "different dimension than this one\n");
979  }
980 
981  num_vectors_merged+=casted->get_num_vectors();
982 
983  /* check if reference counting is used */
984  if (others->get_delete_data())
985  SG_UNREF(other);
986  other=others->get_next_element();
987  }
988 
989  /* create new feature matrix and copy both instances data into it */
990  SGMatrix<ST> data(num_features, num_vectors_merged);
991 
992  /* copy data of this instance */
993  SG_DEBUG("copying matrix of this instance\n")
994  memcpy(data.matrix, feature_matrix.matrix,
995  num_features*num_vectors*sizeof(ST));
996 
997  /* count number of vectors (not elements) processed so far */
998  index_t num_processed=num_vectors;
999 
1000  /* now copy data of other features bock wise */
1001  other=others->get_first_element();
1002  while (other)
1003  {
1004  /* cast is safe due to above check */
1005  CDenseFeatures<ST>* casted=(CDenseFeatures<ST>*)other;
1006 
1007  SG_DEBUG("copying matrix of provided instance\n")
1008  memcpy(&(data.matrix[num_processed*num_features]),
1009  casted->get_feature_matrix().matrix,
1010  num_features*casted->get_num_vectors()*sizeof(ST));
1011 
1012  /* update counting */
1013  num_processed+=casted->get_num_vectors();
1014 
1015  /* check if reference counting is used */
1016  if (others->get_delete_data())
1017  SG_UNREF(other);
1018  other=others->get_next_element();
1019  }
1020 
1021  /* create new instance and return */
1022  CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
1023 
1024  SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
1025  return result;
1026 }
1027 
1029  CFeatures* other)
1030 {
1031  SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
1032 
1033  /* create list with one element and call general method */
1034  CList* list=new CList();
1035  list->append_element(other);
1036  CFeatures* result=create_merged_copy(list);
1037  SG_UNREF(list);
1038 
1039  SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
1040  return result;
1041 }
1042 
1043 template<class ST>
1045 {
1046  SGMatrix<ST> matrix;
1047  matrix.load(loader);
1048  set_feature_matrix(matrix);
1049 }
1050 
1051 template<class ST>
1053 {
1054  feature_matrix.save(writer);
1055 }
1056 
1058 {
1059  REQUIRE(base_features->get_feature_class() == C_DENSE,
1060  "base_features must be of dynamic type CDenseFeatures\n")
1061 
1062  return (CDenseFeatures< ST >*) base_features;
1063 }
1064 
1065 template class CDenseFeatures<bool>;
1066 template class CDenseFeatures<char>;
1067 template class CDenseFeatures<int8_t>;
1068 template class CDenseFeatures<uint8_t>;
1069 template class CDenseFeatures<int16_t>;
1070 template class CDenseFeatures<uint16_t>;
1071 template class CDenseFeatures<int32_t>;
1072 template class CDenseFeatures<uint32_t>;
1073 template class CDenseFeatures<int64_t>;
1074 template class CDenseFeatures<uint64_t>;
1075 template class CDenseFeatures<float32_t>;
1076 template class CDenseFeatures<float64_t>;
1077 template class CDenseFeatures<floatmax_t>;
1078 }

SHOGUN Machine Learning Toolbox - Documentation