SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DenseFeatures.cpp
Go to the documentation of this file.
3 #include <shogun/io/SGIO.h>
6 
7 #include <string.h>
8 
9 namespace shogun {
10 
11 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
12 {
13  init();
14 }
15 
16 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
17  CDotFeatures(orig)
18 {
19  init();
22 
23  if (orig.m_subset_stack != NULL)
24  {
28  }
29 }
30 
32  CDotFeatures()
33 {
34  init();
35  set_feature_matrix(matrix);
36 }
37 
38 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
39  CDotFeatures()
40 {
41  init();
42  set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
43 }
44 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
45  CDotFeatures(loader)
46 {
47  init();
48  load(loader);
49 }
50 
51 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
52 {
53  return new CDenseFeatures<ST>(*this);
54 }
55 
57 {
58  free_features();
59 }
60 
61 template<class ST> void CDenseFeatures<ST>::free_features()
62 {
63  m_subset_stack->remove_all_subsets();
64  free_feature_matrix();
65  SG_UNREF(feature_cache);
66 }
67 
68 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
69 {
70  m_subset_stack->remove_all_subsets();
71  feature_matrix=SGMatrix<ST>();
72  num_vectors = 0;
73  num_features = 0;
74 }
75 
76 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
77 {
78  /* index conversion for subset, only for array access */
79  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
80 
81  len = num_features;
82 
83  if (feature_matrix.matrix)
84  {
85  dofree = false;
86  return &feature_matrix.matrix[real_num * int64_t(num_features)];
87  }
88 
89  ST* feat = NULL;
90  dofree = false;
91 
92  if (feature_cache)
93  {
94  feat = feature_cache->lock_entry(real_num);
95 
96  if (feat)
97  return feat;
98  else
99  feat = feature_cache->set_entry(real_num);
100  }
101 
102  if (!feat)
103  dofree = true;
104  feat = compute_feature_vector(num, len, feat);
105 
106  if (get_num_preprocessors())
107  {
108  int32_t tmp_len = len;
109  ST* tmp_feat_before = feat;
110  ST* tmp_feat_after = NULL;
111 
112  for (int32_t i = 0; i < get_num_preprocessors(); i++)
113  {
115  (CDensePreprocessor<ST>*) get_preprocessor(i);
116  // temporary hack
118  SGVector<ST>(tmp_feat_before, tmp_len));
119  tmp_feat_after = applied.vector;
120  SG_UNREF(p);
121 
122  if (i != 0) // delete feature vector, except for the the first one, i.e., feat
123  SG_FREE(tmp_feat_before);
124  tmp_feat_before = tmp_feat_after;
125  }
126 
127  memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
128  SG_FREE(tmp_feat_after);
129 
130  len = tmp_len;
131  }
132  return feat;
133 }
134 
135 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
136 {
137  /* index conversion for subset, only for array access */
138  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
139 
140  if (num>=get_num_vectors())
141  {
142  SG_ERROR("Index out of bounds (number of vectors %d, you "
143  "requested %d)\n", get_num_vectors(), num);
144  }
145 
146  if (!feature_matrix.matrix)
147  SG_ERROR("Requires a in-memory feature matrix\n");
148 
149  if (vector.vlen != num_features)
150  SG_ERROR(
151  "Vector not of length %d (has %d)\n", num_features, vector.vlen);
152 
153  memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
154  int64_t(num_features) * sizeof(ST));
155 }
156 
158 {
159  /* index conversion for subset, only for array access */
160  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
161 
162  if (num >= get_num_vectors())
163  {
164  SG_ERROR("Index out of bounds (number of vectors %d, you "
165  "requested %d)\n", get_num_vectors(), real_num);
166  }
167 
168  int32_t vlen;
169  bool do_free;
170  ST* vector= get_feature_vector(num, vlen, do_free);
171  return SGVector<ST>(vector, vlen, do_free);
172 }
173 
174 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
175 {
176  if (feature_cache)
177  feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
178 
179  if (dofree)
180  SG_FREE(feat_vec);
181 }
182 
183 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
184 {
185  free_feature_vector(vec.vector, num, false);
186  vec=SGVector<ST>();
187 }
188 
189 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
190 {
191  if (m_subset_stack->has_subsets())
192  SG_ERROR("A subset is set, cannot call vector_subset\n");
193 
194  ASSERT(feature_matrix.matrix);
195  ASSERT(idx_len<=num_vectors);
196 
197  int32_t num_vec = num_vectors;
198  num_vectors = idx_len;
199 
200  int32_t old_ii = -1;
201 
202  for (int32_t i = 0; i < idx_len; i++)
203  {
204  int32_t ii = idx[i];
205  ASSERT(old_ii<ii);
206 
207  if (ii < 0 || ii >= num_vec)
208  SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec);
209 
210  if (i == ii)
211  continue;
212 
213  memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
214  &feature_matrix.matrix[int64_t(num_features) * ii],
215  num_features * sizeof(ST));
216  old_ii = ii;
217  }
218 }
219 
220 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
221 {
222  if (m_subset_stack->has_subsets())
223  SG_ERROR("A subset is set, cannot call feature_subset\n");
224 
225  ASSERT(feature_matrix.matrix);
226  ASSERT(idx_len<=num_features);
227  int32_t num_feat = num_features;
228  num_features = idx_len;
229 
230  for (int32_t i = 0; i < num_vectors; i++)
231  {
232  ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
233  ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
234 
235  int32_t old_jj = -1;
236  for (int32_t j = 0; j < idx_len; j++)
237  {
238  int32_t jj = idx[j];
239  ASSERT(old_jj<jj);
240  if (jj < 0 || jj >= num_feat)
241  SG_ERROR(
242  "Index out of range: should be 0<%d<%d\n", jj, num_feat);
243 
244  dst[j] = src[jj];
245  old_jj = jj;
246  }
247  }
248 }
249 
251 {
252  if (!m_subset_stack->has_subsets())
253  return feature_matrix;
254 
255  SGMatrix<ST> submatrix(num_features, get_num_vectors());
256 
257  /* copy a subset vector wise */
258  for (int32_t i=0; i<submatrix.num_cols; ++i)
259  {
260  int32_t real_i = m_subset_stack->subset_idx_conversion(i);
261  memcpy(&submatrix.matrix[i*int64_t(num_features)],
262  &feature_matrix.matrix[real_i * int64_t(num_features)],
263  num_features * sizeof(ST));
264  }
265 
266  return submatrix;
267 }
268 
270 {
271  SGMatrix<ST> st_feature_matrix=feature_matrix;
272  m_subset_stack->remove_all_subsets();
273  SG_UNREF(feature_cache);
274  clean_preprocessors();
275  free_feature_matrix();
276  return st_feature_matrix;
277 }
278 
280 {
281  m_subset_stack->remove_all_subsets();
282  free_feature_matrix();
283  feature_matrix = matrix;
284  num_features = matrix.num_rows;
285  num_vectors = matrix.num_cols;
286 }
287 
288 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
289 {
290  num_feat = num_features;
291  num_vec = num_vectors;
292  return feature_matrix.matrix;
293 }
294 
296 {
297  int32_t num_feat;
298  int32_t num_vec;
299  ST* fm = get_transposed(num_feat, num_vec);
300 
301  return new CDenseFeatures<ST>(fm, num_feat, num_vec);
302 }
303 
304 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
305 {
306  num_feat = get_num_vectors();
307  num_vec = num_features;
308 
309  int32_t old_num_vec=get_num_vectors();
310 
311  ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
312 
313  for (int32_t i=0; i<old_num_vec; i++)
314  {
315  SGVector<ST> vec=get_feature_vector(i);
316 
317  for (int32_t j=0; j<vec.vlen; j++)
318  fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
319 
320  free_feature_vector(vec, i);
321  }
322 
323  return fm;
324 }
325 
327 {
328  if (m_subset_stack->has_subsets())
329  SG_ERROR("A subset is set, cannot call copy_feature_matrix\n");
330 
331  free_feature_matrix();
332  feature_matrix = src.clone();
333  num_features = src.num_rows;
334  num_vectors = src.num_cols;
335  initialize_cache();
336 }
337 
339 {
340  m_subset_stack->remove_all_subsets();
341 
342  int32_t num_feat = df->get_dim_feature_space();
343  int32_t num_vec = df->get_num_vectors();
344 
345  ASSERT(num_feat>0 && num_vec>0);
346 
347  free_feature_matrix();
348  feature_matrix = SGMatrix<ST>(num_feat, num_vec);
349 
350  for (int32_t i = 0; i < num_vec; i++)
351  {
353  ASSERT(num_feat==v.vlen);
354 
355  for (int32_t j = 0; j < num_feat; j++)
356  feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
357  }
358  num_features = num_feat;
359  num_vectors = num_vec;
360 }
361 
362 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
363 {
364  if (m_subset_stack->has_subsets())
365  SG_ERROR("A subset is set, cannot call apply_preproc\n");
366 
367  SG_DEBUG( "force: %d\n", force_preprocessing);
368 
369  if (feature_matrix.matrix && get_num_preprocessors())
370  {
371  for (int32_t i = 0; i < get_num_preprocessors(); i++)
372  {
373  if ((!is_preprocessed(i) || force_preprocessing))
374  {
375  set_preprocessed(i);
377  (CDensePreprocessor<ST>*) get_preprocessor(i);
378  SG_INFO( "preprocessing using preproc %s\n", p->get_name());
379 
380  if (p->apply_to_feature_matrix(this).matrix == NULL)
381  {
382  SG_UNREF(p);
383  return false;
384  }
385  SG_UNREF(p);
386 
387  }
388  }
389 
390  return true;
391  }
392  else
393  {
394  if (!feature_matrix.matrix)
395  SG_ERROR( "no feature matrix\n");
396 
397  if (!get_num_preprocessors())
398  SG_ERROR( "no preprocessors available\n");
399 
400  return false;
401  }
402 }
403 
404 template<class ST> int32_t CDenseFeatures<ST>::get_size() const { return sizeof(ST); }
405 
406 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
407 {
408  return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
409 }
410 
411 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() { return num_features; }
412 
413 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
414 {
415  num_features = num;
416  initialize_cache();
417 }
418 
419 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
420 {
421  if (m_subset_stack->has_subsets())
422  SG_ERROR("A subset is set, cannot call set_num_vectors\n");
423 
424  num_vectors = num;
425  initialize_cache();
426 }
427 
428 template<class ST> void CDenseFeatures<ST>::initialize_cache()
429 {
430  if (m_subset_stack->has_subsets())
431  SG_ERROR("A subset is set, cannot call initialize_cache\n");
432 
433  if (num_features && num_vectors)
434  {
435  SG_UNREF(feature_cache);
436  feature_cache = new CCache<ST>(get_cache_size(), num_features,
437  num_vectors);
438  SG_REF(feature_cache);
439  }
440 }
441 
442 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }
443 
444 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
445 {
446  if (m_subset_stack->has_subsets())
447  SG_ERROR("A subset is set, cannot call reshape\n");
448 
449  if (p_num_features * p_num_vectors
450  == this->num_features * this->num_vectors)
451  {
452  num_features = p_num_features;
453  num_vectors = p_num_vectors;
454  return true;
455  } else
456  return false;
457 }
458 
459 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
460 
461 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
462  int32_t vec_idx2)
463 {
464  ASSERT(df);
465  ASSERT(df->get_feature_type() == get_feature_type());
466  ASSERT(df->get_feature_class() == get_feature_class());
468 
469  int32_t len1, len2;
470  bool free1, free2;
471 
472  ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
473  ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
474 
475  float64_t result = SGVector<ST>::dot(vec1, vec2, len1);
476 
477  free_feature_vector(vec1, vec_idx1, free1);
478  sf->free_feature_vector(vec2, vec_idx2, free2);
479 
480  return result;
481 }
482 
483 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
484  float64_t* vec2, int32_t vec2_len, bool abs_val)
485 {
486  ASSERT(vec2_len == num_features);
487 
488  int32_t vlen;
489  bool vfree;
490  ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
491 
492  ASSERT(vlen == num_features);
493 
494  if (abs_val)
495  {
496  for (int32_t i = 0; i < num_features; i++)
497  vec2[i] += alpha * CMath::abs(vec1[i]);
498  }
499  else
500  {
501  for (int32_t i = 0; i < num_features; i++)
502  vec2[i] += alpha * vec1[i];
503  }
504 
505  free_feature_vector(vec1, vec_idx1, vfree);
506 }
507 
508 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
509 {
510  return num_features;
511 }
512 
513 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
514 {
515  if (vector_index>=get_num_vectors())
516  {
517  SG_ERROR("Index out of bounds (number of vectors %d, you "
518  "requested %d)\n", get_num_vectors(), vector_index);
519  }
520 
521  dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
522  iterator->vec = get_feature_vector(vector_index, iterator->vlen,
523  iterator->vfree);
524  iterator->vidx = vector_index;
525  iterator->index = 0;
526  return iterator;
527 }
528 
529 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
530  void* iterator)
531 {
532  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
533  if (!it || it->index >= it->vlen)
534  return false;
535 
536  index = it->index++;
537  value = (float64_t) it->vec[index];
538 
539  return true;
540 }
541 
542 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
543 {
544  if (!iterator)
545  return;
546 
547  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
548  free_feature_vector(it->vec, it->vidx, it->vfree);
549  SG_FREE(it);
550 }
551 
553 {
554  SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
555 
556  for (index_t i=0; i<indices.vlen; ++i)
557  {
558  index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
559  memcpy(&feature_matrix_copy.matrix[i*num_features],
560  &feature_matrix.matrix[real_idx*num_features],
561  num_features*sizeof(ST));
562  }
563 
564  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
565  SG_REF(result);
566  return result;
567 }
568 
569 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
570  ST* target)
571 {
573  len = 0;
574  return NULL;
575 }
576 
577 template<class ST> void CDenseFeatures<ST>::init()
578 {
579  num_vectors = 0;
580  num_features = 0;
581 
582  feature_matrix = SGMatrix<ST>();
583  feature_cache = NULL;
584 
585  set_generic<ST>();
586 
587  /* not store number of vectors in subset */
588  SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
589  SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
590  SG_ADD(&feature_matrix, "feature_matrix",
591  "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
592 }
593 
594 #define GET_FEATURE_TYPE(f_type, sg_type) \
595 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
596 { \
597  return f_type; \
598 }
599 
602 GET_FEATURE_TYPE(F_BYTE, uint8_t)
603 GET_FEATURE_TYPE(F_BYTE, int8_t)
604 GET_FEATURE_TYPE(F_SHORT, int16_t)
605 GET_FEATURE_TYPE(F_WORD, uint16_t)
606 GET_FEATURE_TYPE(F_INT, int32_t)
607 GET_FEATURE_TYPE(F_UINT, uint32_t)
608 GET_FEATURE_TYPE(F_LONG, int64_t)
609 GET_FEATURE_TYPE(F_ULONG, uint64_t)
613 #undef GET_FEATURE_TYPE
614 
615 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
616  const float64_t* vec2, int32_t vec2_len)
617 {
618  ASSERT(vec2_len == num_features);
619 
620  int32_t vlen;
621  bool vfree;
622  bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
623 
624  ASSERT(vlen == num_features);
625  float64_t result = 0;
626 
627  for (int32_t i = 0; i < num_features; i++)
628  result += vec1[i] ? vec2[i] : 0;
629 
630  free_feature_vector(vec1, vec_idx1, vfree);
631 
632  return result;
633 }
634 
635 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
636  const float64_t* vec2, int32_t vec2_len)
637 {
638  ASSERT(vec2_len == num_features);
639 
640  int32_t vlen;
641  bool vfree;
642  char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
643 
644  ASSERT(vlen == num_features);
645  float64_t result = 0;
646 
647  for (int32_t i = 0; i < num_features; i++)
648  result += vec1[i] * vec2[i];
649 
650  free_feature_vector(vec1, vec_idx1, vfree);
651 
652  return result;
653 }
654 
655 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
656  const float64_t* vec2, int32_t vec2_len)
657 {
658  ASSERT(vec2_len == num_features);
659 
660  int32_t vlen;
661  bool vfree;
662  int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
663 
664  ASSERT(vlen == num_features);
665  float64_t result = 0;
666 
667  for (int32_t i = 0; i < num_features; i++)
668  result += vec1[i] * vec2[i];
669 
670  free_feature_vector(vec1, vec_idx1, vfree);
671 
672  return result;
673 }
674 
676  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
677 {
678  ASSERT(vec2_len == num_features);
679 
680  int32_t vlen;
681  bool vfree;
682  uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
683 
684  ASSERT(vlen == num_features);
685  float64_t result = 0;
686 
687  for (int32_t i = 0; i < num_features; i++)
688  result += vec1[i] * vec2[i];
689 
690  free_feature_vector(vec1, vec_idx1, vfree);
691 
692  return result;
693 }
694 
696  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
697 {
698  ASSERT(vec2_len == num_features);
699 
700  int32_t vlen;
701  bool vfree;
702  int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
703 
704  ASSERT(vlen == num_features);
705  float64_t result = 0;
706 
707  for (int32_t i = 0; i < num_features; i++)
708  result += vec1[i] * vec2[i];
709 
710  free_feature_vector(vec1, vec_idx1, vfree);
711 
712  return result;
713 }
714 
716  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
717 {
718  ASSERT(vec2_len == num_features);
719 
720  int32_t vlen;
721  bool vfree;
722  uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
723 
724  ASSERT(vlen == num_features);
725  float64_t result = 0;
726 
727  for (int32_t i = 0; i < num_features; i++)
728  result += vec1[i] * vec2[i];
729 
730  free_feature_vector(vec1, vec_idx1, vfree);
731 
732  return result;
733 }
734 
736  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
737 {
738  ASSERT(vec2_len == num_features);
739 
740  int32_t vlen;
741  bool vfree;
742  int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
743 
744  ASSERT(vlen == num_features);
745  float64_t result = 0;
746 
747  for (int32_t i = 0; i < num_features; i++)
748  result += vec1[i] * vec2[i];
749 
750  free_feature_vector(vec1, vec_idx1, vfree);
751 
752  return result;
753 }
754 
756  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
757 {
758  ASSERT(vec2_len == num_features);
759 
760  int32_t vlen;
761  bool vfree;
762  uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
763 
764  ASSERT(vlen == num_features);
765  float64_t result = 0;
766 
767  for (int32_t i = 0; i < num_features; i++)
768  result += vec1[i] * vec2[i];
769 
770  free_feature_vector(vec1, vec_idx1, vfree);
771 
772  return result;
773 }
774 
776  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
777 {
778  ASSERT(vec2_len == num_features);
779 
780  int32_t vlen;
781  bool vfree;
782  int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
783 
784  ASSERT(vlen == num_features);
785  float64_t result = 0;
786 
787  for (int32_t i = 0; i < num_features; i++)
788  result += vec1[i] * vec2[i];
789 
790  free_feature_vector(vec1, vec_idx1, vfree);
791 
792  return result;
793 }
794 
796  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
797 {
798  ASSERT(vec2_len == num_features);
799 
800  int32_t vlen;
801  bool vfree;
802  uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
803 
804  ASSERT(vlen == num_features);
805  float64_t result = 0;
806 
807  for (int32_t i = 0; i < num_features; i++)
808  result += vec1[i] * vec2[i];
809 
810  free_feature_vector(vec1, vec_idx1, vfree);
811 
812  return result;
813 }
814 
816  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
817 {
818  ASSERT(vec2_len == num_features);
819 
820  int32_t vlen;
821  bool vfree;
822  float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
823 
824  ASSERT(vlen == num_features);
825  float64_t result = 0;
826 
827  for (int32_t i = 0; i < num_features; i++)
828  result += vec1[i] * vec2[i];
829 
830  free_feature_vector(vec1, vec_idx1, vfree);
831 
832  return result;
833 }
834 
836  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
837 {
838  ASSERT(vec2_len == num_features);
839 
840  int32_t vlen;
841  bool vfree;
842  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
843 
844  ASSERT(vlen == num_features);
845  float64_t result = SGVector<float64_t>::dot(vec1, vec2, num_features);
846 
847  free_feature_vector(vec1, vec_idx1, vfree);
848 
849  return result;
850 }
851 
853  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
854 {
855  ASSERT(vec2_len == num_features);
856 
857  int32_t vlen;
858  bool vfree;
859  floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
860 
861  ASSERT(vlen == num_features);
862  float64_t result = 0;
863 
864  for (int32_t i = 0; i < num_features; i++)
865  result += vec1[i] * vec2[i];
866 
867  free_feature_vector(vec1, vec_idx1, vfree);
868 
869  return result;
870 }
871 
872 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
873 {
874  if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
875  return false;
876 
877  ST* vec1;
878  ST* vec2;
879  int32_t v1len, v2len;
880  bool v1free, v2free, stop = false;
881 
882  for (int32_t i = 0; i < num_vectors; i++)
883  {
884  vec1 = get_feature_vector(i, v1len, v1free);
885  vec2 = rhs->get_feature_vector(i, v2len, v2free);
886 
887  if (v1len!=v2len)
888  stop = true;
889 
890  for (int32_t j=0; j<v1len; j++)
891  {
892  if (vec1[j]!=vec2[j])
893  stop = true;
894  }
895 
896  free_feature_vector(vec1, i, v1free);
897  free_feature_vector(vec2, i, v2free);
898 
899  if (stop)
900  return false;
901  }
902 
903  return true;
904 }
905 
907  CFeatures* other)
908 {
909  SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
910  if (get_feature_type()!=other->get_feature_type() ||
911  get_feature_class()!=other->get_feature_class() ||
912  strcmp(get_name(), other->get_name()))
913  {
914  SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
915  get_name());
916  }
917 
918  CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
919 
920  if (!casted)
921  {
922  SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
923  "same type as %s\n",get_name(), other->get_name(), get_name());
924  }
925 
926  if (num_features!=casted->num_features)
927  {
928  SG_ERROR("%s::create_merged_copy(): Provided feature object has "
929  "different dimension than this one\n");
930  }
931 
932  /* create new feature matrix and copy both instances data into it */
933  SGMatrix<ST> data(num_features, num_vectors+casted->get_num_vectors());
934 
935  /* copy data of this instance */
936  SG_DEBUG("copying matrix of this instance\n");
937  memcpy(data.matrix, feature_matrix.matrix,
938  num_features*num_vectors*sizeof(ST));
939 
940  /* copy data of provided instance */
941  SG_DEBUG("copying matrix of provided instance\n");
942  memcpy(&data.matrix[num_vectors*num_features],
943  casted->feature_matrix.matrix,
944  casted->num_features*casted->num_vectors*sizeof(ST));
945 
946  /* create new instance and return */
947  CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
948 
949  SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
950  return result;
951 }
952 
953 #define LOAD(f_load, sg_type) \
954 template<> void CDenseFeatures<sg_type>::load(CFile* loader) \
955 { \
956  SG_SET_LOCALE_C; \
957  ASSERT(loader); \
958  sg_type* matrix; \
959  int32_t num_feat; \
960  int32_t num_vec; \
961  loader->f_load(matrix, num_feat, num_vec); \
962  set_feature_matrix(SGMatrix<sg_type>(matrix, num_feat, num_vec)); \
963  SG_RESET_LOCALE; \
964 }
965 
966 LOAD(get_matrix, bool)
967 LOAD(get_matrix, char)
968 LOAD(get_int8_matrix, int8_t)
969 LOAD(get_matrix, uint8_t)
970 LOAD(get_matrix, int16_t)
971 LOAD(get_matrix, uint16_t)
972 LOAD(get_matrix, int32_t)
973 LOAD(get_uint_matrix, uint32_t)
974 LOAD(get_long_matrix, int64_t)
975 LOAD(get_ulong_matrix, uint64_t)
976 LOAD(get_matrix, float32_t)
977 LOAD(get_matrix, float64_t)
978 LOAD(get_longreal_matrix, floatmax_t)
979 #undef LOAD
980 
981 #define SAVE(f_write, sg_type) \
982 template<> void CDenseFeatures<sg_type>::save(CFile* writer) \
983 { \
984  SG_SET_LOCALE_C; \
985  ASSERT(writer); \
986  writer->f_write(feature_matrix.matrix, feature_matrix.num_rows, \
987  feature_matrix.num_cols); \
988  SG_RESET_LOCALE; \
989 }
990 
991 SAVE(set_matrix, bool)
992 SAVE(set_matrix, char)
993 SAVE(set_int8_matrix, int8_t)
994 SAVE(set_matrix, uint8_t)
995 SAVE(set_matrix, int16_t)
996 SAVE(set_matrix, uint16_t)
997 SAVE(set_matrix, int32_t)
998 SAVE(set_uint_matrix, uint32_t)
999 SAVE(set_long_matrix, int64_t)
1000 SAVE(set_ulong_matrix, uint64_t)
1001 SAVE(set_matrix, float32_t)
1002 SAVE(set_matrix, float64_t)
1003 SAVE(set_longreal_matrix, floatmax_t)
1004 #undef SAVE
1005 
1006 template class CDenseFeatures<bool>;
1007 template class CDenseFeatures<char>;
1008 template class CDenseFeatures<int8_t>;
1009 template class CDenseFeatures<uint8_t>;
1010 template class CDenseFeatures<int16_t>;
1011 template class CDenseFeatures<uint16_t>;
1012 template class CDenseFeatures<int32_t>;
1013 template class CDenseFeatures<uint32_t>;
1014 template class CDenseFeatures<int64_t>;
1015 template class CDenseFeatures<uint64_t>;
1016 template class CDenseFeatures<float32_t>;
1017 template class CDenseFeatures<float64_t>;
1018 template class CDenseFeatures<floatmax_t>;
1019 }

SHOGUN Machine Learning Toolbox - Documentation