SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SGSparseVector.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013,2014 Thoralf Klein
8  * Written (W) 2012 Fernando José Iglesias García
9  * Written (W) 2010,2012 Soeren Sonnenburg
10  * Copyright (C) 2010 Berlin Institute of Technology
11  * Copyright (C) 2012 Soeren Sonnenburg
12  */
13 
15 #include <shogun/lib/SGVector.h>
17 #include <shogun/io/File.h>
18 
19 namespace shogun
20 {
21 
22 template <class T>
24 {
25  init_data();
26 }
27 
28 template <class T>
30  bool ref_counting) :
31  SGReferencedData(ref_counting),
32  num_feat_entries(num_entries), features(feats)
33 {
34 }
35 
36 template <class T>
37 SGSparseVector<T>::SGSparseVector(index_t num_entries, bool ref_counting) :
38  SGReferencedData(ref_counting),
39  num_feat_entries(num_entries)
40 {
42 }
43 
44 template <class T>
46  SGReferencedData(orig)
47 {
48  copy_data(orig);
49 }
50 
51 template <class T>
53 {
54  unref();
55 }
56 
57 template <class T>
58 T SGSparseVector<T>::dense_dot(T alpha, T * vec, int32_t dim, T b)
59 {
60  ASSERT(vec)
61  T result = b;
62 
63  if (features)
64  {
65  for (int32_t i = 0; i < num_feat_entries; i++)
66  {
67  if (features[i].feat_index < dim)
68  {
69  result += alpha * vec[features[i].feat_index] * features[i].entry;
70  }
71  }
72  }
73 
74  return result;
75 }
76 
77 template <class T>
78 void SGSparseVector<T>::add_to_dense(T alpha, T * vec, int32_t dim, bool abs_val)
79 {
80  REQUIRE(vec, "vec must not be NULL\n");
81 
82  if (abs_val)
83  {
84  for (int32_t i = 0; i < num_feat_entries; i++)
85  {
86  vec[features[i].feat_index] += alpha*CMath::abs(features[i].entry);
87  }
88  }
89  else
90  {
91  for (int32_t i = 0; i < num_feat_entries; i++)
92  {
93  vec[features[i].feat_index] += alpha*features[i].entry;
94  }
95  }
96 }
97 
98 template <class T>
99 template <typename ST>
101 {
102  ASSERT(vec)
103  T result(0.0);
104 
105  if (features)
106  {
107  for (int32_t i = 0; i < num_feat_entries; i++)
108  {
109  if (features[i].feat_index < vec.vlen)
110  result += static_cast<T>(vec[features[i].feat_index])
111  * features[i].entry;
112  }
113  }
114 
115  return result;
116 }
117 
121 
122 template <class T>
124 {
125  return sparse_dot(*this, v);
126 }
127 
128 template <class T>
130 {
131  if (a.num_feat_entries == 0 || b.num_feat_entries == 0)
132  {
133  return 0;
134  }
135 
136  if (!a.is_sorted() || !b.is_sorted())
137  {
138  return dot_prod_expensive_unsorted(a, b);
139  }
140 
141  T dot_prod = 0;
142  index_t a_idx = 0, b_idx = 0;
143 
144  while (a_idx < a.num_feat_entries && b_idx < b.num_feat_entries)
145  {
146  if (a.features[a_idx].feat_index < b.features[b_idx].feat_index)
147  {
148  a_idx++;
149  }
150  else if (a.features[a_idx].feat_index > b.features[b_idx].feat_index)
151  {
152  b_idx++;
153  }
154  else
155  {
156  // a.features[a_idx].feat_index == b.features[b_idx].feat_index
157  dot_prod += a.features[a_idx].entry * b.features[b_idx].entry;
158  a_idx++;
159  b_idx++;
160  }
161  }
162 
163  return dot_prod;
164 }
165 
166 template<class T>
168 {
169  if (!features)
170  {
171  return 0;
172  }
173 
174  int32_t dimensions = -1;
175 
176  for (index_t i = 0; i < num_feat_entries; i++)
177  {
178  if (features[i].feat_index > dimensions)
179  {
180  dimensions = features[i].feat_index;
181  }
182  }
183 
184  return dimensions + 1;
185 }
186 
187 template<class T>
188 void SGSparseVector<T>::sort_features(bool stable_pointer)
189 {
190  if (!num_feat_entries)
191  {
192  return;
193  }
194 
195  // remember old pointer to enforce quarantee
196  const SGSparseVectorEntry<T> * old_features_ptr = features;
197 
198  int32_t * feat_idx = SG_MALLOC(int32_t, num_feat_entries);
199 
200  for (index_t j = 0; j < num_feat_entries; j++)
201  {
202  feat_idx[j] = features[j].feat_index;
203  }
204 
205  CMath::qsort_index(feat_idx, features, num_feat_entries);
206  SG_FREE(feat_idx);
207 
208  for (index_t j = 1; j < num_feat_entries; j++)
209  {
210  REQUIRE(features[j - 1].feat_index <= features[j].feat_index,
211  "sort_features(): failed sanity check %d <= %d after sorting (comparing indices features[%d] <= features[%d], features=%d)\n",
212  features[j - 1].feat_index, features[j].feat_index, j - 1, j, num_feat_entries);
213  }
214 
215  // compression: removing zero-entries and merging features with same index
216  int32_t last_index = 0;
217 
218  for (index_t j = 1; j < num_feat_entries; j++)
219  {
220  // always true, but kept for future changes
221  REQUIRE(last_index < j,
222  "sort_features(): target index %d must not exceed source index j=%d",
223  last_index, j);
224  REQUIRE(features[last_index].feat_index <= features[j].feat_index,
225  "sort_features(): failed sanity check %d = features[%d].feat_index <= features[%d].feat_index = %d\n",
226  features[last_index].feat_index, last_index, j, features[j].feat_index);
227 
228  // merging of features with same index
229  if (features[last_index].feat_index == features[j].feat_index)
230  {
231  features[last_index].entry += features[j].entry;
232  continue;
233  }
234 
235  // only skip to next element if current one is not zero
236  if (features[last_index].entry != 0.0)
237  {
238  last_index++;
239  }
240 
241  features[last_index] = features[j];
242  }
243 
244  // remove single first element if zero (not caught by loop)
245  if (features[last_index].entry == 0.0)
246  {
247  last_index--;
248  }
249 
250  int32_t new_feat_count = last_index + 1;
251  ASSERT(new_feat_count <= num_feat_entries);
252 
253  // shrinking vector
254  if (!stable_pointer)
255  {
256  SG_SINFO("shrinking vector from %d to %d\n", num_feat_entries, new_feat_count);
257  features = SG_REALLOC(SGSparseVectorEntry<T>, features, num_feat_entries, new_feat_count);
258  }
259 
260  num_feat_entries = new_feat_count;
261 
262  for (index_t j = 1; j < num_feat_entries; j++)
263  {
264  REQUIRE(features[j - 1].feat_index < features[j].feat_index,
265  "sort_features(): failed sanity check %d < %d after sorting (comparing indices features[%d] < features[%d], features=%d)\n",
266  features[j - 1].feat_index, features[j].feat_index, j - 1, j, num_feat_entries);
267  }
268 
269  // compare with old pointer to enforce quarantee
270  if (stable_pointer)
271  {
272  ASSERT(old_features_ptr == features);
273  }
274 
275  return;
276 }
277 
278 template<class T>
280 {
281  if (num_feat_entries == 0 || num_feat_entries == 1)
282  {
283  return true;
284  }
285 
286  for (index_t j = 1; j < num_feat_entries; j++)
287  {
288  if (features[j - 1].feat_index >= features[j].feat_index)
289  {
290  return false;
291  }
292  }
293 
294  return true;
295 }
296 
297 template<class T>
299 {
300  T ret = 0;
301 
302  if (features)
303  {
304  for (index_t i = 0; i < num_feat_entries; i++)
305  if (features[i].feat_index == index)
306  {
307  ret += features[i].entry ;
308  }
309  }
310 
311  return ret ;
312 }
313 
314 template<class T>
316 {
317  SGVector<T> dense;
318 
319  if (features)
320  {
321  dense.resize_vector(get_num_dimensions());
322  dense.zero();
323 
324  for (index_t i = 0; i < num_feat_entries; i++)
325  {
326  dense.vector[features[i].feat_index] += features[i].entry;
327  }
328  }
329 
330  return dense;
331 }
332 
333 template<class T>
335 {
336  SGVector<T> dense(dimension);
337  dense.zero();
338 
339  if (features)
340  {
341  REQUIRE(get_num_dimensions() <= dimension, "get_dense(dimension=%d): sparse dimension %d exceeds requested dimension\n",
342  dimension, get_num_dimensions());
343 
344  for (index_t i = 0; i < num_feat_entries; i++)
345  {
346  dense.vector[features[i].feat_index] += features[i].entry;
347  }
348  }
349 
350  return dense;
351 }
352 
353 template<class T>
355 {
356  SGSparseVectorEntry <T> * copy = SG_MALLOC(SGSparseVectorEntry <T>, num_feat_entries);
357  memcpy(copy, features, num_feat_entries * sizeof(SGSparseVectorEntry <T>));
358  return SGSparseVector<T>(copy, num_feat_entries);
359 }
360 
361 template<class T> void SGSparseVector<T>::load(CFile * loader)
362 {
363  ASSERT(loader)
364  unref();
365 
367  loader->get_sparse_vector(features, num_feat_entries);
369 }
370 
371 template<class T> void SGSparseVector<T>::save(CFile * saver)
372 {
373  ASSERT(saver)
374 
376  saver->set_sparse_vector(features, num_feat_entries);
378 }
379 
380 template <>
382 {
383  SG_SERROR("SGSparseVector::load():: Not supported for complex128_t\n");
384 }
385 
386 template <>
388 {
389  SG_SERROR("SGSparseVector::save():: Not supported for complex128_t\n");
390 }
391 
392 template <class T>
394 {
395  num_feat_entries = ((SGSparseVector *)(&orig))->num_feat_entries;
396  features = ((SGSparseVector *)(&orig))->features;
397 }
398 
399 template <class T>
401 {
402  num_feat_entries = 0;
403  features = NULL;
404 }
405 
406 template <class T>
408 {
409  num_feat_entries = 0;
410  SG_FREE(features);
411 }
412 
413 template <class T>
415 {
416  SG_SWARNING("Computing sparse_dot(a,b) on unsorted vectors is very expensive: O(n^2)\n");
417  SG_SWARNING("Using fallback to give correct results because upstream code does not sort.\n");
418 
419  T dot_prod = 0;
420 
421  for (index_t b_idx = 0; b_idx < b.num_feat_entries; ++b_idx)
422  {
423  const T tmp = b.features[b_idx].entry;
424 
425  for (index_t a_idx = 0; a_idx < a.num_feat_entries; ++a_idx)
426  {
427  if (a.features[a_idx].feat_index == b.features[b_idx].feat_index)
428  {
429  dot_prod += tmp * a.features[a_idx].entry;
430  }
431  }
432  }
433 
434  return dot_prod;
435 }
436 
437 template <>
438 void SGSparseVector<bool>::display_vector(const char * name, const char * prefix)
439 {
440  SG_SPRINT("%s%s=[", prefix, name);
441 
442  for (int32_t i = 0; i < num_feat_entries; i++)
443  {
444  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry ? 1 : 0);
445  }
446 
447  SG_SPRINT("%s]\n", prefix);
448 }
449 
450 template <>
451 void SGSparseVector<char>::display_vector(const char * name, const char * prefix)
452 {
453  SG_SPRINT("%s%s=[", prefix, name);
454 
455  for (int32_t i = 0; i < num_feat_entries; i++)
456  {
457  SG_SPRINT("%s%s%d:%c", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
458  }
459 
460  SG_SPRINT("%s]\n", prefix);
461 }
462 
463 template <>
464 void SGSparseVector<int8_t>::display_vector(const char * name, const char * prefix)
465 {
466  SG_SPRINT("%s%s=[", prefix, name);
467 
468  for (int32_t i = 0; i < num_feat_entries; i++)
469  {
470  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
471  }
472 
473  SG_SPRINT("%s]\n", prefix);
474 }
475 
476 template <>
477 void SGSparseVector<uint8_t>::display_vector(const char * name, const char * prefix)
478 {
479  SG_SPRINT("%s%s=[", prefix, name);
480 
481  for (int32_t i = 0; i < num_feat_entries; i++)
482  {
483  SG_SPRINT("%s%s%d:%u", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
484  }
485 
486  SG_SPRINT("%s]\n", prefix);
487 }
488 
489 template <>
490 void SGSparseVector<int16_t>::display_vector(const char * name, const char * prefix)
491 {
492  SG_SPRINT("%s%s=[", prefix, name);
493 
494  for (int32_t i = 0; i < num_feat_entries; i++)
495  {
496  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
497  }
498 
499  SG_SPRINT("%s]\n", prefix);
500 }
501 
502 template <>
503 void SGSparseVector<uint16_t>::display_vector(const char * name, const char * prefix)
504 {
505  SG_SPRINT("%s%s=[", prefix, name);
506 
507  for (int32_t i = 0; i < num_feat_entries; i++)
508  {
509  SG_SPRINT("%s%s%d:%u", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
510  }
511 
512  SG_SPRINT("%s]\n", prefix);
513 }
514 
515 template <>
516 void SGSparseVector<int32_t>::display_vector(const char * name, const char * prefix)
517 {
518  SG_SPRINT("%s%s=[", prefix, name);
519 
520  for (int32_t i = 0; i < num_feat_entries; i++)
521  {
522  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
523  }
524 
525  SG_SPRINT("%s]\n", prefix);
526 }
527 
528 template <>
529 void SGSparseVector<uint32_t>::display_vector(const char * name, const char * prefix)
530 {
531  SG_SPRINT("%s%s=[", prefix, name);
532 
533  for (int32_t i = 0; i < num_feat_entries; i++)
534  {
535  SG_SPRINT("%s%s%d:%u", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
536  }
537 
538  SG_SPRINT("%s]\n", prefix);
539 }
540 
541 template <>
542 void SGSparseVector<int64_t>::display_vector(const char * name, const char * prefix)
543 {
544  SG_SPRINT("%s%s=[", prefix, name);
545 
546  for (int32_t i = 0; i < num_feat_entries; i++)
547  {
548  SG_SPRINT("%s%s%d:%lld", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
549  }
550 
551  SG_SPRINT("%s]\n", prefix);
552 }
553 
554 template <>
555 void SGSparseVector<uint64_t>::display_vector(const char * name, const char * prefix)
556 {
557  SG_SPRINT("%s%s=[", prefix, name);
558 
559  for (int32_t i = 0; i < num_feat_entries; i++)
560  {
561  SG_SPRINT("%s%s%d:%llu ", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
562  }
563 
564  SG_SPRINT("%s]\n", prefix);
565 }
566 
567 template <>
568 void SGSparseVector<float32_t>::display_vector(const char * name, const char * prefix)
569 {
570  SG_SPRINT("%s%s=[", prefix, name);
571 
572  for (int32_t i = 0; i < num_feat_entries; i++)
573  {
574  SG_SPRINT("%s%s%d:%g", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
575  }
576 
577  SG_SPRINT("%s]\n", prefix);
578 }
579 
580 template <>
581 void SGSparseVector<float64_t>::display_vector(const char * name, const char * prefix)
582 {
583  SG_SPRINT("%s%s=[", prefix, name);
584 
585  for (int32_t i = 0; i < num_feat_entries; i++)
586  {
587  SG_SPRINT("%s%s%d:%.18g", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
588  }
589 
590  SG_SPRINT("%s]\n", prefix);
591 }
592 
593 template <>
594 void SGSparseVector<floatmax_t>::display_vector(const char * name, const char * prefix)
595 {
596  SG_SPRINT("%s%s=[", prefix, name);
597 
598  for (int32_t i = 0; i < num_feat_entries; i++)
599  {
600  SG_SPRINT("%s%s%d:%.36Lg", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
601  }
602 
603  SG_SPRINT("%s]\n", prefix);
604 }
605 
606 template <>
607 void SGSparseVector<complex128_t>::display_vector(const char * name, const char * prefix)
608 {
609  SG_SPRINT("%s%s=[", prefix, name);
610 
611  for (int32_t i = 0; i < num_feat_entries; i++)
612  SG_SPRINT("%s%s%d:(%.18lg+i%.18lg)", prefix, i == 0 ? "" : " ", features[i].feat_index,
613  features[i].entry.real(), features[i].entry.imag());
614 
615  SG_SPRINT("%s]\n", prefix);
616 }
617 
618 template class SGSparseVector<bool>;
619 template class SGSparseVector<char>;
620 template class SGSparseVector<int8_t>;
621 template class SGSparseVector<uint8_t>;
622 template class SGSparseVector<int16_t>;
623 template class SGSparseVector<uint16_t>;
624 template class SGSparseVector<int32_t>;
625 template class SGSparseVector<uint32_t>;
626 template class SGSparseVector<int64_t>;
627 template class SGSparseVector<uint64_t>;
628 template class SGSparseVector<float32_t>;
629 template class SGSparseVector<float64_t>;
630 template class SGSparseVector<floatmax_t>;
631 template class SGSparseVector<complex128_t>;
632 }

SHOGUN Machine Learning Toolbox - Documentation