SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SGSparseVector.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013,2014 Thoralf Klein
8  * Written (W) 2012 Fernando José Iglesias García
9  * Written (W) 2010,2012 Soeren Sonnenburg
10  * Copyright (C) 2010 Berlin Institute of Technology
11  * Copyright (C) 2012 Soeren Sonnenburg
12  */
13 
15 #include <shogun/lib/SGVector.h>
17 #include <shogun/io/File.h>
18 
19 namespace shogun
20 {
21 
22 template <class T>
24 {
25  init_data();
26 }
27 
28 template <class T>
30  bool ref_counting) :
31  SGReferencedData(ref_counting),
32  num_feat_entries(num_entries), features(feats)
33 {
34 }
35 
36 template <class T>
37 SGSparseVector<T>::SGSparseVector(index_t num_entries, bool ref_counting) :
38  SGReferencedData(ref_counting),
39  num_feat_entries(num_entries)
40 {
42 }
43 
44 template <class T>
46  SGReferencedData(orig)
47 {
48  copy_data(orig);
49 }
50 
51 template <class T>
53 {
54  unref();
55 }
56 
57 template <class T>
58 T SGSparseVector<T>::dense_dot(T alpha, T * vec, int32_t dim, T b)
59 {
60  ASSERT(vec)
61  T result = b;
62 
63  if (features)
64  {
65  for (int32_t i = 0; i < num_feat_entries; i++)
66  {
67  if (features[i].feat_index < dim)
68  {
69  result += alpha * vec[features[i].feat_index] * features[i].entry;
70  }
71  }
72  }
73 
74  return result;
75 }
76 
77 template <class T>
78 template <typename ST>
80 {
81  ASSERT(vec)
82  T result(0.0);
83 
84  if (features)
85  {
86  for (int32_t i = 0; i < num_feat_entries; i++)
87  {
88  if (features[i].feat_index < vec.vlen)
89  result += static_cast<T>(vec[features[i].feat_index])
90  * features[i].entry;
91  }
92  }
93 
94  return result;
95 }
96 
100 
101 template <class T>
103 {
104  return sparse_dot(*this, v);
105 }
106 
107 template <class T>
109 {
110  if (a.num_feat_entries == 0 || b.num_feat_entries == 0)
111  {
112  return 0;
113  }
114 
115  if (!a.is_sorted() || !b.is_sorted())
116  {
117  return dot_prod_expensive_unsorted(a, b);
118  }
119 
120  T dot_prod = 0;
121  index_t a_idx = 0, b_idx = 0;
122 
123  while (a_idx < a.num_feat_entries && b_idx < b.num_feat_entries)
124  {
125  if (a.features[a_idx].feat_index < b.features[b_idx].feat_index)
126  {
127  a_idx++;
128  }
129  else if (a.features[a_idx].feat_index > b.features[b_idx].feat_index)
130  {
131  b_idx++;
132  }
133  else
134  {
135  // a.features[a_idx].feat_index == b.features[b_idx].feat_index
136  dot_prod += a.features[a_idx].entry * b.features[b_idx].entry;
137  a_idx++;
138  b_idx++;
139  }
140  }
141 
142  return dot_prod;
143 }
144 
145 template<class T>
147 {
148  if (!features)
149  {
150  return 0;
151  }
152 
153  int32_t dimensions = -1;
154 
155  for (index_t i = 0; i < num_feat_entries; i++)
156  {
157  if (features[i].feat_index > dimensions)
158  {
159  dimensions = features[i].feat_index;
160  }
161  }
162 
163  return dimensions + 1;
164 }
165 
166 template<class T>
167 void SGSparseVector<T>::sort_features(bool stable_pointer)
168 {
169  if (!num_feat_entries)
170  {
171  return;
172  }
173 
174  // remember old pointer to enforce quarantee
175  const SGSparseVectorEntry<T> * old_features_ptr = features;
176 
177  int32_t * feat_idx = SG_MALLOC(int32_t, num_feat_entries);
178 
179  for (index_t j = 0; j < num_feat_entries; j++)
180  {
181  feat_idx[j] = features[j].feat_index;
182  }
183 
184  CMath::qsort_index(feat_idx, features, num_feat_entries);
185  SG_FREE(feat_idx);
186 
187  for (index_t j = 1; j < num_feat_entries; j++)
188  {
189  REQUIRE(features[j - 1].feat_index <= features[j].feat_index,
190  "sort_features(): failed sanity check %d <= %d after sorting (comparing indices features[%d] <= features[%d], features=%d)\n",
191  features[j - 1].feat_index, features[j].feat_index, j - 1, j, num_feat_entries);
192  }
193 
194  // compression: removing zero-entries and merging features with same index
195  int32_t last_index = 0;
196 
197  for (index_t j = 1; j < num_feat_entries; j++)
198  {
199  // always true, but kept for future changes
200  REQUIRE(last_index < j,
201  "sort_features(): target index %d must not exceed source index j=%d",
202  last_index, j);
203  REQUIRE(features[last_index].feat_index <= features[j].feat_index,
204  "sort_features(): failed sanity check %d = features[%d].feat_index <= features[%d].feat_index = %d\n",
205  features[last_index].feat_index, last_index, j, features[j].feat_index);
206 
207  // merging of features with same index
208  if (features[last_index].feat_index == features[j].feat_index)
209  {
210  features[last_index].entry += features[j].entry;
211  continue;
212  }
213 
214  // only skip to next element if current one is not zero
215  if (features[last_index].entry != 0.0)
216  {
217  last_index++;
218  }
219 
220  features[last_index] = features[j];
221  }
222 
223  // remove single first element if zero (not caught by loop)
224  if (features[last_index].entry == 0.0)
225  {
226  last_index--;
227  }
228 
229  int32_t new_feat_count = last_index + 1;
230  ASSERT(new_feat_count <= num_feat_entries);
231 
232  // shrinking vector
233  if (!stable_pointer)
234  {
235  SG_SINFO("shrinking vector from %d to %d\n", num_feat_entries, new_feat_count);
236  features = SG_REALLOC(SGSparseVectorEntry<T>, features, num_feat_entries, new_feat_count);
237  }
238 
239  num_feat_entries = new_feat_count;
240 
241  for (index_t j = 1; j < num_feat_entries; j++)
242  {
243  REQUIRE(features[j - 1].feat_index < features[j].feat_index,
244  "sort_features(): failed sanity check %d < %d after sorting (comparing indices features[%d] < features[%d], features=%d)\n",
245  features[j - 1].feat_index, features[j].feat_index, j - 1, j, num_feat_entries);
246  }
247 
248  // compare with old pointer to enforce quarantee
249  if (stable_pointer)
250  {
251  ASSERT(old_features_ptr == features);
252  }
253 
254  return;
255 }
256 
257 template<class T>
259 {
260  if (num_feat_entries == 0 || num_feat_entries == 1)
261  {
262  return true;
263  }
264 
265  for (index_t j = 1; j < num_feat_entries; j++)
266  {
267  if (features[j - 1].feat_index >= features[j].feat_index)
268  {
269  return false;
270  }
271  }
272 
273  return true;
274 }
275 
276 template<class T>
278 {
279  T ret = 0;
280 
281  if (features)
282  {
283  for (index_t i = 0; i < num_feat_entries; i++)
284  if (features[i].feat_index == index)
285  {
286  ret += features[i].entry ;
287  }
288  }
289 
290  return ret ;
291 }
292 
293 template<class T>
295 {
296  SGVector<T> dense;
297 
298  if (features)
299  {
300  dense.resize_vector(get_num_dimensions());
301  dense.zero();
302 
303  for (index_t i = 0; i < num_feat_entries; i++)
304  {
305  dense.vector[features[i].feat_index] += features[i].entry;
306  }
307  }
308 
309  return dense;
310 }
311 
312 template<class T>
314 {
315  SGVector<T> dense(dimension);
316  dense.zero();
317 
318  if (features)
319  {
320  REQUIRE(get_num_dimensions() <= dimension, "get_dense(dimension=%d): sparse dimension %d exceeds requested dimension\n",
321  dimension, get_num_dimensions());
322 
323  for (index_t i = 0; i < num_feat_entries; i++)
324  {
325  dense.vector[features[i].feat_index] += features[i].entry;
326  }
327  }
328 
329  return dense;
330 }
331 
332 template<class T>
334 {
335  SGSparseVectorEntry <T> * copy = SG_MALLOC(SGSparseVectorEntry <T>, num_feat_entries);
336  memcpy(copy, features, num_feat_entries * sizeof(SGSparseVectorEntry <T>));
337  return SGSparseVector<T>(copy, num_feat_entries);
338 }
339 
340 template<class T> void SGSparseVector<T>::load(CFile * loader)
341 {
342  ASSERT(loader)
343  unref();
344 
346  loader->get_sparse_vector(features, num_feat_entries);
348 }
349 
350 template<class T> void SGSparseVector<T>::save(CFile * saver)
351 {
352  ASSERT(saver)
353 
355  saver->set_sparse_vector(features, num_feat_entries);
357 }
358 
359 template <>
361 {
362  SG_SERROR("SGSparseVector::load():: Not supported for complex128_t\n");
363 }
364 
365 template <>
367 {
368  SG_SERROR("SGSparseVector::save():: Not supported for complex128_t\n");
369 }
370 
371 template <class T>
373 {
374  num_feat_entries = ((SGSparseVector *)(&orig))->num_feat_entries;
375  features = ((SGSparseVector *)(&orig))->features;
376 }
377 
378 template <class T>
380 {
381  num_feat_entries = 0;
382  features = NULL;
383 }
384 
385 template <class T>
387 {
388  num_feat_entries = 0;
389  SG_FREE(features);
390 }
391 
392 template <class T>
394 {
395  SG_SWARNING("Computing sparse_dot(a,b) on unsorted vectors is very expensive: O(n^2)\n");
396  SG_SWARNING("Using fallback to give correct results because upstream code does not sort.\n");
397 
398  T dot_prod = 0;
399 
400  for (index_t b_idx = 0; b_idx < b.num_feat_entries; ++b_idx)
401  {
402  const T tmp = b.features[b_idx].entry;
403 
404  for (index_t a_idx = 0; a_idx < a.num_feat_entries; ++a_idx)
405  {
406  if (a.features[a_idx].feat_index == b.features[b_idx].feat_index)
407  {
408  dot_prod += tmp * a.features[a_idx].entry;
409  }
410  }
411  }
412 
413  return dot_prod;
414 }
415 
416 template <>
417 void SGSparseVector<bool>::display_vector(const char * name, const char * prefix)
418 {
419  SG_SPRINT("%s%s=[", prefix, name);
420 
421  for (int32_t i = 0; i < num_feat_entries; i++)
422  {
423  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry ? 1 : 0);
424  }
425 
426  SG_SPRINT("%s]\n", prefix);
427 }
428 
429 template <>
430 void SGSparseVector<char>::display_vector(const char * name, const char * prefix)
431 {
432  SG_SPRINT("%s%s=[", prefix, name);
433 
434  for (int32_t i = 0; i < num_feat_entries; i++)
435  {
436  SG_SPRINT("%s%s%d:%c", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
437  }
438 
439  SG_SPRINT("%s]\n", prefix);
440 }
441 
442 template <>
443 void SGSparseVector<int8_t>::display_vector(const char * name, const char * prefix)
444 {
445  SG_SPRINT("%s%s=[", prefix, name);
446 
447  for (int32_t i = 0; i < num_feat_entries; i++)
448  {
449  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
450  }
451 
452  SG_SPRINT("%s]\n", prefix);
453 }
454 
455 template <>
456 void SGSparseVector<uint8_t>::display_vector(const char * name, const char * prefix)
457 {
458  SG_SPRINT("%s%s=[", prefix, name);
459 
460  for (int32_t i = 0; i < num_feat_entries; i++)
461  {
462  SG_SPRINT("%s%s%d:%u", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
463  }
464 
465  SG_SPRINT("%s]\n", prefix);
466 }
467 
468 template <>
469 void SGSparseVector<int16_t>::display_vector(const char * name, const char * prefix)
470 {
471  SG_SPRINT("%s%s=[", prefix, name);
472 
473  for (int32_t i = 0; i < num_feat_entries; i++)
474  {
475  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
476  }
477 
478  SG_SPRINT("%s]\n", prefix);
479 }
480 
481 template <>
482 void SGSparseVector<uint16_t>::display_vector(const char * name, const char * prefix)
483 {
484  SG_SPRINT("%s%s=[", prefix, name);
485 
486  for (int32_t i = 0; i < num_feat_entries; i++)
487  {
488  SG_SPRINT("%s%s%d:%u", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
489  }
490 
491  SG_SPRINT("%s]\n", prefix);
492 }
493 
494 template <>
495 void SGSparseVector<int32_t>::display_vector(const char * name, const char * prefix)
496 {
497  SG_SPRINT("%s%s=[", prefix, name);
498 
499  for (int32_t i = 0; i < num_feat_entries; i++)
500  {
501  SG_SPRINT("%s%s%d:%d", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
502  }
503 
504  SG_SPRINT("%s]\n", prefix);
505 }
506 
507 template <>
508 void SGSparseVector<uint32_t>::display_vector(const char * name, const char * prefix)
509 {
510  SG_SPRINT("%s%s=[", prefix, name);
511 
512  for (int32_t i = 0; i < num_feat_entries; i++)
513  {
514  SG_SPRINT("%s%s%d:%u", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
515  }
516 
517  SG_SPRINT("%s]\n", prefix);
518 }
519 
520 template <>
521 void SGSparseVector<int64_t>::display_vector(const char * name, const char * prefix)
522 {
523  SG_SPRINT("%s%s=[", prefix, name);
524 
525  for (int32_t i = 0; i < num_feat_entries; i++)
526  {
527  SG_SPRINT("%s%s%d:%lld", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
528  }
529 
530  SG_SPRINT("%s]\n", prefix);
531 }
532 
533 template <>
534 void SGSparseVector<uint64_t>::display_vector(const char * name, const char * prefix)
535 {
536  SG_SPRINT("%s%s=[", prefix, name);
537 
538  for (int32_t i = 0; i < num_feat_entries; i++)
539  {
540  SG_SPRINT("%s%s%d:%llu ", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
541  }
542 
543  SG_SPRINT("%s]\n", prefix);
544 }
545 
546 template <>
547 void SGSparseVector<float32_t>::display_vector(const char * name, const char * prefix)
548 {
549  SG_SPRINT("%s%s=[", prefix, name);
550 
551  for (int32_t i = 0; i < num_feat_entries; i++)
552  {
553  SG_SPRINT("%s%s%d:%g", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
554  }
555 
556  SG_SPRINT("%s]\n", prefix);
557 }
558 
559 template <>
560 void SGSparseVector<float64_t>::display_vector(const char * name, const char * prefix)
561 {
562  SG_SPRINT("%s%s=[", prefix, name);
563 
564  for (int32_t i = 0; i < num_feat_entries; i++)
565  {
566  SG_SPRINT("%s%s%d:%.18g", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
567  }
568 
569  SG_SPRINT("%s]\n", prefix);
570 }
571 
572 template <>
573 void SGSparseVector<floatmax_t>::display_vector(const char * name, const char * prefix)
574 {
575  SG_SPRINT("%s%s=[", prefix, name);
576 
577  for (int32_t i = 0; i < num_feat_entries; i++)
578  {
579  SG_SPRINT("%s%s%d:%.36Lg", prefix, i == 0 ? "" : " ", features[i].feat_index, features[i].entry);
580  }
581 
582  SG_SPRINT("%s]\n", prefix);
583 }
584 
585 template <>
586 void SGSparseVector<complex128_t>::display_vector(const char * name, const char * prefix)
587 {
588  SG_SPRINT("%s%s=[", prefix, name);
589 
590  for (int32_t i = 0; i < num_feat_entries; i++)
591  SG_SPRINT("%s%s%d:(%.18lg+i%.18lg)", prefix, i == 0 ? "" : " ", features[i].feat_index,
592  features[i].entry.real(), features[i].entry.imag());
593 
594  SG_SPRINT("%s]\n", prefix);
595 }
596 
597 template class SGSparseVector<bool>;
598 template class SGSparseVector<char>;
599 template class SGSparseVector<int8_t>;
600 template class SGSparseVector<uint8_t>;
601 template class SGSparseVector<int16_t>;
602 template class SGSparseVector<uint16_t>;
603 template class SGSparseVector<int32_t>;
604 template class SGSparseVector<uint32_t>;
605 template class SGSparseVector<int64_t>;
606 template class SGSparseVector<uint64_t>;
607 template class SGSparseVector<float32_t>;
608 template class SGSparseVector<float64_t>;
609 template class SGSparseVector<floatmax_t>;
610 template class SGSparseVector<complex128_t>;
611 }

SHOGUN Machine Learning Toolbox - Documentation