SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SGSparseVector.cpp
Go to the documentation of this file.
2 #include <shogun/lib/SGVector.h>
4 #include <shogun/io/File.h>
5 
6 namespace shogun {
7 
8 template <class T>
10 {
11  init_data();
12 }
13 
14 template <class T>
16  bool ref_counting) :
17  SGReferencedData(ref_counting),
18  num_feat_entries(num_entries), features(feats)
19 {
20 }
21 
22 template <class T>
23 SGSparseVector<T>::SGSparseVector(index_t num_entries, bool ref_counting) :
24  SGReferencedData(ref_counting),
25  num_feat_entries(num_entries)
26 {
28 }
29 
30 template <class T>
32  SGReferencedData(orig)
33 {
34  copy_data(orig);
35 }
36 
37 template <class T>
39 {
40  unref();
41 }
42 
43 template <class T>
44 T SGSparseVector<T>::dense_dot(T alpha, T* vec, int32_t dim, T b)
45 {
46  ASSERT(vec)
47  T result=b;
48 
49  if (features)
50  {
51  for (int32_t i=0; i<num_feat_entries; i++)
52  {
53  if (features[i].feat_index<dim)
54  result+=alpha*vec[features[i].feat_index]*features[i].entry;
55  }
56  }
57 
58  return result;
59 }
60 
61 template <class T>
62 template <typename ST>
64 {
65  ASSERT(vec)
66  T result(0.0);
67 
68  if (features)
69  {
70  for (int32_t i=0; i<num_feat_entries; i++)
71  {
72  if (features[i].feat_index<vec.vlen)
73  result+=static_cast<T>(vec[features[i].feat_index])
74  *features[i].entry;
75  }
76  }
77 
78  return result;
79 }
80 
84 
85 template <class T>
87 {
88  return sparse_dot(*this, v);
89 }
90 
91 template <class T>
93 {
94  if (a.num_feat_entries == 0 || b.num_feat_entries == 0)
95  return 0;
96 
97  int32_t cmp = cmp_dot_prod_symmetry_fast(a.num_feat_entries, b.num_feat_entries);
98 
99  if (cmp == 0) // symmetric
100  {
101  return dot_prod_symmetric(a, b);
102  }
103  else if (cmp > 0) // b has more element
104  {
105  return dot_prod_asymmetric(a, b);
106  }
107  else // a has more element
108  {
109  return dot_prod_asymmetric(b, a);
110  }
111 }
112 
113 template<class T>
115 {
116  if (!features)
117  return 0;
118 
119  int32_t dimensions = -1;
120  for (index_t i=0; i<num_feat_entries; i++)
121  {
122  if (features[i].feat_index > dimensions)
123  {
124  dimensions = features[i].feat_index;
125  }
126  }
127 
128  return dimensions+1;
129 }
130 
131 template<class T>
132 void SGSparseVector<T>::sort_features(bool stable_pointer)
133 {
134  if (!num_feat_entries)
135  return;
136 
137  // remember old pointer to enforce quarantee
138  const SGSparseVectorEntry<T>* old_features_ptr = features;
139 
140  int32_t* feat_idx=SG_MALLOC(int32_t, num_feat_entries);
141  for (index_t j=0; j<num_feat_entries; j++)
142  {
143  feat_idx[j]=features[j].feat_index;
144  }
145 
146  CMath::qsort_index(feat_idx, features, num_feat_entries);
147  SG_FREE(feat_idx);
148 
149  for (index_t j=1; j<num_feat_entries; j++)
150  {
151  REQUIRE(features[j-1].feat_index <= features[j].feat_index,
152  "sort_features(): failed sanity check %d <= %d after sorting (comparing indices features[%d] <= features[%d], features=%d)\n",
153  features[j-1].feat_index, features[j].feat_index, j-1, j, num_feat_entries);
154  }
155 
156  // compression: removing zero-entries and merging features with same index
157  int32_t last_index = 0;
158  for (index_t j=1; j<num_feat_entries; j++)
159  {
160  // always true, but kept for future changes
161  REQUIRE(last_index < j,
162  "sort_features(): target index %d must not exceed source index j=%d",
163  last_index, j);
164  REQUIRE(features[last_index].feat_index <= features[j].feat_index,
165  "sort_features(): failed sanity check %d = features[%d].feat_index <= features[%d].feat_index = %d\n",
166  features[last_index].feat_index, last_index, j, features[j].feat_index);
167 
168  // merging of features with same index
169  if (features[last_index].feat_index == features[j].feat_index)
170  {
171  features[last_index].entry += features[j].entry;
172  continue;
173  }
174 
175  // only skip to next element if current one is not zero
176  if (features[last_index].entry != 0.0)
177  {
178  last_index++;
179  }
180 
181  features[last_index] = features[j];
182  }
183 
184  // remove single first element if zero (not caught by loop)
185  if (features[last_index].entry == 0.0)
186  {
187  last_index--;
188  }
189 
190  int32_t new_feat_count = last_index+1;
191  ASSERT(new_feat_count <= num_feat_entries);
192 
193  // shrinking vector
194  if (!stable_pointer)
195  {
196  SG_SINFO("shrinking vector from %d to %d\n", num_feat_entries, new_feat_count);
197  features = SG_REALLOC(SGSparseVectorEntry<T>, features, num_feat_entries, new_feat_count);
198  }
199  num_feat_entries = new_feat_count;
200 
201  for (index_t j=1; j<num_feat_entries; j++)
202  {
203  REQUIRE(features[j-1].feat_index < features[j].feat_index,
204  "sort_features(): failed sanity check %d < %d after sorting (comparing indices features[%d] < features[%d], features=%d)\n",
205  features[j-1].feat_index, features[j].feat_index, j-1, j, num_feat_entries);
206  }
207 
208  // compare with old pointer to enforce quarantee
209  if (stable_pointer) {
210  ASSERT(old_features_ptr == features);
211  }
212  return;
213 }
214 
215 template<class T>
217 {
218  T ret = 0;
219  if (features)
220  {
221  for (index_t i=0; i<num_feat_entries; i++)
222  if (features[i].feat_index==index)
223  ret+=features[i].entry ;
224  }
225 
226  return ret ;
227 }
228 
229 template<class T>
231 {
232  SGVector<T> dense;
233 
234  if (features)
235  {
236  dense.resize_vector(get_num_dimensions());
237  dense.zero();
238 
239  for (index_t i=0; i<num_feat_entries; i++)
240  {
241  dense.vector[features[i].feat_index] += features[i].entry;
242  }
243  }
244 
245  return dense;
246 }
247 
248 template<class T>
250 {
251  SGVector<T> dense(dimension);
252  dense.zero();
253 
254  if (features)
255  {
256  REQUIRE(get_num_dimensions() <= dimension, "get_dense(dimension=%d): sparse dimension %d exceeds requested dimension\n",
257  dimension, get_num_dimensions());
258 
259  for (index_t i=0; i<num_feat_entries; i++)
260  {
261  dense.vector[features[i].feat_index] += features[i].entry;
262  }
263  }
264 
265  return dense;
266 }
267 
268 template<class T>
270 {
271  SGSparseVectorEntry <T> * copy = SG_MALLOC(SGSparseVectorEntry <T>, num_feat_entries);
272  memcpy(copy, features, num_feat_entries * sizeof(SGSparseVectorEntry <T>));
273  return SGSparseVector<T>(copy, num_feat_entries);
274 }
275 
276 template<class T> void SGSparseVector<T>::load(CFile* loader)
277 {
278  ASSERT(loader)
279  unref();
280 
282  loader->get_sparse_vector(features, num_feat_entries);
284 }
285 
286 template<class T> void SGSparseVector<T>::save(CFile* saver)
287 {
288  ASSERT(saver)
289 
291  saver->set_sparse_vector(features, num_feat_entries);
293 }
294 
295 template <>
297 {
298  SG_SERROR("SGSparseVector::load():: Not supported for complex128_t\n");
299 }
300 
301 template <>
303 {
304  SG_SERROR("SGSparseVector::save():: Not supported for complex128_t\n");
305 }
306 
307 template <class T>
309 {
310  num_feat_entries = ((SGSparseVector*)(&orig))->num_feat_entries;
311  features = ((SGSparseVector*)(&orig))->features;
312 }
313 
314 template <class T>
316 {
317  num_feat_entries = 0;
318  features = NULL;
319 }
320 
321 template <class T>
323 {
324  num_feat_entries = 0;
325  SG_FREE(features);
326 }
327 
328 template <class T>
330 {
331  if (alen > blen) // no need for floats here
332  {
333  return (blen * CMath::floor_log(alen) < alen) ? -1 : 0;
334  }
335  else // alen <= blen
336  {
337  return (alen * CMath::floor_log(blen) < blen) ? 1 : 0;
338  }
339 }
340 
341 template <class T>
343 {
344  T dot_prod = 0;
345  for(index_t b_idx = 0; b_idx < b.num_feat_entries; ++b_idx)
346  {
347  const T tmp = b.features[b_idx].entry;
348  if (a.features[a.num_feat_entries-1].feat_index < b.features[b_idx].feat_index)
349  break;
350  for (index_t a_idx = 0; a_idx < a.num_feat_entries; ++a_idx)
351  {
352  if (a.features[a_idx].feat_index == b.features[b_idx].feat_index)
353  dot_prod += tmp * a.features[a_idx].entry;
354  }
355  }
356  return dot_prod;
357 }
358 
359 template <class T>
361 {
363  T dot_prod = 0;
364  index_t a_idx = 0, b_idx = 0;
365  while (true)
366  {
367  if (a.features[a_idx].feat_index == b.features[b_idx].feat_index)
368  {
369  dot_prod += a.features[a_idx].entry * b.features[b_idx].entry;
370 
371  a_idx++;
372  if (a.num_feat_entries == a_idx)
373  break;
374  b_idx++;
375  if (b.num_feat_entries == b_idx)
376  break;
377  }
378  else if (a.features[a_idx].feat_index < b.features[b_idx].feat_index)
379  {
380  a_idx++;
381  if (a.num_feat_entries == a_idx)
382  break;
383  }
384  else
385  {
386  b_idx++;
387  if (b.num_feat_entries == b_idx)
388  break;
389  }
390  }
391  return dot_prod;
392 }
393 
394 template <>
395 void SGSparseVector<bool>::display_vector(const char* name, const char* prefix)
396 {
397  SG_SPRINT("%s%s=[", prefix, name);
398  for (int32_t i=0; i<num_feat_entries; i++)
399  SG_SPRINT("%s%s%d:%d", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry ? 1 : 0);
400  SG_SPRINT("%s]\n", prefix);
401 }
402 
403 template <>
404 void SGSparseVector<char>::display_vector(const char* name, const char* prefix)
405 {
406  SG_SPRINT("%s%s=[", prefix, name);
407  for (int32_t i=0; i<num_feat_entries; i++)
408  SG_SPRINT("%s%s%d:%c", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
409  SG_SPRINT("%s]\n", prefix);
410 }
411 
412 template <>
413 void SGSparseVector<int8_t>::display_vector(const char* name, const char* prefix)
414 {
415  SG_SPRINT("%s%s=[", prefix, name);
416  for (int32_t i=0; i<num_feat_entries; i++)
417  SG_SPRINT("%s%s%d:%d", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
418  SG_SPRINT("%s]\n", prefix);
419 }
420 
421 template <>
422 void SGSparseVector<uint8_t>::display_vector(const char* name, const char* prefix)
423 {
424  SG_SPRINT("%s%s=[", prefix, name);
425  for (int32_t i=0; i<num_feat_entries; i++)
426  SG_SPRINT("%s%s%d:%u", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
427  SG_SPRINT("%s]\n", prefix);
428 }
429 
430 template <>
431 void SGSparseVector<int16_t>::display_vector(const char* name, const char* prefix)
432 {
433  SG_SPRINT("%s%s=[", prefix, name);
434  for (int32_t i=0; i<num_feat_entries; i++)
435  SG_SPRINT("%s%s%d:%d", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
436  SG_SPRINT("%s]\n", prefix);
437 }
438 
439 template <>
440 void SGSparseVector<uint16_t>::display_vector(const char* name, const char* prefix)
441 {
442  SG_SPRINT("%s%s=[", prefix, name);
443  for (int32_t i=0; i<num_feat_entries; i++)
444  SG_SPRINT("%s%s%d:%u", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
445  SG_SPRINT("%s]\n", prefix);
446 }
447 
448 template <>
449 void SGSparseVector<int32_t>::display_vector(const char* name, const char* prefix)
450 {
451  SG_SPRINT("%s%s=[", prefix, name);
452  for (int32_t i=0; i<num_feat_entries; i++)
453  SG_SPRINT("%s%s%d:%d", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
454  SG_SPRINT("%s]\n", prefix);
455 }
456 
457 template <>
458 void SGSparseVector<uint32_t>::display_vector(const char* name, const char* prefix)
459 {
460  SG_SPRINT("%s%s=[", prefix, name);
461  for (int32_t i=0; i<num_feat_entries; i++)
462  SG_SPRINT("%s%s%d:%u", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
463  SG_SPRINT("%s]\n", prefix);
464 }
465 
466 template <>
467 void SGSparseVector<int64_t>::display_vector(const char* name, const char* prefix)
468 {
469  SG_SPRINT("%s%s=[", prefix, name);
470  for (int32_t i=0; i<num_feat_entries; i++)
471  SG_SPRINT("%s%s%d:%lld", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
472  SG_SPRINT("%s]\n", prefix);
473 }
474 
475 template <>
476 void SGSparseVector<uint64_t>::display_vector(const char* name, const char* prefix)
477 {
478  SG_SPRINT("%s%s=[", prefix, name);
479  for (int32_t i=0; i<num_feat_entries; i++)
480  SG_SPRINT("%s%s%d:%llu ", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
481  SG_SPRINT("%s]\n", prefix);
482 }
483 
484 template <>
485 void SGSparseVector<float32_t>::display_vector(const char* name, const char* prefix)
486 {
487  SG_SPRINT("%s%s=[", prefix, name);
488  for (int32_t i=0; i<num_feat_entries; i++)
489  SG_SPRINT("%s%s%d:%g", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
490  SG_SPRINT("%s]\n", prefix);
491 }
492 
493 template <>
494 void SGSparseVector<float64_t>::display_vector(const char* name, const char* prefix)
495 {
496  SG_SPRINT("%s%s=[", prefix, name);
497  for (int32_t i=0; i<num_feat_entries; i++)
498  SG_SPRINT("%s%s%d:%.18g", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
499  SG_SPRINT("%s]\n", prefix);
500 }
501 
502 template <>
503 void SGSparseVector<floatmax_t>::display_vector(const char* name, const char* prefix)
504 {
505  SG_SPRINT("%s%s=[", prefix, name);
506  for (int32_t i=0; i<num_feat_entries; i++)
507  SG_SPRINT("%s%s%d:%.36Lg", prefix, i==0 ? "" : " ", features[i].feat_index, features[i].entry);
508  SG_SPRINT("%s]\n", prefix);
509 }
510 
511 template <>
512 void SGSparseVector<complex128_t>::display_vector(const char* name, const char* prefix)
513 {
514  SG_SPRINT("%s%s=[", prefix, name);
515  for (int32_t i=0; i<num_feat_entries; i++)
516  SG_SPRINT("%s%s%d:(%.18lg+i%.18lg)", prefix, i==0 ? "" : " ", features[i].feat_index,
517  features[i].entry.real(), features[i].entry.imag());
518  SG_SPRINT("%s]\n", prefix);
519 }
520 
521 template class SGSparseVector<bool>;
522 template class SGSparseVector<char>;
523 template class SGSparseVector<int8_t>;
524 template class SGSparseVector<uint8_t>;
525 template class SGSparseVector<int16_t>;
526 template class SGSparseVector<uint16_t>;
527 template class SGSparseVector<int32_t>;
528 template class SGSparseVector<uint32_t>;
529 template class SGSparseVector<int64_t>;
530 template class SGSparseVector<uint64_t>;
531 template class SGSparseVector<float32_t>;
532 template class SGSparseVector<float64_t>;
533 template class SGSparseVector<floatmax_t>;
534 template class SGSparseVector<complex128_t>;
535 }
536 

SHOGUN Machine Learning Toolbox - Documentation