SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SparseFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2012 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
14 #ifndef _SPARSEFEATURES__H__
15 #define _SPARSEFEATURES__H__
16 
17 #include <shogun/lib/common.h>
18 #include <shogun/lib/DataType.h>
20 #include <shogun/lib/Cache.h>
21 #include <shogun/io/File.h>
22 
27 
28 namespace shogun
29 {
30 
31 class CFile;
32 class CRegressionLabels;
33 class CFeatures;
34 class CDotFeatures;
35 template <class ST> class CDenseFeatures;
36 template <class ST> class SGSparseMatrix;
37 
56 template <class ST> class CSparseFeatures : public CDotFeatures
57 {
58  public:
63  CSparseFeatures(int32_t size=0);
64 
74  int32_t num_feat, int32_t num_vec,bool copy=false);
75 
82 
89 
91  CSparseFeatures(const CSparseFeatures & orig);
92 
97  CSparseFeatures(CFile* loader);
98 
100  virtual ~CSparseFeatures();
101 
107 
112  void free_sparse_features();
113 
118  virtual CFeatures* duplicate() const;
119 
129  ST get_feature(int32_t num, int32_t index);
130 
139  ST* get_full_feature_vector(int32_t num, int32_t& len);
140 
147 
153  virtual int32_t get_nnz_features_for_vector(int32_t num);
154 
165 
176  static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen,
177  SGSparseVectorEntry<ST>* bvec, int32_t blen);
178 
191  ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
192 
204  void add_to_dense_vec(float64_t alpha, int32_t num,
205  float64_t* vec, int32_t dim, bool abs_val=false);
206 
213  void free_sparse_feature_vector(int32_t num);
214 
224  SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
225 
234 
242 
254  SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
255 
264 
272 
282  virtual bool set_full_feature_matrix(SGMatrix<ST> full);
283 
291  virtual bool apply_preprocessor(bool force_preprocessing=false);
292 
297  virtual int32_t get_size() const;
298 
307 
312  virtual int32_t get_num_vectors() const;
313 
318  int32_t get_num_features();
319 
331  int32_t set_num_features(int32_t num);
332 
337  virtual EFeatureClass get_feature_class() const;
338 
343  virtual EFeatureType get_feature_type() const;
344 
351  void free_feature_vector(int32_t num);
352 
357  int64_t get_num_nonzero_entries();
358 
367 
383  float64_t* sq_lhs, int32_t idx_a,
385  int32_t idx_b);
386 
393  void load(CFile* loader);
394 
401  void save(CFile* writer);
402 
412  CRegressionLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
413 
419  void sort_features();
420 
429  bool write_svmlight_file(char* fname, CRegressionLabels* label);
430 
438  virtual int32_t get_dim_feature_space() const;
439 
449  virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
450 
459  virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
460 
461  #ifndef DOXYGEN_SHOULD_SKIP_THIS
462 
463  struct sparse_feature_iterator
464  {
467 
469  int32_t vector_index;
470 
472  int32_t index;
473 
475  void print_info()
476  {
477  SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
478  sv.features, vector_index, sv.num_feat_entries, index);
479  }
480  };
481  #endif
482 
494  virtual void* get_feature_iterator(int32_t vector_index);
495 
506  virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
507 
513  virtual void free_feature_iterator(void* iterator);
514 
521  virtual CFeatures* copy_subset(SGVector<index_t> indices);
522 
524  inline virtual const char* get_name() const { return "SparseFeatures"; }
525 
526  protected:
538  int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
539 
540  private:
541  void init();
542 
543  protected:
544 
546  int32_t num_vectors;
547 
549  int32_t num_features;
550 
553 
556 };
557 }
558 #endif /* _SPARSEFEATURES__H__ */

SHOGUN Machine Learning Toolbox - Documentation