SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SparseFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2012 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
14 #ifndef _SPARSEFEATURES__H__
15 #define _SPARSEFEATURES__H__
16 
17 #include <shogun/lib/common.h>
18 #include <shogun/lib/DataType.h>
20 #include <shogun/lib/Cache.h>
21 #include <shogun/io/File.h>
22 #include <shogun/io/LibSVMFile.h>
23 
28 
29 namespace shogun
30 {
31 
32 class CFile;
33 class CLibSVMFile;
34 class CRegressionLabels;
35 class CFeatures;
36 class CDotFeatures;
37 template <class ST> class CDenseFeatures;
38 template <class ST> class SGSparseMatrix;
39 
58 template <class ST> class CSparseFeatures : public CDotFeatures
59 {
60  public:
65  CSparseFeatures(int32_t size=0);
66 
73 
80 
82  CSparseFeatures(const CSparseFeatures & orig);
83 
88  CSparseFeatures(CFile* loader);
89 
91  virtual ~CSparseFeatures();
92 
98 
103  void free_sparse_features();
104 
109  virtual CFeatures* duplicate() const;
110 
120  ST get_feature(int32_t num, int32_t index);
121 
128 
134  virtual int32_t get_nnz_features_for_vector(int32_t num);
135 
146 
159  ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
160 
172  void add_to_dense_vec(float64_t alpha, int32_t num,
173  float64_t* vec, int32_t dim, bool abs_val=false);
174 
181  void free_sparse_feature_vector(int32_t num);
182 
192  SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
193 
202 
210 
222  SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
223 
232 
240 
250  virtual void set_full_feature_matrix(SGMatrix<ST> full);
251 
259  virtual bool apply_preprocessor(bool force_preprocessing=false);
260 
268 
273  virtual int32_t get_num_vectors() const;
274 
279  int32_t get_num_features() const;
280 
292  int32_t set_num_features(int32_t num);
293 
298  virtual EFeatureClass get_feature_class() const;
299 
304  virtual EFeatureType get_feature_type() const;
305 
312  void free_feature_vector(int32_t num);
313 
318  int64_t get_num_nonzero_entries();
319 
328 
344  float64_t* sq_lhs, int32_t idx_a,
346  int32_t idx_b);
347 
354  void load(CFile* loader);
355 
364 
371  void save(CFile* writer);
372 
380  void save_with_labels(CLibSVMFile* writer, SGVector<float64_t> labels);
381 
387  void sort_features();
388 
396  virtual int32_t get_dim_feature_space() const;
397 
407  virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
408 
417  virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
418 
419  #ifndef DOXYGEN_SHOULD_SKIP_THIS
420 
421  struct sparse_feature_iterator
422  {
425 
427  int32_t vector_index;
428 
430  int32_t index;
431 
433  void print_info()
434  {
435  SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
436  sv.features, vector_index, sv.num_feat_entries, index);
437  }
438  };
439  #endif
440 
452  virtual void* get_feature_iterator(int32_t vector_index);
453 
464  virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
465 
471  virtual void free_feature_iterator(void* iterator);
472 
479  virtual CFeatures* copy_subset(SGVector<index_t> indices);
480 
482  virtual const char* get_name() const { return "SparseFeatures"; }
483 
484  protected:
496  int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
497 
498  private:
499  void init();
500 
501  protected:
502 
505 
508 };
509 }
510 #endif /* _SPARSEFEATURES__H__ */

SHOGUN Machine Learning Toolbox - Documentation