SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SparseFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2012 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
14 #ifndef _SPARSEFEATURES__H__
15 #define _SPARSEFEATURES__H__
16 
17 #include <shogun/lib/config.h>
18 
19 #include <shogun/lib/common.h>
22 #include <shogun/lib/SGMatrix.h>
24 #include <shogun/lib/SGVector.h>
26 
27 namespace shogun
28 {
29 
30 class CFile;
31 class CLibSVMFile;
32 class CFeatures;
33 template <class ST> class CDenseFeatures;
34 template <class T> class CCache;
35 
54 template <class ST> class CSparseFeatures : public CDotFeatures
55 {
56  public:
61  CSparseFeatures(int32_t size=0);
62 
69 
76 
78  CSparseFeatures(const CSparseFeatures & orig);
79 
84  CSparseFeatures(CFile* loader);
85 
87  virtual ~CSparseFeatures();
88 
94 
99  void free_sparse_features();
100 
105  virtual CFeatures* duplicate() const;
106 
116  ST get_feature(int32_t num, int32_t index);
117 
124 
130  virtual int32_t get_nnz_features_for_vector(int32_t num);
131 
142 
155  ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
156 
168  void add_to_dense_vec(float64_t alpha, int32_t num,
169  float64_t* vec, int32_t dim, bool abs_val=false);
170 
177  void free_sparse_feature_vector(int32_t num);
178 
188  SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
189 
198 
206 
218  SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
219 
228 
236 
246  virtual void set_full_feature_matrix(SGMatrix<ST> full);
247 
255  virtual bool apply_preprocessor(bool force_preprocessing=false);
256 
264 
269  virtual int32_t get_num_vectors() const;
270 
275  int32_t get_num_features() const;
276 
288  int32_t set_num_features(int32_t num);
289 
294  virtual EFeatureClass get_feature_class() const;
295 
300  virtual EFeatureType get_feature_type() const;
301 
308  void free_feature_vector(int32_t num);
309 
314  int64_t get_num_nonzero_entries();
315 
324 
340  float64_t* sq_lhs, int32_t idx_a,
342  int32_t idx_b);
343 
350  void load(CFile* loader);
351 
360 
367  void save(CFile* writer);
368 
376  void save_with_labels(CLibSVMFile* writer, SGVector<float64_t> labels);
377 
383  void sort_features();
384 
392  virtual int32_t get_dim_feature_space() const;
393 
403  virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
404 
413  virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
414 
415  #ifndef DOXYGEN_SHOULD_SKIP_THIS
416 
417  struct sparse_feature_iterator
418  {
421 
423  int32_t vector_index;
424 
426  int32_t index;
427 
429  void print_info()
430  {
431  SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
432  sv.features, vector_index, sv.num_feat_entries, index);
433  }
434  };
435  #endif
436 
448  virtual void* get_feature_iterator(int32_t vector_index);
449 
460  virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
461 
467  virtual void free_feature_iterator(void* iterator);
468 
475  virtual CFeatures* copy_subset(SGVector<index_t> indices);
476 
478  virtual const char* get_name() const { return "SparseFeatures"; }
479 
480  protected:
492  int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
493 
494  private:
495  void init();
496 
497  protected:
498 
501 
504 };
505 }
506 #endif /* _SPARSEFEATURES__H__ */
CSparseFeatures(int32_t size=0)
The class DenseFeatures implements dense feature matrices.
Definition: LDA.h:41
ST dense_dot(ST alpha, int32_t num, ST *vec, int32_t dim, ST b)
int32_t set_num_features(int32_t num)
CFeatures(int32_t size=0)
Definition: Features.cpp:23
virtual const char * get_name() const
Template class SparseFeatures implements sparse matrices.
virtual CFeatures * duplicate() const
Features that support dot products among other operations.
Definition: DotFeatures.h:44
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
float64_t compute_squared_norm(CSparseFeatures< float64_t > *lhs, float64_t *sq_lhs, int32_t idx_a, CSparseFeatures< float64_t > *rhs, float64_t *sq_rhs, int32_t idx_b)
ST get_feature(int32_t num, int32_t index)
SGSparseMatrix< ST > get_sparse_feature_matrix()
void set_sparse_feature_matrix(SGSparseMatrix< ST > sm)
#define SG_SPRINT(...)
Definition: SGIO.h:180
virtual SGSparseVectorEntry< ST > * compute_sparse_feature_vector(int32_t num, int32_t &len, SGSparseVectorEntry< ST > *target=NULL)
CSparseFeatures< ST > * get_transposed()
int32_t get_num_features() const
SGSparseMatrix< ST > sparse_feature_matrix
array of sparse vectors of size num_vectors
shogun vector
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
virtual EFeatureClass get_feature_class() const
SGVector< ST > get_full_feature_vector(int32_t num)
double float64_t
Definition: common.h:50
CCache< SGSparseVectorEntry< ST > > * feature_cache
virtual void free_feature_iterator(void *iterator)
A File access base class.
Definition: File.h:34
virtual void set_full_feature_matrix(SGMatrix< ST > full)
void obtain_from_simple(CDenseFeatures< ST > *sf)
void save_with_labels(CLibSVMFile *writer, SGVector< float64_t > labels)
virtual void * get_feature_iterator(int32_t vector_index)
void free_feature_vector(int32_t num)
void load(CFile *loader)
virtual EFeatureType get_feature_type() const
SGSparseVectorEntry< T > * features
void free_sparse_feature_vector(int32_t num)
SGSparseVector< ST > get_sparse_feature_vector(int32_t num)
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19
virtual bool apply_preprocessor(bool force_preprocessing=false)
virtual int32_t get_dim_feature_space() const
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
read sparse real valued features in svm light format e.g. -1 1:10.0 2:100.2 1000:1.3 with -1 == (optional) label and dim 1 - value 10.0 dim 2 - value 100.2 dim 1000 - value 1.3
Definition: LibSVMFile.h:34
Template class Cache implements a simple cache.
The class Features is the base class of all feature objects.
Definition: Features.h:68
void save(CFile *writer)
float64_t * compute_squared(float64_t *sq)
void add_to_dense_vec(float64_t alpha, int32_t num, float64_t *vec, int32_t dim, bool abs_val=false)
virtual CFeatures * copy_subset(SGVector< index_t > indices)
SGVector< float64_t > load_with_labels(CLibSVMFile *loader)
SGMatrix< ST > get_full_feature_matrix()
virtual int32_t get_num_vectors() const
virtual int32_t get_nnz_features_for_vector(int32_t num)

SHOGUN Machine Learning Toolbox - Documentation