Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef _SPARSEFEATURES__H__
00015 #define _SPARSEFEATURES__H__
00016
00017 #include <shogun/lib/common.h>
00018 #include <shogun/lib/DataType.h>
00019 #include <shogun/lib/SGSparseMatrix.h>
00020 #include <shogun/lib/Cache.h>
00021 #include <shogun/io/File.h>
00022
00023 #include <shogun/labels/RegressionLabels.h>
00024 #include <shogun/features/Features.h>
00025 #include <shogun/features/DotFeatures.h>
00026 #include <shogun/features/DenseFeatures.h>
00027
00028 namespace shogun
00029 {
00030
00031 class CFile;
00032 class CRegressionLabels;
00033 class CFeatures;
00034 class CDotFeatures;
00035 template <class ST> class CDenseFeatures;
00036 template <class ST> class SGSparseMatrix;
00037
00056 template <class ST> class CSparseFeatures : public CDotFeatures
00057 {
00058 public:
00063 CSparseFeatures(int32_t size=0);
00064
00073 CSparseFeatures(SGSparseVector<ST>* src,
00074 int32_t num_feat, int32_t num_vec,bool copy=false);
00075
00081 CSparseFeatures(SGSparseMatrix<ST> sparse);
00082
00088 CSparseFeatures(SGMatrix<ST> dense);
00089
00091 CSparseFeatures(const CSparseFeatures & orig);
00092
00097 CSparseFeatures(CFile* loader);
00098
00100 virtual ~CSparseFeatures();
00101
00106 void free_sparse_feature_matrix();
00107
00112 void free_sparse_features();
00113
00118 virtual CFeatures* duplicate() const;
00119
00129 ST get_feature(int32_t num, int32_t index);
00130
00139 ST* get_full_feature_vector(int32_t num, int32_t& len);
00140
00146 SGVector<ST> get_full_feature_vector(int32_t num);
00147
00153 virtual int32_t get_nnz_features_for_vector(int32_t num);
00154
00164 SGSparseVector<ST> get_sparse_feature_vector(int32_t num);
00165
00178 ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
00179
00191 void add_to_dense_vec(float64_t alpha, int32_t num,
00192 float64_t* vec, int32_t dim, bool abs_val=false);
00193
00200 void free_sparse_feature_vector(int32_t num);
00201
00211 SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
00212
00220 SGSparseMatrix<ST> get_sparse_feature_matrix();
00221
00228 CSparseFeatures<ST>* get_transposed();
00229
00241 SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
00242
00250 void set_sparse_feature_matrix(SGSparseMatrix<ST> sm);
00251
00258 SGMatrix<ST> get_full_feature_matrix();
00259
00269 virtual bool set_full_feature_matrix(SGMatrix<ST> full);
00270
00278 virtual bool apply_preprocessor(bool force_preprocessing=false);
00279
00284 virtual int32_t get_size() const;
00285
00293 bool obtain_from_simple(CDenseFeatures<ST>* sf);
00294
00299 virtual int32_t get_num_vectors() const;
00300
00305 int32_t get_num_features();
00306
00318 int32_t set_num_features(int32_t num);
00319
00324 virtual EFeatureClass get_feature_class() const;
00325
00330 virtual EFeatureType get_feature_type() const;
00331
00338 void free_feature_vector(int32_t num);
00339
00344 int64_t get_num_nonzero_entries();
00345
00353 float64_t* compute_squared(float64_t* sq);
00354
00369 float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs,
00370 float64_t* sq_lhs, int32_t idx_a,
00371 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs,
00372 int32_t idx_b);
00373
00380 void load(CFile* loader);
00381
00388 void save(CFile* writer);
00389
00399 CRegressionLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
00400
00406 void sort_features();
00407
00416 bool write_svmlight_file(char* fname, CRegressionLabels* label);
00417
00425 virtual int32_t get_dim_feature_space() const;
00426
00436 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00437
00446 virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
00447
00448 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00449
00450 struct sparse_feature_iterator
00451 {
00453 SGSparseVector<ST> sv;
00454
00456 int32_t vector_index;
00457
00459 int32_t index;
00460
00462 void print_info()
00463 {
00464 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
00465 sv.features, vector_index, sv.num_feat_entries, index);
00466 }
00467 };
00468 #endif
00469
00481 virtual void* get_feature_iterator(int32_t vector_index);
00482
00493 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
00494
00500 virtual void free_feature_iterator(void* iterator);
00501
00508 virtual CFeatures* copy_subset(SGVector<index_t> indices);
00509
00511 virtual const char* get_name() const { return "SparseFeatures"; }
00512
00513 protected:
00524 virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num,
00525 int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
00526
00527 private:
00528 void init();
00529
00530 protected:
00531
00533 int32_t num_vectors;
00534
00536 int32_t num_features;
00537
00539 SGSparseVector<ST>* sparse_feature_matrix;
00540
00542 CCache< SGSparseVectorEntry<ST> >* feature_cache;
00543 };
00544 }
00545 #endif