Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef _SPARSEFEATURES__H__
00015 #define _SPARSEFEATURES__H__
00016
00017 #include <shogun/lib/common.h>
00018 #include <shogun/lib/Cache.h>
00019 #include <shogun/io/File.h>
00020
00021 #include <shogun/features/Labels.h>
00022 #include <shogun/features/Features.h>
00023 #include <shogun/features/DotFeatures.h>
00024 #include <shogun/features/SimpleFeatures.h>
00025
00026 namespace shogun
00027 {
00028
00029 class CFile;
00030 class CLabels;
00031 class CFeatures;
00032 class CDotFeatures;
00033 template <class ST> class CSimpleFeatures;
00034
00052 template <class ST> class CSparseFeatures : public CDotFeatures
00053 {
00054 public:
00059 CSparseFeatures(int32_t size=0);
00060
00069 CSparseFeatures(SGSparseVector<ST>* src,
00070 int32_t num_feat, int32_t num_vec,bool copy=false);
00071
00077 CSparseFeatures(SGSparseMatrix<ST> sparse);
00078
00084 CSparseFeatures(SGMatrix<ST> dense);
00085
00087 CSparseFeatures(const CSparseFeatures & orig);
00088
00093 CSparseFeatures(CFile* loader);
00094
00096 virtual ~CSparseFeatures();
00097
00102 void free_sparse_feature_matrix();
00103
00108 void free_sparse_features();
00109
00114 virtual CFeatures* duplicate() const;
00115
00125 ST get_feature(int32_t num, int32_t index);
00126
00135 ST* get_full_feature_vector(int32_t num, int32_t& len);
00136
00142 SGVector<ST> get_full_feature_vector(int32_t num);
00143
00149 virtual int32_t get_nnz_features_for_vector(int32_t num);
00150
00160 SGSparseVector<ST> get_sparse_feature_vector(int32_t num);
00161
00172 static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen,
00173 SGSparseVectorEntry<ST>* bvec, int32_t blen);
00174
00187 ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
00188
00200 void add_to_dense_vec(float64_t alpha, int32_t num,
00201 float64_t* vec, int32_t dim, bool abs_val=false);
00202
00210 void free_sparse_feature_vector(SGSparseVector<ST> vec, int32_t num);
00211
00221 SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
00222
00230 SGSparseMatrix<ST> get_sparse_feature_matrix();
00231
00237 static void clean_tsparse(SGSparseVector<ST>* sfm, int32_t num_vec);
00238
00245 CSparseFeatures<ST>* get_transposed();
00246
00258 SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
00259
00267 void set_sparse_feature_matrix(SGSparseMatrix<ST> sm);
00268
00275 SGMatrix<ST> get_full_feature_matrix();
00276
00286 virtual bool set_full_feature_matrix(SGMatrix<ST> full);
00287
00295 virtual bool apply_preprocessor(bool force_preprocessing=false);
00296
00301 virtual int32_t get_size();
00302
00310 bool obtain_from_simple(CSimpleFeatures<ST>* sf);
00311
00316 virtual int32_t get_num_vectors() const;
00317
00322 int32_t get_num_features();
00323
00335 int32_t set_num_features(int32_t num);
00336
00341 virtual EFeatureClass get_feature_class();
00342
00347 virtual EFeatureType get_feature_type();
00348
00356 void free_feature_vector(SGSparseVector<ST> vec, int32_t num);
00357
00362 int64_t get_num_nonzero_entries();
00363
00371 float64_t* compute_squared(float64_t* sq);
00372
00387 float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs,
00388 float64_t* sq_lhs, int32_t idx_a,
00389 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs,
00390 int32_t idx_b);
00391
00398 void load(CFile* loader);
00399
00406 void save(CFile* writer);
00407
00417 CLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
00418
00424 void sort_features();
00425
00434 bool write_svmlight_file(char* fname, CLabels* label);
00435
00443 virtual int32_t get_dim_feature_space() const;
00444
00454 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00455
00464 virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
00465
00466 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00467
00468 struct sparse_feature_iterator
00469 {
00471 SGSparseVector<ST> sv;
00472
00474 int32_t index;
00475
00477 void print_info()
00478 {
00479 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
00480 sv.features, sv.vec_index, sv.num_feat_entries, index);
00481 }
00482 };
00483 #endif
00484
00496 virtual void* get_feature_iterator(int32_t vector_index);
00497
00508 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
00509
00515 virtual void free_feature_iterator(void* iterator);
00516
00523 virtual CFeatures* copy_subset(SGVector<index_t> indices);
00524
00526 inline virtual const char* get_name() const { return "SparseFeatures"; }
00527
00528 protected:
00539 virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num,
00540 int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
00541
00542 private:
00543 void init();
00544
00545 protected:
00546
00548 int32_t num_vectors;
00549
00551 int32_t num_features;
00552
00554 SGSparseVector<ST>* sparse_feature_matrix;
00555
00557 CCache< SGSparseVectorEntry<ST> >* feature_cache;
00558 };
00559 }
00560 #endif