SparseFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Written (W) 2011-2012 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  * Copyright (C) 2010 Berlin Institute of Technology
00012  */
00013 
00014 #ifndef _SPARSEFEATURES__H__
00015 #define _SPARSEFEATURES__H__
00016 
00017 #include <shogun/lib/common.h>
00018 #include <shogun/lib/DataType.h>
00019 #include <shogun/lib/SGSparseMatrix.h>
00020 #include <shogun/lib/Cache.h>
00021 #include <shogun/io/File.h>
00022 
00023 #include <shogun/labels/RegressionLabels.h>
00024 #include <shogun/features/Features.h>
00025 #include <shogun/features/DotFeatures.h>
00026 #include <shogun/features/DenseFeatures.h>
00027 
00028 namespace shogun
00029 {
00030 
00031 class CFile;
00032 class CRegressionLabels;
00033 class CFeatures;
00034 class CDotFeatures;
00035 template <class ST> class CDenseFeatures;
00036 template <class ST> class SGSparseMatrix;
00037 
00056 template <class ST> class CSparseFeatures : public CDotFeatures
00057 {
00058     public:
00063         CSparseFeatures(int32_t size=0);
00064 
00073         CSparseFeatures(SGSparseVector<ST>* src,
00074                 int32_t num_feat, int32_t num_vec,bool copy=false);
00075 
00081         CSparseFeatures(SGSparseMatrix<ST> sparse);
00082 
00088         CSparseFeatures(SGMatrix<ST> dense);
00089 
00091         CSparseFeatures(const CSparseFeatures & orig);
00092 
00097         CSparseFeatures(CFile* loader);
00098 
00100         virtual ~CSparseFeatures();
00101 
00106         void free_sparse_feature_matrix();
00107 
00112         void free_sparse_features();
00113 
00118         virtual CFeatures* duplicate() const;
00119 
00129         ST get_feature(int32_t num, int32_t index);
00130 
00139         ST* get_full_feature_vector(int32_t num, int32_t& len);
00140 
00146         SGVector<ST> get_full_feature_vector(int32_t num);
00147 
00153         virtual int32_t get_nnz_features_for_vector(int32_t num);
00154 
00164         SGSparseVector<ST> get_sparse_feature_vector(int32_t num);
00165 
00178         ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
00179 
00191         void add_to_dense_vec(float64_t alpha, int32_t num,
00192                 float64_t* vec, int32_t dim, bool abs_val=false);
00193 
00200         void free_sparse_feature_vector(int32_t num);
00201 
00211         SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
00212 
00220         SGSparseMatrix<ST> get_sparse_feature_matrix();
00221 
00228         CSparseFeatures<ST>* get_transposed();
00229 
00241         SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
00242 
00250         void set_sparse_feature_matrix(SGSparseMatrix<ST> sm);
00251 
00258         SGMatrix<ST> get_full_feature_matrix();
00259 
00269         virtual bool set_full_feature_matrix(SGMatrix<ST> full);
00270 
00278         virtual bool apply_preprocessor(bool force_preprocessing=false);
00279 
00284         virtual int32_t get_size() const;
00285 
00293         bool obtain_from_simple(CDenseFeatures<ST>* sf);
00294 
00299         virtual int32_t  get_num_vectors() const;
00300 
00305         int32_t  get_num_features();
00306 
00318         int32_t set_num_features(int32_t num);
00319 
00324         virtual EFeatureClass get_feature_class() const;
00325 
00330         virtual EFeatureType get_feature_type() const;
00331 
00338         void free_feature_vector(int32_t num);
00339 
00344         int64_t get_num_nonzero_entries();
00345 
00353         float64_t* compute_squared(float64_t* sq);
00354 
00369         float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs,
00370                 float64_t* sq_lhs, int32_t idx_a,
00371                 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs,
00372                 int32_t idx_b);
00373 
00380         void load(CFile* loader);
00381 
00388         void save(CFile* writer);
00389 
00399         CRegressionLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
00400 
00406         void sort_features();
00407 
00416         bool write_svmlight_file(char* fname, CRegressionLabels* label);
00417 
00425         virtual int32_t get_dim_feature_space() const;
00426 
00436         virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00437 
00446         virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
00447 
00448         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00449 
00450         struct sparse_feature_iterator
00451         {
00453             SGSparseVector<ST> sv;
00454 
00456             int32_t vector_index;
00457 
00459             int32_t index;
00460 
00462             void print_info()
00463             {
00464                 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
00465                         sv.features, vector_index, sv.num_feat_entries, index);
00466             }
00467         };
00468         #endif
00469 
00481         virtual void* get_feature_iterator(int32_t vector_index);
00482 
00493         virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
00494 
00500         virtual void free_feature_iterator(void* iterator);
00501 
00508         virtual CFeatures* copy_subset(SGVector<index_t> indices);
00509 
00511         virtual const char* get_name() const { return "SparseFeatures"; }
00512 
00513     protected:
00524         virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num,
00525             int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
00526 
00527     private:
00528         void init();
00529 
00530     protected:
00531 
00533         int32_t num_vectors;
00534 
00536         int32_t num_features;
00537 
00539         SGSparseVector<ST>* sparse_feature_matrix;
00540 
00542         CCache< SGSparseVectorEntry<ST> >* feature_cache;
00543 };
00544 }
00545 #endif /* _SPARSEFEATURES__H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation