SparseFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Subset support written (W) 2011 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  * Copyright (C) 2010 Berlin Institute of Technology
00012  */
00013 
00014 #ifndef _SPARSEFEATURES__H__
00015 #define _SPARSEFEATURES__H__
00016 
00017 #include <shogun/lib/common.h>
00018 #include <shogun/lib/Cache.h>
00019 #include <shogun/io/File.h>
00020 
00021 #include <shogun/features/Labels.h>
00022 #include <shogun/features/Features.h>
00023 #include <shogun/features/DotFeatures.h>
00024 #include <shogun/features/SimpleFeatures.h>
00025 
00026 namespace shogun
00027 {
00028 
00029 class CFile;
00030 class CLabels;
00031 class CFeatures;
00032 class CDotFeatures;
00033 template <class ST> class CSimpleFeatures;
00034 
00052 template <class ST> class CSparseFeatures : public CDotFeatures
00053 {
00054     public:
00059         CSparseFeatures(int32_t size=0);
00060 
00069         CSparseFeatures(SGSparseVector<ST>* src,
00070                 int32_t num_feat, int32_t num_vec,bool copy=false);
00071 
00077         CSparseFeatures(SGSparseMatrix<ST> sparse);
00078 
00084         CSparseFeatures(SGMatrix<ST> dense);
00085 
00087         CSparseFeatures(const CSparseFeatures & orig);
00088 
00093         CSparseFeatures(CFile* loader);
00094 
00096         virtual ~CSparseFeatures();
00097 
00102         void free_sparse_feature_matrix();
00103 
00108         void free_sparse_features();
00109 
00114         virtual CFeatures* duplicate() const;
00115 
00125         ST get_feature(int32_t num, int32_t index);
00126 
00135         ST* get_full_feature_vector(int32_t num, int32_t& len);
00136 
00142         SGVector<ST> get_full_feature_vector(int32_t num);
00143 
00149         virtual int32_t get_nnz_features_for_vector(int32_t num);
00150 
00160         SGSparseVector<ST> get_sparse_feature_vector(int32_t num);
00161 
00172         static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen,
00173                 SGSparseVectorEntry<ST>* bvec, int32_t blen);
00174 
00187         ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
00188 
00200         void add_to_dense_vec(float64_t alpha, int32_t num,
00201                 float64_t* vec, int32_t dim, bool abs_val=false);
00202 
00210         void free_sparse_feature_vector(SGSparseVector<ST> vec, int32_t num);
00211 
00221         SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
00222 
00230         SGSparseMatrix<ST> get_sparse_feature_matrix();
00231 
00237         static void clean_tsparse(SGSparseVector<ST>* sfm, int32_t num_vec);
00238 
00245         CSparseFeatures<ST>* get_transposed();
00246 
00258         SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
00259 
00267         void set_sparse_feature_matrix(SGSparseMatrix<ST> sm);
00268 
00275         SGMatrix<ST> get_full_feature_matrix();
00276 
00286         virtual bool set_full_feature_matrix(SGMatrix<ST> full);
00287 
00295         virtual bool apply_preprocessor(bool force_preprocessing=false);
00296 
00301         virtual int32_t get_size();
00302 
00310         bool obtain_from_simple(CSimpleFeatures<ST>* sf);
00311 
00316         virtual int32_t  get_num_vectors() const;
00317 
00322         int32_t  get_num_features();
00323 
00335         int32_t set_num_features(int32_t num);
00336 
00341         virtual EFeatureClass get_feature_class();
00342 
00347         virtual EFeatureType get_feature_type();
00348 
00356         void free_feature_vector(SGSparseVector<ST> vec, int32_t num);
00357 
00362         int64_t get_num_nonzero_entries();
00363 
00371         float64_t* compute_squared(float64_t* sq);
00372 
00387         float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs,
00388                 float64_t* sq_lhs, int32_t idx_a,
00389                 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs,
00390                 int32_t idx_b);
00391 
00398         void load(CFile* loader);
00399 
00406         void save(CFile* writer);
00407 
00417         CLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
00418 
00424         void sort_features();
00425 
00434         bool write_svmlight_file(char* fname, CLabels* label);
00435 
00443         virtual int32_t get_dim_feature_space() const;
00444 
00454         virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00455 
00464         virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
00465 
00466         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00467 
00468         struct sparse_feature_iterator
00469         {
00471             SGSparseVector<ST> sv;
00472 
00474             int32_t index;
00475 
00477             void print_info()
00478             {
00479                 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
00480                         sv.features, sv.vec_index, sv.num_feat_entries, index);
00481             }
00482         };
00483         #endif
00484 
00496         virtual void* get_feature_iterator(int32_t vector_index);
00497 
00508         virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
00509 
00515         virtual void free_feature_iterator(void* iterator);
00516 
00523         virtual CFeatures* copy_subset(SGVector<index_t> indices);
00524 
00526         inline virtual const char* get_name() const { return "SparseFeatures"; }
00527 
00528     protected:
00539         virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num,
00540             int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
00541 
00542     private:
00543         void init();
00544 
00545     protected:
00546 
00548         int32_t num_vectors;
00549 
00551         int32_t num_features;
00552 
00554         SGSparseVector<ST>* sparse_feature_matrix;
00555 
00557         CCache< SGSparseVectorEntry<ST> >* feature_cache;
00558 };
00559 }
00560 #endif /* _SPARSEFEATURES__H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation