Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef _STREAMING_SPARSEFEATURES__H__
00011 #define _STREAMING_SPARSEFEATURES__H__
00012
00013 #include <shogun/lib/common.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/features/streaming/StreamingDotFeatures.h>
00016 #include <shogun/lib/DataType.h>
00017 #include <shogun/io/streaming/InputParser.h>
00018
00019 namespace shogun
00020 {
00043 template <class T> class CStreamingSparseFeatures : public CStreamingDotFeatures
00044 {
00045 public:
00046
00054 CStreamingSparseFeatures();
00055
00064 CStreamingSparseFeatures(CStreamingFile* file,
00065 bool is_labelled,
00066 int32_t size);
00067
00073 virtual ~CStreamingSparseFeatures();
00074
00084 virtual void set_vector_reader();
00085
00095 virtual void set_vector_and_label_reader();
00096
00102 virtual void start_parser();
00103
00109 virtual void end_parser();
00110
00119 virtual bool get_next_example();
00120
00127 T get_feature(int32_t index);
00128
00134 SGSparseVector<T> get_vector();
00135
00143 virtual float64_t get_label();
00144
00151 virtual void release_example();
00152
00157 virtual void reset_stream();
00158
00170 int32_t set_num_features(int32_t num);
00171
00179 virtual int32_t get_dim_feature_space() const;
00180
00189 virtual void expand_if_required(float32_t*& vec, int32_t &len);
00190
00199 virtual void expand_if_required(float64_t*& vec, int32_t &len);
00200
00211 virtual float32_t dot(CStreamingDotFeatures *df);
00212
00223 static T sparse_dot(T alpha, SGSparseVectorEntry<T>* avec, int32_t alen, SGSparseVectorEntry<T>* bvec, int32_t blen);
00224
00234 T dense_dot(T alpha, T* vec, int32_t dim, T b);
00235
00244 virtual float64_t dense_dot(const float64_t* vec2, int32_t vec2_len);
00245
00254 virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len);
00255
00265 virtual void add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val=false);
00266
00276 virtual void add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val=false);
00277
00283 int64_t get_num_nonzero_entries();
00284
00290 float32_t compute_squared();
00291
00297 void sort_features();
00298
00304 virtual int32_t get_num_features();
00305
00311 virtual int32_t get_nnz_features_for_vector();
00312
00318 virtual EFeatureType get_feature_type() const;
00319
00325 virtual EFeatureClass get_feature_class() const;
00326
00332 virtual CFeatures* duplicate() const;
00333
00339 virtual const char* get_name() const { return "StreamingSparseFeatures"; }
00340
00346 virtual int32_t get_num_vectors() const;
00347
00353 virtual int32_t get_size() const;
00354
00355 private:
00360 virtual void init();
00361
00369 virtual void init(CStreamingFile *file, bool is_labelled, int32_t size);
00370
00371 protected:
00373 CInputParser< SGSparseVectorEntry<T> > parser;
00374
00376 SGSparseVector<T> current_sgvector;
00377
00379 SGSparseVectorEntry<T>* current_vector;
00380
00382 index_t current_vec_index;
00383
00385 float64_t current_label;
00386
00388 int32_t current_length;
00389
00391 int32_t current_num_features;
00392 };
00393
00394 }
00395 #endif // _STREAMING_SPARSEFEATURES__H__