StreamingSparseFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 #ifndef _STREAMING_SPARSEFEATURES__H__
00011 #define _STREAMING_SPARSEFEATURES__H__
00012 
00013 #include <shogun/lib/common.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/features/streaming/StreamingDotFeatures.h>
00016 #include <shogun/lib/DataType.h>
00017 #include <shogun/io/streaming/InputParser.h>
00018 
00019 namespace shogun
00020 {
00043 template <class T> class CStreamingSparseFeatures : public CStreamingDotFeatures
00044 {
00045 public:
00046 
00054     CStreamingSparseFeatures();
00055 
00064     CStreamingSparseFeatures(CStreamingFile* file,
00065                  bool is_labelled,
00066                  int32_t size);
00067 
00073     virtual ~CStreamingSparseFeatures();
00074 
00084     virtual void set_vector_reader();
00085 
00095     virtual void set_vector_and_label_reader();
00096 
00102     virtual void start_parser();
00103 
00109     virtual void end_parser();
00110 
00119     virtual bool get_next_example();
00120 
00127     T get_feature(int32_t index);
00128 
00134     SGSparseVector<T> get_vector();
00135 
00143     virtual float64_t get_label();
00144 
00151     virtual void release_example();
00152 
00157     virtual void reset_stream();
00158 
00170     int32_t set_num_features(int32_t num);
00171 
00179     virtual int32_t get_dim_feature_space() const;
00180 
00189     virtual void expand_if_required(float32_t*& vec, int32_t &len);
00190 
00199     virtual void expand_if_required(float64_t*& vec, int32_t &len);
00200 
00211     virtual float32_t dot(CStreamingDotFeatures *df);
00212 
00223     static T sparse_dot(T alpha, SGSparseVectorEntry<T>* avec, int32_t alen, SGSparseVectorEntry<T>* bvec, int32_t blen);
00224 
00234     T dense_dot(T alpha, T* vec, int32_t dim, T b);
00235 
00244     virtual float64_t dense_dot(const float64_t* vec2, int32_t vec2_len);
00245 
00254     virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len);
00255 
00265     virtual void add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val=false);
00266 
00276     virtual void add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val=false);
00277 
00283     int64_t get_num_nonzero_entries();
00284 
00290     float32_t compute_squared();
00291 
00297     void sort_features();
00298 
00304     virtual int32_t get_num_features();
00305 
00311     virtual int32_t get_nnz_features_for_vector();
00312 
00318     virtual EFeatureType get_feature_type() const;
00319 
00325     virtual EFeatureClass get_feature_class() const;
00326  
00332     virtual CFeatures* duplicate() const;
00333 
00339     virtual const char* get_name() const { return "StreamingSparseFeatures"; }
00340 
00346     virtual int32_t get_num_vectors() const;
00347 
00353     virtual int32_t get_size() const;
00354 
00355 private:
00360     virtual void init();
00361 
00369     virtual void init(CStreamingFile *file, bool is_labelled, int32_t size);
00370 
00371 protected:
00373     CInputParser< SGSparseVectorEntry<T> > parser;
00374 
00376     SGSparseVector<T> current_sgvector;
00377 
00379     SGSparseVectorEntry<T>* current_vector;
00380 
00382     index_t current_vec_index;
00383 
00385     float64_t current_label;
00386 
00388     int32_t current_length;
00389 
00391     int32_t current_num_features;
00392 };
00393 
00394 }
00395 #endif // _STREAMING_SPARSEFEATURES__H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation