StreamingVwFeatures.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights
00003  * embodied in the content of this file are licensed under the BSD
00004  * (revised) open source license.
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 3 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * Written (W) 2011 Shashwat Lal Das
00012  * Adaptation of Vowpal Wabbit v5.1.
00013  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
00014  */
00015 
00016 #ifndef _STREAMING_VWFEATURES__H__
00017 #define _STREAMING_VWFEATURES__H__
00018 
00019 #include <shogun/lib/common.h>
00020 #include <shogun/lib/DataType.h>
00021 #include <shogun/mathematics/Math.h>
00022 
00023 #include <shogun/io/InputParser.h>
00024 #include <shogun/io/StreamingVwFile.h>
00025 #include <shogun/io/StreamingVwCacheFile.h>
00026 #include <shogun/features/StreamingDotFeatures.h>
00027 #include <shogun/classifier/vw/vw_common.h>
00028 #include <shogun/classifier/vw/vw_math.h>
00029 
00030 namespace shogun
00031 {
00039 class CStreamingVwFeatures : public CStreamingDotFeatures
00040 {
00041 public:
00042 
00050     CStreamingVwFeatures();
00051 
00060     CStreamingVwFeatures(CStreamingVwFile* file,
00061                  bool is_labelled, int32_t size);
00062 
00071     CStreamingVwFeatures(CStreamingVwCacheFile* file,
00072                  bool is_labelled, int32_t size);
00073 
00079     ~CStreamingVwFeatures();
00080 
00086     CFeatures* duplicate() const;
00087 
00097     virtual void set_vector_reader();
00098 
00108     virtual void set_vector_and_label_reader();
00109 
00115     virtual void start_parser();
00116 
00122     virtual void end_parser();
00123 
00128     virtual void reset_stream();
00129 
00134     virtual CVwEnvironment* get_env();
00135 
00141     virtual void set_env(CVwEnvironment* vw_env);
00142 
00151     virtual bool get_next_example();
00152 
00158     virtual VwExample* get_example();
00159 
00167     virtual float64_t get_label();
00168 
00175     virtual void release_example();
00176 
00185     virtual void expand_if_required(float32_t*& vec, int32_t& len);
00186 
00195     virtual void expand_if_required(float64_t*& vec, int32_t& len);
00196 
00204     virtual int32_t get_dim_feature_space() const;
00205 
00214     virtual float32_t real_weight(float32_t w, float32_t gravity);
00215 
00226     virtual float32_t dot(CStreamingDotFeatures *df);
00227 
00236     virtual float32_t dense_dot(VwExample* &ex, const float32_t* vec2);
00237 
00247     virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len);
00248 
00258     virtual float32_t dense_dot(SGSparseVector<float32_t>* vec1, const float32_t* vec2);
00259 
00270     virtual float32_t dense_dot_truncated(const float32_t* vec2, VwExample* &ex, float32_t gravity);
00271 
00282     virtual void add_to_dense_vec(float32_t alpha, VwExample* &ex,
00283             float32_t* vec2, int32_t vec2_len, bool abs_val = false);
00284 
00294     virtual void add_to_dense_vec(float32_t alpha,
00295             float32_t* vec2, int32_t vec2_len, bool abs_val = false);
00296 
00301     virtual int32_t get_nnz_features_for_vector();
00302 
00308     virtual int32_t get_num_features();
00309 
00315     virtual inline EFeatureType get_feature_type();
00316 
00322     virtual EFeatureClass get_feature_class();
00323 
00329     inline virtual const char* get_name() const { return "StreamingVwFeatures"; }
00330 
00336     inline virtual int32_t get_num_vectors() const;
00337 
00343     virtual int32_t get_size();
00344 
00345 private:
00350     virtual void init();
00351 
00359     virtual void init(CStreamingVwFile *file, bool is_labelled, int32_t size);
00360 
00368     virtual void init(CStreamingVwCacheFile *file, bool is_labelled, int32_t size);
00369 
00376     virtual void setup_example(VwExample* ae);
00377 
00378 protected:
00379 
00381     CInputParser<VwExample> parser;
00382 
00384     vw_size_t example_count;
00385 
00387     float64_t current_label;
00388 
00390     int32_t current_length;
00391 
00393     CVwEnvironment* env;
00394 
00396     VwExample* current_example;
00397 };
00398 }
00399 #endif // _STREAMING_VWFEATURES__H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation