en/latest/StreamingVwFeatures_8cpp_source.html

 /*

  * Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights

  * embodied in the content of this file are licensed under the BSD

  * (revised) open source license.

  *

  * This program is free software; you can redistribute it and/or modify

  * it under the terms of the GNU General Public License as published by

  * the Free Software Foundation; either version 3 of the License, or

  * (at your option) any later version.

  *

  * Written (W) 2011 Shashwat Lal Das

  * Adaptation of Vowpal Wabbit v5.1.

  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.

  */


 #include <shogun/features/streaming/StreamingVwFeatures.h>


 using namespace shogun;


 CStreamingVwFeatures::CStreamingVwFeatures() : CStreamingDotFeatures()

 {

     init();

     set_read_functions();

 }


 CStreamingVwFeatures::CStreamingVwFeatures(CStreamingVwFile* file,

         bool is_labelled, int32_t size)

 : CStreamingDotFeatures()

 {

     init(file, is_labelled, size);

     set_read_functions();

 }


 CStreamingVwFeatures::CStreamingVwFeatures(CStreamingVwCacheFile* file,

         bool is_labelled, int32_t size)

 : CStreamingDotFeatures()

 {

     init(file, is_labelled, size);

     set_read_functions();

 }


 CStreamingVwFeatures::~CStreamingVwFeatures()

 {

     if (parser.is_running())

         parser.end_parser();

     SG_UNREF(env);

 }


 CFeatures* CStreamingVwFeatures::duplicate() const

 {

     return new CStreamingVwFeatures(*this);

 }


 void CStreamingVwFeatures::set_vector_reader()

 {

     parser.set_read_vector(&CStreamingFile::get_vector);

 }


 void CStreamingVwFeatures::set_vector_and_label_reader()

 {

     parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);

 }


 void CStreamingVwFeatures::reset_stream()

 {

     if (working_file->is_seekable())

     {

         working_file->reset_stream();

         parser.exit_parser();

         parser.init(working_file, has_labels, parser.get_ring_size());

         parser.set_free_vector_after_release(false);

         parser.start_parser();

     }

     else

         SG_ERROR("The input cannot be reset! Please use 1 pass.\n")

 }


 CVwEnvironment* CStreamingVwFeatures::get_env()

 {

     return env;

 }


 void CStreamingVwFeatures::set_env(CVwEnvironment* vw_env)

 {

     if(vw_env!=env)

     {

         SG_REF(vw_env);

         SG_UNREF(env);

         env = vw_env;

     }

 }


 void CStreamingVwFeatures::expand_if_required(float32_t*& vec, int32_t& len)

 {

     int32_t dim = 1 << env->num_bits;

     if (dim > len)

     {

         vec = SG_REALLOC(float32_t, vec, len, dim);

         memset(&vec[len], 0, (dim-len) * sizeof(float32_t));

         len = dim;

     }

 }


 void CStreamingVwFeatures::expand_if_required(float64_t*& vec, int32_t& len)

 {

     int32_t dim = 1 << env->num_bits;

     if (dim > len)

     {

         vec = SG_REALLOC(float64_t, vec, len, dim);

         memset(&vec[len], 0, (dim-len) * sizeof(float64_t));

         len = dim;

     }

 }


 float32_t CStreamingVwFeatures::real_weight(float32_t w, float32_t gravity)

 {

     float32_t wprime = 0;

     if (gravity < fabsf(w))

         wprime = CMath::sign(w)*(fabsf(w) - gravity);

     return wprime;

 }


 int32_t CStreamingVwFeatures::get_nnz_features_for_vector()

 {

     return current_length;

 }


 int32_t CStreamingVwFeatures::get_num_vectors() const

 {

     if (current_example)

         return 1;

     else

         return 0;

 }


 EFeatureType CStreamingVwFeatures::get_feature_type() const

 {

     return F_DREAL;

 }


 void CStreamingVwFeatures::init()

 {

     working_file=NULL;

     seekable=false;

     current_length=-1;

     current_example=NULL;

     env=NULL;


     example_count = 0;

 }


 void CStreamingVwFeatures::init(CStreamingVwFile* file, bool is_labelled, int32_t size)

 {

     init();

     has_labels = is_labelled;

     working_file = file;

     parser.init(file, is_labelled, size);

     parser.set_free_vector_after_release(false);

     seekable=false;


     // Get environment from the StreamingVwFile

     env = ((CStreamingVwFile*) file)->get_env();

     SG_REF(env);

 }


 void CStreamingVwFeatures::init(CStreamingVwCacheFile* file, bool is_labelled, int32_t size)

 {

     init();

     has_labels = is_labelled;

     working_file = file;

     parser.init(file, is_labelled, size);

     parser.set_free_vector_after_release(false);

     seekable=true;


     // Get environment from the StreamingVwFile

     env = ((CStreamingVwCacheFile*) file)->get_env();

     SG_REF(env);

 }


 void CStreamingVwFeatures::setup_example(VwExample* ae)

 {

     ae->pass = env->passes_complete;

     ae->num_features = 0;

     ae->total_sum_feat_sq = 1;

     ae->example_counter = ++example_count;

     ae->global_weight = ae->ld->weight;

     env->t += ae->global_weight;

     ae->example_t = env->t;


     // If some namespaces should be ignored, remove them

     if (env->ignore_some)

     {

         for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)

             if (env->ignore[*i])

             {

                 ae->atomics[*i].erase();

                 memmove(i,i+1,(ae->indices.end - (i+1))*sizeof(vw_size_t));

                 ae->indices.end--;

                 i--;

             }

     }


     // Add constant feature

     vw_size_t constant_namespace = 128;

     VwFeature temp = {1,constant_hash & env->mask};

     ae->indices.push(constant_namespace);

     ae->atomics[constant_namespace].push(temp);

     ae->sum_feat_sq[constant_namespace] = 0;


     if(env->stride != 1)

     {

         // Make room for per-feature information.

         vw_size_t stride = env->stride;

         for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)

             for(VwFeature* j = ae->atomics[*i].begin; j != ae->atomics[*i].end; j++)

                 j->weight_index = j->weight_index*stride;

     }


     for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)

     {

         ae->num_features += ae->atomics[*i].end - ae->atomics[*i].begin;

         ae->total_sum_feat_sq += ae->sum_feat_sq[*i];

     }


     // For quadratic features

     for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)

     {

         char* i = env->pairs.get_element(k);


         ae->num_features

             += (ae->atomics[(int32_t)(i[0])].end - ae->atomics[(int32_t)(i[0])].begin)

             *(ae->atomics[(int32_t)(i[1])].end - ae->atomics[(int32_t)(i[1])].begin);


         ae->total_sum_feat_sq += ae->sum_feat_sq[(int32_t)(i[0])]*ae->sum_feat_sq[(int32_t)(i[1])];

     }

 }


 void CStreamingVwFeatures::start_parser()

 {

     if (!parser.is_running())

         parser.start_parser();

 }


 void CStreamingVwFeatures::end_parser()

 {

     parser.end_parser();

 }


 bool CStreamingVwFeatures::get_next_example()

 {

     bool ret_value;

     ret_value = (bool) parser.get_next_example(current_example,

                            current_length,

                            current_label);

     if (current_length < 1)

         return false;


     if (ret_value)

         setup_example(current_example);

     else

         return false;


     current_label = current_example->ld->label;

     current_length = current_example->num_features;


     return ret_value;

 }


 VwExample* CStreamingVwFeatures::get_example()

 {

     return current_example;

 }


 float64_t CStreamingVwFeatures::get_label()

 {

     ASSERT(has_labels)


     return current_label;

 }


 void CStreamingVwFeatures::release_example()

 {

     env->example_number++;

     env->weighted_examples += current_example->ld->weight;


     if (current_example->ld->label == FLT_MAX)

         env->weighted_labels += 0;

     else

         env->weighted_labels += current_example->ld->label * current_example->ld->weight;


     env->total_features += current_example->num_features;

     env->sum_loss += current_example->loss;


     current_example->reset_members();

     parser.finalize_example();

 }


 int32_t CStreamingVwFeatures::get_dim_feature_space() const

 {

     return current_length;

 }


 float32_t CStreamingVwFeatures::dot(CStreamingDotFeatures* df)

 {

     SG_NOTIMPLEMENTED

     return CMath::INFTY;

 }


 float32_t CStreamingVwFeatures::dense_dot(VwExample* &ex, const float32_t* vec2)

 {

     float32_t ret = 0.;

     for (vw_size_t* i = ex->indices.begin; i!= ex->indices.end; i++)

     {

         for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)

             ret += vec2[f->weight_index & env->thread_mask] * f->x;

     }

     return ret;

 }


 float32_t CStreamingVwFeatures::dense_dot(const float32_t* vec2, int32_t vec2_len)

 {

     return dense_dot(current_example, vec2);

 }


 float32_t CStreamingVwFeatures::dense_dot(SGSparseVector<float32_t>* vec1, const float32_t* vec2)

 {

     float32_t ret = 0.;

     for (int32_t i = 0; i < vec1->num_feat_entries; i++)

         ret += vec1->features[i].entry * vec2[vec1->features[i].feat_index & env->mask];


     return ret;

 }


 float32_t CStreamingVwFeatures::dense_dot_truncated(const float32_t* vec2, VwExample* &ex, float32_t gravity)

 {

     float32_t ret = 0.;

     for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)

     {

         for (VwFeature* f = ex->atomics[*i].begin; f!= ex->atomics[*i].end; f++)

         {

             float32_t w = vec2[f->weight_index & env->thread_mask];

             float32_t wprime = real_weight(w,gravity);

             ret += wprime*f->x;

         }

     }


     return ret;

 }


 void CStreamingVwFeatures::add_to_dense_vec(float32_t alpha, VwExample* &ex, float32_t* vec2, int32_t vec2_len, bool abs_val)

 {

     if (abs_val)

     {

         for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)

         {

             for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)

                 vec2[f->weight_index & env->thread_mask] += alpha * abs(f->x);

         }

     }

     else

     {

         for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)

         {

             for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)

                 vec2[f->weight_index & env->thread_mask] += alpha * f->x;

         }

     }

 }


 void CStreamingVwFeatures::add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)

 {

     add_to_dense_vec(alpha, current_example, vec2, vec2_len, abs_val);

 }


 int32_t CStreamingVwFeatures::get_num_features()

 {

     return current_length;

 }


 EFeatureClass CStreamingVwFeatures::get_feature_class() const

 {

     return C_STREAMING_VW;

 }

shogun::CStreamingFile::reset_stream
virtual void reset_stream()
Definition: StreamingFile.h:69

shogun::vw_size_t
uint32_t vw_size_t
vw_size_t typedef to work across platforms
Definition: vw_constants.h:26

shogun::CStreamingVwFeatures::set_env
virtual void set_env(CVwEnvironment *vw_env)
Definition: StreamingVwFeatures.cpp:83

shogun::DynArray::get_element
T get_element(int32_t index) const
Definition: DynArray.h:142

shogun::CVwEnvironment::weighted_examples
float64_t weighted_examples
Weighted examples.
Definition: VwEnvironment.h:171

shogun::v_array::end
T * end
Pointer to last set element in the array.
Definition: v_array.h:160

shogun::CStreamingVwFeatures::set_vector_and_label_reader
virtual void set_vector_and_label_reader()
Definition: StreamingVwFeatures.cpp:59

shogun::CStreamingVwFeatures::get_feature_class
virtual EFeatureClass get_feature_class() const
Definition: StreamingVwFeatures.cpp:380

shogun::C_STREAMING_VW
Definition: FeatureTypes.h:53

shogun::CStreamingVwFeatures::release_example
virtual void release_example()
Definition: StreamingVwFeatures.cpp:281

shogun::CStreamingVwFeatures::dot
virtual float32_t dot(CStreamingDotFeatures *df)
Definition: StreamingVwFeatures.cpp:303

shogun::v_array::begin
T * begin
Pointer to first element of the array.
Definition: v_array.h:157

shogun::CMath::INFTY
static const float64_t INFTY
infinity
Definition: Math.h:2048

shogun::CStreamingVwFeatures::env
CVwEnvironment * env
Environment for VW.
Definition: StreamingVwFeatures.h:388

shogun::CVwEnvironment
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:41

shogun::CStreamingVwFeatures::add_to_dense_vec
virtual void add_to_dense_vec(float32_t alpha, VwExample *&ex, float32_t *vec2, int32_t vec2_len, bool abs_val=false)
Definition: StreamingVwFeatures.cpp:350

shogun::CStreamingVwFeatures::current_example
VwExample * current_example
Example currently being processed.
Definition: StreamingVwFeatures.h:391

shogun::CStreamingFile::is_seekable
virtual bool is_seekable()
Definition: StreamingFile.h:64

shogun::CStreamingVwFeatures::get_num_vectors
virtual int32_t get_num_vectors() const
Definition: StreamingVwFeatures.cpp:128

shogun::CStreamingFeatures::has_labels
bool has_labels
Whether examples are labelled or not.
Definition: StreamingFeatures.h:201

shogun::VwExample::num_features
vw_size_t num_features
Number of features.
Definition: vw_example.h:89

shogun::CStreamingVwFeatures::get_example
virtual VwExample * get_example()
Definition: StreamingVwFeatures.cpp:269

shogun::CVwEnvironment::example_number
int64_t example_number
Example number.
Definition: VwEnvironment.h:169

shogun::VwExample::total_sum_feat_sq
float32_t total_sum_feat_sq
Total sum of square of features.
Definition: vw_example.h:106

shogun::CStreamingVwFeatures::start_parser
virtual void start_parser()
Definition: StreamingVwFeatures.cpp:238

shogun::CStreamingFile::get_vector
virtual void get_vector(bool *&vector, int32_t &len)

SG_ERROR
#define SG_ERROR(...)
Definition: SGIO.h:129

SG_NOTIMPLEMENTED
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139

shogun::CStreamingVwFeatures::get_num_features
virtual int32_t get_num_features()
Definition: StreamingVwFeatures.cpp:375

shogun::CVwEnvironment::num_bits
vw_size_t num_bits
log_2 of the number of features
Definition: VwEnvironment.h:126

shogun::CStreamingVwFeatures::get_dim_feature_space
virtual int32_t get_dim_feature_space() const
Definition: StreamingVwFeatures.cpp:298

shogun::DynArray::get_num_elements
int32_t get_num_elements() const
Definition: DynArray.h:130

shogun::CStreamingVwFeatures::get_feature_type
virtual EFeatureType get_feature_type() const
Definition: StreamingVwFeatures.cpp:136

shogun::VwExample::sum_feat_sq
float64_t sum_feat_sq[256]
Sum of square of features.
Definition: vw_example.h:104

SG_REF
#define SG_REF(x)
Definition: SGObject.h:54

shogun::SGSparseVector::num_feat_entries
index_t num_feat_entries
Definition: SGSparseVector.h:212

shogun::v_array::push
void push(const T &new_elem)
Definition: v_array.h:168

shogun::CStreamingFeatures::working_file
CStreamingFile * working_file
The StreamingFile object to read from.
Definition: StreamingFeatures.h:204

shogun::EFeatureClass
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38

shogun::VwExample::loss
float32_t loss
Loss.
Definition: vw_example.h:95

shogun::VwLabel::label
float32_t label
Label value.
Definition: vw_label.h:92

shogun::VwExample::pass
vw_size_t pass
Pass.
Definition: vw_example.h:91

shogun::CStreamingVwFeatures::reset_stream
virtual void reset_stream()
Definition: StreamingVwFeatures.cpp:64

shogun::VwExample::indices
v_array< vw_size_t > indices
Array of namespaces.
Definition: vw_example.h:84

shogun::VwExample::reset_members
void reset_members()
Definition: vw_example.cpp:35

shogun::CStreamingVwFeatures::dense_dot_truncated
virtual float32_t dense_dot_truncated(const float32_t *vec2, VwExample *&ex, float32_t gravity)
Definition: StreamingVwFeatures.cpp:334

shogun::CStreamingVwFeatures::current_label
float64_t current_label
The current example's label.
Definition: StreamingVwFeatures.h:382

shogun::CStreamingVwFeatures::~CStreamingVwFeatures
~CStreamingVwFeatures()
Definition: StreamingVwFeatures.cpp:42

shogun::CStreamingVwFeatures::get_env
virtual CVwEnvironment * get_env()
Definition: StreamingVwFeatures.cpp:78

shogun::F_DREAL
Definition: FeatureTypes.h:32

shogun::VwLabel::weight
float32_t weight
Weight of example.
Definition: vw_label.h:94

ASSERT
#define ASSERT(x)
Definition: SGIO.h:201

shogun::CStreamingVwFeatures::CStreamingVwFeatures
CStreamingVwFeatures()
Definition: StreamingVwFeatures.cpp:20

shogun::CVwEnvironment::weighted_labels
float64_t weighted_labels
Weighted labels.
Definition: VwEnvironment.h:175

shogun::CStreamingVwFeatures::get_label
virtual float64_t get_label()
Definition: StreamingVwFeatures.cpp:274

shogun::CVwEnvironment::ignore_some
bool ignore_some
Whether some namespaces are ignored.
Definition: VwEnvironment.h:191

float64_t
double float64_t
Definition: common.h:50

shogun::CVwEnvironment::pairs
DynArray< char * > pairs
Pairs of features to cross for quadratic updates.
Definition: VwEnvironment.h:196

StreamingVwFeatures.h

shogun::constant_hash
const int32_t constant_hash
Constant used to access the constant feature.
Definition: vw_constants.h:32

shogun::CVwEnvironment::stride
vw_size_t stride
Number of elements in weight vector per feature.
Definition: VwEnvironment.h:134

shogun::CStreamingVwFeatures::get_nnz_features_for_vector
virtual int32_t get_nnz_features_for_vector()
Definition: StreamingVwFeatures.cpp:123

shogun::CStreamingVwFeatures::set_vector_reader
virtual void set_vector_reader()
Definition: StreamingVwFeatures.cpp:54

shogun::VwExample::example_counter
vw_size_t example_counter
Example counter.
Definition: vw_example.h:109

shogun::CVwEnvironment::t
float32_t t
Value of t.
Definition: VwEnvironment.h:162

shogun::CStreamingVwFeatures::real_weight
virtual float32_t real_weight(float32_t w, float32_t gravity)
Definition: StreamingVwFeatures.cpp:115

shogun::VwExample
Example class for VW.
Definition: vw_example.h:58

shogun::CStreamingDotFeatures
Streaming features that support dot products among other operations.
Definition: StreamingDotFeatures.h:47

shogun::VwExample::example_t
float32_t example_t
t value for this example
Definition: vw_example.h:101

shogun::SGSparseVector::features
SGSparseVectorEntry< T > * features
Definition: SGSparseVector.h:215

shogun::CVwEnvironment::mask
vw_size_t mask
Mask used for hashing.
Definition: VwEnvironment.h:130

shogun::CStreamingVwCacheFile
Class StreamingVwCacheFile to read vector-by-vector from VW cache files.
Definition: StreamingVwCacheFile.h:32

shogun::CVwEnvironment::total_features
vw_size_t total_features
Total number of features.
Definition: VwEnvironment.h:177

shogun::CStreamingVwFeatures::get_next_example
virtual bool get_next_example()
Definition: StreamingVwFeatures.cpp:249

float32_t
float float32_t
Definition: common.h:49

shogun::EFeatureType
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19

shogun::CStreamingVwFile
Class StreamingVwFile to read vector-by-vector from Vowpal Wabbit data files. It reads the example an...
Definition: StreamingVwFile.h:28

shogun::VwExample::global_weight
float32_t global_weight
Global weight.
Definition: vw_example.h:99

shogun::CStreamingFile::get_vector_and_label
virtual void get_vector_and_label(bool *&vector, int32_t &len, float64_t &label)

shogun::VwFeature
One feature in VW.
Definition: vw_example.h:34

SG_UNREF
#define SG_UNREF(x)
Definition: SGObject.h:55

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

shogun::CStreamingFeatures::set_read_functions
void set_read_functions()
Definition: StreamingFeatures.cpp:29

shogun::CStreamingVwFeatures::current_length
int32_t current_length
Number of features in current example.
Definition: StreamingVwFeatures.h:385

shogun::CMath::sign
static T sign(T a)
Definition: Math.h:426

shogun::CStreamingVwFeatures::end_parser
virtual void end_parser()
Definition: StreamingVwFeatures.cpp:244

shogun::CStreamingFeatures::seekable
bool seekable
Whether the stream is seekable.
Definition: StreamingFeatures.h:207

shogun::CFeatures
The class Features is the base class of all feature objects.
Definition: Features.h:68

shogun::VwExample::ld
VwLabel * ld
Label object.
Definition: vw_example.h:79

shogun::CStreamingVwFeatures::duplicate
CFeatures * duplicate() const
Definition: StreamingVwFeatures.cpp:49

shogun::SGSparseVector
template class SGSparseVector The assumtion is that the stored SGSparseVectorEntry* vector is orde...
Definition: base/Parameter.h:29

shogun::CVwEnvironment::thread_mask
vw_size_t thread_mask
Mask used by regressor for learning.
Definition: VwEnvironment.h:132

shogun::CStreamingVwFeatures::dense_dot
virtual float32_t dense_dot(VwExample *&ex, const float32_t *vec2)
Definition: StreamingVwFeatures.cpp:309

shogun::CVwEnvironment::passes_complete
vw_size_t passes_complete
Number of passes complete.
Definition: VwEnvironment.h:181

shogun::CVwEnvironment::ignore
bool ignore[256]
Which namespaces to ignore.
Definition: VwEnvironment.h:193

shogun::CStreamingVwFeatures::example_count
vw_size_t example_count
Number of examples processed at a point of time.
Definition: StreamingVwFeatures.h:379

shogun::CStreamingVwFeatures::parser
CInputParser< VwExample > parser
The parser object, which reads from input and returns parsed example objects.
Definition: StreamingVwFeatures.h:376

shogun::CStreamingVwFeatures::expand_if_required
virtual void expand_if_required(float32_t *&vec, int32_t &len)
Definition: StreamingVwFeatures.cpp:93

shogun::CVwEnvironment::sum_loss
float64_t sum_loss
Sum of losses.
Definition: VwEnvironment.h:179

shogun::VwExample::atomics
v_array< VwFeature > atomics[256]
Array of features.
Definition: vw_example.h:86