18 bool is_labelled, int32_t size,
CTokenizer* tzer, int32_t bits)
21 init(file, is_labelled, size, tzer, bits,
true, 1, 0);
26 init(NULL,
false, 0, NULL, 0,
false, 1, 0);
35 bool is_labelled = (lab != NULL);
38 init(file, is_labelled, size, tzer, bits,
true, 1, 0);
40 parser.set_free_vectors_on_destruct(
false);
43 void CStreamingHashedDocDotFeatures::init(
CStreamingFile* file,
bool is_labelled,
44 int32_t size,
CTokenizer* tzer, int32_t bits,
bool normalize, int32_t n_grams, int32_t skips)
66 parser.init(file, is_labelled, size);
73 parser.set_free_vector_after_release(
false);
109 int32_t vec2_len,
bool abs_val)
124 return "StreamingHashedDocDotFeatures";
169 parser.finalize_example();
virtual const char * get_name() const =0
virtual int32_t get_num_vectors() const
This class implements streaming features for a document collection. Like in the standard Bag-of-Words...
virtual void release_example()
virtual void get_string(bool *&vector, int32_t &len)
T sparse_dot(const SGSparseVector< T > &v)
virtual int32_t get_dim_feature_space() const
virtual const char * get_name() const
virtual float64_t get_label()
virtual CFeatures * duplicate() const
bool has_labels
Whether examples are labelled or not.
virtual bool get_next_example()
CStreamingFile * working_file
The StreamingFile object to read from.
EFeatureClass
shogun feature class
A Streaming File access class.
virtual void get_string_and_label(bool *&vector, int32_t &len, float64_t &label)
virtual EFeatureType get_feature_type() const
virtual void add_to_dense_vec(float32_t alpha, float32_t *vec2, int32_t vec2_len, bool abs_val=false)
This class can be used to convert a document collection contained in a CStringFeatures object w...
virtual EFeatureClass get_feature_class() const
virtual void set_vector_reader()
Class SGObject is the base class of all shogun objects.
SGSparseVector< float64_t > get_vector()
virtual float32_t dense_dot(const float32_t *vec2, int32_t vec2_len)
virtual void set_vector_and_label_reader()
The class CTokenizer acts as a base class in order to implement tokenizers. Sub-classes must implemen...
SGSparseVector< float64_t > current_vector
Class CStreamingFileFromStringFeatures is derived from CStreamingFile and provides an input source fo...
virtual void start_parser()
virtual int32_t get_num_features()
CHashedDocConverter * converter
Streaming features that support dot products among other operations.
SGSparseVectorEntry< T > * features
EFeatureType
shogun feature type
virtual ~CStreamingHashedDocDotFeatures()
all of classes and functions are contained in the shogun namespace
void set_read_functions()
bool seekable
Whether the stream is seekable.
The class Features is the base class of all feature objects.
void set_k_skip_n_grams(int32_t k, int32_t n)
void set_normalization(bool normalize)
CStreamingHashedDocDotFeatures()
virtual CFeatures * apply(CFeatures *features)
void set_k_skip_n_grams(int32_t k, int32_t n)
CInputParser< char > parser
void set_normalization(bool normalize)
static int32_t pow(bool x, int32_t n)
virtual void end_parser()
virtual float32_t dot(CStreamingDotFeatures *df)