SHOGUN
v3.0.0
|
This class implements streaming features for a document collection. Like in the standard Bag-of-Words representation, this class considers each document as a collection of tokens, which are then hashed into a new feature space of a specified dimension. This class is very flexible and allows the user to specify the tokenizer used to tokenize each document, specify whether the results should be normalized with regards to the sqrt of the document size, as well as to specify whether he wants to combine different tokens. The latter implements a k-skip n-grams approach, meaning that you can combine up to n tokens, while skipping up to k. Eg. for the tokens ["a", "b", "c", "d"], with n_grams = 2 and skips = 2, one would get the following combinations : ["a", "ab", "ac" (skipped 1), "ad" (skipped 2), "b", "bc", "bd" (skipped 1), "c", "cd", "d"].
The current example is stored as a combination of current_vector and current_label. Call get_next_example() followed by get_current_vector() to iterate through the stream.
Definition at line 40 of file StreamingHashedDocDotFeatures.h.
Public Member Functions | |
CStreamingHashedDocDotFeatures () | |
CStreamingHashedDocDotFeatures (CStreamingFile *file, bool is_labelled, int32_t size, CTokenizer *tzer, int32_t bits=20) | |
CStreamingHashedDocDotFeatures (CStringFeatures< char > *dot_features, CTokenizer *tzer, int32_t bits=20, float64_t *lab=NULL) | |
virtual | ~CStreamingHashedDocDotFeatures () |
virtual float32_t | dot (CStreamingDotFeatures *df) |
virtual float32_t | dense_dot (const float32_t *vec2, int32_t vec2_len) |
virtual void | add_to_dense_vec (float32_t alpha, float32_t *vec2, int32_t vec2_len, bool abs_val=false) |
virtual int32_t | get_dim_feature_space () const |
virtual const char * | get_name () const |
virtual int32_t | get_num_vectors () const |
virtual CFeatures * | duplicate () const |
virtual void | set_vector_reader () |
virtual void | set_vector_and_label_reader () |
virtual EFeatureType | get_feature_type () const |
virtual EFeatureClass | get_feature_class () const |
virtual void | start_parser () |
virtual void | end_parser () |
virtual float64_t | get_label () |
virtual bool | get_next_example () |
virtual void | release_example () |
virtual int32_t | get_num_features () |
SGSparseVector< float64_t > | get_vector () |
void | set_normalization (bool normalize) |
void | set_k_skip_n_grams (int32_t k, int32_t n) |
virtual void | dense_dot_range (float32_t *output, float32_t *alphas, float32_t *vec, int32_t dim, float32_t b, int32_t num_vec=0) |
virtual void | expand_if_required (float32_t *&vec, int32_t &len) |
virtual void | expand_if_required (float64_t *&vec, int32_t &len) |
virtual void * | get_feature_iterator () |
virtual int32_t | get_nnz_features_for_vector () |
virtual bool | get_next_feature (int32_t &index, float32_t &value, void *iterator) |
virtual void | free_feature_iterator (void *iterator) |
void | set_read_functions () |
virtual bool | get_has_labels () |
virtual bool | is_seekable () |
virtual void | reset_stream () |
virtual CFeatures * | get_streamed_features (index_t num_elements) |
virtual void | add_preprocessor (CPreprocessor *p) |
virtual void | del_preprocessor (int32_t num) |
CPreprocessor * | get_preprocessor (int32_t num) const |
void | set_preprocessed (int32_t num) |
bool | is_preprocessed (int32_t num) const |
int32_t | get_num_preprocessed () const |
int32_t | get_num_preprocessors () const |
void | clean_preprocessors () |
void | list_preprocessors () |
int32_t | get_cache_size () const |
virtual bool | reshape (int32_t num_features, int32_t num_vectors) |
void | list_feature_obj () const |
virtual void | load (CFile *loader) |
virtual void | save (CFile *writer) |
bool | check_feature_compatibility (CFeatures *f) const |
bool | has_property (EFeatureProperty p) const |
void | set_property (EFeatureProperty p) |
void | unset_property (EFeatureProperty p) |
virtual CFeatures * | create_merged_copy (CList *others) |
virtual CFeatures * | create_merged_copy (CFeatures *other) |
virtual void | add_subset (SGVector< index_t > subset) |
virtual void | remove_subset () |
virtual void | remove_all_subsets () |
virtual CSubsetStack * | get_subset_stack () |
virtual void | subset_changed_post () |
virtual CFeatures * | copy_subset (SGVector< index_t > indices) |
virtual CSGObject * | shallow_copy () const |
virtual CSGObject * | deep_copy () const |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="", int32_t param_version=Version::get_version_parameter()) |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="", int32_t param_version=Version::get_version_parameter()) |
DynArray< TParameter * > * | load_file_parameters (const SGParamInfo *param_info, int32_t file_version, CSerializableFile *file, const char *prefix="") |
DynArray< TParameter * > * | load_all_file_parameters (int32_t file_version, int32_t current_version, CSerializableFile *file, const char *prefix="") |
void | map_parameters (DynArray< TParameter * > *param_base, int32_t &base_version, DynArray< const SGParamInfo * > *target_param_infos) |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGStringList< char > | get_modelsel_names () |
void | print_modsel_params () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
void | build_gradient_parameter_dictionary (CMap< TParameter *, CSGObject * > *dict) |
virtual bool | update_parameter_hash () |
virtual bool | equals (CSGObject *other, float64_t accuracy=0.0) |
virtual CSGObject * | clone () |
Public Attributes | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
Parameter * | m_gradient_parameters |
ParameterMap * | m_parameter_map |
uint32_t | m_hash |
Protected Member Functions | |
virtual TParameter * | migrate (DynArray< TParameter * > *param_base, const SGParamInfo *target) |
virtual void | one_to_one_migration_prepare (DynArray< TParameter * > *param_base, const SGParamInfo *target, TParameter *&replacement, TParameter *&to_migrate, char *old_name=NULL) |
virtual void | load_serializable_pre () throw (ShogunException) |
virtual void | load_serializable_post () throw (ShogunException) |
virtual void | save_serializable_pre () throw (ShogunException) |
virtual void | save_serializable_post () throw (ShogunException) |
Protected Attributes | |
int32_t | num_bits |
SGSparseVector< float64_t > | current_vector |
CTokenizer * | tokenizer |
CHashedDocConverter * | converter |
CInputParser< char > | parser |
float64_t | current_label |
float32_t | combined_weight |
feature weighting in combined dot features | |
bool | has_labels |
Whether examples are labelled or not. | |
CStreamingFile * | working_file |
The StreamingFile object to read from. | |
bool | seekable |
Whether the stream is seekable. | |
CSubsetStack * | m_subset_stack |
Constructor
Definition at line 24 of file StreamingHashedDocDotFeatures.cpp.
CStreamingHashedDocDotFeatures | ( | CStreamingFile * | file, |
bool | is_labelled, | ||
int32_t | size, | ||
CTokenizer * | tzer, | ||
int32_t | bits = 20 |
||
) |
Constructor with input information passed. Will use normalization and no quadratic features by default, user should use the set_normalization() and set_k_skip_n_gram() methods to change that.
file | CStreamingFile to take input from. |
is_labelled | Whether examples are labelled or not. |
size | Number of examples to be held in the parser's "ring". |
tzer | the tokenizer to use on the document collection |
bits | the number of bits of the new dimension (means a dimension of size 2^bits) |
Definition at line 17 of file StreamingHashedDocDotFeatures.cpp.
CStreamingHashedDocDotFeatures | ( | CStringFeatures< char > * | dot_features, |
CTokenizer * | tzer, | ||
int32_t | bits = 20 , |
||
float64_t * | lab = NULL |
||
) |
Constructor taking a CDotFeatures object and optionally, labels, as args. Will use normalization and no quadratic features by default, user should use the set_normalization() and set_k_skip_n_gram() methods to change that.
The derived class should implement it so that the Streaming*Features class uses the DotFeatures object as the input, getting examples one by one from the DotFeatures object (and labels, if applicable).
dot_features | CDotFeatures object |
tzer | the tokenizer to use on the document collection |
bits | the number of bits of the new dimension (means a dimension of size 2^bits) |
lab | labels (optional) |
Definition at line 29 of file StreamingHashedDocDotFeatures.cpp.
|
virtual |
Destructor
Definition at line 76 of file StreamingHashedDocDotFeatures.cpp.
|
virtualinherited |
adds a subset of indices on top of the current subsets (possibly subset o subset. Calls subset_changed_post() afterwards
subset | subset of indices to add |
Reimplemented in CCombinedFeatures.
Definition at line 307 of file Features.cpp.
|
virtual |
add current vector multiplied with alpha to dense vector, 'vec'
alpha | scalar alpha |
vec2 | real valued vector to add to |
vec2_len | length of vector |
abs_val | if true add the absolute value |
Implements CStreamingDotFeatures.
Definition at line 108 of file StreamingHashedDocDotFeatures.cpp.
|
inherited |
Builds a dictionary of all parameters in SGObject as well of those of SGObjects that are parameters of this object. Dictionary maps parameters to the objects that own them.
dict | dictionary of parameters to be built. |
Definition at line 1196 of file SGObject.cpp.
|
inherited |
check feature compatibility
f | features to check for compatibility |
Definition at line 280 of file Features.cpp.
|
inherited |
clears all preprocs
Definition at line 113 of file Features.cpp.
|
virtualinherited |
Creates a clone of the current object. This is done via recursively traversing all parameters, which corresponds to a deep copy. Calling equals on the cloned object always returns true although none of the memory of both objects overlaps.
Definition at line 1313 of file SGObject.cpp.
Creates a new CFeatures instance containing copies of the elements which are specified by the provided indices.
This method is needed for a KernelMachine to store its model data. NOT IMPLEMENTED!
indices | indices of feature elements to copy |
Reimplemented in CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char >, CStringFeatures< uint16_t >, CSparseFeatures< ST >, CSparseFeatures< float64_t >, CSparseFeatures< T >, CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, and CCombinedFeatures.
Definition at line 330 of file Features.cpp.
Takes a list of feature instances and returns a new instance being a concatenation of a copy of this instace's data and the given instancess data. Note that the feature types have to be equal.
NOT IMPLEMENTED!
others | list of feature objects to append |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.
Definition at line 229 of file Features.h.
Convenience method for method with same name and list as parameter.
NOT IMPLEMENTED!
other | feature object to append |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, and CCombinedFeatures.
Definition at line 243 of file Features.h.
|
virtualinherited |
A deep copy. All the instance variables will also be copied.
Definition at line 160 of file SGObject.h.
|
virtualinherited |
delete preprocessor from list
num | index of preprocessor in list |
Definition at line 119 of file Features.cpp.
compute dot product between current vector and a dense vector
vec2 | real valued vector |
vec2_len | length of vector |
Implements CStreamingDotFeatures.
Definition at line 95 of file StreamingHashedDocDotFeatures.cpp.
|
virtualinherited |
Compute the dot product for all vectors. This function makes use of dense_dot alphas[i] * sparse[i]^T * w + b
output | result for the given vector range |
alphas | scalars to multiply with, may be NULL |
vec | dense vector to compute dot product with |
dim | length of the dense vector |
b | bias |
num_vec | number of vectors to operate on (indices 0 to num_vec-1) |
If num_vec == 0 or left to its default value, the function attempts to return dot product for all vectors. However, the given output vector must be preallocated!
note that the result will be written to output[0...(num_vec-1)] except when num_vec = 0
Definition at line 30 of file StreamingDotFeatures.cpp.
|
virtual |
compute dot product between vectors of two StreamingDotFeatures objects.
df | StreamingDotFeatures (of same kind) to compute dot product with |
Implements CStreamingDotFeatures.
Definition at line 85 of file StreamingHashedDocDotFeatures.cpp.
|
virtual |
Duplicate the object.
Implements CFeatures.
Definition at line 127 of file StreamingHashedDocDotFeatures.cpp.
|
virtual |
End the parser. Wait for the parsing thread to complete.
Implements CStreamingFeatures.
Definition at line 148 of file StreamingHashedDocDotFeatures.cpp.
Recursively compares the current SGObject to another one. Compares all registered numerical parameters, recursion upon complex (SGObject) parameters. Does not compare pointers!
May be overwritten but please do with care! Should not be necessary in most cases.
other | object to compare with |
accuracy | accuracy to use for comparison (optional) |
Definition at line 1217 of file SGObject.cpp.
|
virtualinherited |
Expand the vector passed so that it its length is equal to the dimensionality of the features. The previous values are kept intact through realloc, and the new ones are set to zero.
vec | float32_t* vector |
len | length of the vector |
Reimplemented in CStreamingSparseFeatures< T >, and CStreamingVwFeatures.
Definition at line 53 of file StreamingDotFeatures.cpp.
|
virtualinherited |
Expand the vector passed so that it its length is equal to the dimensionality of the features. The previous values are kept intact through realloc, and the new ones are set to zero.
vec | float64_t* vector |
len | length of the vector |
Reimplemented in CStreamingSparseFeatures< T >, and CStreamingVwFeatures.
Definition at line 64 of file StreamingDotFeatures.cpp.
|
virtualinherited |
clean up iterator call this function with the iterator returned by get_first_feature
iterator | as returned by get_first_feature |
Definition at line 93 of file StreamingDotFeatures.cpp.
|
inherited |
|
virtual |
obtain the dimensionality of the feature space
(not mix this up with the dimensionality of the input space, usually obtained via get_num_features())
Implements CStreamingDotFeatures.
Definition at line 117 of file StreamingHashedDocDotFeatures.cpp.
|
virtual |
Return the feature class
Implements CFeatures.
Definition at line 137 of file StreamingHashedDocDotFeatures.cpp.
|
virtualinherited |
iterate over the non-zero features
call get_feature_iterator first, followed by get_next_feature and free_feature_iterator to cleanup
Definition at line 75 of file StreamingDotFeatures.cpp.
|
virtual |
Return the feature type, depending on T.
Implements CFeatures.
Definition at line 132 of file StreamingHashedDocDotFeatures.cpp.
|
inherited |
|
inherited |
|
inherited |
|
virtualinherited |
Return whether the examples are labelled or not.
Definition at line 35 of file StreamingFeatures.cpp.
|
virtual |
Return the label of the current example.
Raise an error if the input has been specified as unlabelled.
Implements CStreamingFeatures.
Definition at line 177 of file StreamingHashedDocDotFeatures.cpp.
|
inherited |
Definition at line 1100 of file SGObject.cpp.
|
inherited |
Returns description of a given parameter string, if it exists. SG_ERROR otherwise
param_name | name of the parameter |
Definition at line 1124 of file SGObject.cpp.
|
inherited |
Returns index of model selection parameter with provided index
param_name | name of model selection parameter |
Definition at line 1137 of file SGObject.cpp.
|
virtual |
Return the name.
Implements CSGObject.
Definition at line 122 of file StreamingHashedDocDotFeatures.cpp.
|
virtual |
Indicate to the parser that it must fetch the next example.
Implements CStreamingFeatures.
Definition at line 153 of file StreamingHashedDocDotFeatures.cpp.
|
virtualinherited |
iterate over the non-zero features
call this function with the iterator returned by get_first_feature and call free_feature_iterator to cleanup
index | is returned by reference (-1 when not available) |
value | is returned by reference |
iterator | as returned by get_first_feature |
Definition at line 87 of file StreamingDotFeatures.cpp.
|
virtualinherited |
get number of non-zero features in vector
(in case accurate estimates are too expensive overestimating is OK)
Reimplemented in CStreamingSparseFeatures< T >, CStreamingVwFeatures, CStreamingDenseFeatures< T >, CStreamingDenseFeatures< float64_t >, and CStreamingDenseFeatures< float32_t >.
Definition at line 81 of file StreamingDotFeatures.cpp.
|
virtual |
Get the number of features in the current example.
Implements CStreamingFeatures.
Definition at line 172 of file StreamingHashedDocDotFeatures.cpp.
|
inherited |
get the number of applied preprocs
Definition at line 100 of file Features.cpp.
|
inherited |
get number of preprocessors
Definition at line 152 of file Features.cpp.
|
virtual |
Return the number of vectors stored in this object.
Implements CFeatures.
Definition at line 182 of file StreamingHashedDocDotFeatures.cpp.
|
inherited |
get specified preprocessor
num | index of preprocessor in list |
Definition at line 90 of file Features.cpp.
Returns a CFeatures instance which contains num_elements elements from the underlying stream
num_elements | num elements to save from stream |
NOT IMPLEMENTED!
Reimplemented in CStreamingDenseFeatures< T >, CStreamingDenseFeatures< float64_t >, and CStreamingDenseFeatures< float32_t >.
Definition at line 188 of file StreamingFeatures.h.
|
virtualinherited |
SGSparseVector< float64_t > get_vector | ( | ) |
Get the current example
Definition at line 197 of file StreamingHashedDocDotFeatures.cpp.
|
inherited |
check if features have given property
p | feature property |
Definition at line 292 of file Features.cpp.
|
virtualinherited |
If the SGSerializable is a class template then TRUE will be returned and GENERIC is set to the type of the generic.
generic | set to the type of the generic if returning TRUE |
Definition at line 268 of file SGObject.cpp.
|
inherited |
get whether specified preprocessor was already applied
num | index of preprocessor in list |
Definition at line 146 of file Features.cpp.
|
virtualinherited |
Whether the stream is seekable (to check if multiple epochs are possible), i.e., whether we can process examples in a batch fashion.
A stream can usually seekable when it comes from a file or when it comes from another conventional CFeatures object.
Definition at line 40 of file StreamingFeatures.cpp.
|
inherited |
list feature object
Definition at line 168 of file Features.cpp.
|
inherited |
print preprocessors
Definition at line 128 of file Features.cpp.
|
virtualinherited |
load features from file
loader | File object via which data shall be loaded |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, CSparseFeatures< ST >, CSparseFeatures< float64_t >, CSparseFeatures< T >, CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char >, and CStringFeatures< uint16_t >.
Definition at line 266 of file Features.cpp.
|
inherited |
maps all parameters of this instance to the provided file version and loads all parameter data from the file into an array, which is sorted (basically calls load_file_parameter(...) for all parameters and puts all results into a sorted array)
file_version | parameter version of the file |
current_version | version from which mapping begins (you want to use Version::get_version_parameter() for this in most cases) |
file | file to load from |
prefix | prefix for members |
Definition at line 673 of file SGObject.cpp.
|
inherited |
loads some specified parameters from a file with a specified version The provided parameter info has a version which is recursively mapped until the file parameter version is reached. Note that there may be possibly multiple parameters in the mapping, therefore, a set of TParameter instances is returned
param_info | information of parameter |
file_version | parameter version of the file, must be <= provided parameter version |
file | file to load from |
prefix | prefix for members |
Definition at line 514 of file SGObject.cpp.
|
virtualinherited |
Load this object from file. If it will fail (returning FALSE) then this object will contain inconsistent data and should not be used!
file | where to load from |
prefix | prefix for members |
param_version | (optional) a parameter version different to (this is mainly for testing, better do not use) |
Definition at line 345 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_POST is called.
ShogunException | Will be thrown if an error occurres. |
Reimplemented in CKernel, CWeightedDegreePositionStringKernel, CList, CAlphabet, CLinearHMM, CGaussianKernel, CInverseMultiQuadricKernel, CCircularKernel, and CExponentialKernel.
Definition at line 1029 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_PRE is called.
ShogunException | Will be thrown if an error occurres. |
Reimplemented in CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool >, CDynamicArray< uint64_t >, and CDynamicObjectArray.
Definition at line 1024 of file SGObject.cpp.
|
inherited |
Takes a set of TParameter instances (base) with a certain version and a set of target parameter infos and recursively maps the base level wise to the current version using CSGObject::migrate(...). The base is replaced. After this call, the base version containing parameters should be of same version/type as the initial target parameter infos. Note for this to work, the migrate methods and all the internal parameter mappings have to match
param_base | set of TParameter instances that are mapped to the provided target parameter infos |
base_version | version of the parameter base |
target_param_infos | set of SGParamInfo instances that specify the target parameter base |
Definition at line 711 of file SGObject.cpp.
|
protectedvirtualinherited |
creates a new TParameter instance, which contains migrated data from the version that is provided. The provided parameter data base is used for migration, this base is a collection of all parameter data of the previous version. Migration is done FROM the data in param_base TO the provided param info Migration is always one version step. Method has to be implemented in subclasses, if no match is found, base method has to be called.
If there is an element in the param_base which equals the target, a copy of the element is returned. This represents the case when nothing has changed and therefore, the migrate method is not overloaded in a subclass
param_base | set of TParameter instances to use for migration |
target | parameter info for the resulting TParameter |
Definition at line 918 of file SGObject.cpp.
|
protectedvirtualinherited |
This method prepares everything for a one-to-one parameter migration. One to one here means that only ONE element of the parameter base is needed for the migration (the one with the same name as the target). Data is allocated for the target (in the type as provided in the target SGParamInfo), and a corresponding new TParameter instance is written to replacement. The to_migrate pointer points to the single needed TParameter instance needed for migration. If a name change happened, the old name may be specified by old_name. In addition, the m_delete_data flag of to_migrate is set to true. So if you want to migrate data, the only thing to do after this call is converting the data in the m_parameter fields. If unsure how to use - have a look into an example for this. (base_migration_type_conversion.cpp for example)
param_base | set of TParameter instances to use for migration |
target | parameter info for the resulting TParameter |
replacement | (used as output) here the TParameter instance which is returned by migration is created into |
to_migrate | the only source that is used for migration |
old_name | with this parameter, a name change may be specified |
Definition at line 858 of file SGObject.cpp.
|
inherited |
prints all parameter registered for model selection and their type
Definition at line 1076 of file SGObject.cpp.
|
virtualinherited |
prints registered parameters out
prefix | prefix for members |
Definition at line 280 of file SGObject.cpp.
|
virtual |
Indicate that processing of the current example is done. The parser then considers it safe to dispose of that example and replace it with another one.
Implements CStreamingFeatures.
Definition at line 167 of file StreamingHashedDocDotFeatures.cpp.
|
virtualinherited |
removes all subsets Calls subset_changed_post() afterwards
Reimplemented in CCombinedFeatures.
Definition at line 319 of file Features.cpp.
|
virtualinherited |
removes that last added subset from subset stack, if existing Calls subset_changed_post() afterwards
Reimplemented in CCombinedFeatures.
Definition at line 313 of file Features.cpp.
|
virtualinherited |
Function to reset the stream (if possible).
Reimplemented in CStreamingSparseFeatures< T >, CStreamingVwFeatures, CStreamingDenseFeatures< T >, CStreamingDenseFeatures< float64_t >, and CStreamingDenseFeatures< float32_t >.
Definition at line 45 of file StreamingFeatures.cpp.
|
virtualinherited |
in case there is a feature matrix allow for reshaping
NOT IMPLEMENTED!
num_features | new number of features |
num_vectors | new number of vectors |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.
Definition at line 162 of file Features.cpp.
|
virtualinherited |
save features to file
writer | File object via which data shall be saved |
Reimplemented in CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char >, CStringFeatures< uint16_t >, CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, CSparseFeatures< ST >, CSparseFeatures< float64_t >, and CSparseFeatures< T >.
Definition at line 273 of file Features.cpp.
|
virtualinherited |
Save this object to file.
file | where to save the object; will be closed during returning if PREFIX is an empty string. |
prefix | prefix for members |
param_version | (optional) a parameter version different to (this is mainly for testing, better do not use) |
Definition at line 286 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_POST is called.
ShogunException | Will be thrown if an error occurres. |
Reimplemented in CKernel.
Definition at line 1039 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_PRE is called.
ShogunException | Will be thrown if an error occurres. |
Reimplemented in CKernel, CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool >, CDynamicArray< uint64_t >, and CDynamicObjectArray.
Definition at line 1034 of file SGObject.cpp.
|
inherited |
set generic type to T
Definition at line 41 of file SGObject.cpp.
|
inherited |
|
inherited |
set the parallel object
parallel | parallel object to use |
Definition at line 220 of file SGObject.cpp.
|
inherited |
set the version object
version | version object to use |
Definition at line 255 of file SGObject.cpp.
void set_k_skip_n_grams | ( | int32_t | k, |
int32_t | n | ||
) |
Method used to specify the parameters for the quadratic approach of k-skip n-grams. See class description for more details and an example.
k | the max number of allowed skips |
n | the max number of tokens to combine |
Definition at line 207 of file StreamingHashedDocDotFeatures.cpp.
void set_normalization | ( | bool | normalize | ) |
specify whether hashed vector should be normalized or not
normalize | whether to normalize |
Definition at line 202 of file StreamingHashedDocDotFeatures.cpp.
|
inherited |
set applied flag for preprocessor
num | index of preprocessor in list |
Definition at line 140 of file Features.cpp.
|
inherited |
|
inherited |
Set the vector reading functions.
The functions are implemented specific to the type in the derived class.
Definition at line 29 of file StreamingFeatures.cpp.
|
virtual |
Sets the read function (in case the examples are labelled) to get_*_vector_and_label from CStreamingFile.
The exact function depends on type T.
The parser uses the function set by this while reading labelled examples.
Implements CStreamingFeatures.
Definition at line 192 of file StreamingHashedDocDotFeatures.cpp.
|
virtual |
Sets the read function (in case the examples are unlabelled) to get_*_vector() from CStreamingFile.
The exact function depends on type T.
The parser uses the function set by this while reading unlabelled examples.
Implements CStreamingFeatures.
Definition at line 187 of file StreamingHashedDocDotFeatures.cpp.
|
virtualinherited |
A shallow copy. All the SGObject instance variables will be simply assigned and SG_REF-ed.
Reimplemented in CGaussianKernel.
Definition at line 151 of file SGObject.h.
|
virtual |
Start the parser. It stores parsed examples from the input in a separate thread.
Implements CStreamingFeatures.
Definition at line 142 of file StreamingHashedDocDotFeatures.cpp.
|
virtualinherited |
method may be overwritten to update things that depend on subset
Reimplemented in CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char >, and CStringFeatures< uint16_t >.
Definition at line 271 of file Features.h.
|
inherited |
unset generic type
this has to be called in classes specializing a template class
Definition at line 275 of file SGObject.cpp.
|
inherited |
|
virtualinherited |
Updates the hash of current parameter combination.
Definition at line 227 of file SGObject.cpp.
|
protectedinherited |
feature weighting in combined dot features
Definition at line 186 of file StreamingDotFeatures.h.
|
protected |
Converter
Definition at line 251 of file StreamingHashedDocDotFeatures.h.
|
protected |
The current example's label
Definition at line 257 of file StreamingHashedDocDotFeatures.h.
|
protected |
Current example
Definition at line 245 of file StreamingHashedDocDotFeatures.h.
|
protectedinherited |
Whether examples are labelled or not.
Definition at line 198 of file StreamingFeatures.h.
|
inherited |
io
Definition at line 514 of file SGObject.h.
|
inherited |
parameters wrt which we can compute gradients
Definition at line 529 of file SGObject.h.
|
inherited |
Hash of parameter values
Definition at line 535 of file SGObject.h.
|
inherited |
model selection parameters
Definition at line 526 of file SGObject.h.
|
inherited |
map for different parameter versions
Definition at line 532 of file SGObject.h.
|
inherited |
parameters
Definition at line 523 of file SGObject.h.
|
protectedinherited |
subset used for index transformations
Definition at line 302 of file Features.h.
|
protected |
number of bits for the target dimension
Definition at line 242 of file StreamingHashedDocDotFeatures.h.
|
inherited |
parallel
Definition at line 517 of file SGObject.h.
|
protected |
The parser
Definition at line 254 of file StreamingHashedDocDotFeatures.h.
|
protectedinherited |
Whether the stream is seekable.
Definition at line 204 of file StreamingFeatures.h.
|
protected |
Definition at line 248 of file StreamingHashedDocDotFeatures.h.
|
inherited |
version
Definition at line 520 of file SGObject.h.
|
protectedinherited |
The StreamingFile object to read from.
Definition at line 201 of file StreamingFeatures.h.