SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
List of all members | Public Member Functions | Static Public Member Functions | Public Attributes | Protected Member Functions | Protected Attributes
CStringFileFeatures< ST > Class Template Reference

Detailed Description

template<class ST>
class shogun::CStringFileFeatures< ST >

File based string features.

StringFeatures that are file based. Underneath memory mapped files are used. Derived from CStringFeatures thus transparently enabling all of the StringFeature functionality.

Supported file format contains one string per line, lines of variable length are supported and must be separated by '
'.

Definition at line 36 of file StringFileFeatures.h.

Inheritance diagram for CStringFileFeatures< ST >:
[legend]

Public Member Functions

 CStringFileFeatures ()
 
 CStringFileFeatures (const char *fname, EAlphabet alpha)
 
virtual ~CStringFileFeatures ()
 
virtual const char * get_name () const
 
virtual void cleanup_feature_vectors (int32_t start, int32_t stop)
 
virtual EFeatureClass get_feature_class () const
 
virtual EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
template<>
EFeatureType get_feature_type () const
 
CAlphabetget_alphabet ()
 
virtual CFeaturesduplicate () const
 
SGVector< ST > get_feature_vector (int32_t num)
 
ST * get_feature_vector (int32_t num, int32_t &len, bool &dofree)
 
void set_feature_vector (SGVector< ST > vector, int32_t num)
 
virtual void set_feature_vector (int32_t num, ST *string, int32_t len)
 
void enable_on_the_fly_preprocessing ()
 
void disable_on_the_fly_preprocessing ()
 
CStringFeatures< ST > * get_transposed ()
 
SGString< ST > * get_transposed (int32_t &num_feat, int32_t &num_vec)
 
void free_feature_vector (ST *feat_vec, int32_t num, bool dofree)
 
void free_feature_vector (SGVector< ST > feat_vec, int32_t num)
 
virtual ST get_feature (int32_t vec_num, int32_t feat_num)
 
virtual int32_t get_vector_length (int32_t vec_num)
 
virtual int32_t get_max_vector_length ()
 
virtual int32_t get_num_vectors () const
 
floatmax_t get_num_symbols ()
 
floatmax_t get_max_num_symbols ()
 
floatmax_t get_original_num_symbols ()
 
int32_t get_order ()
 
ST get_masked_symbols (ST symbol, uint8_t mask)
 
template<>
bool get_masked_symbols (bool symbol, uint8_t mask)
 
template<>
float32_t get_masked_symbols (float32_t symbol, uint8_t mask)
 
template<>
float64_t get_masked_symbols (float64_t symbol, uint8_t mask)
 
template<>
floatmax_t get_masked_symbols (floatmax_t symbol, uint8_t mask)
 
ST shift_offset (ST offset, int32_t amount)
 
template<>
bool shift_offset (bool symbol, int32_t amount)
 
template<>
float32_t shift_offset (float32_t symbol, int32_t amount)
 
template<>
float64_t shift_offset (float64_t symbol, int32_t amount)
 
template<>
floatmax_t shift_offset (floatmax_t symbol, int32_t amount)
 
ST shift_symbol (ST symbol, int32_t amount)
 
template<>
bool shift_symbol (bool symbol, int32_t amount)
 
template<>
float32_t shift_symbol (float32_t symbol, int32_t amount)
 
template<>
float64_t shift_symbol (float64_t symbol, int32_t amount)
 
template<>
floatmax_t shift_symbol (floatmax_t symbol, int32_t amount)
 
virtual void load (CFile *loader)
 
void load_ascii_file (char *fname, bool remap_to_bin=true, EAlphabet ascii_alphabet=DNA, EAlphabet binary_alphabet=RAWDNA)
 
bool load_fasta_file (const char *fname, bool ignore_invalid=false)
 
bool load_fastq_file (const char *fname, bool ignore_invalid=false, bool bitremap_in_single_string=false)
 
bool load_from_directory (char *dirname)
 
void set_features (SGStringList< ST > feats)
 
bool set_features (SGString< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length)
 
bool append_features (CStringFeatures< ST > *sf)
 
bool append_features (SGString< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length)
 
SGStringList< ST > get_features ()
 
virtual SGString< ST > * get_features (int32_t &num_str, int32_t &max_str_len)
 
virtual void get_features (SGString< ST > **dst, int32_t *num_str)
 
virtual SGString< ST > * copy_features (int32_t &num_str, int32_t &max_str_len)
 
virtual void save (CFile *writer)
 
virtual bool load_compressed (char *src, bool decompress)
 
virtual bool save_compressed (char *dest, E_COMPRESSION_TYPE compression, int level)
 
virtual bool apply_preprocessor (bool force_preprocessing=false)
 
int32_t obtain_by_sliding_window (int32_t window_size, int32_t step_size, int32_t skip=0)
 
int32_t obtain_by_position_list (int32_t window_size, CDynamicArray< int32_t > *positions, int32_t skip=0)
 
bool obtain_from_char (CStringFeatures< char > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
 
template<class CT >
bool obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
 
template<>
bool obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
 
template<>
bool obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
 
template<>
bool obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
 
bool have_same_length (int32_t len=-1)
 
void embed_features (int32_t p_order)
 
template<>
void embed_features (int32_t p_order)
 
template<>
void embed_features (int32_t p_order)
 
template<>
void embed_features (int32_t p_order)
 
void compute_symbol_mask_table (int64_t max_val)
 
template<>
void compute_symbol_mask_table (int64_t max_val)
 
template<>
void compute_symbol_mask_table (int64_t max_val)
 
template<>
void compute_symbol_mask_table (int64_t max_val)
 
void unembed_word (ST word, uint8_t *seq, int32_t len)
 
template<>
void unembed_word (float32_t word, uint8_t *seq, int32_t len)
 
template<>
void unembed_word (float64_t word, uint8_t *seq, int32_t len)
 
template<>
void unembed_word (floatmax_t word, uint8_t *seq, int32_t len)
 
ST embed_word (ST *seq, int32_t len)
 
template<>
float32_t embed_word (float32_t *seq, int32_t len)
 
template<>
float64_t embed_word (float64_t *seq, int32_t len)
 
template<>
floatmax_t embed_word (floatmax_t *seq, int32_t len)
 
void determine_maximum_string_length ()
 
virtual void get_histogram (float64_t **hist, int32_t *rows, int32_t *cols, bool normalize=true)
 
virtual void create_random (float64_t *hist, int32_t rows, int32_t cols, int32_t num_vec)
 
virtual CFeaturescopy_subset (SGVector< index_t > indices)
 
virtual void subset_changed_post ()
 
virtual void add_preprocessor (CPreprocessor *p)
 
virtual void del_preprocessor (int32_t num)
 
CPreprocessorget_preprocessor (int32_t num) const
 
void set_preprocessed (int32_t num)
 
bool is_preprocessed (int32_t num) const
 
int32_t get_num_preprocessed () const
 
int32_t get_num_preprocessors () const
 
void clean_preprocessors ()
 
void list_preprocessors ()
 
int32_t get_cache_size () const
 
virtual bool reshape (int32_t num_features, int32_t num_vectors)
 
void list_feature_obj () const
 
bool check_feature_compatibility (CFeatures *f) const
 
bool has_property (EFeatureProperty p) const
 
void set_property (EFeatureProperty p)
 
void unset_property (EFeatureProperty p)
 
virtual CFeaturescreate_merged_copy (CList *others)
 
virtual CFeaturescreate_merged_copy (CFeatures *other)
 
virtual void add_subset (SGVector< index_t > subset)
 
virtual void add_subset_in_place (SGVector< index_t > subset)
 
virtual void remove_subset ()
 
virtual void remove_all_subsets ()
 
virtual CSubsetStackget_subset_stack ()
 
virtual CFeaturescopy_dimension_subset (SGVector< index_t > dims)
 
virtual bool support_compatible_class () const
 
virtual bool get_feature_class_compatibility (EFeatureClass rhs) const
 
virtual CFeaturesshallow_subset_copy ()
 
virtual CSGObjectshallow_copy () const
 
virtual CSGObjectdeep_copy () const
 
virtual bool is_generic (EPrimitiveType *generic) const
 
template<class T >
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
template<>
void set_generic ()
 
void unset_generic ()
 
virtual void print_serializable (const char *prefix="")
 
virtual bool save_serializable (CSerializableFile *file, const char *prefix="")
 
virtual bool load_serializable (CSerializableFile *file, const char *prefix="")
 
void set_global_io (SGIO *io)
 
SGIOget_global_io ()
 
void set_global_parallel (Parallel *parallel)
 
Parallelget_global_parallel ()
 
void set_global_version (Version *version)
 
Versionget_global_version ()
 
SGStringList< char > get_modelsel_names ()
 
void print_modsel_params ()
 
char * get_modsel_param_descr (const char *param_name)
 
index_t get_modsel_param_index (const char *param_name)
 
void build_gradient_parameter_dictionary (CMap< TParameter *, CSGObject * > *dict)
 
bool has (const std::string &name) const
 
template<typename T >
bool has (const Tag< T > &tag) const
 
template<typename T , typename U = void>
bool has (const std::string &name) const
 
template<typename T >
void set (const Tag< T > &_tag, const T &value)
 
template<typename T , typename U = void>
void set (const std::string &name, const T &value)
 
template<typename T >
get (const Tag< T > &_tag) const
 
template<typename T , typename U = void>
get (const std::string &name) const
 
virtual void update_parameter_hash ()
 
virtual bool parameter_hash_changed ()
 
virtual bool equals (CSGObject *other, float64_t accuracy=0.0, bool tolerant=false)
 
virtual CSGObjectclone ()
 

Static Public Member Functions

static ST * get_zero_terminated_string_copy (SGString< ST > str)
 

Public Attributes

SGIOio
 
Parallelparallel
 
Versionversion
 
Parameterm_parameters
 
Parameterm_model_selection_parameters
 
Parameterm_gradient_parameters
 
uint32_t m_hash
 

Protected Member Functions

ST * get_line (uint64_t &len, uint64_t &offs, int32_t &line_nr, uint64_t file_length)
 
virtual void cleanup ()
 
virtual void cleanup_feature_vector (int32_t num)
 
void fetch_meta_info_from_file (int32_t granularity=1048576)
 
virtual ST * compute_feature_vector (int32_t num, int32_t &len)
 
virtual void load_serializable_pre () throw (ShogunException)
 
virtual void load_serializable_post () throw (ShogunException)
 
virtual void save_serializable_pre () throw (ShogunException)
 
virtual void save_serializable_post () throw (ShogunException)
 
template<typename T >
void register_param (Tag< T > &_tag, const T &value)
 
template<typename T >
void register_param (const std::string &name, const T &value)
 

Protected Attributes

CMemoryMappedFile< ST > * file
 
CAlphabetalphabet
 
int32_t num_vectors
 
SGString< ST > * features
 
ST * single_string
 
int32_t length_of_single_string
 length of prior single string More...
 
int32_t max_string_length
 
floatmax_t num_symbols
 number of used symbols More...
 
floatmax_t original_num_symbols
 original number of used symbols (before higher order mapping) More...
 
int32_t order
 order used in higher order mapping More...
 
ST * symbol_mask_table
 order used in higher order mapping More...
 
int32_t symbol_mask_table_len
 order used in higher order mapping More...
 
bool preprocess_on_get
 preprocess on-the-fly? More...
 
CCache< ST > * feature_cache
 
CSubsetStackm_subset_stack
 

Constructor & Destructor Documentation

default constructor

Definition at line 6 of file StringFileFeatures.cpp.

CStringFileFeatures ( const char *  fname,
EAlphabet  alpha 
)

constructor

Parameters
fnamefilename of the file containing line based features
alphaalphabet (type) to use for string features

Definition at line 10 of file StringFileFeatures.cpp.

~CStringFileFeatures ( )
virtual

default destructor

Definition at line 17 of file StringFileFeatures.cpp.

Member Function Documentation

void add_preprocessor ( CPreprocessor p)
virtualinherited

add preprocessor

Parameters
ppreprocessor to set

Definition at line 85 of file Features.cpp.

void add_subset ( SGVector< index_t subset)
virtualinherited

Adds a subset of indices on top of the current subsets (possibly subset of subset). Every call causes a new active index vector to be stored. Added subsets can be removed one-by-one. If this is not needed, add_subset_in_place() should be used (does not store intermediate index vectors)

Calls subset_changed_post() afterwards

Parameters
subsetsubset of indices to add

Reimplemented in CCombinedFeatures.

Definition at line 310 of file Features.cpp.

void add_subset_in_place ( SGVector< index_t subset)
virtualinherited

Sets/changes latest added subset. This allows to add multiple subsets with in-place memory requirements. They cannot be removed one-by-one afterwards, only the latest active can. If this is needed, use add_subset(). If no subset is active, this just adds.

Calls subset_changed_post() afterwards

Parameters
subsetsubset of indices to replace the latest one with.

Definition at line 316 of file Features.cpp.

bool append_features ( CStringFeatures< ST > *  sf)
inherited

append features If the given string features have a subset, only this will be copied

not possible with subset

Parameters
sffeatures to append
Returns
if setting was successful

Definition at line 899 of file StringFeatures.cpp.

bool append_features ( SGString< ST > *  p_features,
int32_t  p_num_vectors,
int32_t  p_max_string_length 
)
inherited

append features

not possible with subset

Parameters
p_featuresfeatures to append
p_num_vectorsnumber of vectors
p_max_string_lengthmaximum string length

note that p_features will be SG_FREE()'d on success

Returns
if setting was successful

Definition at line 921 of file StringFeatures.cpp.

bool apply_preprocessor ( bool  force_preprocessing = false)
virtualinherited

apply preprocessor

Parameters
force_preprocessingif preprocssing shall be forced
Returns
if applying was successful

Definition at line 1177 of file StringFeatures.cpp.

void build_gradient_parameter_dictionary ( CMap< TParameter *, CSGObject * > *  dict)
inherited

Builds a dictionary of all parameters in SGObject as well of those of SGObjects that are parameters of this object. Dictionary maps parameters to the objects that own them.

Parameters
dictdictionary of parameters to be built.

Definition at line 630 of file SGObject.cpp.

bool check_feature_compatibility ( CFeatures f) const
inherited

check feature compatibility

Parameters
ffeatures to check for compatibility
Returns
if features are compatible

Definition at line 283 of file Features.cpp.

void clean_preprocessors ( )
inherited

clears all preprocs

Definition at line 116 of file Features.cpp.

void cleanup ( )
protectedvirtual

cleanup string features

Reimplemented from CStringFeatures< ST >.

Definition at line 53 of file StringFileFeatures.cpp.

void cleanup_feature_vector ( int32_t  num)
protectedvirtual

cleanup a single feature vector

Reimplemented from CStringFeatures< ST >.

Definition at line 71 of file StringFileFeatures.cpp.

void cleanup_feature_vectors ( int32_t  start,
int32_t  stop 
)
virtualinherited

cleanup multiple feature vectors

possible with subset

Parameters
startindex of first vector to be cleaned
stopindex of the last vector to be cleaned

Definition at line 190 of file StringFeatures.cpp.

CSGObject * clone ( )
virtualinherited

Creates a clone of the current object. This is done via recursively traversing all parameters, which corresponds to a deep copy. Calling equals on the cloned object always returns true although none of the memory of both objects overlaps.

Returns
an identical copy of the given object, which is disjoint in memory. NULL if the clone fails. Note that the returned object is SG_REF'ed

Definition at line 747 of file SGObject.cpp.

ST * compute_feature_vector ( int32_t  num,
int32_t &  len 
)
protectedvirtualinherited

compute feature vector for sample num if target is set the vector is written to target len is returned by reference

possible with subset

Parameters
numwhich vector
lenlength of vector
Returns
feature vector

Definition at line 1651 of file StringFeatures.cpp.

void compute_symbol_mask_table ( int64_t  max_val)
inherited

compute symbol mask table

required to access bit-based symbols

not implemented for subset

Definition at line 1374 of file StringFeatures.cpp.

void compute_symbol_mask_table ( int64_t  max_val)
inherited

Definition at line 1889 of file StringFeatures.cpp.

void compute_symbol_mask_table ( int64_t  max_val)
inherited

Definition at line 1892 of file StringFeatures.cpp.

void compute_symbol_mask_table ( int64_t  max_val)
inherited

Definition at line 1895 of file StringFeatures.cpp.

CFeatures * copy_dimension_subset ( SGVector< index_t dims)
virtualinherited

Creates a new CFeatures instance containing only the dimensions of the feature vector which are specified by the provided indices.

This method is needed for feature selection tasks NOT IMPLEMENTED!

Parameters
dimsindices of feature dimensions to copy
Returns
new CFeatures instance with copies of specified features

Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.

Definition at line 348 of file Features.cpp.

SGString< ST > * copy_features ( int32_t &  num_str,
int32_t &  max_str_len 
)
virtualinherited

copy_features

possible with subset

Parameters
num_strnumber of strings (returned)
max_str_lenmaximal string length (returned)
Returns
string features

Definition at line 992 of file StringFeatures.cpp.

CFeatures * copy_subset ( SGVector< index_t indices)
virtualinherited

Creates a new CFeatures instance containing copies of the elements which are specified by the provided indices.

possible with subset

Parameters
indicesindices of feature elements to copy
Returns
new CFeatures instance with copies of feature data

Reimplemented from CFeatures.

Definition at line 1610 of file StringFeatures.cpp.

virtual CFeatures* create_merged_copy ( CList others)
virtualinherited

Takes a list of feature instances and returns a new instance being a concatenation of a copy of this instace's data and the given instancess data. Note that the feature types have to be equal.

NOT IMPLEMENTED!

Parameters
otherslist of feature objects to append
Returns
new feature object which contains copy of data of this instance and given ones

Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.

Definition at line 235 of file Features.h.

virtual CFeatures* create_merged_copy ( CFeatures other)
virtualinherited

Convenience method for method with same name and list as parameter.

NOT IMPLEMENTED!

Parameters
otherfeature object to append
Returns
new feature object which contains copy of data of this instance and of given one

Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, and CCombinedFeatures.

Definition at line 249 of file Features.h.

void create_random ( float64_t hist,
int32_t  rows,
int32_t  cols,
int32_t  num_vec 
)
virtualinherited

create some random strings based on normalized histogram

not possible with subset

Definition at line 1508 of file StringFeatures.cpp.

CSGObject * deep_copy ( ) const
virtualinherited

A deep copy. All the instance variables will also be copied.

Definition at line 231 of file SGObject.cpp.

void del_preprocessor ( int32_t  num)
virtualinherited

delete preprocessor from list

Parameters
numindex of preprocessor in list

Definition at line 122 of file Features.cpp.

void determine_maximum_string_length ( )
inherited

determine new maximum string length

possible with subset

Definition at line 1431 of file StringFeatures.cpp.

void disable_on_the_fly_preprocessing ( )
inherited

call this to disable on the fly feature preprocessing upon call to get_feature_vector. Useful when you manually apply preprocessors.

Definition at line 270 of file StringFeatures.cpp.

CFeatures * duplicate ( ) const
virtualinherited

duplicate feature object

Returns
feature object

Implements CFeatures.

Definition at line 218 of file StringFeatures.cpp.

void embed_features ( int32_t  p_order)
inherited

embed string features in bit representation in-place

not implemented for subset

Definition at line 1320 of file StringFeatures.cpp.

void embed_features ( int32_t  p_order)
inherited

Definition at line 1879 of file StringFeatures.cpp.

void embed_features ( int32_t  p_order)
inherited

Definition at line 1882 of file StringFeatures.cpp.

void embed_features ( int32_t  p_order)
inherited

Definition at line 1885 of file StringFeatures.cpp.

ST embed_word ( ST *  seq,
int32_t  len 
)
inherited

embed a single word

Parameters
seqsequence of size len in a bitfield
len

Definition at line 1418 of file StringFeatures.cpp.

float32_t embed_word ( float32_t seq,
int32_t  len 
)
inherited

Definition at line 1899 of file StringFeatures.cpp.

float64_t embed_word ( float64_t seq,
int32_t  len 
)
inherited

Definition at line 1903 of file StringFeatures.cpp.

floatmax_t embed_word ( floatmax_t seq,
int32_t  len 
)
inherited

Definition at line 1907 of file StringFeatures.cpp.

void enable_on_the_fly_preprocessing ( )
inherited

call this to preprocess string features upon call to get_feature_vector

Definition at line 265 of file StringFeatures.cpp.

bool equals ( CSGObject other,
float64_t  accuracy = 0.0,
bool  tolerant = false 
)
virtualinherited

Recursively compares the current SGObject to another one. Compares all registered numerical parameters, recursion upon complex (SGObject) parameters. Does not compare pointers!

May be overwritten but please do with care! Should not be necessary in most cases.

Parameters
otherobject to compare with
accuracyaccuracy to use for comparison (optional)
tolerantallows linient check on float equality (within accuracy)
Returns
true if all parameters were equal, false if not

Definition at line 651 of file SGObject.cpp.

void fetch_meta_info_from_file ( int32_t  granularity = 1048576)
protected

obtain meta information from file

i.e., determine number of strings and their lengths

Definition at line 77 of file StringFileFeatures.cpp.

void free_feature_vector ( ST *  feat_vec,
int32_t  num,
bool  dofree 
)
inherited

free feature vector

possible with subset

Parameters
feat_vecfeature vector to free
numindex in feature cache, possibly from subset
dofreeif vector should be really deleted

Definition at line 357 of file StringFeatures.cpp.

void free_feature_vector ( SGVector< ST >  feat_vec,
int32_t  num 
)
inherited

free feature vector

possible with subset

Parameters
feat_vecfeature vector to free
numindex in feature cache, possibly from subset

Definition at line 375 of file StringFeatures.cpp.

T get ( const Tag< T > &  _tag) const
inherited

Getter for a class parameter, identified by a Tag. Throws an exception if the class does not have such a parameter.

Parameters
_tagname and type information of parameter
Returns
value of the parameter identified by the input tag

Definition at line 367 of file SGObject.h.

T get ( const std::string &  name) const
inherited

Getter for a class parameter, identified by a name. Throws an exception if the class does not have such a parameter.

Parameters
namename of the parameter
Returns
value of the parameter corresponding to the input name and type

Definition at line 388 of file SGObject.h.

CAlphabet * get_alphabet ( )
inherited

get alphabet used in string features

Returns
alphabet

Definition at line 212 of file StringFeatures.cpp.

int32_t get_cache_size ( ) const
inherited

get cache size

Returns
cache size

Definition at line 160 of file Features.cpp.

ST get_feature ( int32_t  vec_num,
int32_t  feat_num 
)
virtualinherited

get feature

possible with subset

Parameters
vec_numwhich vector
feat_numwhich feature, possibly from subset
Returns
feature

Definition at line 390 of file StringFeatures.cpp.

EFeatureClass get_feature_class ( ) const
virtualinherited

get feature class

Returns
feature class STRING

Implements CFeatures.

Definition at line 208 of file StringFeatures.cpp.

bool get_feature_class_compatibility ( EFeatureClass  rhs) const
virtualinherited

Given a class in right hand side, does this class support compatible computation?

for example, is this->dot(rhs_prt) valid, where rhs_prt is the class in right hand side

Parameters
rhsthe class in right hand side
Returns
whether this class supports compatible computation

Reimplemented in CDenseSubSamplesFeatures< ST >.

Definition at line 355 of file Features.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type

Returns
templated feature type

Implements CFeatures.

Definition at line 210 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the char feature can deal with

Returns
feature type char

Implements CFeatures.

Definition at line 1709 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the char feature can deal with

Returns
feature type char

Implements CFeatures.

Definition at line 1718 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the BYTE feature can deal with

Returns
feature type BYTE

Implements CFeatures.

Definition at line 1727 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the SHORT feature can deal with

Returns
feature type SHORT

Implements CFeatures.

Definition at line 1736 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the WORD feature can deal with

Returns
feature type WORD

Implements CFeatures.

Definition at line 1745 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the INT feature can deal with

Returns
feature type INT

Implements CFeatures.

Definition at line 1754 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the INT feature can deal with

Returns
feature type INT

Implements CFeatures.

Definition at line 1763 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the LONG feature can deal with

Returns
feature type LONG

Implements CFeatures.

Definition at line 1772 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the ULONG feature can deal with

Returns
feature type ULONG

Implements CFeatures.

Definition at line 1781 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the SHORTREAL feature can deal with

Returns
feature type SHORTREAL

Implements CFeatures.

Definition at line 1790 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the DREAL feature can deal with

Returns
feature type DREAL

Implements CFeatures.

Definition at line 1799 of file StringFeatures.cpp.

EFeatureType get_feature_type ( ) const
virtualinherited

get feature type the LONGREAL feature can deal with

Returns
feature type LONGREAL

Implements CFeatures.

Definition at line 1808 of file StringFeatures.cpp.

SGVector< ST > get_feature_vector ( int32_t  num)
inherited

get string for selected example num

possible with subset

Parameters
numindex of the string
Returns
the selected string

Definition at line 223 of file StringFeatures.cpp.

ST * get_feature_vector ( int32_t  num,
int32_t &  len,
bool &  dofree 
)
inherited

get feature vector for sample num

possible with subset

Parameters
numindex of feature vector
lenlength is returned by reference
dofreewhether returned vector must be freed by caller via free_feature_vector
Returns
feature vector for sample num

Definition at line 275 of file StringFeatures.cpp.

SGStringList< ST > get_features ( )
inherited

get_features

Returns
features

Definition at line 974 of file StringFeatures.cpp.

SGString< ST > * get_features ( int32_t &  num_str,
int32_t &  max_str_len 
)
virtualinherited

get_features

not possible with subset

Parameters
num_strnumber of strings (returned)
max_str_lenmaximal string length (returned)
Returns
string features

Definition at line 982 of file StringFeatures.cpp.

void get_features ( SGString< ST > **  dst,
int32_t *  num_str 
)
virtualinherited

get_features (swig compatible)

possible with subset

Parameters
dststring features (returned)
num_strnumber of strings (returned)

Definition at line 1014 of file StringFeatures.cpp.

SGIO * get_global_io ( )
inherited

get the io object

Returns
io object

Definition at line 268 of file SGObject.cpp.

Parallel * get_global_parallel ( )
inherited

get the parallel object

Returns
parallel object

Definition at line 310 of file SGObject.cpp.

Version * get_global_version ( )
inherited

get the version object

Returns
version object

Definition at line 323 of file SGObject.cpp.

void get_histogram ( float64_t **  hist,
int32_t *  rows,
int32_t *  cols,
bool  normalize = true 
)
virtualinherited

compute histogram over strings

possible with subset

Definition at line 1466 of file StringFeatures.cpp.

ST * get_line ( uint64_t &  len,
uint64_t &  offs,
int32_t &  line_nr,
uint64_t  file_length 
)
protected

get next line from file

The returned line may be modfied in case the file was opened read/write. It is otherwise read-only.

Parameters
lenlength of line (returned via reference)
offsoffset to be passed for reading next line, should be 0 initially (returned via reference)
line_nrused to indicate errors (returned as reference should be 0 initially)
file_lengthtotal length of the file (for error checking)
Returns
line (NOT ZERO TERMINATED)

Definition at line 23 of file StringFileFeatures.cpp.

ST get_masked_symbols ( ST  symbol,
uint8_t  mask 
)
inherited

a higher order mapped symbol will be shaped such that the symbols specified by bits in the mask will be returned.

Parameters
symbolsymbol to mask
maskmask to apply
Returns
masked symbol

Definition at line 433 of file StringFeatures.cpp.

bool get_masked_symbols ( bool  symbol,
uint8_t  mask 
)
inherited

Definition at line 1813 of file StringFeatures.cpp.

float32_t get_masked_symbols ( float32_t  symbol,
uint8_t  mask 
)
inherited

Definition at line 1817 of file StringFeatures.cpp.

float64_t get_masked_symbols ( float64_t  symbol,
uint8_t  mask 
)
inherited

Definition at line 1821 of file StringFeatures.cpp.

floatmax_t get_masked_symbols ( floatmax_t  symbol,
uint8_t  mask 
)
inherited

Definition at line 1825 of file StringFeatures.cpp.

floatmax_t get_max_num_symbols ( )
inherited

get maximum number of symbols

Note: floatmax_t sounds weird, but int64_t is not long enough (and there is no int128_t type)

Returns
maximum number of symbols

Definition at line 427 of file StringFeatures.cpp.

int32_t get_max_vector_length ( )
virtualinherited

get maximum vector length

this one is updated when a subset is set

Returns
maximum vector/string length

Definition at line 415 of file StringFeatures.cpp.

SGStringList< char > get_modelsel_names ( )
inherited
Returns
vector of names of all parameters which are registered for model selection

Definition at line 531 of file SGObject.cpp.

char * get_modsel_param_descr ( const char *  param_name)
inherited

Returns description of a given parameter string, if it exists. SG_ERROR otherwise

Parameters
param_namename of the parameter
Returns
description of the parameter

Definition at line 555 of file SGObject.cpp.

index_t get_modsel_param_index ( const char *  param_name)
inherited

Returns index of model selection parameter with provided index

Parameters
param_namename of model selection parameter
Returns
index of model selection parameter with provided name, -1 if there is no such

Definition at line 568 of file SGObject.cpp.

virtual const char* get_name ( ) const
virtual

Returns the name of the SGSerializable instance.

Returns
name of the SGSerializable

Reimplemented from CStringFeatures< ST >.

Definition at line 61 of file StringFileFeatures.h.

int32_t get_num_preprocessed ( ) const
inherited

get the number of applied preprocs

Returns
number of applied preprocessors

Definition at line 103 of file Features.cpp.

int32_t get_num_preprocessors ( ) const
inherited

get number of preprocessors

Returns
number of preprocessors

Definition at line 155 of file Features.cpp.

floatmax_t get_num_symbols ( )
inherited

get number of symbols

Note: floatmax_t sounds weird, but LONG is not long enough

Returns
number of symbols

Definition at line 425 of file StringFeatures.cpp.

int32_t get_num_vectors ( ) const
virtualinherited
Returns
number of vectors, possibly of subset

Implements CFeatures.

Definition at line 420 of file StringFeatures.cpp.

int32_t get_order ( )
inherited

order used for higher order mapping

Returns
order

Definition at line 431 of file StringFeatures.cpp.

floatmax_t get_original_num_symbols ( )
inherited

number of symbols before higher order mapping

Returns
original number of symbols

Definition at line 429 of file StringFeatures.cpp.

CPreprocessor * get_preprocessor ( int32_t  num) const
inherited

get specified preprocessor

Parameters
numindex of preprocessor in list

Definition at line 93 of file Features.cpp.

CSubsetStack * get_subset_stack ( )
virtualinherited

returns subset stack

Returns
subset stack

Definition at line 334 of file Features.cpp.

CStringFeatures< ST > * get_transposed ( )
inherited

get a transposed copy of the features

possible with subset

Returns
transposed copy

Definition at line 313 of file StringFeatures.cpp.

SGString< ST > * get_transposed ( int32_t &  num_feat,
int32_t &  num_vec 
)
inherited

compute and return the transpose of string features matrix which will be prepocessed. num_feat, num_vectors are returned by reference caller has to clean up

note that strings all have to have same length

possible with subset

Parameters
num_featnumber of features in matrix
num_vecnumber of vectors in matrix
Returns
transposed string features

Definition at line 326 of file StringFeatures.cpp.

int32_t get_vector_length ( int32_t  vec_num)
virtualinherited

get vector length

possible with subset

Parameters
vec_numwhich vector, possibly from subset
Returns
length of vector

Definition at line 404 of file StringFeatures.cpp.

ST * get_zero_terminated_string_copy ( SGString< ST >  str)
staticinherited

get a zero terminated copy of the string

Parameters
strthe string to copy
Returns
zero terminated copy of str

note that this function is only sensible for character strings

Definition at line 1443 of file StringFeatures.cpp.

bool has ( const std::string &  name) const
inherited

Checks if object has a class parameter identified by a name.

Parameters
namename of the parameter
Returns
true if the parameter exists with the input name

Definition at line 289 of file SGObject.h.

bool has ( const Tag< T > &  tag) const
inherited

Checks if object has a class parameter identified by a Tag.

Parameters
tagtag of the parameter containing name and type information
Returns
true if the parameter exists with the input tag

Definition at line 301 of file SGObject.h.

bool has ( const std::string &  name) const
inherited

Checks if a type exists for a class parameter identified by a name.

Parameters
namename of the parameter
Returns
true if the parameter exists with the input name and type

Definition at line 312 of file SGObject.h.

bool has_property ( EFeatureProperty  p) const
inherited

check if features have given property

Parameters
pfeature property
Returns
if features have given property

Definition at line 295 of file Features.cpp.

bool have_same_length ( int32_t  len = -1)
inherited

check if length of each vector in this feature object equals the given length. if existant, only subset is checked

possible for subset

Parameters
lenvector length to check against
Returns
if length of each vector in this feature object equals the given length.

Definition at line 1301 of file StringFeatures.cpp.

bool is_generic ( EPrimitiveType *  generic) const
virtualinherited

If the SGSerializable is a class template then TRUE will be returned and GENERIC is set to the type of the generic.

Parameters
genericset to the type of the generic if returning TRUE
Returns
TRUE if a class template.

Definition at line 329 of file SGObject.cpp.

bool is_preprocessed ( int32_t  num) const
inherited

get whether specified preprocessor was already applied

Parameters
numindex of preprocessor in list

Definition at line 149 of file Features.cpp.

void list_feature_obj ( ) const
inherited

list feature object

Definition at line 171 of file Features.cpp.

void list_preprocessors ( )
inherited

print preprocessors

Definition at line 131 of file Features.cpp.

virtual void load ( CFile loader)
virtualinherited

load features from file

Parameters
loaderFile object via which to load data

Reimplemented from CFeatures.

void load_ascii_file ( char *  fname,
bool  remap_to_bin = true,
EAlphabet  ascii_alphabet = DNA,
EAlphabet  binary_alphabet = RAWDNA 
)
inherited

load ascii line-based string features from file.

any subset is removed before

Parameters
fnamefilename to load from
remap_to_binif translation to other binary alphabet should be performed
ascii_alphabetsrc alphabet
binary_alphabetalphabet to translate to

Definition at line 451 of file StringFeatures.cpp.

bool load_compressed ( char *  src,
bool  decompress 
)
virtualinherited

load compressed features from file

any subset is removed before

Parameters
srcfilename to load from
decompresswhether to decompress on loading
Returns
if loading was successful

Definition at line 1022 of file StringFeatures.cpp.

bool load_fasta_file ( const char *  fname,
bool  ignore_invalid = false 
)
inherited

load fasta file as string features

any subset is removed before

Parameters
fnamefilename to load from
ignore_invalidif set to true, characters other than A,C,G,T are converted to A
Returns
if loading was successful

Definition at line 591 of file StringFeatures.cpp.

bool load_fastq_file ( const char *  fname,
bool  ignore_invalid = false,
bool  bitremap_in_single_string = false 
)
inherited

load fastq file as string features

removes subset beforehand

Parameters
fnamefilename to load from
ignore_invalidif set to true, characters other than A,C,G,T are converted to A
bitremap_in_single_stringif set to true, do binary embedding of symbols
Returns
if loading was successful

Definition at line 684 of file StringFeatures.cpp.

bool load_from_directory ( char *  dirname)
inherited

load features from directory

removes subset before

Parameters
dirnamedirectory name to load from
Returns
if loading was successful

Definition at line 785 of file StringFeatures.cpp.

bool load_serializable ( CSerializableFile file,
const char *  prefix = "" 
)
virtualinherited

Load this object from file. If it will fail (returning FALSE) then this object will contain inconsistent data and should not be used!

Parameters
filewhere to load from
prefixprefix for members
Returns
TRUE if done, otherwise FALSE

Definition at line 402 of file SGObject.cpp.

void load_serializable_post ( )
throw (ShogunException
)
protectedvirtualinherited

Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_POST is called.

Exceptions
ShogunExceptionwill be thrown if an error occurs.

Reimplemented in CKernel, CWeightedDegreePositionStringKernel, CList, CAlphabet, CLinearHMM, CGaussianKernel, CInverseMultiQuadricKernel, CCircularKernel, and CExponentialKernel.

Definition at line 459 of file SGObject.cpp.

void load_serializable_pre ( )
throw (ShogunException
)
protectedvirtualinherited

Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_PRE is called.

Exceptions
ShogunExceptionwill be thrown if an error occurs.

Reimplemented in CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool >, and CDynamicObjectArray.

Definition at line 454 of file SGObject.cpp.

int32_t obtain_by_position_list ( int32_t  window_size,
CDynamicArray< int32_t > *  positions,
int32_t  skip = 0 
)
inherited

extracts windows of size window_size from first string using the positions in list

not implemented for subset

Parameters
window_sizewindow size
positionspositions
skipskip
Returns
something inty

Definition at line 1238 of file StringFeatures.cpp.

int32_t obtain_by_sliding_window ( int32_t  window_size,
int32_t  step_size,
int32_t  skip = 0 
)
inherited

slides a window of size window_size over the current single string step_size is the amount by which the window is shifted. creates (string_len-window_size)/step_size many feature obj if skip is nonzero, skip the first 'skip' characters of each string

not implemented for subset

Parameters
window_sizewindow size
step_sizestep size
skipskip
Returns
something inty

Definition at line 1201 of file StringFeatures.cpp.

bool obtain_from_char ( CStringFeatures< char > *  sf,
int32_t  start,
int32_t  p_order,
int32_t  gap,
bool  rev 
)
inherited

obtain string features from char features

wrapper for template method

any subset is removed before, subset of parameter sf is possible

Parameters
sfstring features
startstart
p_orderorder
gapgap
revreverse
Returns
if obtaining was successful

Definition at line 1296 of file StringFeatures.cpp.

bool obtain_from_char_features ( CStringFeatures< CT > *  sf,
int32_t  start,
int32_t  p_order,
int32_t  gap,
bool  rev 
)
inherited

template obtain from char features

any subset is removed before, subset of parameter sf is possible

Parameters
sfstring features
startstart
p_orderorder
gapgap
revreverse
Returns
if obtaining was successful

Definition at line 1977 of file StringFeatures.cpp.

bool obtain_from_char_features ( CStringFeatures< CT > *  sf,
int32_t  start,
int32_t  p_order,
int32_t  gap,
bool  rev 
)
inherited

Definition at line 1865 of file StringFeatures.cpp.

bool obtain_from_char_features ( CStringFeatures< CT > *  sf,
int32_t  start,
int32_t  p_order,
int32_t  gap,
bool  rev 
)
inherited

Definition at line 1869 of file StringFeatures.cpp.

bool obtain_from_char_features ( CStringFeatures< CT > *  sf,
int32_t  start,
int32_t  p_order,
int32_t  gap,
bool  rev 
)
inherited

Definition at line 1873 of file StringFeatures.cpp.

bool parameter_hash_changed ( )
virtualinherited
Returns
whether parameter combination has changed since last update

Definition at line 295 of file SGObject.cpp.

void print_modsel_params ( )
inherited

prints all parameter registered for model selection and their type

Definition at line 507 of file SGObject.cpp.

void print_serializable ( const char *  prefix = "")
virtualinherited

prints registered parameters out

Parameters
prefixprefix for members

Definition at line 341 of file SGObject.cpp.

void register_param ( Tag< T > &  _tag,
const T &  value 
)
protectedinherited

Registers a class parameter which is identified by a tag. This enables the parameter to be modified by set() and retrieved by get(). Parameters can be registered in the constructor of the class.

Parameters
_tagname and type information of parameter
valuevalue of the parameter

Definition at line 439 of file SGObject.h.

void register_param ( const std::string &  name,
const T &  value 
)
protectedinherited

Registers a class parameter which is identified by a name. This enables the parameter to be modified by set() and retrieved by get(). Parameters can be registered in the constructor of the class.

Parameters
namename of the parameter
valuevalue of the parameter along with type information

Definition at line 452 of file SGObject.h.

void remove_all_subsets ( )
virtualinherited

removes all subsets Calls subset_changed_post() afterwards

Reimplemented in CCombinedFeatures.

Definition at line 328 of file Features.cpp.

void remove_subset ( )
virtualinherited

removes that last added subset from subset stack, if existing Calls subset_changed_post() afterwards

Reimplemented in CCombinedFeatures.

Definition at line 322 of file Features.cpp.

bool reshape ( int32_t  num_features,
int32_t  num_vectors 
)
virtualinherited

in case there is a feature matrix allow for reshaping

NOT IMPLEMENTED!

Parameters
num_featuresnew number of features
num_vectorsnew number of vectors
Returns
if reshaping was successful

Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.

Definition at line 165 of file Features.cpp.

virtual void save ( CFile writer)
virtualinherited

save features to file

not possible with subset

Parameters
writerFile object via which to save data

Reimplemented from CFeatures.

bool save_compressed ( char *  dest,
E_COMPRESSION_TYPE  compression,
int  level 
)
virtualinherited

save compressed features to file

not possible with subset

Parameters
destfilename to save to
compressioncompressor to use
levelcompression level to use (1-9)
Returns
if saving was successful

Definition at line 1117 of file StringFeatures.cpp.

bool save_serializable ( CSerializableFile file,
const char *  prefix = "" 
)
virtualinherited

Save this object to file.

Parameters
filewhere to save the object; will be closed during returning if PREFIX is an empty string.
prefixprefix for members
Returns
TRUE if done, otherwise FALSE

Definition at line 347 of file SGObject.cpp.

void save_serializable_post ( )
throw (ShogunException
)
protectedvirtualinherited

Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_POST is called.

Exceptions
ShogunExceptionwill be thrown if an error occurs.

Reimplemented in CKernel.

Definition at line 469 of file SGObject.cpp.

void save_serializable_pre ( )
throw (ShogunException
)
protectedvirtualinherited

Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_PRE is called.

Exceptions
ShogunExceptionwill be thrown if an error occurs.

Reimplemented in CKernel, CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool >, and CDynamicObjectArray.

Definition at line 464 of file SGObject.cpp.

void set ( const Tag< T > &  _tag,
const T &  value 
)
inherited

Setter for a class parameter, identified by a Tag. Throws an exception if the class does not have such a parameter.

Parameters
_tagname and type information of parameter
valuevalue of the parameter

Definition at line 328 of file SGObject.h.

void set ( const std::string &  name,
const T &  value 
)
inherited

Setter for a class parameter, identified by a name. Throws an exception if the class does not have such a parameter.

Parameters
namename of the parameter
valuevalue of the parameter along with type information

Definition at line 354 of file SGObject.h.

void set_feature_vector ( SGVector< ST >  vector,
int32_t  num 
)
inherited

set string for selected example num

not possible with subset

Parameters
vectorstring to set
numindex of the string

Definition at line 241 of file StringFeatures.cpp.

void set_feature_vector ( int32_t  num,
ST *  string,
int32_t  len 
)
virtualinherited

set feature vector for sample num

possible with subset

Parameters
numindex of feature vector
stringstring with the feature vector's content
lenlength of the string

Definition at line 1452 of file StringFeatures.cpp.

void set_features ( SGStringList< ST >  feats)
inherited

set features

not possible with subset

Definition at line 855 of file StringFeatures.cpp.

bool set_features ( SGString< ST > *  p_features,
int32_t  p_num_vectors,
int32_t  p_max_string_length 
)
inherited

set features

not possible with subset

Parameters
p_featuresnew features
p_num_vectorsnumber of vectors
p_max_string_lengthmaximum string length
Returns
if setting was successful

Definition at line 860 of file StringFeatures.cpp.

void set_generic ( )
inherited

Definition at line 74 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 79 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 84 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 89 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 94 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 99 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 104 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 109 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 114 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 119 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 124 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 129 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 134 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 139 of file SGObject.cpp.

void set_generic ( )
inherited

Definition at line 144 of file SGObject.cpp.

void set_generic ( )
inherited

set generic type to T

void set_global_io ( SGIO io)
inherited

set the io object

Parameters
ioio object to use

Definition at line 261 of file SGObject.cpp.

void set_global_parallel ( Parallel parallel)
inherited

set the parallel object

Parameters
parallelparallel object to use

Definition at line 274 of file SGObject.cpp.

void set_global_version ( Version version)
inherited

set the version object

Parameters
versionversion object to use

Definition at line 316 of file SGObject.cpp.

void set_preprocessed ( int32_t  num)
inherited

set applied flag for preprocessor

Parameters
numindex of preprocessor in list

Definition at line 143 of file Features.cpp.

void set_property ( EFeatureProperty  p)
inherited

set property

Parameters
pkernel property to set

Definition at line 300 of file Features.cpp.

CSGObject * shallow_copy ( ) const
virtualinherited

A shallow copy. All the SGObject instance variables will be simply assigned and SG_REF-ed.

Reimplemented in CGaussianKernel.

Definition at line 225 of file SGObject.cpp.

virtual CFeatures* shallow_subset_copy ( )
virtualinherited
ST shift_offset ( ST  offset,
int32_t  amount 
)
inherited

shift offset to the left by amount

Parameters
offsetoffset to shift
amountamount to shift the offset
Returns
shifted offset

Definition at line 439 of file StringFeatures.cpp.

bool shift_offset ( bool  symbol,
int32_t  amount 
)
inherited

Definition at line 1830 of file StringFeatures.cpp.

float32_t shift_offset ( float32_t  symbol,
int32_t  amount 
)
inherited

Definition at line 1834 of file StringFeatures.cpp.

float64_t shift_offset ( float64_t  symbol,
int32_t  amount 
)
inherited

Definition at line 1838 of file StringFeatures.cpp.

floatmax_t shift_offset ( floatmax_t  symbol,
int32_t  amount 
)
inherited

Definition at line 1842 of file StringFeatures.cpp.

ST shift_symbol ( ST  symbol,
int32_t  amount 
)
inherited

shift symbol to the right by amount (taking care of custom symbol sizes)

Parameters
symbolsymbol to shift
amountamount to shift the symbol
Returns
shifted symbol

Definition at line 445 of file StringFeatures.cpp.

bool shift_symbol ( bool  symbol,
int32_t  amount 
)
inherited

Definition at line 1847 of file StringFeatures.cpp.

float32_t shift_symbol ( float32_t  symbol,
int32_t  amount 
)
inherited

Definition at line 1851 of file StringFeatures.cpp.

float64_t shift_symbol ( float64_t  symbol,
int32_t  amount 
)
inherited

Definition at line 1855 of file StringFeatures.cpp.

floatmax_t shift_symbol ( floatmax_t  symbol,
int32_t  amount 
)
inherited

Definition at line 1859 of file StringFeatures.cpp.

void subset_changed_post ( )
virtualinherited

post method when subset is changed

Reimplemented from CFeatures.

Definition at line 1645 of file StringFeatures.cpp.

virtual bool support_compatible_class ( ) const
virtualinherited

does this class support compatible computation bewteen difference classes? for example, this->dot(rhs_prt), can rhs_prt be an instance of a difference class?

Returns
whether this class supports compatible computation

Reimplemented in CDenseSubSamplesFeatures< ST >.

Definition at line 323 of file Features.h.

void unembed_word ( ST  word,
uint8_t *  seq,
int32_t  len 
)
inherited

remap bit-based word to character sequence

Parameters
wordword to remap
seqsequence of size len that remapped characters are written to
lenlength of sequence and word

Definition at line 1402 of file StringFeatures.cpp.

void unembed_word ( float32_t  word,
uint8_t *  seq,
int32_t  len 
)
inherited

Definition at line 1912 of file StringFeatures.cpp.

void unembed_word ( float64_t  word,
uint8_t *  seq,
int32_t  len 
)
inherited

Definition at line 1915 of file StringFeatures.cpp.

void unembed_word ( floatmax_t  word,
uint8_t *  seq,
int32_t  len 
)
inherited

Definition at line 1918 of file StringFeatures.cpp.

void unset_generic ( )
inherited

unset generic type

this has to be called in classes specializing a template class

Definition at line 336 of file SGObject.cpp.

void unset_property ( EFeatureProperty  p)
inherited

unset property

Parameters
pkernel property to unset

Definition at line 305 of file Features.cpp.

void update_parameter_hash ( )
virtualinherited

Updates the hash of current parameter combination

Definition at line 281 of file SGObject.cpp.

Member Data Documentation

CAlphabet* alphabet
protectedinherited

alphabet

Definition at line 674 of file StringFeatures.h.

CCache<ST>* feature_cache
protectedinherited

feature cache

Definition at line 710 of file StringFeatures.h.

SGString<ST>* features
protectedinherited

this contains the array of features

Definition at line 680 of file StringFeatures.h.

CMemoryMappedFile<ST>* file
protected

memory mapped file

Definition at line 94 of file StringFileFeatures.h.

SGIO* io
inherited

io

Definition at line 537 of file SGObject.h.

int32_t length_of_single_string
protectedinherited

length of prior single string

Definition at line 686 of file StringFeatures.h.

Parameter* m_gradient_parameters
inherited

parameters wrt which we can compute gradients

Definition at line 552 of file SGObject.h.

uint32_t m_hash
inherited

Hash of parameter values

Definition at line 555 of file SGObject.h.

Parameter* m_model_selection_parameters
inherited

model selection parameters

Definition at line 549 of file SGObject.h.

Parameter* m_parameters
inherited

parameters

Definition at line 546 of file SGObject.h.

CSubsetStack* m_subset_stack
protectedinherited

subset used for index transformations

Definition at line 361 of file Features.h.

int32_t max_string_length
protectedinherited

length of longest string (for subset, is updated)

Definition at line 689 of file StringFeatures.h.

floatmax_t num_symbols
protectedinherited

number of used symbols

Definition at line 692 of file StringFeatures.h.

int32_t num_vectors
protectedinherited

number of string vectors (for subset, is updated)

Definition at line 677 of file StringFeatures.h.

int32_t order
protectedinherited

order used in higher order mapping

Definition at line 698 of file StringFeatures.h.

floatmax_t original_num_symbols
protectedinherited

original number of used symbols (before higher order mapping)

Definition at line 695 of file StringFeatures.h.

Parallel* parallel
inherited

parallel

Definition at line 540 of file SGObject.h.

bool preprocess_on_get
protectedinherited

preprocess on-the-fly?

Definition at line 707 of file StringFeatures.h.

ST* single_string
protectedinherited

true when single string / created by sliding window

Definition at line 683 of file StringFeatures.h.

ST* symbol_mask_table
protectedinherited

order used in higher order mapping

Definition at line 701 of file StringFeatures.h.

int32_t symbol_mask_table_len
protectedinherited

order used in higher order mapping

Definition at line 704 of file StringFeatures.h.

Version* version
inherited

version

Definition at line 543 of file SGObject.h.


The documentation for this class was generated from the following files:

SHOGUN Machine Learning Toolbox - Documentation