SHOGUN
4.1.0
|
File based string features.
StringFeatures that are file based. Underneath memory mapped files are used. Derived from CStringFeatures thus transparently enabling all of the StringFeature functionality.
Supported file format contains one string per line, lines of variable length are supported and must be separated by '
'.
Definition at line 36 of file StringFileFeatures.h.
Public Member Functions | |
CStringFileFeatures () | |
CStringFileFeatures (const char *fname, EAlphabet alpha) | |
virtual | ~CStringFileFeatures () |
virtual const char * | get_name () const |
virtual void | cleanup_feature_vectors (int32_t start, int32_t stop) |
virtual EFeatureClass | get_feature_class () const |
virtual EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
template<> | |
EFeatureType | get_feature_type () const |
CAlphabet * | get_alphabet () |
virtual CFeatures * | duplicate () const |
SGVector< ST > | get_feature_vector (int32_t num) |
ST * | get_feature_vector (int32_t num, int32_t &len, bool &dofree) |
void | set_feature_vector (SGVector< ST > vector, int32_t num) |
virtual void | set_feature_vector (int32_t num, ST *string, int32_t len) |
void | enable_on_the_fly_preprocessing () |
void | disable_on_the_fly_preprocessing () |
CStringFeatures< ST > * | get_transposed () |
SGString< ST > * | get_transposed (int32_t &num_feat, int32_t &num_vec) |
void | free_feature_vector (ST *feat_vec, int32_t num, bool dofree) |
void | free_feature_vector (SGVector< ST > feat_vec, int32_t num) |
virtual ST | get_feature (int32_t vec_num, int32_t feat_num) |
virtual int32_t | get_vector_length (int32_t vec_num) |
virtual int32_t | get_max_vector_length () |
virtual int32_t | get_num_vectors () const |
floatmax_t | get_num_symbols () |
floatmax_t | get_max_num_symbols () |
floatmax_t | get_original_num_symbols () |
int32_t | get_order () |
ST | get_masked_symbols (ST symbol, uint8_t mask) |
template<> | |
bool | get_masked_symbols (bool symbol, uint8_t mask) |
template<> | |
float32_t | get_masked_symbols (float32_t symbol, uint8_t mask) |
template<> | |
float64_t | get_masked_symbols (float64_t symbol, uint8_t mask) |
template<> | |
floatmax_t | get_masked_symbols (floatmax_t symbol, uint8_t mask) |
ST | shift_offset (ST offset, int32_t amount) |
template<> | |
bool | shift_offset (bool symbol, int32_t amount) |
template<> | |
float32_t | shift_offset (float32_t symbol, int32_t amount) |
template<> | |
float64_t | shift_offset (float64_t symbol, int32_t amount) |
template<> | |
floatmax_t | shift_offset (floatmax_t symbol, int32_t amount) |
ST | shift_symbol (ST symbol, int32_t amount) |
template<> | |
bool | shift_symbol (bool symbol, int32_t amount) |
template<> | |
float32_t | shift_symbol (float32_t symbol, int32_t amount) |
template<> | |
float64_t | shift_symbol (float64_t symbol, int32_t amount) |
template<> | |
floatmax_t | shift_symbol (floatmax_t symbol, int32_t amount) |
virtual void | load (CFile *loader) |
void | load_ascii_file (char *fname, bool remap_to_bin=true, EAlphabet ascii_alphabet=DNA, EAlphabet binary_alphabet=RAWDNA) |
bool | load_fasta_file (const char *fname, bool ignore_invalid=false) |
bool | load_fastq_file (const char *fname, bool ignore_invalid=false, bool bitremap_in_single_string=false) |
bool | load_from_directory (char *dirname) |
void | set_features (SGStringList< ST > feats) |
bool | set_features (SGString< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length) |
bool | append_features (CStringFeatures< ST > *sf) |
bool | append_features (SGString< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length) |
SGStringList< ST > | get_features () |
virtual SGString< ST > * | get_features (int32_t &num_str, int32_t &max_str_len) |
virtual void | get_features (SGString< ST > **dst, int32_t *num_str) |
virtual SGString< ST > * | copy_features (int32_t &num_str, int32_t &max_str_len) |
virtual void | save (CFile *writer) |
virtual bool | load_compressed (char *src, bool decompress) |
virtual bool | save_compressed (char *dest, E_COMPRESSION_TYPE compression, int level) |
virtual bool | apply_preprocessor (bool force_preprocessing=false) |
int32_t | obtain_by_sliding_window (int32_t window_size, int32_t step_size, int32_t skip=0) |
int32_t | obtain_by_position_list (int32_t window_size, CDynamicArray< int32_t > *positions, int32_t skip=0) |
bool | obtain_from_char (CStringFeatures< char > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<class CT > | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<> | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<> | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<> | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
bool | have_same_length (int32_t len=-1) |
void | embed_features (int32_t p_order) |
template<> | |
void | embed_features (int32_t p_order) |
template<> | |
void | embed_features (int32_t p_order) |
template<> | |
void | embed_features (int32_t p_order) |
void | compute_symbol_mask_table (int64_t max_val) |
template<> | |
void | compute_symbol_mask_table (int64_t max_val) |
template<> | |
void | compute_symbol_mask_table (int64_t max_val) |
template<> | |
void | compute_symbol_mask_table (int64_t max_val) |
void | unembed_word (ST word, uint8_t *seq, int32_t len) |
template<> | |
void | unembed_word (float32_t word, uint8_t *seq, int32_t len) |
template<> | |
void | unembed_word (float64_t word, uint8_t *seq, int32_t len) |
template<> | |
void | unembed_word (floatmax_t word, uint8_t *seq, int32_t len) |
ST | embed_word (ST *seq, int32_t len) |
template<> | |
float32_t | embed_word (float32_t *seq, int32_t len) |
template<> | |
float64_t | embed_word (float64_t *seq, int32_t len) |
template<> | |
floatmax_t | embed_word (floatmax_t *seq, int32_t len) |
void | determine_maximum_string_length () |
virtual void | get_histogram (float64_t **hist, int32_t *rows, int32_t *cols, bool normalize=true) |
virtual void | create_random (float64_t *hist, int32_t rows, int32_t cols, int32_t num_vec) |
virtual CFeatures * | copy_subset (SGVector< index_t > indices) |
virtual void | subset_changed_post () |
virtual void | add_preprocessor (CPreprocessor *p) |
virtual void | del_preprocessor (int32_t num) |
CPreprocessor * | get_preprocessor (int32_t num) const |
void | set_preprocessed (int32_t num) |
bool | is_preprocessed (int32_t num) const |
int32_t | get_num_preprocessed () const |
int32_t | get_num_preprocessors () const |
void | clean_preprocessors () |
void | list_preprocessors () |
int32_t | get_cache_size () const |
virtual bool | reshape (int32_t num_features, int32_t num_vectors) |
void | list_feature_obj () const |
bool | check_feature_compatibility (CFeatures *f) const |
bool | has_property (EFeatureProperty p) const |
void | set_property (EFeatureProperty p) |
void | unset_property (EFeatureProperty p) |
virtual CFeatures * | create_merged_copy (CList *others) |
virtual CFeatures * | create_merged_copy (CFeatures *other) |
virtual void | add_subset (SGVector< index_t > subset) |
virtual void | add_subset_in_place (SGVector< index_t > subset) |
virtual void | remove_subset () |
virtual void | remove_all_subsets () |
virtual CSubsetStack * | get_subset_stack () |
virtual CFeatures * | copy_dimension_subset (SGVector< index_t > dims) |
virtual bool | support_compatible_class () const |
virtual bool | get_feature_class_compatibility (EFeatureClass rhs) const |
virtual CSGObject * | shallow_copy () const |
virtual CSGObject * | deep_copy () const |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="") |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="") |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGStringList< char > | get_modelsel_names () |
void | print_modsel_params () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
void | build_gradient_parameter_dictionary (CMap< TParameter *, CSGObject * > *dict) |
virtual void | update_parameter_hash () |
virtual bool | parameter_hash_changed () |
virtual bool | equals (CSGObject *other, float64_t accuracy=0.0, bool tolerant=false) |
virtual CSGObject * | clone () |
Static Public Member Functions | |
static ST * | get_zero_terminated_string_copy (SGString< ST > str) |
Public Attributes | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
Parameter * | m_gradient_parameters |
uint32_t | m_hash |
Protected Member Functions | |
ST * | get_line (uint64_t &len, uint64_t &offs, int32_t &line_nr, uint64_t file_length) |
virtual void | cleanup () |
virtual void | cleanup_feature_vector (int32_t num) |
void | fetch_meta_info_from_file (int32_t granularity=1048576) |
virtual ST * | compute_feature_vector (int32_t num, int32_t &len) |
virtual void | load_serializable_pre () throw (ShogunException) |
virtual void | load_serializable_post () throw (ShogunException) |
virtual void | save_serializable_pre () throw (ShogunException) |
virtual void | save_serializable_post () throw (ShogunException) |
Protected Attributes | |
CMemoryMappedFile< ST > * | file |
CAlphabet * | alphabet |
int32_t | num_vectors |
SGString< ST > * | features |
ST * | single_string |
int32_t | length_of_single_string |
length of prior single string More... | |
int32_t | max_string_length |
floatmax_t | num_symbols |
number of used symbols More... | |
floatmax_t | original_num_symbols |
original number of used symbols (before higher order mapping) More... | |
int32_t | order |
order used in higher order mapping More... | |
ST * | symbol_mask_table |
order used in higher order mapping More... | |
int32_t | symbol_mask_table_len |
order used in higher order mapping More... | |
bool | preprocess_on_get |
preprocess on-the-fly? More... | |
CCache< ST > * | feature_cache |
CSubsetStack * | m_subset_stack |
default constructor
Definition at line 6 of file StringFileFeatures.cpp.
CStringFileFeatures | ( | const char * | fname, |
EAlphabet | alpha | ||
) |
constructor
fname | filename of the file containing line based features |
alpha | alphabet (type) to use for string features |
Definition at line 10 of file StringFileFeatures.cpp.
|
virtual |
default destructor
Definition at line 17 of file StringFileFeatures.cpp.
|
virtualinherited |
Adds a subset of indices on top of the current subsets (possibly subset of subset). Every call causes a new active index vector to be stored. Added subsets can be removed one-by-one. If this is not needed, add_subset_in_place() should be used (does not store intermediate index vectors)
Calls subset_changed_post() afterwards
subset | subset of indices to add |
Reimplemented in CCombinedFeatures.
Definition at line 310 of file Features.cpp.
Sets/changes latest added subset. This allows to add multiple subsets with in-place memory requirements. They cannot be removed one-by-one afterwards, only the latest active can. If this is needed, use add_subset(). If no subset is active, this just adds.
Calls subset_changed_post() afterwards
subset | subset of indices to replace the latest one with. |
Definition at line 316 of file Features.cpp.
|
inherited |
append features If the given string features have a subset, only this will be copied
not possible with subset
sf | features to append |
Definition at line 899 of file StringFeatures.cpp.
|
inherited |
append features
not possible with subset
p_features | features to append |
p_num_vectors | number of vectors |
p_max_string_length | maximum string length |
note that p_features will be SG_FREE()'d on success
Definition at line 921 of file StringFeatures.cpp.
|
virtualinherited |
apply preprocessor
force_preprocessing | if preprocssing shall be forced |
Definition at line 1177 of file StringFeatures.cpp.
|
inherited |
Builds a dictionary of all parameters in SGObject as well of those of SGObjects that are parameters of this object. Dictionary maps parameters to the objects that own them.
dict | dictionary of parameters to be built. |
Definition at line 597 of file SGObject.cpp.
|
inherited |
check feature compatibility
f | features to check for compatibility |
Definition at line 283 of file Features.cpp.
|
inherited |
clears all preprocs
Definition at line 116 of file Features.cpp.
|
protectedvirtual |
cleanup string features
Reimplemented from CStringFeatures< ST >.
Definition at line 53 of file StringFileFeatures.cpp.
|
protectedvirtual |
cleanup a single feature vector
Reimplemented from CStringFeatures< ST >.
Definition at line 71 of file StringFileFeatures.cpp.
|
virtualinherited |
cleanup multiple feature vectors
possible with subset
start | index of first vector to be cleaned |
stop | index of the last vector to be cleaned |
Definition at line 190 of file StringFeatures.cpp.
|
virtualinherited |
Creates a clone of the current object. This is done via recursively traversing all parameters, which corresponds to a deep copy. Calling equals on the cloned object always returns true although none of the memory of both objects overlaps.
Definition at line 714 of file SGObject.cpp.
|
protectedvirtualinherited |
compute feature vector for sample num if target is set the vector is written to target len is returned by reference
possible with subset
num | which vector |
len | length of vector |
Definition at line 1651 of file StringFeatures.cpp.
|
inherited |
compute symbol mask table
required to access bit-based symbols
not implemented for subset
Definition at line 1374 of file StringFeatures.cpp.
|
inherited |
Definition at line 1889 of file StringFeatures.cpp.
|
inherited |
Definition at line 1892 of file StringFeatures.cpp.
|
inherited |
Definition at line 1895 of file StringFeatures.cpp.
Creates a new CFeatures instance containing only the dimensions of the feature vector which are specified by the provided indices.
This method is needed for feature selection tasks NOT IMPLEMENTED!
dims | indices of feature dimensions to copy |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.
Definition at line 348 of file Features.cpp.
|
virtualinherited |
copy_features
possible with subset
num_str | number of strings (returned) |
max_str_len | maximal string length (returned) |
Definition at line 992 of file StringFeatures.cpp.
Creates a new CFeatures instance containing copies of the elements which are specified by the provided indices.
possible with subset
indices | indices of feature elements to copy |
Reimplemented from CFeatures.
Definition at line 1610 of file StringFeatures.cpp.
Takes a list of feature instances and returns a new instance being a concatenation of a copy of this instace's data and the given instancess data. Note that the feature types have to be equal.
NOT IMPLEMENTED!
others | list of feature objects to append |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.
Definition at line 235 of file Features.h.
Convenience method for method with same name and list as parameter.
NOT IMPLEMENTED!
other | feature object to append |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, and CCombinedFeatures.
Definition at line 249 of file Features.h.
|
virtualinherited |
create some random strings based on normalized histogram
not possible with subset
Definition at line 1508 of file StringFeatures.cpp.
|
virtualinherited |
A deep copy. All the instance variables will also be copied.
Definition at line 198 of file SGObject.cpp.
|
virtualinherited |
delete preprocessor from list
num | index of preprocessor in list |
Definition at line 122 of file Features.cpp.
|
inherited |
determine new maximum string length
possible with subset
Definition at line 1431 of file StringFeatures.cpp.
|
inherited |
call this to disable on the fly feature preprocessing upon call to get_feature_vector. Useful when you manually apply preprocessors.
Definition at line 270 of file StringFeatures.cpp.
|
virtualinherited |
duplicate feature object
Implements CFeatures.
Definition at line 218 of file StringFeatures.cpp.
|
inherited |
embed string features in bit representation in-place
not implemented for subset
Definition at line 1320 of file StringFeatures.cpp.
|
inherited |
Definition at line 1879 of file StringFeatures.cpp.
|
inherited |
Definition at line 1882 of file StringFeatures.cpp.
|
inherited |
Definition at line 1885 of file StringFeatures.cpp.
|
inherited |
embed a single word
seq | sequence of size len in a bitfield |
len |
Definition at line 1418 of file StringFeatures.cpp.
Definition at line 1899 of file StringFeatures.cpp.
Definition at line 1903 of file StringFeatures.cpp.
|
inherited |
Definition at line 1907 of file StringFeatures.cpp.
|
inherited |
call this to preprocess string features upon call to get_feature_vector
Definition at line 265 of file StringFeatures.cpp.
Recursively compares the current SGObject to another one. Compares all registered numerical parameters, recursion upon complex (SGObject) parameters. Does not compare pointers!
May be overwritten but please do with care! Should not be necessary in most cases.
other | object to compare with |
accuracy | accuracy to use for comparison (optional) |
tolerant | allows linient check on float equality (within accuracy) |
Definition at line 618 of file SGObject.cpp.
|
protected |
obtain meta information from file
i.e., determine number of strings and their lengths
Definition at line 77 of file StringFileFeatures.cpp.
|
inherited |
free feature vector
possible with subset
feat_vec | feature vector to free |
num | index in feature cache, possibly from subset |
dofree | if vector should be really deleted |
Definition at line 357 of file StringFeatures.cpp.
|
inherited |
free feature vector
possible with subset
feat_vec | feature vector to free |
num | index in feature cache, possibly from subset |
Definition at line 375 of file StringFeatures.cpp.
|
inherited |
get alphabet used in string features
Definition at line 212 of file StringFeatures.cpp.
|
inherited |
|
virtualinherited |
get feature
possible with subset
vec_num | which vector |
feat_num | which feature, possibly from subset |
Definition at line 390 of file StringFeatures.cpp.
|
virtualinherited |
get feature class
Implements CFeatures.
Definition at line 208 of file StringFeatures.cpp.
|
virtualinherited |
Given a class in right hand side, does this class support compatible computation?
for example, is this->dot(rhs_prt) valid, where rhs_prt is the class in right hand side
rhs | the class in right hand side |
Reimplemented in CDenseSubSamplesFeatures< ST >.
Definition at line 355 of file Features.cpp.
|
virtualinherited |
get feature type
Implements CFeatures.
Definition at line 210 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the char feature can deal with
Implements CFeatures.
Definition at line 1709 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the char feature can deal with
Implements CFeatures.
Definition at line 1718 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the BYTE feature can deal with
Implements CFeatures.
Definition at line 1727 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the SHORT feature can deal with
Implements CFeatures.
Definition at line 1736 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the WORD feature can deal with
Implements CFeatures.
Definition at line 1745 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the INT feature can deal with
Implements CFeatures.
Definition at line 1754 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the INT feature can deal with
Implements CFeatures.
Definition at line 1763 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the LONG feature can deal with
Implements CFeatures.
Definition at line 1772 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the ULONG feature can deal with
Implements CFeatures.
Definition at line 1781 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the SHORTREAL feature can deal with
Implements CFeatures.
Definition at line 1790 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the DREAL feature can deal with
Implements CFeatures.
Definition at line 1799 of file StringFeatures.cpp.
|
virtualinherited |
get feature type the LONGREAL feature can deal with
Implements CFeatures.
Definition at line 1808 of file StringFeatures.cpp.
|
inherited |
get string for selected example num
possible with subset
num | index of the string |
Definition at line 223 of file StringFeatures.cpp.
|
inherited |
get feature vector for sample num
possible with subset
num | index of feature vector |
len | length is returned by reference |
dofree | whether returned vector must be freed by caller via free_feature_vector |
Definition at line 275 of file StringFeatures.cpp.
|
inherited |
|
virtualinherited |
get_features
not possible with subset
num_str | number of strings (returned) |
max_str_len | maximal string length (returned) |
Definition at line 982 of file StringFeatures.cpp.
|
virtualinherited |
get_features (swig compatible)
possible with subset
dst | string features (returned) |
num_str | number of strings (returned) |
Definition at line 1014 of file StringFeatures.cpp.
|
inherited |
|
inherited |
|
inherited |
|
virtualinherited |
compute histogram over strings
possible with subset
Definition at line 1466 of file StringFeatures.cpp.
|
protected |
get next line from file
The returned line may be modfied in case the file was opened read/write. It is otherwise read-only.
len | length of line (returned via reference) |
offs | offset to be passed for reading next line, should be 0 initially (returned via reference) |
line_nr | used to indicate errors (returned as reference should be 0 initially) |
file_length | total length of the file (for error checking) |
Definition at line 23 of file StringFileFeatures.cpp.
|
inherited |
a higher order mapped symbol will be shaped such that the symbols specified by bits in the mask will be returned.
symbol | symbol to mask |
mask | mask to apply |
Definition at line 433 of file StringFeatures.cpp.
|
inherited |
Definition at line 1813 of file StringFeatures.cpp.
Definition at line 1817 of file StringFeatures.cpp.
Definition at line 1821 of file StringFeatures.cpp.
|
inherited |
Definition at line 1825 of file StringFeatures.cpp.
|
inherited |
get maximum number of symbols
Note: floatmax_t sounds weird, but int64_t is not long enough (and there is no int128_t type)
Definition at line 427 of file StringFeatures.cpp.
|
virtualinherited |
get maximum vector length
this one is updated when a subset is set
Definition at line 415 of file StringFeatures.cpp.
|
inherited |
Definition at line 498 of file SGObject.cpp.
|
inherited |
Returns description of a given parameter string, if it exists. SG_ERROR otherwise
param_name | name of the parameter |
Definition at line 522 of file SGObject.cpp.
|
inherited |
Returns index of model selection parameter with provided index
param_name | name of model selection parameter |
Definition at line 535 of file SGObject.cpp.
|
virtual |
Returns the name of the SGSerializable instance.
Reimplemented from CStringFeatures< ST >.
Definition at line 61 of file StringFileFeatures.h.
|
inherited |
get the number of applied preprocs
Definition at line 103 of file Features.cpp.
|
inherited |
get number of preprocessors
Definition at line 155 of file Features.cpp.
|
inherited |
get number of symbols
Note: floatmax_t sounds weird, but LONG is not long enough
Definition at line 425 of file StringFeatures.cpp.
|
virtualinherited |
Implements CFeatures.
Definition at line 420 of file StringFeatures.cpp.
|
inherited |
|
inherited |
number of symbols before higher order mapping
Definition at line 429 of file StringFeatures.cpp.
|
inherited |
get specified preprocessor
num | index of preprocessor in list |
Definition at line 93 of file Features.cpp.
|
virtualinherited |
|
inherited |
get a transposed copy of the features
possible with subset
Definition at line 313 of file StringFeatures.cpp.
|
inherited |
compute and return the transpose of string features matrix which will be prepocessed. num_feat, num_vectors are returned by reference caller has to clean up
note that strings all have to have same length
possible with subset
num_feat | number of features in matrix |
num_vec | number of vectors in matrix |
Definition at line 326 of file StringFeatures.cpp.
|
virtualinherited |
get vector length
possible with subset
vec_num | which vector, possibly from subset |
Definition at line 404 of file StringFeatures.cpp.
|
staticinherited |
get a zero terminated copy of the string
str | the string to copy |
note that this function is only sensible for character strings
Definition at line 1443 of file StringFeatures.cpp.
|
inherited |
check if features have given property
p | feature property |
Definition at line 295 of file Features.cpp.
|
inherited |
check if length of each vector in this feature object equals the given length. if existant, only subset is checked
possible for subset
len | vector length to check against |
Definition at line 1301 of file StringFeatures.cpp.
|
virtualinherited |
If the SGSerializable is a class template then TRUE will be returned and GENERIC is set to the type of the generic.
generic | set to the type of the generic if returning TRUE |
Definition at line 296 of file SGObject.cpp.
|
inherited |
get whether specified preprocessor was already applied
num | index of preprocessor in list |
Definition at line 149 of file Features.cpp.
|
inherited |
list feature object
Definition at line 171 of file Features.cpp.
|
inherited |
print preprocessors
Definition at line 131 of file Features.cpp.
|
virtualinherited |
load features from file
loader | File object via which to load data |
Reimplemented from CFeatures.
|
inherited |
load ascii line-based string features from file.
any subset is removed before
fname | filename to load from |
remap_to_bin | if translation to other binary alphabet should be performed |
ascii_alphabet | src alphabet |
binary_alphabet | alphabet to translate to |
Definition at line 451 of file StringFeatures.cpp.
|
virtualinherited |
load compressed features from file
any subset is removed before
src | filename to load from |
decompress | whether to decompress on loading |
Definition at line 1022 of file StringFeatures.cpp.
|
inherited |
load fasta file as string features
any subset is removed before
fname | filename to load from |
ignore_invalid | if set to true, characters other than A,C,G,T are converted to A |
Definition at line 591 of file StringFeatures.cpp.
|
inherited |
load fastq file as string features
removes subset beforehand
fname | filename to load from |
ignore_invalid | if set to true, characters other than A,C,G,T are converted to A |
bitremap_in_single_string | if set to true, do binary embedding of symbols |
Definition at line 684 of file StringFeatures.cpp.
|
inherited |
load features from directory
removes subset before
dirname | directory name to load from |
Definition at line 785 of file StringFeatures.cpp.
|
virtualinherited |
Load this object from file. If it will fail (returning FALSE) then this object will contain inconsistent data and should not be used!
file | where to load from |
prefix | prefix for members |
Definition at line 369 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_POST is called.
ShogunException | will be thrown if an error occurs. |
Reimplemented in CKernel, CWeightedDegreePositionStringKernel, CList, CAlphabet, CLinearHMM, CGaussianKernel, CInverseMultiQuadricKernel, CCircularKernel, and CExponentialKernel.
Definition at line 426 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_PRE is called.
ShogunException | will be thrown if an error occurs. |
Reimplemented in CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool >, and CDynamicObjectArray.
Definition at line 421 of file SGObject.cpp.
|
inherited |
extracts windows of size window_size from first string using the positions in list
not implemented for subset
window_size | window size |
positions | positions |
skip | skip |
Definition at line 1238 of file StringFeatures.cpp.
|
inherited |
slides a window of size window_size over the current single string step_size is the amount by which the window is shifted. creates (string_len-window_size)/step_size many feature obj if skip is nonzero, skip the first 'skip' characters of each string
not implemented for subset
window_size | window size |
step_size | step size |
skip | skip |
Definition at line 1201 of file StringFeatures.cpp.
|
inherited |
obtain string features from char features
wrapper for template method
any subset is removed before, subset of parameter sf is possible
sf | string features |
start | start |
p_order | order |
gap | gap |
rev | reverse |
Definition at line 1296 of file StringFeatures.cpp.
|
inherited |
template obtain from char features
any subset is removed before, subset of parameter sf is possible
sf | string features |
start | start |
p_order | order |
gap | gap |
rev | reverse |
Definition at line 1977 of file StringFeatures.cpp.
|
inherited |
Definition at line 1865 of file StringFeatures.cpp.
|
inherited |
Definition at line 1869 of file StringFeatures.cpp.
|
inherited |
Definition at line 1873 of file StringFeatures.cpp.
|
virtualinherited |
Definition at line 262 of file SGObject.cpp.
|
inherited |
prints all parameter registered for model selection and their type
Definition at line 474 of file SGObject.cpp.
|
virtualinherited |
prints registered parameters out
prefix | prefix for members |
Definition at line 308 of file SGObject.cpp.
|
virtualinherited |
removes all subsets Calls subset_changed_post() afterwards
Reimplemented in CCombinedFeatures.
Definition at line 328 of file Features.cpp.
|
virtualinherited |
removes that last added subset from subset stack, if existing Calls subset_changed_post() afterwards
Reimplemented in CCombinedFeatures.
Definition at line 322 of file Features.cpp.
|
virtualinherited |
in case there is a feature matrix allow for reshaping
NOT IMPLEMENTED!
num_features | new number of features |
num_vectors | new number of vectors |
Reimplemented in CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, and CDenseFeatures< uint16_t >.
Definition at line 165 of file Features.cpp.
|
virtualinherited |
save features to file
not possible with subset
writer | File object via which to save data |
Reimplemented from CFeatures.
|
virtualinherited |
save compressed features to file
not possible with subset
dest | filename to save to |
compression | compressor to use |
level | compression level to use (1-9) |
Definition at line 1117 of file StringFeatures.cpp.
|
virtualinherited |
Save this object to file.
file | where to save the object; will be closed during returning if PREFIX is an empty string. |
prefix | prefix for members |
Definition at line 314 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_POST is called.
ShogunException | will be thrown if an error occurs. |
Reimplemented in CKernel.
Definition at line 436 of file SGObject.cpp.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_PRE is called.
ShogunException | will be thrown if an error occurs. |
Reimplemented in CKernel, CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool >, and CDynamicObjectArray.
Definition at line 431 of file SGObject.cpp.
|
inherited |
set string for selected example num
not possible with subset
vector | string to set |
num | index of the string |
Definition at line 241 of file StringFeatures.cpp.
|
virtualinherited |
set feature vector for sample num
possible with subset
num | index of feature vector |
string | string with the feature vector's content |
len | length of the string |
Definition at line 1452 of file StringFeatures.cpp.
|
inherited |
|
inherited |
set features
not possible with subset
p_features | new features |
p_num_vectors | number of vectors |
p_max_string_length | maximum string length |
Definition at line 860 of file StringFeatures.cpp.
|
inherited |
Definition at line 41 of file SGObject.cpp.
|
inherited |
Definition at line 46 of file SGObject.cpp.
|
inherited |
Definition at line 51 of file SGObject.cpp.
|
inherited |
Definition at line 56 of file SGObject.cpp.
|
inherited |
Definition at line 61 of file SGObject.cpp.
|
inherited |
Definition at line 66 of file SGObject.cpp.
|
inherited |
Definition at line 71 of file SGObject.cpp.
|
inherited |
Definition at line 76 of file SGObject.cpp.
|
inherited |
Definition at line 81 of file SGObject.cpp.
|
inherited |
Definition at line 86 of file SGObject.cpp.
|
inherited |
Definition at line 91 of file SGObject.cpp.
|
inherited |
Definition at line 96 of file SGObject.cpp.
|
inherited |
Definition at line 101 of file SGObject.cpp.
|
inherited |
Definition at line 106 of file SGObject.cpp.
|
inherited |
Definition at line 111 of file SGObject.cpp.
|
inherited |
set generic type to T
|
inherited |
|
inherited |
set the parallel object
parallel | parallel object to use |
Definition at line 241 of file SGObject.cpp.
|
inherited |
set the version object
version | version object to use |
Definition at line 283 of file SGObject.cpp.
|
inherited |
set applied flag for preprocessor
num | index of preprocessor in list |
Definition at line 143 of file Features.cpp.
|
inherited |
|
virtualinherited |
A shallow copy. All the SGObject instance variables will be simply assigned and SG_REF-ed.
Reimplemented in CGaussianKernel.
Definition at line 192 of file SGObject.cpp.
|
inherited |
shift offset to the left by amount
offset | offset to shift |
amount | amount to shift the offset |
Definition at line 439 of file StringFeatures.cpp.
|
inherited |
Definition at line 1830 of file StringFeatures.cpp.
Definition at line 1834 of file StringFeatures.cpp.
Definition at line 1838 of file StringFeatures.cpp.
|
inherited |
Definition at line 1842 of file StringFeatures.cpp.
|
inherited |
shift symbol to the right by amount (taking care of custom symbol sizes)
symbol | symbol to shift |
amount | amount to shift the symbol |
Definition at line 445 of file StringFeatures.cpp.
|
inherited |
Definition at line 1847 of file StringFeatures.cpp.
Definition at line 1851 of file StringFeatures.cpp.
Definition at line 1855 of file StringFeatures.cpp.
|
inherited |
Definition at line 1859 of file StringFeatures.cpp.
|
virtualinherited |
post method when subset is changed
Reimplemented from CFeatures.
Definition at line 1645 of file StringFeatures.cpp.
|
virtualinherited |
does this class support compatible computation bewteen difference classes? for example, this->dot(rhs_prt), can rhs_prt be an instance of a difference class?
Reimplemented in CDenseSubSamplesFeatures< ST >.
Definition at line 323 of file Features.h.
|
inherited |
remap bit-based word to character sequence
word | word to remap |
seq | sequence of size len that remapped characters are written to |
len | length of sequence and word |
Definition at line 1402 of file StringFeatures.cpp.
|
inherited |
Definition at line 1912 of file StringFeatures.cpp.
|
inherited |
Definition at line 1915 of file StringFeatures.cpp.
|
inherited |
Definition at line 1918 of file StringFeatures.cpp.
|
inherited |
unset generic type
this has to be called in classes specializing a template class
Definition at line 303 of file SGObject.cpp.
|
inherited |
|
virtualinherited |
Updates the hash of current parameter combination
Definition at line 248 of file SGObject.cpp.
|
protectedinherited |
alphabet
Definition at line 674 of file StringFeatures.h.
|
protectedinherited |
feature cache
Definition at line 710 of file StringFeatures.h.
|
protectedinherited |
this contains the array of features
Definition at line 680 of file StringFeatures.h.
|
protected |
memory mapped file
Definition at line 94 of file StringFileFeatures.h.
|
inherited |
io
Definition at line 369 of file SGObject.h.
|
protectedinherited |
length of prior single string
Definition at line 686 of file StringFeatures.h.
|
inherited |
parameters wrt which we can compute gradients
Definition at line 384 of file SGObject.h.
|
inherited |
Hash of parameter values
Definition at line 387 of file SGObject.h.
|
inherited |
model selection parameters
Definition at line 381 of file SGObject.h.
|
inherited |
parameters
Definition at line 378 of file SGObject.h.
|
protectedinherited |
subset used for index transformations
Definition at line 352 of file Features.h.
|
protectedinherited |
length of longest string (for subset, is updated)
Definition at line 689 of file StringFeatures.h.
|
protectedinherited |
number of used symbols
Definition at line 692 of file StringFeatures.h.
|
protectedinherited |
number of string vectors (for subset, is updated)
Definition at line 677 of file StringFeatures.h.
|
protectedinherited |
order used in higher order mapping
Definition at line 698 of file StringFeatures.h.
|
protectedinherited |
original number of used symbols (before higher order mapping)
Definition at line 695 of file StringFeatures.h.
|
inherited |
parallel
Definition at line 372 of file SGObject.h.
|
protectedinherited |
preprocess on-the-fly?
Definition at line 707 of file StringFeatures.h.
|
protectedinherited |
true when single string / created by sliding window
Definition at line 683 of file StringFeatures.h.
|
protectedinherited |
order used in higher order mapping
Definition at line 701 of file StringFeatures.h.
|
protectedinherited |
order used in higher order mapping
Definition at line 704 of file StringFeatures.h.
|
inherited |
version
Definition at line 375 of file SGObject.h.