13 #ifndef _CSTRINGFEATURES__H__
14 #define _CSTRINGFEATURES__H__
31 template <
class T>
class CDynamicArray;
33 template <
class T>
class SGString;
34 template <
class T>
class SGStringList;
36 #ifndef DOXYGEN_SHOULD_SKIP_THIS
37 struct SSKDoubleFeature
44 struct SSKTripleFeature
76 template <
class ST>
class CStringFeatures :
public CFeatures
231 SGString<ST>*
get_transposed(int32_t &num_feat, int32_t &num_vec);
260 virtual ST
get_feature(int32_t vec_num, int32_t feat_num);
342 virtual void load(CFile* loader);
377 bool ignore_invalid=
false,
bool bitremap_in_single_string=
false);
404 bool set_features(SGString<ST>* p_features, int32_t p_num_vectors,
405 int32_t p_max_string_length);
430 int32_t p_max_string_length);
445 virtual SGString<ST>*
get_features(int32_t& num_str, int32_t& max_str_len);
455 virtual SGString<ST>*
copy_features(int32_t& num_str, int32_t& max_str_len);
464 virtual void get_features(SGString<ST>** dst, int32_t* num_str);
472 virtual void save(CFile* writer);
543 int32_t p_order, int32_t gap,
bool rev);
558 int32_t p_order, int32_t gap,
bool rev);
631 bool normalize=
true);
651 virtual const char*
get_name()
const {
return "StringFeatures"; }
713 #endif // _CSTRINGFEATURES__H__
void set_feature_vector(SGVector< ST > vector, int32_t num)
int32_t length_of_single_string
length of prior single string
int32_t obtain_by_sliding_window(int32_t window_size, int32_t step_size, int32_t skip=0)
virtual int32_t get_max_vector_length()
SGVector< ST > get_feature_vector(int32_t num)
void enable_on_the_fly_preprocessing()
virtual void load(CFile *loader)
virtual CFeatures * duplicate() const
floatmax_t num_symbols
number of used symbols
bool load_from_directory(char *dirname)
bool obtain_from_char(CStringFeatures< char > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
virtual CFeatures * copy_subset(SGVector< index_t > indices)
ST shift_offset(ST offset, int32_t amount)
int32_t max_string_length
RAWDNA - letters 0,1,2,3.
virtual void subset_changed_post()
void unembed_word(ST word, uint8_t *seq, int32_t len)
EAlphabet
Alphabet of charfeatures/observations.
virtual SGString< ST > * copy_features(int32_t &num_str, int32_t &max_str_len)
floatmax_t get_max_num_symbols()
bool load_fasta_file(const char *fname, bool ignore_invalid=false)
SGString< ST > * features
virtual int32_t get_num_vectors() const
bool append_features(CStringFeatures< ST > *sf)
CFeatures(int32_t size=0)
virtual void create_random(float64_t *hist, int32_t rows, int32_t cols, int32_t num_vec)
The class Alphabet implements an alphabet and alphabet utility functions.
CCache< ST > * feature_cache
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
floatmax_t get_original_num_symbols()
bool preprocess_on_get
preprocess on-the-fly?
virtual bool load_compressed(char *src, bool decompress)
EFeatureClass
shogun feature class
void disable_on_the_fly_preprocessing()
void load_ascii_file(char *fname, bool remap_to_bin=true, EAlphabet ascii_alphabet=DNA, EAlphabet binary_alphabet=RAWDNA)
virtual bool apply_preprocessor(bool force_preprocessing=false)
ST get_masked_symbols(ST symbol, uint8_t mask)
floatmax_t get_num_symbols()
int32_t symbol_mask_table_len
order used in higher order mapping
CStringFeatures< ST > * get_transposed()
int32_t order
order used in higher order mapping
ST embed_word(ST *seq, int32_t len)
int32_t obtain_by_position_list(int32_t window_size, CDynamicArray< int32_t > *positions, int32_t skip=0)
bool load_fastq_file(const char *fname, bool ignore_invalid=false, bool bitremap_in_single_string=false)
floatmax_t original_num_symbols
original number of used symbols (before higher order mapping)
virtual EFeatureClass get_feature_class() const
virtual bool save_compressed(char *dest, E_COMPRESSION_TYPE compression, int level)
virtual ST * compute_feature_vector(int32_t num, int32_t &len)
virtual ST get_feature(int32_t vec_num, int32_t feat_num)
CAlphabet * get_alphabet()
void compute_symbol_mask_table(int64_t max_val)
virtual EFeatureType get_feature_type() const
void determine_maximum_string_length()
EFeatureType
shogun feature type
bool have_same_length(int32_t len=-1)
virtual void cleanup_feature_vector(int32_t num)
virtual const char * get_name() const
all of classes and functions are contained in the shogun namespace
virtual ~CStringFeatures()
virtual void save(CFile *writer)
SGStringList< ST > get_features()
ST shift_symbol(ST symbol, int32_t amount)
void embed_features(int32_t p_order)
virtual void get_histogram(float64_t **hist, int32_t *rows, int32_t *cols, bool normalize=true)
virtual void cleanup_feature_vectors(int32_t start, int32_t stop)
bool obtain_from_char_features(CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
ST * symbol_mask_table
order used in higher order mapping
void set_features(SGStringList< ST > feats)
virtual int32_t get_vector_length(int32_t vec_num)
static ST * get_zero_terminated_string_copy(SGString< ST > str)