21 init(file, is_labelled, size);
29 if (parser.is_running())
41 num_symbols=alphabet->get_num_symbols();
51 num_symbols=alphabet->get_num_symbols();
58 alpha_ascii=
new CAlphabet(ascii_alphabet);
66 alpha_ascii=
new CAlphabet(ascii_alphabet);
100 return current_length;
110 parser.set_read_vector_and_label
114 #define GET_FEATURE_TYPE(f_type, sg_type) \
115 template<> EFeatureType CStreamingStringFeatures<sg_type>::get_feature_type() const \
133 #undef GET_FEATURE_TYPE
137 void CStreamingStringFeatures<T>::init()
140 alphabet=
new CAlphabet();
144 current_sgstring.string=current_string;
145 current_sgstring.slen=current_length;
151 void CStreamingStringFeatures<T>::init(CStreamingFile* file,
156 has_labels=is_labelled;
158 parser.init(file, is_labelled, size);
159 parser.set_free_vector_after_release(
false);
160 parser.set_free_vectors_on_destruct(
false);
167 alpha_ascii=alphabet;
169 if (!parser.is_running())
170 parser.start_parser();
184 ret_value = (bool) parser.get_next_example(current_string,
194 alpha_ascii->add_string_to_histogram(current_string, current_length);
196 for (i=0; i<current_length; i++)
197 current_string[i]=alpha_ascii->remap_to_bin(current_string[i]);
198 alpha_bin->add_string_to_histogram(current_string, current_length);
202 alpha_ascii->add_string_to_histogram(current_string, current_length);
206 if ( !(alpha_ascii->check_alphabet_size() && alpha_ascii->check_alphabet()) )
208 SG_ERROR(
"StreamingStringFeatures: The given input was found to be incompatible with the alphabet!\n")
217 alphabet=alpha_ascii;
220 num_symbols=alphabet->get_num_symbols();
228 current_sgstring.string=current_string;
229 current_sgstring.slen=current_length;
231 return current_sgstring;
239 return current_label;
245 parser.finalize_example();
251 return current_length;
virtual CFeatures * duplicate() const
void use_alphabet(EAlphabet alpha)
virtual void get_string(bool *&vector, int32_t &len)
CStreamingStringFeatures()
virtual float64_t get_label()
EAlphabet
Alphabet of charfeatures/observations.
SGString< T > get_vector()
virtual int32_t get_num_features()
The class Alphabet implements an alphabet and alphabet utility functions.
virtual int32_t get_num_vectors() const
EFeatureClass
shogun feature class
A Streaming File access class.
virtual int32_t get_vector_length()
virtual void get_string_and_label(bool *&vector, int32_t &len, float64_t &label)
virtual EFeatureClass get_feature_class() const
virtual void end_parser()
bool remap_to_bin
Whether remapping must be done.
floatmax_t get_num_symbols()
This class implements streaming features as strings.
virtual bool get_next_example()
virtual void start_parser()
all of classes and functions are contained in the shogun namespace
virtual void set_vector_reader()
void set_read_functions()
#define GET_FEATURE_TYPE(f_type, sg_type)
The class Features is the base class of all feature objects.
virtual ~CStreamingStringFeatures()
Streaming features are features which are used for online algorithms.
virtual void set_vector_and_label_reader()
virtual void release_example()
void set_remap(CAlphabet *ascii_alphabet, CAlphabet *binary_alphabet)
CAlphabet * get_alphabet()