SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StreamingStringFeatures.cpp
Go to the documentation of this file.
2 
3 namespace shogun
4 {
5 
6 
7 template <class T>
9 {
10  init();
12  remap_to_bin=false;
13 }
14 
15 template <class T>
17  bool is_labelled,
18  int32_t size)
20 {
21  init(file, is_labelled, size);
23  remap_to_bin=false;
24 }
25 
26 template <class T>
28 {
29  if (parser.is_running())
30  parser.end_parser();
31  SG_UNREF(alphabet);
32 }
33 
34 template <class T>
36 {
37  SG_UNREF(alphabet);
38 
39  alphabet=new CAlphabet(alpha);
40  SG_REF(alphabet);
41  num_symbols=alphabet->get_num_symbols();
42 }
43 
44 template <class T>
46 {
47  SG_UNREF(alphabet);
48 
49  alphabet=new CAlphabet(alpha);
50  SG_REF(alphabet);
51  num_symbols=alphabet->get_num_symbols();
52 }
53 
54 template <class T>
55 void CStreamingStringFeatures<T>::set_remap(CAlphabet* ascii_alphabet, CAlphabet* binary_alphabet)
56 {
57  remap_to_bin=true;
58  alpha_ascii=new CAlphabet(ascii_alphabet);
59  alpha_bin=new CAlphabet(binary_alphabet);
60 }
61 
62 template <class T>
63 void CStreamingStringFeatures<T>::set_remap(EAlphabet ascii_alphabet, EAlphabet binary_alphabet)
64 {
65  remap_to_bin=true;
66  alpha_ascii=new CAlphabet(ascii_alphabet);
67  alpha_bin=new CAlphabet(binary_alphabet);
68 }
69 
70 template <class T>
72 {
73  SG_REF(alphabet);
74  return alphabet;
75 }
76 
77 template <class T>
79 {
80  return num_symbols;
81 }
82 
83 template <class T>
85 {
86  return new CStreamingStringFeatures<T>(*this);
87 }
88 
89 template <class T>
91 {
92  if (current_string)
93  return 1;
94  return 0;
95 }
96 
97 template <class T>
99 {
100  return current_length;
101 }
102 
104 {
105  parser.set_read_vector(&CStreamingFile::get_string);
106 }
107 
109 {
110  parser.set_read_vector_and_label
112 }
113 
114 #define GET_FEATURE_TYPE(f_type, sg_type) \
115 template<> EFeatureType CStreamingStringFeatures<sg_type>::get_feature_type() const \
116 { \
117  return f_type; \
118 }
119 
122 GET_FEATURE_TYPE(F_BYTE, uint8_t)
123 GET_FEATURE_TYPE(F_BYTE, int8_t)
124 GET_FEATURE_TYPE(F_SHORT, int16_t)
125 GET_FEATURE_TYPE(F_WORD, uint16_t)
126 GET_FEATURE_TYPE(F_INT, int32_t)
127 GET_FEATURE_TYPE(F_UINT, uint32_t)
128 GET_FEATURE_TYPE(F_LONG, int64_t)
129 GET_FEATURE_TYPE(F_ULONG, uint64_t)
133 #undef GET_FEATURE_TYPE
134 
135 
136 template <class T>
137 void CStreamingStringFeatures<T>::init()
138 {
139  working_file=NULL;
140  alphabet=new CAlphabet();
141 
142  current_string=NULL;
143  current_length=-1;
144  current_sgstring.string=current_string;
145  current_sgstring.slen=current_length;
146 
147  set_generic<T>();
148 }
149 
150 template <class T>
151 void CStreamingStringFeatures<T>::init(CStreamingFile* file,
152  bool is_labelled,
153  int32_t size)
154 {
155  init();
156  has_labels=is_labelled;
157  working_file=file;
158  parser.init(file, is_labelled, size);
159  parser.set_free_vector_after_release(false);
160  parser.set_free_vectors_on_destruct(false);
161 }
162 
163 template <class T>
165 {
166  if (!remap_to_bin)
167  alpha_ascii=alphabet;
168 
169  if (!parser.is_running())
170  parser.start_parser();
171 }
172 
173 template <class T>
175 {
176  parser.end_parser();
177 }
178 
179 template <class T>
181 {
182  bool ret_value;
183 
184  ret_value = (bool) parser.get_next_example(current_string,
185  current_length,
186  current_label);
187 
188  if (!ret_value)
189  return false;
190 
191  int32_t i;
192  if (remap_to_bin)
193  {
194  alpha_ascii->add_string_to_histogram(current_string, current_length);
195 
196  for (i=0; i<current_length; i++)
197  current_string[i]=alpha_ascii->remap_to_bin(current_string[i]);
198  alpha_bin->add_string_to_histogram(current_string, current_length);
199  }
200  else
201  {
202  alpha_ascii->add_string_to_histogram(current_string, current_length);
203  }
204 
205  /* Check the input using src alphabet, alpha_ascii */
206  if ( !(alpha_ascii->check_alphabet_size() && alpha_ascii->check_alphabet()) )
207  {
208  SG_ERROR("StreamingStringFeatures: The given input was found to be incompatible with the alphabet!\n")
209  return 0;
210  }
211 
212  //SG_UNREF(alphabet);
213 
214  if (remap_to_bin)
215  alphabet=alpha_bin;
216  else
217  alphabet=alpha_ascii;
218 
219  //SG_REF(alphabet);
220  num_symbols=alphabet->get_num_symbols();
221 
222  return ret_value;
223 }
224 
225 template <class T>
227 {
228  current_sgstring.string=current_string;
229  current_sgstring.slen=current_length;
230 
231  return current_sgstring;
232 }
233 
234 template <class T>
236 {
237  ASSERT(has_labels)
238 
239  return current_label;
240 }
241 
242 template <class T>
244 {
245  parser.finalize_example();
246 }
247 
248 template <class T>
250 {
251  return current_length;
252 }
253 
254 template <class T>
256 {
257  return C_STREAMING_STRING;
258 }
259 
260 template class CStreamingStringFeatures<bool>;
261 template class CStreamingStringFeatures<char>;
262 template class CStreamingStringFeatures<int8_t>;
263 template class CStreamingStringFeatures<uint8_t>;
264 template class CStreamingStringFeatures<int16_t>;
266 template class CStreamingStringFeatures<int32_t>;
268 template class CStreamingStringFeatures<int64_t>;
273 
274 }

SHOGUN Machine Learning Toolbox - Documentation