SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StreamingStringFeatures.cpp
Go to the documentation of this file.
2 
3 namespace shogun
4 {
5 
6 
7 template <class T>
9 {
10  init();
12  remap_to_bin=false;
13 }
14 
15 template <class T>
17  bool is_labelled,
18  int32_t size)
20 {
21  init(file, is_labelled, size);
23  remap_to_bin=false;
24 }
25 
26 template <class T>
28 {
29  parser.end_parser();
30  SG_UNREF(alphabet);
31 }
32 
33 template <class T>
35 {
36  SG_UNREF(alphabet);
37 
38  alphabet=new CAlphabet(alpha);
39  SG_REF(alphabet);
40  num_symbols=alphabet->get_num_symbols();
41 }
42 
43 template <class T>
45 {
46  SG_UNREF(alphabet);
47 
48  alphabet=new CAlphabet(alpha);
49  SG_REF(alphabet);
50  num_symbols=alphabet->get_num_symbols();
51 }
52 
53 template <class T>
54 void CStreamingStringFeatures<T>::set_remap(CAlphabet* ascii_alphabet, CAlphabet* binary_alphabet)
55 {
56  remap_to_bin=true;
57  alpha_ascii=new CAlphabet(ascii_alphabet);
58  alpha_bin=new CAlphabet(binary_alphabet);
59 }
60 
61 template <class T>
62 void CStreamingStringFeatures<T>::set_remap(EAlphabet ascii_alphabet, EAlphabet binary_alphabet)
63 {
64  remap_to_bin=true;
65  alpha_ascii=new CAlphabet(ascii_alphabet);
66  alpha_bin=new CAlphabet(binary_alphabet);
67 }
68 
69 template <class T>
71 {
72  SG_REF(alphabet);
73  return alphabet;
74 }
75 
76 template <class T>
78 {
79  return num_symbols;
80 }
81 
82 template <class T>
84 {
85  return new CStreamingStringFeatures<T>(*this);
86 }
87 
88 template <class T>
90 {
91  if (current_string)
92  return 1;
93  return 0;
94 }
95 
96 template <class T>
98 {
99  return sizeof(T);
100 }
101 
102 template <class T>
104 {
105  return current_length;
106 }
107 
109 {
110  parser.set_read_vector(&CStreamingFile::get_string);
111 }
112 
114 {
115  parser.set_read_vector_and_label
117 }
118 
119 #define GET_FEATURE_TYPE(f_type, sg_type) \
120 template<> EFeatureType CStreamingStringFeatures<sg_type>::get_feature_type() const \
121 { \
122  return f_type; \
123 }
124 
127 GET_FEATURE_TYPE(F_BYTE, uint8_t)
128 GET_FEATURE_TYPE(F_BYTE, int8_t)
129 GET_FEATURE_TYPE(F_SHORT, int16_t)
130 GET_FEATURE_TYPE(F_WORD, uint16_t)
131 GET_FEATURE_TYPE(F_INT, int32_t)
132 GET_FEATURE_TYPE(F_UINT, uint32_t)
133 GET_FEATURE_TYPE(F_LONG, int64_t)
134 GET_FEATURE_TYPE(F_ULONG, uint64_t)
138 #undef GET_FEATURE_TYPE
139 
140 
141 template <class T>
142 void CStreamingStringFeatures<T>::init()
143 {
144  working_file=NULL;
145  alphabet=new CAlphabet();
146 
147  current_string=NULL;
148  current_length=-1;
149  current_sgstring.string=current_string;
150  current_sgstring.slen=current_length;
151 }
152 
153 template <class T>
154 void CStreamingStringFeatures<T>::init(CStreamingFile* file,
155  bool is_labelled,
156  int32_t size)
157 {
158  init();
159  has_labels=is_labelled;
160  working_file=file;
161  parser.init(file, is_labelled, size);
162  parser.set_free_vector_after_release(false);
163  parser.set_free_vectors_on_destruct(false);
164 }
165 
166 template <class T>
168 {
169  if (!remap_to_bin)
170  alpha_ascii=alphabet;
171 
172  if (!parser.is_running())
173  parser.start_parser();
174 }
175 
176 template <class T>
178 {
179  parser.end_parser();
180 }
181 
182 template <class T>
184 {
185  bool ret_value;
186 
187  ret_value = (bool) parser.get_next_example(current_string,
188  current_length,
189  current_label);
190 
191  if (!ret_value)
192  return false;
193 
194  int32_t i;
195  if (remap_to_bin)
196  {
197  alpha_ascii->add_string_to_histogram(current_string, current_length);
198 
199  for (i=0; i<current_length; i++)
200  current_string[i]=alpha_ascii->remap_to_bin(current_string[i]);
201  alpha_bin->add_string_to_histogram(current_string, current_length);
202  }
203  else
204  {
205  alpha_ascii->add_string_to_histogram(current_string, current_length);
206  }
207 
208  /* Check the input using src alphabet, alpha_ascii */
209  if ( !(alpha_ascii->check_alphabet_size() && alpha_ascii->check_alphabet()) )
210  {
211  SG_ERROR("StreamingStringFeatures: The given input was found to be incompatible with the alphabet!\n");
212  return 0;
213  }
214 
215  //SG_UNREF(alphabet);
216 
217  if (remap_to_bin)
218  alphabet=alpha_bin;
219  else
220  alphabet=alpha_ascii;
221 
222  //SG_REF(alphabet);
223  num_symbols=alphabet->get_num_symbols();
224 
225  return ret_value;
226 }
227 
228 template <class T>
230 {
231  current_sgstring.string=current_string;
232  current_sgstring.slen=current_length;
233 
234  return current_sgstring;
235 }
236 
237 template <class T>
239 {
240  ASSERT(has_labels);
241 
242  return current_label;
243 }
244 
245 template <class T>
247 {
248  parser.finalize_example();
249 }
250 
251 template <class T>
253 {
254  return current_length;
255 }
256 
257 template <class T>
259 {
260  return C_STREAMING_STRING;
261 }
262 
263 template class CStreamingStringFeatures<bool>;
264 template class CStreamingStringFeatures<char>;
265 template class CStreamingStringFeatures<int8_t>;
266 template class CStreamingStringFeatures<uint8_t>;
267 template class CStreamingStringFeatures<int16_t>;
269 template class CStreamingStringFeatures<int32_t>;
271 template class CStreamingStringFeatures<int64_t>;
276 
277 }

SHOGUN Machine Learning Toolbox - Documentation