16 using namespace shogun;
21 SG_UNSTABLE(
"CStreamingAsciiFile::CStreamingAsciiFile()",
"\n");
35 #define GET_VECTOR(fname, conv, sg_type) \
36 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat) \
38 char* buffer = NULL; \
40 int32_t old_len = num_feat; \
42 bytes_read = buf->read_line(buffer); \
55 char* ptr_item=NULL; \
56 char* ptr_data=buffer; \
57 DynArray<char*>* items=new DynArray<char*>(); \
61 if ((*ptr_data=='\n') || \
62 (ptr_data - buffer >= bytes_read)) \
67 append_item(items, ptr_data, ptr_item); \
74 else if (!isblank(*ptr_data) && !ptr_item) \
78 else if (isblank(*ptr_data) && ptr_item) \
80 append_item(items, ptr_data, ptr_item); \
88 SG_DEBUG("num_feat %d\n", num_feat); \
91 if (old_len < num_feat) \
92 vector=SG_REALLOC(sg_type, vector, num_feat); \
94 for (int32_t i=0; i<num_feat; i++) \
96 char* item=items->get_element(i); \
97 vector[i]=conv(item); \
103 GET_VECTOR(get_bool_vector, str_to_bool,
bool)
116 #define GET_FLOAT_VECTOR(sg_type) \
117 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\
120 int32_t num_chars = buf->read_line(line); \
121 int32_t old_len = len; \
123 if (num_chars == 0) \
129 substring example_string = {line, line + num_chars}; \
131 CAsciiFile::tokenize(' ', example_string, words); \
133 len = words.index(); \
134 substring* feature_start = &words[0]; \
137 vector = SG_REALLOC(sg_type, vector, len); \
140 for (substring* i = feature_start; i != words.end; i++) \
142 vector[j++] = float_of_substring(*i); \
148 #undef GET_FLOAT_VECTOR
152 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type) \
153 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \
155 char* buffer = NULL; \
156 ssize_t bytes_read; \
157 int32_t old_len = num_feat; \
159 bytes_read = buf->read_line(buffer); \
172 char* ptr_item=NULL; \
173 char* ptr_data=buffer; \
174 DynArray<char*>* items=new DynArray<char*>(); \
178 if ((*ptr_data=='\n') || \
179 (ptr_data - buffer >= bytes_read)) \
184 append_item(items, ptr_data, ptr_item); \
191 else if (!isblank(*ptr_data) && !ptr_item) \
195 else if (isblank(*ptr_data) && ptr_item) \
197 append_item(items, ptr_data, ptr_item); \
205 SG_DEBUG("num_feat %d\n", num_feat); \
207 label=atof(items->get_element(0)); \
209 if (old_len < num_feat - 1) \
210 vector=SG_REALLOC(sg_type, vector, num_feat-1); \
212 for (int32_t i=1; i<num_feat; i++) \
214 char* item=items->get_element(i); \
215 vector[i-1]=conv(item); \
233 #undef GET_VECTOR_AND_LABEL
235 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type) \
236 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
239 int32_t num_chars = buf->read_line(line); \
240 int32_t old_len = len; \
242 if (num_chars == 0) \
248 substring example_string = {line, line + num_chars}; \
250 CAsciiFile::tokenize(' ', example_string, words); \
252 label = float_of_substring(words[0]); \
254 len = words.index() - 1; \
255 substring* feature_start = &words[1]; \
258 vector = SG_REALLOC(sg_type, vector, len); \
261 for (substring* i = feature_start; i != words.end; i++) \
263 vector[j++] = float_of_substring(*i); \
269 #undef GET_FLOAT_VECTOR_AND_LABEL
273 #define GET_STRING(fname, conv, sg_type) \
274 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len) \
276 char* buffer = NULL; \
277 ssize_t bytes_read; \
279 bytes_read = buf->read_line(buffer); \
288 SG_DEBUG("Line read from the file:\n%s\n", buffer); \
290 if (buffer[bytes_read-1]=='\n') \
293 buffer[bytes_read-1]='\0'; \
297 vector=(sg_type *) buffer; \
300 GET_STRING(get_bool_string, str_to_bool,
bool)
317 #define GET_STRING_AND_LABEL(fname, conv, sg_type) \
318 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
320 char* buffer = NULL; \
321 ssize_t bytes_read; \
323 bytes_read = buf->read_line(buffer); \
332 int32_t str_start_pos=-1; \
334 for (int32_t i=0; i<bytes_read; i++) \
336 if (buffer[i] == ' ') \
339 label=atoi(buffer); \
346 if (str_start_pos == -1) \
353 if (buffer[bytes_read-1]=='\n') \
355 buffer[bytes_read-1]='\0'; \
356 len=bytes_read-str_start_pos-1; \
359 len=bytes_read-str_start_pos; \
361 vector=(sg_type*) &buffer[str_start_pos]; \
377 #undef GET_STRING_AND_LABEL
381 #define GET_SPARSE_VECTOR(fname, conv, sg_type) \
382 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \
384 char* buffer = NULL; \
385 ssize_t bytes_read; \
387 bytes_read = buf->read_line(buffer); \
398 if (buffer[bytes_read-1]=='\n') \
400 num_chars=bytes_read-1; \
401 buffer[num_chars]='\0'; \
404 num_chars=bytes_read; \
406 int32_t num_dims=0; \
407 for (int32_t i=0; i<num_chars; i++) \
409 if (buffer[i]==':') \
415 int32_t index_start_pos=-1; \
416 int32_t feature_start_pos; \
417 int32_t current_feat=0; \
418 vector=SG_MALLOC(SGSparseVectorEntry<sg_type>, num_dims); \
419 for (int32_t i=0; i<num_chars; i++) \
421 if (buffer[i]==':') \
424 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
426 index_start_pos=-1; \
428 feature_start_pos=i+1; \
429 while ((buffer[i]!=' ') && (i<num_chars)) \
435 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
439 else if (buffer[i]==' ') \
446 if (index_start_pos == -1) \
467 #undef GET_SPARSE_VECTOR
471 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type) \
472 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \
474 char* buffer = NULL; \
475 ssize_t bytes_read; \
477 bytes_read = buf->read_line(buffer); \
488 if (buffer[bytes_read-1]=='\n') \
490 num_chars=bytes_read-1; \
491 buffer[num_chars]='\0'; \
494 num_chars=bytes_read; \
496 int32_t num_dims=0; \
497 for (int32_t i=0; i<num_chars; i++) \
499 if (buffer[i]==':') \
505 int32_t index_start_pos=-1; \
506 int32_t feature_start_pos; \
507 int32_t current_feat=0; \
508 int32_t label_pos=-1; \
509 vector=SG_MALLOC(SGSparseVectorEntry<sg_type>, num_dims); \
511 for (int32_t i=1; i<num_chars; i++) \
513 if (buffer[i]==':') \
517 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') ) \
521 label=atof(buffer); \
527 SG_ERROR("No label found!\n"); \
529 buffer+=label_pos+1; \
530 num_chars-=label_pos+1; \
531 for (int32_t i=0; i<num_chars; i++) \
533 if (buffer[i]==':') \
536 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
538 index_start_pos=-1; \
540 feature_start_pos=i+1; \
541 while ((buffer[i]!=' ') && (i<num_chars)) \
547 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
551 else if (buffer[i]==' ') \
558 if (index_start_pos == -1) \
579 #undef GET_SPARSE_VECTOR_AND_LABEL
582 void CStreamingAsciiFile::append_item(
583 DynArray<T>* items,
char* ptr_data,
char* ptr_item)
585 size_t len=(ptr_data-ptr_item)/
sizeof(
char);
587 memset(item, 0,
sizeof(
char)*(len+1));
588 item=strncpy(item, ptr_item, len);
590 SG_DEBUG(
"current %c, len %d, item %s\n", *ptr_data, len, item);