16 using namespace shogun;
21 SG_UNSTABLE(
"CStreamingAsciiFile::CStreamingAsciiFile()",
"\n")
37 #define GET_VECTOR(fname, conv, sg_type) \
38 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat) \
40 char* buffer = NULL; \
42 int32_t old_len = num_feat; \
45 bytes_read = buf->read_line(buffer); \
59 char* ptr_item=NULL; \
60 char* ptr_data=buffer; \
61 DynArray<char*>* items=new DynArray<char*>(); \
65 if ((*ptr_data=='\n') || \
66 (ptr_data - buffer >= bytes_read)) \
71 append_item(items, ptr_data, ptr_item); \
78 else if (!isblank(*ptr_data) && !ptr_item) \
82 else if (isblank(*ptr_data) && ptr_item) \
84 append_item(items, ptr_data, ptr_item); \
92 SG_DEBUG("num_feat %d\n", num_feat) \
95 if (old_len < num_feat) \
96 vector=SG_REALLOC(sg_type, vector, old_len, num_feat); \
98 for (int32_t i=0; i<num_feat; i++) \
100 char* item=items->get_element(i); \
101 vector[i]=conv(item); \
108 GET_VECTOR(get_bool_vector, str_to_bool,
bool)
121 #define GET_FLOAT_VECTOR(sg_type) \
122 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\
126 int32_t num_chars = buf->read_line(line); \
127 int32_t old_len = len; \
129 if (num_chars == 0) \
136 substring example_string = {line, line + num_chars}; \
138 CCSVFile::tokenize(m_delimiter, example_string, words); \
140 len = words.index(); \
141 substring* feature_start = &words[0]; \
144 vector = SG_REALLOC(sg_type, vector, old_len, len); \
147 for (substring* i = feature_start; i != words.end; i++) \
149 vector[j++] = SGIO::float_of_substring(*i); \
156 #undef GET_FLOAT_VECTOR
160 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type) \
161 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \
163 char* buffer = NULL; \
164 ssize_t bytes_read; \
165 int32_t old_len = num_feat; \
168 bytes_read = buf->read_line(buffer); \
182 char* ptr_item=NULL; \
183 char* ptr_data=buffer; \
184 DynArray<char*>* items=new DynArray<char*>(); \
188 if ((*ptr_data=='\n') || \
189 (ptr_data - buffer >= bytes_read)) \
194 append_item(items, ptr_data, ptr_item); \
201 else if (!isblank(*ptr_data) && !ptr_item) \
205 else if (isblank(*ptr_data) && ptr_item) \
207 append_item(items, ptr_data, ptr_item); \
215 SG_DEBUG("num_feat %d\n", num_feat) \
217 label=atof(items->get_element(0)); \
219 if (old_len < num_feat - 1) \
220 vector=SG_REALLOC(sg_type, vector, old_len, num_feat-1); \
222 for (int32_t i=1; i<num_feat; i++) \
224 char* item=items->get_element(i); \
225 vector[i-1]=conv(item); \
244 #undef GET_VECTOR_AND_LABEL
246 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type) \
247 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
251 int32_t num_chars = buf->read_line(line); \
252 int32_t old_len = len; \
254 if (num_chars == 0) \
261 substring example_string = {line, line + num_chars}; \
263 CCSVFile::tokenize(m_delimiter, example_string, words); \
265 label = SGIO::float_of_substring(words[0]); \
267 len = words.index() - 1; \
268 substring* feature_start = &words[1]; \
271 vector = SG_REALLOC(sg_type, vector, old_len, len); \
274 for (substring* i = feature_start; i != words.end; i++) \
276 vector[j++] = SGIO::float_of_substring(*i); \
283 #undef GET_FLOAT_VECTOR_AND_LABEL
287 #define GET_STRING(fname, conv, sg_type) \
288 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len) \
290 char* buffer = NULL; \
291 ssize_t bytes_read; \
294 bytes_read = buf->read_line(buffer); \
304 SG_DEBUG("Line read from the file:\n%s\n", buffer) \
306 if (buffer[bytes_read-1]=='\n') \
309 buffer[bytes_read-1]='\0'; \
313 vector=(sg_type *) buffer; \
317 GET_STRING(get_bool_string, str_to_bool,
bool)
334 #define GET_STRING_AND_LABEL(fname, conv, sg_type) \
335 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
337 char* buffer = NULL; \
338 ssize_t bytes_read; \
341 bytes_read = buf->read_line(buffer); \
351 int32_t str_start_pos=-1; \
353 for (int32_t i=0; i<bytes_read; i++) \
355 if (buffer[i] == ' ') \
358 label=atoi(buffer); \
365 if (str_start_pos == -1) \
372 if (buffer[bytes_read-1]=='\n') \
374 buffer[bytes_read-1]='\0'; \
375 len=bytes_read-str_start_pos-1; \
378 len=bytes_read-str_start_pos; \
380 vector=(sg_type*) &buffer[str_start_pos]; \
397 #undef GET_STRING_AND_LABEL
401 #define GET_SPARSE_VECTOR(fname, conv, sg_type) \
402 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \
404 char* buffer = NULL; \
405 ssize_t bytes_read; \
408 bytes_read = buf->read_line(buffer); \
420 if (buffer[bytes_read-1]=='\n') \
422 num_chars=bytes_read-1; \
423 buffer[num_chars]='\0'; \
426 num_chars=bytes_read; \
428 int32_t num_dims=0; \
429 for (int32_t i=0; i<num_chars; i++) \
431 if (buffer[i]==':') \
437 int32_t index_start_pos=-1; \
438 int32_t feature_start_pos; \
439 int32_t current_feat=0; \
440 if (len < num_dims) \
441 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
442 for (int32_t i=0; i<num_chars; i++) \
444 if (buffer[i]==':') \
447 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
449 index_start_pos=-1; \
451 feature_start_pos=i+1; \
452 while ((buffer[i]!=' ') && (i<num_chars)) \
458 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
462 else if (buffer[i]==' ') \
469 if (index_start_pos == -1) \
491 #undef GET_SPARSE_VECTOR
495 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type) \
496 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \
498 char* buffer = NULL; \
499 ssize_t bytes_read; \
502 bytes_read = buf->read_line(buffer); \
514 if (buffer[bytes_read-1]=='\n') \
516 num_chars=bytes_read-1; \
517 buffer[num_chars]='\0'; \
520 num_chars=bytes_read; \
522 int32_t num_dims=0; \
523 for (int32_t i=0; i<num_chars; i++) \
525 if (buffer[i]==':') \
531 int32_t index_start_pos=-1; \
532 int32_t feature_start_pos; \
533 int32_t current_feat=0; \
534 int32_t label_pos=-1; \
535 if (len < num_dims) \
536 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
538 for (int32_t i=1; i<num_chars; i++) \
540 if (buffer[i]==':') \
544 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') ) \
548 label=atof(buffer); \
554 SG_ERROR("No label found!\n") \
556 buffer+=label_pos+1; \
557 num_chars-=label_pos+1; \
558 for (int32_t i=0; i<num_chars; i++) \
560 if (buffer[i]==':') \
563 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
565 index_start_pos=-1; \
567 feature_start_pos=i+1; \
568 while ((buffer[i]!=' ') && (i<num_chars)) \
574 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
578 else if (buffer[i]==' ') \
585 if (index_start_pos == -1) \
607 #undef GET_SPARSE_VECTOR_AND_LABEL
610 void CStreamingAsciiFile::append_item(
611 DynArray<T>* items,
char* ptr_data,
char* ptr_item)
613 size_t len=(ptr_data-ptr_item)/
sizeof(
char);
614 char* item=SG_MALLOC(
char, len+1);
615 memset(item, 0,
sizeof(
char)*(len+1));
616 item=strncpy(item, ptr_item, len);
618 SG_DEBUG(
"current %c, len %d, item %s\n", *ptr_data, len, item)
624 m_delimiter = delimiter;