23 SG_UNSTABLE(
"CStreamingAsciiFile::CStreamingAsciiFile()",
"\n")
39 #define GET_VECTOR(fname, conv, sg_type) \
40 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat) \
42 char* buffer = NULL; \
44 int32_t old_len = num_feat; \
47 bytes_read = buf->read_line(buffer); \
61 char* ptr_item=NULL; \
62 char* ptr_data=buffer; \
63 DynArray<char*>* items=new DynArray<char*>(); \
67 if ((*ptr_data=='\n') || \
68 (ptr_data - buffer >= bytes_read)) \
73 append_item(items, ptr_data, ptr_item); \
80 else if (!isblank(*ptr_data) && !ptr_item) \
84 else if (isblank(*ptr_data) && ptr_item) \
86 append_item(items, ptr_data, ptr_item); \
94 SG_DEBUG("num_feat %d\n", num_feat) \
97 if (old_len < num_feat) \
98 vector=SG_REALLOC(sg_type, vector, old_len, num_feat); \
100 for (int32_t i=0; i<num_feat; i++) \
102 char* item=items->get_element(i); \
103 vector[i]=conv(item); \
110 GET_VECTOR(get_bool_vector, str_to_bool,
bool)
123 #define GET_FLOAT_VECTOR(sg_type) \
124 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\
128 int32_t num_chars = buf->read_line(line); \
129 int32_t old_len = len; \
131 if (num_chars == 0) \
138 substring example_string = {line, line + num_chars}; \
140 tokenize(m_delimiter, example_string, words); \
142 len = words.index(); \
143 substring* feature_start = &words[0]; \
146 vector = SG_REALLOC(sg_type, vector, old_len, len); \
149 for (substring* i = feature_start; i != words.end; i++) \
151 vector[j++] = SGIO::float_of_substring(*i); \
158 #undef GET_FLOAT_VECTOR
162 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type) \
163 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \
165 char* buffer = NULL; \
166 ssize_t bytes_read; \
167 int32_t old_len = num_feat; \
170 bytes_read = buf->read_line(buffer); \
184 char* ptr_item=NULL; \
185 char* ptr_data=buffer; \
186 DynArray<char*>* items=new DynArray<char*>(); \
190 if ((*ptr_data=='\n') || \
191 (ptr_data - buffer >= bytes_read)) \
196 append_item(items, ptr_data, ptr_item); \
203 else if (!isblank(*ptr_data) && !ptr_item) \
207 else if (isblank(*ptr_data) && ptr_item) \
209 append_item(items, ptr_data, ptr_item); \
217 SG_DEBUG("num_feat %d\n", num_feat) \
219 label=atof(items->get_element(0)); \
221 if (old_len < num_feat - 1) \
222 vector=SG_REALLOC(sg_type, vector, old_len, num_feat-1); \
224 for (int32_t i=1; i<num_feat; i++) \
226 char* item=items->get_element(i); \
227 vector[i-1]=conv(item); \
246 #undef GET_VECTOR_AND_LABEL
248 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type) \
249 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
253 int32_t num_chars = buf->read_line(line); \
254 int32_t old_len = len; \
256 if (num_chars == 0) \
263 substring example_string = {line, line + num_chars}; \
265 tokenize(m_delimiter, example_string, words); \
267 label = SGIO::float_of_substring(words[0]); \
269 len = words.index() - 1; \
270 substring* feature_start = &words[1]; \
273 vector = SG_REALLOC(sg_type, vector, old_len, len); \
276 for (substring* i = feature_start; i != words.end; i++) \
278 vector[j++] = SGIO::float_of_substring(*i); \
285 #undef GET_FLOAT_VECTOR_AND_LABEL
289 #define GET_STRING(fname, conv, sg_type) \
290 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len) \
292 char* buffer = NULL; \
293 ssize_t bytes_read; \
296 bytes_read = buf->read_line(buffer); \
306 SG_DEBUG("Line read from the file:\n%s\n", buffer) \
308 if (buffer[bytes_read-1]=='\n') \
311 buffer[bytes_read-1]='\0'; \
315 vector=(sg_type *) buffer; \
319 GET_STRING(get_bool_string, str_to_bool,
bool)
336 #define GET_STRING_AND_LABEL(fname, conv, sg_type) \
337 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
339 char* buffer = NULL; \
340 ssize_t bytes_read; \
343 bytes_read = buf->read_line(buffer); \
353 int32_t str_start_pos=-1; \
355 for (int32_t i=0; i<bytes_read; i++) \
357 if (buffer[i] == ' ') \
360 label=atoi(buffer); \
367 if (str_start_pos == -1) \
374 if (buffer[bytes_read-1]=='\n') \
376 buffer[bytes_read-1]='\0'; \
377 len=bytes_read-str_start_pos-1; \
380 len=bytes_read-str_start_pos; \
382 vector=(sg_type*) &buffer[str_start_pos]; \
399 #undef GET_STRING_AND_LABEL
403 #define GET_SPARSE_VECTOR(fname, conv, sg_type) \
404 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \
406 char* buffer = NULL; \
407 ssize_t bytes_read; \
410 bytes_read = buf->read_line(buffer); \
422 if (buffer[bytes_read-1]=='\n') \
424 num_chars=bytes_read-1; \
425 buffer[num_chars]='\0'; \
428 num_chars=bytes_read; \
430 int32_t num_dims=0; \
431 for (int32_t i=0; i<num_chars; i++) \
433 if (buffer[i]==':') \
439 int32_t index_start_pos=-1; \
440 int32_t feature_start_pos; \
441 int32_t current_feat=0; \
442 if (len < num_dims) \
443 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
444 for (int32_t i=0; i<num_chars; i++) \
446 if (buffer[i]==':') \
449 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
451 index_start_pos=-1; \
453 feature_start_pos=i+1; \
454 while ((buffer[i]!=' ') && (i<num_chars)) \
460 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
464 else if (buffer[i]==' ') \
471 if (index_start_pos == -1) \
493 #undef GET_SPARSE_VECTOR
497 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type) \
498 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \
500 char* buffer = NULL; \
501 ssize_t bytes_read; \
504 bytes_read = buf->read_line(buffer); \
516 if (buffer[bytes_read-1]=='\n') \
518 num_chars=bytes_read-1; \
519 buffer[num_chars]='\0'; \
522 num_chars=bytes_read; \
524 int32_t num_dims=0; \
525 for (int32_t i=0; i<num_chars; i++) \
527 if (buffer[i]==':') \
533 int32_t index_start_pos=-1; \
534 int32_t feature_start_pos; \
535 int32_t current_feat=0; \
536 int32_t label_pos=-1; \
537 if (len < num_dims) \
538 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
540 for (int32_t i=1; i<num_chars; i++) \
542 if (buffer[i]==':') \
546 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') ) \
550 label=atof(buffer); \
556 SG_ERROR("No label found!\n") \
558 buffer+=label_pos+1; \
559 num_chars-=label_pos+1; \
560 for (int32_t i=0; i<num_chars; i++) \
562 if (buffer[i]==':') \
565 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
567 index_start_pos=-1; \
569 feature_start_pos=i+1; \
570 while ((buffer[i]!=' ') && (i<num_chars)) \
576 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
580 else if (buffer[i]==' ') \
587 if (index_start_pos == -1) \
609 #undef GET_SPARSE_VECTOR_AND_LABEL
612 void CStreamingAsciiFile::append_item(
613 DynArray<T>* items,
char* ptr_data,
char* ptr_item)
615 REQUIRE(ptr_data && ptr_item,
"Data and Item to append should not be NULL\n");
617 size_t len=(ptr_data-ptr_item)/
sizeof(
char);
618 char* item=SG_MALLOC(
char, len+1);
619 memset(item, 0,
sizeof(
char)*(len+1));
620 item=strncpy(item, ptr_item, len);
622 SG_DEBUG(
"current %c, len %d, item %s\n", *ptr_data, len, item)
628 m_delimiter = delimiter;
633 char *last = s.
start;
636 if (*s.
start == delim)
#define GET_SPARSE_VECTOR(fname, conv, sg_type)
#define GET_STRING(fname, conv, sg_type)
void set_delimiter(char delimiter)
bool append_element(T element)
#define GET_VECTOR(fname, conv, sg_type)
virtual ~CStreamingAsciiFile()
Class v_array taken directly from JL's implementation.
struct Substring, specified by start position and end position.
void push(const T &new_elem)
A Streaming File access class.
#define GET_VECTOR_AND_LABEL(fname, conv, sg_type)
#define GET_FLOAT_VECTOR(sg_type)
Template Dynamic array class that creates an array that can be used like a list or an array...
#define GET_FLOAT_VECTOR_AND_LABEL(sg_type)
all of classes and functions are contained in the shogun namespace
#define GET_STRING_AND_LABEL(fname, conv, sg_type)
#define SG_UNSTABLE(func,...)
#define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type)