StreamingAsciiFile.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/io/streaming/StreamingAsciiFile.h>
00012 #include <shogun/mathematics/Math.h>
00013 
00014 #include <ctype.h>
00015 
00016 using namespace shogun;
00017 
00018 CStreamingAsciiFile::CStreamingAsciiFile()
00019         : CStreamingFile()
00020 {
00021         SG_UNSTABLE("CStreamingAsciiFile::CStreamingAsciiFile()", "\n");
00022 }
00023 
00024 CStreamingAsciiFile::CStreamingAsciiFile(const char* fname, char rw)
00025         : CStreamingFile(fname, rw)
00026 {
00027 }
00028 
00029 CStreamingAsciiFile::~CStreamingAsciiFile()
00030 {
00031 }
00032 
00033 /* Methods for reading dense vectors from an ascii file */
00034 
00035 #define GET_VECTOR(fname, conv, sg_type)                                    \
00036 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat)   \
00037 {                                                                           \
00038         char* buffer = NULL;                                                \
00039         ssize_t bytes_read;                                                 \
00040         int32_t old_len = num_feat;                                         \
00041                                                                             \
00042         bytes_read = buf->read_line(buffer);                                \
00043                                                                             \
00044         if (bytes_read<=0)                                                  \
00045         {                                                                   \
00046                 vector=NULL;                                                \
00047                 num_feat=-1;                                                \
00048                 return;                                                     \
00049         }                                                                   \
00050                                                                             \
00051         /* determine num_feat, populate dynamic array */                    \
00052         int32_t nf=0;                                                       \
00053         num_feat=0;                                                         \
00054                                                                             \
00055         char* ptr_item=NULL;                                                \
00056         char* ptr_data=buffer;                                              \
00057         DynArray<char*>* items=new DynArray<char*>();                       \
00058                                                                             \
00059         while (*ptr_data)                                                   \
00060         {                                                                   \
00061                 if ((*ptr_data=='\n') ||                                    \
00062                     (ptr_data - buffer >= bytes_read))                      \
00063                 {                                                           \
00064                         if (ptr_item)                                       \
00065                                 nf++;                                       \
00066                                                                             \
00067                         append_item(items, ptr_data, ptr_item);             \
00068                         num_feat=nf;                                        \
00069                                                                             \
00070                         nf=0;                                               \
00071                         ptr_item=NULL;                                      \
00072                         break;                                              \
00073                 }                                                           \
00074                 else if (!isblank(*ptr_data) && !ptr_item)                  \
00075                 {                                                           \
00076                         ptr_item=ptr_data;                                  \
00077                 }                                                           \
00078                 else if (isblank(*ptr_data) && ptr_item)                    \
00079                 {                                                           \
00080                         append_item(items, ptr_data, ptr_item);             \
00081                         ptr_item=NULL;                                      \
00082                         nf++;                                               \
00083                 }                                                           \
00084                                                                             \
00085                 ptr_data++;                                                 \
00086         }                                                                   \
00087                                                                             \
00088         SG_DEBUG("num_feat %d\n", num_feat);                                \
00089                                                                             \
00090         /* now copy data into vector */                                     \
00091         if (old_len < num_feat)                                             \
00092                 vector=SG_REALLOC(sg_type, vector, num_feat);               \
00093                                                                             \
00094         for (int32_t i=0; i<num_feat; i++)                                  \
00095         {                                                                   \
00096                 char* item=items->get_element(i);                           \
00097                 vector[i]=conv(item);                                       \
00098                 SG_FREE(item);                                              \
00099         }                                                                   \
00100         delete items;                                                       \
00101 }
00102 
00103 GET_VECTOR(get_bool_vector, str_to_bool, bool)
00104 GET_VECTOR(get_byte_vector, atoi, uint8_t)
00105 GET_VECTOR(get_char_vector, atoi, char)
00106 GET_VECTOR(get_int_vector, atoi, int32_t)
00107 GET_VECTOR(get_short_vector, atoi, int16_t)
00108 GET_VECTOR(get_word_vector, atoi, uint16_t)
00109 GET_VECTOR(get_int8_vector, atoi, int8_t)
00110 GET_VECTOR(get_uint_vector, atoi, uint32_t)
00111 GET_VECTOR(get_long_vector, atoi, int64_t)
00112 GET_VECTOR(get_ulong_vector, atoi, uint64_t)
00113 GET_VECTOR(get_longreal_vector, atoi, floatmax_t)
00114 #undef GET_VECTOR
00115 
00116 #define GET_FLOAT_VECTOR(sg_type)                                           \
00117         void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\
00118         {                                                                   \
00119                 char *line=NULL;                                            \
00120                 int32_t num_chars = buf->read_line(line);                   \
00121                 int32_t old_len = len;                                      \
00122                                                                             \
00123                 if (num_chars == 0)                                         \
00124                 {                                                           \
00125                         len = -1;                                           \
00126                         return;                                             \
00127                 }                                                           \
00128                                                                             \
00129                 substring example_string = {line, line + num_chars};        \
00130                                                                             \
00131                 CAsciiFile::tokenize(' ', example_string, words);           \
00132                                                                             \
00133                 len = words.index();                                        \
00134                 substring* feature_start = &words[0];                       \
00135                                                                             \
00136                 if (len > old_len)                                          \
00137                         vector = SG_REALLOC(sg_type, vector, len);          \
00138                                                                             \
00139                 int32_t j=0;                                                \
00140                 for (substring* i = feature_start; i != words.end; i++)     \
00141                 {                                                           \
00142                         vector[j++] = float_of_substring(*i);               \
00143                 }                                                           \
00144         }
00145 
00146 GET_FLOAT_VECTOR(float32_t)
00147 GET_FLOAT_VECTOR(float64_t)
00148 #undef GET_FLOAT_VECTOR
00149 
00150 /* Methods for reading a dense vector and a label from an ascii file */
00151 
00152 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type)                      \
00153         void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \
00154         {                                                               \
00155                 char* buffer = NULL;                                    \
00156                 ssize_t bytes_read;                                     \
00157                 int32_t old_len = num_feat;                             \
00158                                                                         \
00159                 bytes_read = buf->read_line(buffer);                    \
00160                                                                         \
00161                 if (bytes_read<=0)                                      \
00162                 {                                                       \
00163                         vector=NULL;                                    \
00164                         num_feat=-1;                                    \
00165                         return;                                         \
00166                 }                                                       \
00167                                                                         \
00168                 /* determine num_feat, populate dynamic array */        \
00169                 int32_t nf=0;                                           \
00170                 num_feat=0;                                             \
00171                                                                         \
00172                 char* ptr_item=NULL;                                    \
00173                 char* ptr_data=buffer;                                  \
00174                 DynArray<char*>* items=new DynArray<char*>();           \
00175                                                                         \
00176                 while (*ptr_data)                                       \
00177                 {                                                       \
00178                         if ((*ptr_data=='\n') ||                        \
00179                             (ptr_data - buffer >= bytes_read))          \
00180                         {                                               \
00181                                 if (ptr_item)                           \
00182                                         nf++;                           \
00183                                                                         \
00184                                 append_item(items, ptr_data, ptr_item); \
00185                                 num_feat=nf;                            \
00186                                                                         \
00187                                 nf=0;                                   \
00188                                 ptr_item=NULL;                          \
00189                                 break;                                  \
00190                         }                                               \
00191                         else if (!isblank(*ptr_data) && !ptr_item)      \
00192                         {                                               \
00193                                 ptr_item=ptr_data;                      \
00194                         }                                               \
00195                         else if (isblank(*ptr_data) && ptr_item)        \
00196                         {                                               \
00197                                 append_item(items, ptr_data, ptr_item); \
00198                                 ptr_item=NULL;                          \
00199                                 nf++;                                   \
00200                         }                                               \
00201                                                                         \
00202                         ptr_data++;                                     \
00203                 }                                                       \
00204                                                                         \
00205                 SG_DEBUG("num_feat %d\n", num_feat);                    \
00206                 /* The first element is the label */                    \
00207                 label=atof(items->get_element(0));                      \
00208                 /* now copy rest of the data into vector */             \
00209                 if (old_len < num_feat - 1)                             \
00210                         vector=SG_REALLOC(sg_type, vector, num_feat-1); \
00211                                                                         \
00212                 for (int32_t i=1; i<num_feat; i++)                      \
00213                 {                                                       \
00214                         char* item=items->get_element(i);               \
00215                         vector[i-1]=conv(item);                         \
00216                         SG_FREE(item);                                  \
00217                 }                                                       \
00218                 delete items;                                           \
00219                 num_feat--;                                             \
00220         }
00221 
00222 GET_VECTOR_AND_LABEL(get_bool_vector_and_label, str_to_bool, bool)
00223 GET_VECTOR_AND_LABEL(get_byte_vector_and_label, atoi, uint8_t)
00224 GET_VECTOR_AND_LABEL(get_char_vector_and_label, atoi, char)
00225 GET_VECTOR_AND_LABEL(get_int_vector_and_label, atoi, int32_t)
00226 GET_VECTOR_AND_LABEL(get_short_vector_and_label, atoi, int16_t)
00227 GET_VECTOR_AND_LABEL(get_word_vector_and_label, atoi, uint16_t)
00228 GET_VECTOR_AND_LABEL(get_int8_vector_and_label, atoi, int8_t)
00229 GET_VECTOR_AND_LABEL(get_uint_vector_and_label, atoi, uint32_t)
00230 GET_VECTOR_AND_LABEL(get_long_vector_and_label, atoi, int64_t)
00231 GET_VECTOR_AND_LABEL(get_ulong_vector_and_label, atoi, uint64_t)
00232 GET_VECTOR_AND_LABEL(get_longreal_vector_and_label, atoi, floatmax_t)
00233 #undef GET_VECTOR_AND_LABEL
00234 
00235 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type)                             \
00236         void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
00237         {                                                               \
00238                 char *line=NULL;                                        \
00239                 int32_t num_chars = buf->read_line(line);               \
00240                 int32_t old_len = len;                                  \
00241                                                                         \
00242                 if (num_chars == 0)                                     \
00243                 {                                                       \
00244                         len = -1;                                       \
00245                         return;                                         \
00246                 }                                                       \
00247                                                                         \
00248                 substring example_string = {line, line + num_chars};    \
00249                                                                         \
00250                 CAsciiFile::tokenize(' ', example_string, words);       \
00251                                                                         \
00252                 label = float_of_substring(words[0]);                   \
00253                                                                         \
00254                 len = words.index() - 1;                                \
00255                 substring* feature_start = &words[1];                   \
00256                                                                         \
00257                 if (len > old_len)                                      \
00258                         vector = SG_REALLOC(sg_type, vector, len);      \
00259                                                                         \
00260                 int32_t j=0;                                            \
00261                 for (substring* i = feature_start; i != words.end; i++) \
00262                 {                                                       \
00263                         vector[j++] = float_of_substring(*i);           \
00264                 }                                                       \
00265         }
00266 
00267 GET_FLOAT_VECTOR_AND_LABEL(float32_t)
00268 GET_FLOAT_VECTOR_AND_LABEL(float64_t)
00269 #undef GET_FLOAT_VECTOR_AND_LABEL
00270 
00271 /* Methods for reading a string vector from an ascii file (see StringFeatures) */
00272 
00273 #define GET_STRING(fname, conv, sg_type)                                \
00274 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len)    \
00275 {                                                                       \
00276         char* buffer = NULL;                                            \
00277         ssize_t bytes_read;                                             \
00278                                                                         \
00279         bytes_read = buf->read_line(buffer);                            \
00280                                                                         \
00281         if (bytes_read<=1)                                              \
00282         {                                                               \
00283                 vector=NULL;                                            \
00284                 len=-1;                                                 \
00285                 return;                                                 \
00286         }                                                               \
00287                                                                         \
00288         SG_DEBUG("Line read from the file:\n%s\n", buffer);             \
00289         /* Remove the terminating \n */                                 \
00290         if (buffer[bytes_read-1]=='\n')                                 \
00291         {                                                               \
00292                 len=bytes_read-1;                                       \
00293                 buffer[bytes_read-1]='\0';                              \
00294         }                                                               \
00295         else                                                            \
00296                 len=bytes_read;                                         \
00297         vector=(sg_type *) buffer;                                      \
00298 }
00299 
00300 GET_STRING(get_bool_string, str_to_bool, bool)
00301 GET_STRING(get_byte_string, atoi, uint8_t)
00302 GET_STRING(get_char_string, atoi, char)
00303 GET_STRING(get_int_string, atoi, int32_t)
00304 GET_STRING(get_shortreal_string, atof, float32_t)
00305 GET_STRING(get_real_string, atof, float64_t)
00306 GET_STRING(get_short_string, atoi, int16_t)
00307 GET_STRING(get_word_string, atoi, uint16_t)
00308 GET_STRING(get_int8_string, atoi, int8_t)
00309 GET_STRING(get_uint_string, atoi, uint32_t)
00310 GET_STRING(get_long_string, atoi, int64_t)
00311 GET_STRING(get_ulong_string, atoi, uint64_t)
00312 GET_STRING(get_longreal_string, atoi, floatmax_t)
00313 #undef GET_STRING
00314 
00315 /* Methods for reading a string vector and a label from an ascii file */
00316 
00317 #define GET_STRING_AND_LABEL(fname, conv, sg_type)                      \
00318 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
00319 {                                                                       \
00320         char* buffer = NULL;                                            \
00321         ssize_t bytes_read;                                             \
00322                                                                         \
00323         bytes_read = buf->read_line(buffer);                            \
00324                                                                         \
00325         if (bytes_read<=1)                                              \
00326         {                                                               \
00327                 vector=NULL;                                            \
00328                 len=-1;                                                 \
00329                 return;                                                 \
00330         }                                                               \
00331                                                                         \
00332         int32_t str_start_pos=-1;                                       \
00333                                                                         \
00334         for (int32_t i=0; i<bytes_read; i++)                            \
00335         {                                                               \
00336                 if (buffer[i] == ' ')                                   \
00337                 {                                                       \
00338                         buffer[i]='\0';                                 \
00339                         label=atoi(buffer);                             \
00340                         buffer[i]=' ';                                  \
00341                         str_start_pos=i+1;                              \
00342                         break;                                          \
00343                 }                                                       \
00344         }                                                               \
00345         /* If no label found, set vector=NULL and length=-1 */          \
00346         if (str_start_pos == -1)                                        \
00347         {                                                               \
00348                 vector=NULL;                                            \
00349                 len=-1;                                                 \
00350                 return;                                                 \
00351         }                                                               \
00352         /* Remove terminating \n */                                     \
00353         if (buffer[bytes_read-1]=='\n')                                 \
00354         {                                                               \
00355                 buffer[bytes_read-1]='\0';                              \
00356                 len=bytes_read-str_start_pos-1;                         \
00357         }                                                               \
00358         else                                                            \
00359                 len=bytes_read-str_start_pos;                           \
00360                                                                         \
00361         vector=(sg_type*) &buffer[str_start_pos];                       \
00362 }
00363 
00364 GET_STRING_AND_LABEL(get_bool_string_and_label, str_to_bool, bool)
00365 GET_STRING_AND_LABEL(get_byte_string_and_label, atoi, uint8_t)
00366 GET_STRING_AND_LABEL(get_char_string_and_label, atoi, char)
00367 GET_STRING_AND_LABEL(get_int_string_and_label, atoi, int32_t)
00368 GET_STRING_AND_LABEL(get_shortreal_string_and_label, atof, float32_t)
00369 GET_STRING_AND_LABEL(get_real_string_and_label, atof, float64_t)
00370 GET_STRING_AND_LABEL(get_short_string_and_label, atoi, int16_t)
00371 GET_STRING_AND_LABEL(get_word_string_and_label, atoi, uint16_t)
00372 GET_STRING_AND_LABEL(get_int8_string_and_label, atoi, int8_t)
00373 GET_STRING_AND_LABEL(get_uint_string_and_label, atoi, uint32_t)
00374 GET_STRING_AND_LABEL(get_long_string_and_label, atoi, int64_t)
00375 GET_STRING_AND_LABEL(get_ulong_string_and_label, atoi, uint64_t)
00376 GET_STRING_AND_LABEL(get_longreal_string_and_label, atoi, floatmax_t)
00377 #undef GET_STRING_AND_LABEL
00378 
00379 /* Methods for reading a sparse vector from an ascii file */
00380 
00381 #define GET_SPARSE_VECTOR(fname, conv, sg_type)                         \
00382 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \
00383 {                                                                       \
00384         char* buffer = NULL;                                            \
00385         ssize_t bytes_read;                                             \
00386                                                                         \
00387         bytes_read = buf->read_line(buffer);                            \
00388                                                                         \
00389         if (bytes_read<=1)                                              \
00390         {                                                               \
00391                 vector=NULL;                                            \
00392                 len=-1;                                                 \
00393                 return;                                                 \
00394         }                                                               \
00395                                                                         \
00396         /* Remove terminating \n */                                     \
00397         int32_t num_chars;                                              \
00398         if (buffer[bytes_read-1]=='\n')                                 \
00399           {                                                             \
00400             num_chars=bytes_read-1;                                     \
00401             buffer[num_chars]='\0';                                     \
00402           }                                                             \
00403         else                                                            \
00404           num_chars=bytes_read;                                         \
00405                                                                         \
00406         int32_t num_dims=0;                                             \
00407         for (int32_t i=0; i<num_chars; i++)                             \
00408         {                                                               \
00409                 if (buffer[i]==':')                                     \
00410                 {                                                       \
00411                         num_dims++;                                     \
00412                 }                                                       \
00413         }                                                               \
00414                                                                         \
00415         int32_t index_start_pos=-1;                                     \
00416         int32_t feature_start_pos;                                      \
00417         int32_t current_feat=0;                                         \
00418         vector=SG_MALLOC(SGSparseVectorEntry<sg_type>, num_dims);       \
00419         for (int32_t i=0; i<num_chars; i++)                             \
00420         {                                                               \
00421                 if (buffer[i]==':')                                     \
00422                 {                                                       \
00423                         buffer[i]='\0';                                 \
00424                         vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
00425                         /* Unset index_start_pos */                     \
00426                         index_start_pos=-1;                             \
00427                                                                         \
00428                         feature_start_pos=i+1;                          \
00429                         while ((buffer[i]!=' ') && (i<num_chars))       \
00430                         {                                               \
00431                                 i++;                                    \
00432                         }                                               \
00433                                                                         \
00434                         buffer[i]='\0';                                 \
00435                         vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
00436                                                                         \
00437                         current_feat++;                                 \
00438                 }                                                       \
00439                 else if (buffer[i]==' ')                                \
00440                   i++;                                                  \
00441                 else                                                    \
00442                   {                                                     \
00443                     /* Set index_start_pos if not set already */        \
00444                     /* if already set, it means the index is  */        \
00445                     /* more than one digit long.              */        \
00446                     if (index_start_pos == -1)                          \
00447                         index_start_pos=i;                              \
00448                   }                                                     \
00449         }                                                               \
00450                                                                         \
00451         len=current_feat;                                               \
00452 }
00453 
00454 GET_SPARSE_VECTOR(get_bool_sparse_vector, str_to_bool, bool)
00455 GET_SPARSE_VECTOR(get_byte_sparse_vector, atoi, uint8_t)
00456 GET_SPARSE_VECTOR(get_char_sparse_vector, atoi, char)
00457 GET_SPARSE_VECTOR(get_int_sparse_vector, atoi, int32_t)
00458 GET_SPARSE_VECTOR(get_shortreal_sparse_vector, atof, float32_t)
00459 GET_SPARSE_VECTOR(get_real_sparse_vector, atof, float64_t)
00460 GET_SPARSE_VECTOR(get_short_sparse_vector, atoi, int16_t)
00461 GET_SPARSE_VECTOR(get_word_sparse_vector, atoi, uint16_t)
00462 GET_SPARSE_VECTOR(get_int8_sparse_vector, atoi, int8_t)
00463 GET_SPARSE_VECTOR(get_uint_sparse_vector, atoi, uint32_t)
00464 GET_SPARSE_VECTOR(get_long_sparse_vector, atoi, int64_t)
00465 GET_SPARSE_VECTOR(get_ulong_sparse_vector, atoi, uint64_t)
00466 GET_SPARSE_VECTOR(get_longreal_sparse_vector, atoi, floatmax_t)
00467 #undef GET_SPARSE_VECTOR
00468 
00469 /* Methods for reading a sparse vector and a label from an ascii file */
00470 
00471 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type)               \
00472 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \
00473 {                                                                       \
00474         char* buffer = NULL;                                            \
00475         ssize_t bytes_read;                                             \
00476                                                                         \
00477         bytes_read = buf->read_line(buffer);                            \
00478                                                                         \
00479         if (bytes_read<=1)                                              \
00480         {                                                               \
00481                 vector=NULL;                                            \
00482                 len=-1;                                                 \
00483                 return;                                                 \
00484         }                                                               \
00485                                                                         \
00486         /* Remove terminating \n */                                     \
00487         int32_t num_chars;                                              \
00488         if (buffer[bytes_read-1]=='\n')                                 \
00489         {                                                               \
00490                 num_chars=bytes_read-1;                                 \
00491                 buffer[num_chars]='\0';                                 \
00492         }                                                               \
00493         else                                                            \
00494                 num_chars=bytes_read;                                   \
00495                                                                         \
00496         int32_t num_dims=0;                                             \
00497         for (int32_t i=0; i<num_chars; i++)                             \
00498         {                                                               \
00499                 if (buffer[i]==':')                                     \
00500                 {                                                       \
00501                         num_dims++;                                     \
00502                 }                                                       \
00503         }                                                               \
00504                                                                         \
00505         int32_t index_start_pos=-1;                                     \
00506         int32_t feature_start_pos;                                      \
00507         int32_t current_feat=0;                                         \
00508         int32_t label_pos=-1;                                           \
00509         vector=SG_MALLOC(SGSparseVectorEntry<sg_type>, num_dims);       \
00510                                                                         \
00511         for (int32_t i=1; i<num_chars; i++)                             \
00512         {                                                               \
00513                 if (buffer[i]==':')                                     \
00514                 {                                                       \
00515                         break;                                          \
00516                 }                                                       \
00517                 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') )           \
00518                 {                                                       \
00519                         buffer[i]='\0';                                 \
00520                         label_pos=i;                                    \
00521                         label=atof(buffer);                             \
00522                         break;                                          \
00523                 }                                                       \
00524         }                                                               \
00525                                                                         \
00526         if (label_pos==-1)                                              \
00527                 SG_ERROR("No label found!\n");                          \
00528                                                                         \
00529         buffer+=label_pos+1;                                            \
00530         num_chars-=label_pos+1;                                         \
00531         for (int32_t i=0; i<num_chars; i++)                             \
00532         {                                                               \
00533                 if (buffer[i]==':')                                     \
00534                 {                                                       \
00535                         buffer[i]='\0';                                 \
00536                         vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
00537                         /* Unset index_start_pos */                     \
00538                         index_start_pos=-1;                             \
00539                                                                         \
00540                         feature_start_pos=i+1;                          \
00541                         while ((buffer[i]!=' ') && (i<num_chars))       \
00542                         {                                               \
00543                                 i++;                                    \
00544                         }                                               \
00545                                                                         \
00546                         buffer[i]='\0';                                 \
00547                         vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
00548                                                                         \
00549                         current_feat++;                                 \
00550                 }                                                       \
00551                 else if (buffer[i]==' ')                                \
00552                         i++;                                            \
00553                 else                                                    \
00554                 {                                                       \
00555                         /* Set index_start_pos if not set already */    \
00556                         /* if already set, it means the index is  */    \
00557                         /* more than one digit long.              */    \
00558                         if (index_start_pos == -1)                      \
00559                                 index_start_pos=i;                      \
00560                 }                                                       \
00561         }                                                               \
00562                                                                         \
00563         len=current_feat;                                               \
00564 }
00565 
00566 GET_SPARSE_VECTOR_AND_LABEL(get_bool_sparse_vector_and_label, str_to_bool, bool)
00567 GET_SPARSE_VECTOR_AND_LABEL(get_byte_sparse_vector_and_label, atoi, uint8_t)
00568 GET_SPARSE_VECTOR_AND_LABEL(get_char_sparse_vector_and_label, atoi, char)
00569 GET_SPARSE_VECTOR_AND_LABEL(get_int_sparse_vector_and_label, atoi, int32_t)
00570 GET_SPARSE_VECTOR_AND_LABEL(get_shortreal_sparse_vector_and_label, atof, float32_t)
00571 GET_SPARSE_VECTOR_AND_LABEL(get_real_sparse_vector_and_label, atof, float64_t)
00572 GET_SPARSE_VECTOR_AND_LABEL(get_short_sparse_vector_and_label, atoi, int16_t)
00573 GET_SPARSE_VECTOR_AND_LABEL(get_word_sparse_vector_and_label, atoi, uint16_t)
00574 GET_SPARSE_VECTOR_AND_LABEL(get_int8_sparse_vector_and_label, atoi, int8_t)
00575 GET_SPARSE_VECTOR_AND_LABEL(get_uint_sparse_vector_and_label, atoi, uint32_t)
00576 GET_SPARSE_VECTOR_AND_LABEL(get_long_sparse_vector_and_label, atoi, int64_t)
00577 GET_SPARSE_VECTOR_AND_LABEL(get_ulong_sparse_vector_and_label, atoi, uint64_t)
00578 GET_SPARSE_VECTOR_AND_LABEL(get_longreal_sparse_vector_and_label, atoi, floatmax_t)
00579 #undef GET_SPARSE_VECTOR_AND_LABEL
00580 
00581 template <class T>
00582 void CStreamingAsciiFile::append_item(
00583         DynArray<T>* items, char* ptr_data, char* ptr_item)
00584 {
00585         size_t len=(ptr_data-ptr_item)/sizeof(char);
00586         char* item=SG_MALLOC(char, len+1);
00587         memset(item, 0, sizeof(char)*(len+1));
00588         item=strncpy(item, ptr_item, len);
00589 
00590         SG_DEBUG("current %c, len %d, item %s\n", *ptr_data, len, item);
00591         items->append_element(item);
00592 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation