00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef __ASCII_FILE_H__
00015 #define __ASCII_FILE_H__
00016
00017 #include <shogun/lib/config.h>
00018 #include <shogun/base/DynArray.h>
00019 #include <shogun/lib/common.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/io/SGIO.h>
00022 #include <shogun/io/IOBuffer.h>
00023
00024 namespace shogun
00025 {
00035 class CAsciiFile: public CFile
00036 {
00037 public:
00039 CAsciiFile();
00040
00046 CAsciiFile(FILE* f, const char* name=NULL);
00047
00054 CAsciiFile(const char* fname, char rw='r', const char* name=NULL);
00055
00057 virtual ~CAsciiFile();
00058
00066 virtual void get_vector(int8_t*& vector, int32_t& len);
00067 virtual void get_vector(uint8_t*& vector, int32_t& len);
00068 virtual void get_vector(char*& vector, int32_t& len);
00069 virtual void get_vector(int32_t*& vector, int32_t& len);
00070 virtual void get_vector(uint32_t*& vector, int32_t& len);
00071 virtual void get_vector(float64_t*& vector, int32_t& len);
00072 virtual void get_vector(float32_t*& vector, int32_t& len);
00073 virtual void get_vector(floatmax_t*& vector, int32_t& len);
00074 virtual void get_vector(int16_t*& vector, int32_t& len);
00075 virtual void get_vector(uint16_t*& vector, int32_t& len);
00076 virtual void get_vector(int64_t*& vector, int32_t& len);
00077 virtual void get_vector(uint64_t*& vector, int32_t& len);
00079
00088 virtual void get_matrix(
00089 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00090 virtual void get_int8_matrix(
00091 int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00092 virtual void get_matrix(
00093 char*& matrix, int32_t& num_feat, int32_t& num_vec);
00094 virtual void get_matrix(
00095 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00096 virtual void get_uint_matrix(
00097 uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00098 virtual void get_long_matrix(
00099 int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00100 virtual void get_ulong_matrix(
00101 uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00102 virtual void get_matrix(
00103 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00104 virtual void get_matrix(
00105 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00106 virtual void get_longreal_matrix(
00107 floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00108 virtual void get_matrix(
00109 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00110 virtual void get_matrix(
00111 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00113
00122 virtual void get_ndarray(
00123 uint8_t*& array, int32_t*& dims, int32_t& num_dims);
00124 virtual void get_int8_ndarray(
00125 int8_t*& array, int32_t*& dims, int32_t& num_dims);
00126 virtual void get_ndarray(
00127 char*& array, int32_t*& dims, int32_t& num_dims);
00128 virtual void get_ndarray(
00129 int32_t*& array, int32_t*& dims, int32_t& num_dims);
00130 virtual void get_uint_ndarray(
00131 uint32_t*& array, int32_t*& dims, int32_t& num_dims);
00132 virtual void get_long_ndarray(
00133 int64_t*& array, int32_t*& dims, int32_t& num_dims);
00134 virtual void get_ulong_ndarray(
00135 uint64_t*& array, int32_t*& dims, int32_t& num_dims);
00136 virtual void get_ndarray(
00137 float32_t*& array, int32_t*& dims, int32_t& num_dims);
00138 virtual void get_ndarray(
00139 float64_t*& array, int32_t*& dims, int32_t& num_dims);
00140 virtual void get_longreal_ndarray(
00141 floatmax_t*& array, int32_t*& dims, int32_t& num_dims);
00142 virtual void get_ndarray(
00143 int16_t*& array, int32_t*& dims, int32_t& num_dims);
00144 virtual void get_ndarray(
00145 uint16_t*& array, int32_t*& dims, int32_t& num_dims);
00147
00156 virtual void get_sparse_matrix(
00157 SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
00158 virtual void get_sparse_matrix(
00159 SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00160 virtual void get_int8_sparsematrix(
00161 SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00162 virtual void get_sparse_matrix(
00163 SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
00164 virtual void get_sparse_matrix(
00165 SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00166 virtual void get_uint_sparsematrix(
00167 SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00168 virtual void get_long_sparsematrix(
00169 SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00170 virtual void get_ulong_sparsematrix(
00171 SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00172 virtual void get_sparse_matrix(
00173 SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00174 virtual void get_sparse_matrix(
00175 SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00176 virtual void get_sparse_matrix(
00177 SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00178 virtual void get_sparse_matrix(
00179 SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00180 virtual void get_longreal_sparsematrix(
00181 SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00183
00184
00193 virtual void get_string_list(
00194 SGString<uint8_t>*& strings, int32_t& num_str,
00195 int32_t& max_string_len);
00196 virtual void get_int8_string_list(
00197 SGString<int8_t>*& strings, int32_t& num_str,
00198 int32_t& max_string_len);
00199 virtual void get_string_list(
00200 SGString<char>*& strings, int32_t& num_str,
00201 int32_t& max_string_len);
00202 virtual void get_string_list(
00203 SGString<int32_t>*& strings, int32_t& num_str,
00204 int32_t& max_string_len);
00205 virtual void get_uint_string_list(
00206 SGString<uint32_t>*& strings, int32_t& num_str,
00207 int32_t& max_string_len);
00208 virtual void get_string_list(
00209 SGString<int16_t>*& strings, int32_t& num_str,
00210 int32_t& max_string_len);
00211 virtual void get_string_list(
00212 SGString<uint16_t>*& strings, int32_t& num_str,
00213 int32_t& max_string_len);
00214 virtual void get_long_string_list(
00215 SGString<int64_t>*& strings, int32_t& num_str,
00216 int32_t& max_string_len);
00217 virtual void get_ulong_string_list(
00218 SGString<uint64_t>*& strings, int32_t& num_str,
00219 int32_t& max_string_len);
00220 virtual void get_string_list(
00221 SGString<float32_t>*& strings, int32_t& num_str,
00222 int32_t& max_string_len);
00223 virtual void get_string_list(
00224 SGString<float64_t>*& strings, int32_t& num_str,
00225 int32_t& max_string_len);
00226 virtual void get_longreal_string_list(
00227 SGString<floatmax_t>*& strings, int32_t& num_str,
00228 int32_t& max_string_len);
00230
00238 virtual void set_vector(const int8_t* vector, int32_t len);
00239 virtual void set_vector(const uint8_t* vector, int32_t len);
00240 virtual void set_vector(const char* vector, int32_t len);
00241 virtual void set_vector(const int32_t* vector, int32_t len);
00242 virtual void set_vector(const uint32_t* vector, int32_t len);
00243 virtual void set_vector(const float32_t* vector, int32_t len);
00244 virtual void set_vector(const float64_t* vector, int32_t len);
00245 virtual void set_vector(const floatmax_t* vector, int32_t len);
00246 virtual void set_vector(const int16_t* vector, int32_t len);
00247 virtual void set_vector(const uint16_t* vector, int32_t len);
00248 virtual void set_vector(const int64_t* vector, int32_t len);
00249 virtual void set_vector(const uint64_t* vector, int32_t len);
00251
00252
00260 virtual void set_matrix(
00261 const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
00262 virtual void set_int8_matrix(
00263 const int8_t* matrix, int32_t num_feat, int32_t num_vec);
00264 virtual void set_matrix(
00265 const char* matrix, int32_t num_feat, int32_t num_vec);
00266 virtual void set_matrix(
00267 const int32_t* matrix, int32_t num_feat, int32_t num_vec);
00268 virtual void set_uint_matrix(
00269 const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
00270 virtual void set_long_matrix(
00271 const int64_t* matrix, int32_t num_feat, int32_t num_vec);
00272 virtual void set_ulong_matrix(
00273 const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
00274 virtual void set_matrix(
00275 const float32_t* matrix, int32_t num_feat, int32_t num_vec);
00276 virtual void set_matrix(
00277 const float64_t* matrix, int32_t num_feat, int32_t num_vec);
00278 virtual void set_longreal_matrix(
00279 const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
00280 virtual void set_matrix(
00281 const int16_t* matrix, int32_t num_feat, int32_t num_vec);
00282 virtual void set_matrix(
00283 const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
00285
00293 virtual void set_ndarray(
00294 const uint8_t* array, int32_t* dims, int32_t num_dims);
00295 virtual void set_int8_ndarray(
00296 const int8_t* array, int32_t* dims, int32_t num_dims);
00297 virtual void set_ndarray(
00298 const char* array, int32_t* dims, int32_t num_dims);
00299 virtual void set_ndarray(
00300 const int32_t* array, int32_t* dims, int32_t num_dims);
00301 virtual void set_uint_ndarray(
00302 const uint32_t* array, int32_t* dims, int32_t num_dims);
00303 virtual void set_long_ndarray(
00304 const int64_t* array, int32_t* dims, int32_t num_dims);
00305 virtual void set_ulong_ndarray(
00306 const uint64_t* array, int32_t* dims, int32_t num_dims);
00307 virtual void set_ndarray(
00308 const float32_t* array, int32_t* dims, int32_t num_dims);
00309 virtual void set_ndarray(
00310 const float64_t* array, int32_t* dims, int32_t num_dims);
00311 virtual void set_longreal_ndarray(
00312 const floatmax_t* array, int32_t* dims, int32_t num_dims);
00313 virtual void set_ndarray(
00314 const int16_t* array, int32_t* dims, int32_t num_dims);
00315 virtual void set_ndarray(
00316 const uint16_t* array, int32_t* dims, int32_t num_dims);
00318
00326 virtual void set_sparse_matrix(
00327 const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
00328 virtual void set_sparse_matrix(
00329 const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
00330 virtual void set_int8_sparsematrix(
00331 const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
00332 virtual void set_sparse_matrix(
00333 const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
00334 virtual void set_sparse_matrix(
00335 const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
00336 virtual void set_uint_sparsematrix(
00337 const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
00338 virtual void set_long_sparsematrix(
00339 const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
00340 virtual void set_ulong_sparsematrix(
00341 const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
00342 virtual void set_sparse_matrix(
00343 const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
00344 virtual void set_sparse_matrix(
00345 const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
00346 virtual void set_sparse_matrix(
00347 const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
00348 virtual void set_sparse_matrix(
00349 const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
00350 virtual void set_longreal_sparsematrix(
00351 const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
00353
00354
00363 virtual void set_string_list(
00364 const SGString<uint8_t>* strings, int32_t num_str);
00365 virtual void set_int8_string_list(
00366 const SGString<int8_t>* strings, int32_t num_str);
00367 virtual void set_string_list(
00368 const SGString<char>* strings, int32_t num_str);
00369 virtual void set_string_list(
00370 const SGString<int32_t>* strings, int32_t num_str);
00371 virtual void set_uint_string_list(
00372 const SGString<uint32_t>* strings, int32_t num_str);
00373 virtual void set_string_list(
00374 const SGString<int16_t>* strings, int32_t num_str);
00375 virtual void set_string_list(
00376 const SGString<uint16_t>* strings, int32_t num_str);
00377 virtual void set_long_string_list(
00378 const SGString<int64_t>* strings, int32_t num_str);
00379 virtual void set_ulong_string_list(
00380 const SGString<uint64_t>* strings, int32_t num_str);
00381 virtual void set_string_list(
00382 const SGString<float32_t>* strings, int32_t num_str);
00383 virtual void set_string_list(
00384 const SGString<float64_t>* strings, int32_t num_str);
00385 virtual void set_longreal_string_list(
00386 const SGString<floatmax_t>* strings, int32_t num_str);
00388
00390 virtual const char* get_name() const { return "AsciiFile"; }
00391
00407 static ssize_t getdelim(char **lineptr, size_t *n, char delimiter, FILE* stream);
00408
00419 static ssize_t getline(char **lineptr, size_t *n, FILE *stream);
00420
00429 static void tokenize(char delim, substring s, v_array<substring> &ret);
00430
00431 private:
00438 template <class T> void append_item(DynArray<T>* items, char* ptr_data, char* ptr_item);
00439
00440 protected:
00441
00443 CIOBuffer buf;
00444 };
00445 }
00446 #endif //__ASCII_FILE_H__