00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef __ASCII_FILE_H__
00015 #define __ASCII_FILE_H__
00016
00017 #include <shogun/lib/config.h>
00018 #include <shogun/base/DynArray.h>
00019 #include <shogun/lib/common.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/io/SGIO.h>
00022 #include <shogun/io/IOBuffer.h>
00023
00024 namespace shogun
00025 {
00035 class CAsciiFile: public CFile
00036 {
00037 public:
00039 CAsciiFile();
00040
00046 CAsciiFile(FILE* f, const char* name=NULL);
00047
00054 CAsciiFile(char* fname, char rw='r', const char* name=NULL);
00055
00057 virtual ~CAsciiFile();
00058
00066 virtual void get_vector(uint8_t*& vector, int32_t& len);
00067 virtual void get_vector(char*& vector, int32_t& len);
00068 virtual void get_vector(int32_t*& vector, int32_t& len);
00069 virtual void get_vector(float64_t*& vector, int32_t& len);
00070 virtual void get_vector(float32_t*& vector, int32_t& len);
00071 virtual void get_vector(int16_t*& vector, int32_t& len);
00072 virtual void get_vector(uint16_t*& vector, int32_t& len);
00074
00083 virtual void get_matrix(
00084 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00085 virtual void get_int8_matrix(
00086 int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00087 virtual void get_matrix(
00088 char*& matrix, int32_t& num_feat, int32_t& num_vec);
00089 virtual void get_matrix(
00090 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00091 virtual void get_uint_matrix(
00092 uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00093 virtual void get_long_matrix(
00094 int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00095 virtual void get_ulong_matrix(
00096 uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00097 virtual void get_matrix(
00098 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00099 virtual void get_matrix(
00100 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00101 virtual void get_longreal_matrix(
00102 floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00103 virtual void get_matrix(
00104 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00105 virtual void get_matrix(
00106 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00108
00117 virtual void get_ndarray(
00118 uint8_t*& array, int32_t*& dims, int32_t& num_dims);
00119 virtual void get_int8_ndarray(
00120 int8_t*& array, int32_t*& dims, int32_t& num_dims);
00121 virtual void get_ndarray(
00122 char*& array, int32_t*& dims, int32_t& num_dims);
00123 virtual void get_ndarray(
00124 int32_t*& array, int32_t*& dims, int32_t& num_dims);
00125 virtual void get_uint_ndarray(
00126 uint32_t*& array, int32_t*& dims, int32_t& num_dims);
00127 virtual void get_long_ndarray(
00128 int64_t*& array, int32_t*& dims, int32_t& num_dims);
00129 virtual void get_ulong_ndarray(
00130 uint64_t*& array, int32_t*& dims, int32_t& num_dims);
00131 virtual void get_ndarray(
00132 float32_t*& array, int32_t*& dims, int32_t& num_dims);
00133 virtual void get_ndarray(
00134 float64_t*& array, int32_t*& dims, int32_t& num_dims);
00135 virtual void get_longreal_ndarray(
00136 floatmax_t*& array, int32_t*& dims, int32_t& num_dims);
00137 virtual void get_ndarray(
00138 int16_t*& array, int32_t*& dims, int32_t& num_dims);
00139 virtual void get_ndarray(
00140 uint16_t*& array, int32_t*& dims, int32_t& num_dims);
00142
00151 virtual void get_sparse_matrix(
00152 SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
00153 virtual void get_sparse_matrix(
00154 SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00155 virtual void get_int8_sparsematrix(
00156 SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00157 virtual void get_sparse_matrix(
00158 SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
00159 virtual void get_sparse_matrix(
00160 SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00161 virtual void get_uint_sparsematrix(
00162 SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00163 virtual void get_long_sparsematrix(
00164 SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00165 virtual void get_ulong_sparsematrix(
00166 SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00167 virtual void get_sparse_matrix(
00168 SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00169 virtual void get_sparse_matrix(
00170 SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00171 virtual void get_sparse_matrix(
00172 SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00173 virtual void get_sparse_matrix(
00174 SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00175 virtual void get_longreal_sparsematrix(
00176 SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00178
00179
00188 virtual void get_string_list(
00189 SGString<uint8_t>*& strings, int32_t& num_str,
00190 int32_t& max_string_len);
00191 virtual void get_int8_string_list(
00192 SGString<int8_t>*& strings, int32_t& num_str,
00193 int32_t& max_string_len);
00194 virtual void get_string_list(
00195 SGString<char>*& strings, int32_t& num_str,
00196 int32_t& max_string_len);
00197 virtual void get_string_list(
00198 SGString<int32_t>*& strings, int32_t& num_str,
00199 int32_t& max_string_len);
00200 virtual void get_uint_string_list(
00201 SGString<uint32_t>*& strings, int32_t& num_str,
00202 int32_t& max_string_len);
00203 virtual void get_string_list(
00204 SGString<int16_t>*& strings, int32_t& num_str,
00205 int32_t& max_string_len);
00206 virtual void get_string_list(
00207 SGString<uint16_t>*& strings, int32_t& num_str,
00208 int32_t& max_string_len);
00209 virtual void get_long_string_list(
00210 SGString<int64_t>*& strings, int32_t& num_str,
00211 int32_t& max_string_len);
00212 virtual void get_ulong_string_list(
00213 SGString<uint64_t>*& strings, int32_t& num_str,
00214 int32_t& max_string_len);
00215 virtual void get_string_list(
00216 SGString<float32_t>*& strings, int32_t& num_str,
00217 int32_t& max_string_len);
00218 virtual void get_string_list(
00219 SGString<float64_t>*& strings, int32_t& num_str,
00220 int32_t& max_string_len);
00221 virtual void get_longreal_string_list(
00222 SGString<floatmax_t>*& strings, int32_t& num_str,
00223 int32_t& max_string_len);
00225
00233 virtual void set_vector(const uint8_t* vector, int32_t len);
00234 virtual void set_vector(const char* vector, int32_t len);
00235 virtual void set_vector(const int32_t* vector, int32_t len);
00236 virtual void set_vector( const float32_t* vector, int32_t len);
00237 virtual void set_vector(const float64_t* vector, int32_t len);
00238 virtual void set_vector(const int16_t* vector, int32_t len);
00239 virtual void set_vector(const uint16_t* vector, int32_t len);
00241
00242
00250 virtual void set_matrix(
00251 const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
00252 virtual void set_int8_matrix(
00253 const int8_t* matrix, int32_t num_feat, int32_t num_vec);
00254 virtual void set_matrix(
00255 const char* matrix, int32_t num_feat, int32_t num_vec);
00256 virtual void set_matrix(
00257 const int32_t* matrix, int32_t num_feat, int32_t num_vec);
00258 virtual void set_uint_matrix(
00259 const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
00260 virtual void set_long_matrix(
00261 const int64_t* matrix, int32_t num_feat, int32_t num_vec);
00262 virtual void set_ulong_matrix(
00263 const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
00264 virtual void set_matrix(
00265 const float32_t* matrix, int32_t num_feat, int32_t num_vec);
00266 virtual void set_matrix(
00267 const float64_t* matrix, int32_t num_feat, int32_t num_vec);
00268 virtual void set_longreal_matrix(
00269 const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
00270 virtual void set_matrix(
00271 const int16_t* matrix, int32_t num_feat, int32_t num_vec);
00272 virtual void set_matrix(
00273 const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
00275
00283 virtual void set_ndarray(
00284 const uint8_t* array, int32_t* dims, int32_t num_dims);
00285 virtual void set_int8_ndarray(
00286 const int8_t* array, int32_t* dims, int32_t num_dims);
00287 virtual void set_ndarray(
00288 const char* array, int32_t* dims, int32_t num_dims);
00289 virtual void set_ndarray(
00290 const int32_t* array, int32_t* dims, int32_t num_dims);
00291 virtual void set_uint_ndarray(
00292 const uint32_t* array, int32_t* dims, int32_t num_dims);
00293 virtual void set_long_ndarray(
00294 const int64_t* array, int32_t* dims, int32_t num_dims);
00295 virtual void set_ulong_ndarray(
00296 const uint64_t* array, int32_t* dims, int32_t num_dims);
00297 virtual void set_ndarray(
00298 const float32_t* array, int32_t* dims, int32_t num_dims);
00299 virtual void set_ndarray(
00300 const float64_t* array, int32_t* dims, int32_t num_dims);
00301 virtual void set_longreal_ndarray(
00302 const floatmax_t* array, int32_t* dims, int32_t num_dims);
00303 virtual void set_ndarray(
00304 const int16_t* array, int32_t* dims, int32_t num_dims);
00305 virtual void set_ndarray(
00306 const uint16_t* array, int32_t* dims, int32_t num_dims);
00308
00316 virtual void set_sparse_matrix(
00317 const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
00318 virtual void set_sparse_matrix(
00319 const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
00320 virtual void set_int8_sparsematrix(
00321 const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
00322 virtual void set_sparse_matrix(
00323 const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
00324 virtual void set_sparse_matrix(
00325 const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
00326 virtual void set_uint_sparsematrix(
00327 const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
00328 virtual void set_long_sparsematrix(
00329 const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
00330 virtual void set_ulong_sparsematrix(
00331 const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
00332 virtual void set_sparse_matrix(
00333 const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
00334 virtual void set_sparse_matrix(
00335 const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
00336 virtual void set_sparse_matrix(
00337 const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
00338 virtual void set_sparse_matrix(
00339 const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
00340 virtual void set_longreal_sparsematrix(
00341 const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
00343
00344
00353 virtual void set_string_list(
00354 const SGString<uint8_t>* strings, int32_t num_str);
00355 virtual void set_int8_string_list(
00356 const SGString<int8_t>* strings, int32_t num_str);
00357 virtual void set_string_list(
00358 const SGString<char>* strings, int32_t num_str);
00359 virtual void set_string_list(
00360 const SGString<int32_t>* strings, int32_t num_str);
00361 virtual void set_uint_string_list(
00362 const SGString<uint32_t>* strings, int32_t num_str);
00363 virtual void set_string_list(
00364 const SGString<int16_t>* strings, int32_t num_str);
00365 virtual void set_string_list(
00366 const SGString<uint16_t>* strings, int32_t num_str);
00367 virtual void set_long_string_list(
00368 const SGString<int64_t>* strings, int32_t num_str);
00369 virtual void set_ulong_string_list(
00370 const SGString<uint64_t>* strings, int32_t num_str);
00371 virtual void set_string_list(
00372 const SGString<float32_t>* strings, int32_t num_str);
00373 virtual void set_string_list(
00374 const SGString<float64_t>* strings, int32_t num_str);
00375 virtual void set_longreal_string_list(
00376 const SGString<floatmax_t>* strings, int32_t num_str);
00378
00380 inline virtual const char* get_name() const { return "AsciiFile"; }
00381
00397 static ssize_t getdelim(char **lineptr, size_t *n, char delimiter, FILE* stream);
00398
00409 static ssize_t getline(char **lineptr, size_t *n, FILE *stream);
00410
00419 static void tokenize(char delim, substring s, v_array<substring> &ret);
00420
00421 private:
00428 template <class T> void append_item(DynArray<T>* items, char* ptr_data, char* ptr_item);
00429
00430 protected:
00431
00433 CIOBuffer buf;
00434 };
00435 }
00436 #endif //__ASCII_FILE_H__