AsciiFile.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Parts of this code are copyright (c) 2009 Yahoo! Inc.
00008  * All rights reserved.  The copyrights embodied in the content of
00009  * this file are licensed under the BSD (revised) open source license.
00010  *
00011  * Written (W) 2010 Soeren Sonnenburg
00012  * Copyright (C) 2010 Berlin Institute of Technology
00013  */
00014 #ifndef __ASCII_FILE_H__
00015 #define __ASCII_FILE_H__
00016 
00017 #include <shogun/lib/config.h>
00018 #include <shogun/base/DynArray.h>
00019 #include <shogun/lib/common.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/io/SGIO.h>
00022 #include <shogun/io/IOBuffer.h>
00023 
00024 namespace shogun
00025 {
00035 class CAsciiFile: public CFile
00036 {
00037 public:
00039     CAsciiFile();
00040 
00046     CAsciiFile(FILE* f, const char* name=NULL);
00047 
00054     CAsciiFile(const char* fname, char rw='r', const char* name=NULL);
00055 
00057     virtual ~CAsciiFile();
00058 
00066     virtual void get_vector(int8_t*& vector, int32_t& len);
00067     virtual void get_vector(uint8_t*& vector, int32_t& len);
00068     virtual void get_vector(char*& vector, int32_t& len);
00069     virtual void get_vector(int32_t*& vector, int32_t& len);
00070     virtual void get_vector(uint32_t*& vector, int32_t& len);
00071     virtual void get_vector(float64_t*& vector, int32_t& len);
00072     virtual void get_vector(float32_t*& vector, int32_t& len);
00073     virtual void get_vector(floatmax_t*& vector, int32_t& len);
00074     virtual void get_vector(int16_t*& vector, int32_t& len);
00075     virtual void get_vector(uint16_t*& vector, int32_t& len);
00076     virtual void get_vector(int64_t*& vector, int32_t& len);
00077     virtual void get_vector(uint64_t*& vector, int32_t& len);
00079 
00088     virtual void get_matrix(
00089             uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00090     virtual void get_int8_matrix(
00091             int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00092     virtual void get_matrix(
00093             char*& matrix, int32_t& num_feat, int32_t& num_vec);
00094     virtual void get_matrix(
00095             int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00096     virtual void get_uint_matrix(
00097             uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00098     virtual void get_long_matrix(
00099             int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00100     virtual void get_ulong_matrix(
00101             uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00102     virtual void get_matrix(
00103             float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00104     virtual void get_matrix(
00105             float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00106     virtual void get_longreal_matrix(
00107             floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00108     virtual void get_matrix(
00109             int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00110     virtual void get_matrix(
00111             uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00113 
00122     virtual void get_ndarray(
00123                         uint8_t*& array, int32_t*& dims, int32_t& num_dims);
00124     virtual void get_int8_ndarray(
00125             int8_t*& array, int32_t*& dims, int32_t& num_dims);
00126     virtual void get_ndarray(
00127             char*& array, int32_t*& dims, int32_t& num_dims);
00128     virtual void get_ndarray(
00129             int32_t*& array, int32_t*& dims, int32_t& num_dims);
00130     virtual void get_uint_ndarray(
00131             uint32_t*& array, int32_t*& dims, int32_t& num_dims);
00132     virtual void get_long_ndarray(
00133             int64_t*& array, int32_t*& dims, int32_t& num_dims);
00134     virtual void get_ulong_ndarray(
00135             uint64_t*& array, int32_t*& dims, int32_t& num_dims);
00136     virtual void get_ndarray(
00137             float32_t*& array, int32_t*& dims, int32_t& num_dims);
00138     virtual void get_ndarray(
00139                         float64_t*& array, int32_t*& dims, int32_t& num_dims);
00140     virtual void get_longreal_ndarray(
00141                         floatmax_t*& array, int32_t*& dims, int32_t& num_dims);
00142     virtual void get_ndarray(
00143             int16_t*& array, int32_t*& dims, int32_t& num_dims);
00144     virtual void get_ndarray(
00145             uint16_t*& array, int32_t*& dims, int32_t& num_dims);
00147 
00156     virtual void get_sparse_matrix(
00157             SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
00158     virtual void get_sparse_matrix(
00159             SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00160     virtual void get_int8_sparsematrix(
00161             SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00162     virtual void get_sparse_matrix(
00163             SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
00164     virtual void get_sparse_matrix(
00165             SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00166     virtual void get_uint_sparsematrix(
00167             SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00168     virtual void get_long_sparsematrix(
00169             SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00170     virtual void get_ulong_sparsematrix(
00171             SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00172     virtual void get_sparse_matrix(
00173             SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00174     virtual void get_sparse_matrix(
00175             SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00176     virtual void get_sparse_matrix(
00177             SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00178     virtual void get_sparse_matrix(
00179             SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00180     virtual void get_longreal_sparsematrix(
00181             SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00183 
00184 
00193     virtual void get_string_list(
00194             SGString<uint8_t>*& strings, int32_t& num_str,
00195             int32_t& max_string_len);
00196     virtual void get_int8_string_list(
00197             SGString<int8_t>*& strings, int32_t& num_str,
00198             int32_t& max_string_len);
00199     virtual void get_string_list(
00200             SGString<char>*& strings, int32_t& num_str,
00201             int32_t& max_string_len);
00202     virtual void get_string_list(
00203             SGString<int32_t>*& strings, int32_t& num_str,
00204             int32_t& max_string_len);
00205     virtual void get_uint_string_list(
00206             SGString<uint32_t>*& strings, int32_t& num_str,
00207             int32_t& max_string_len);
00208     virtual void get_string_list(
00209             SGString<int16_t>*& strings, int32_t& num_str,
00210             int32_t& max_string_len);
00211     virtual void get_string_list(
00212             SGString<uint16_t>*& strings, int32_t& num_str,
00213             int32_t& max_string_len);
00214     virtual void get_long_string_list(
00215             SGString<int64_t>*& strings, int32_t& num_str,
00216             int32_t& max_string_len);
00217     virtual void get_ulong_string_list(
00218             SGString<uint64_t>*& strings, int32_t& num_str,
00219             int32_t& max_string_len);
00220     virtual void get_string_list(
00221             SGString<float32_t>*& strings, int32_t& num_str,
00222             int32_t& max_string_len);
00223     virtual void get_string_list(
00224             SGString<float64_t>*& strings, int32_t& num_str,
00225             int32_t& max_string_len);
00226     virtual void get_longreal_string_list(
00227             SGString<floatmax_t>*& strings, int32_t& num_str,
00228             int32_t& max_string_len);
00230 
00238     virtual void set_vector(const int8_t* vector, int32_t len);
00239     virtual void set_vector(const uint8_t* vector, int32_t len);
00240     virtual void set_vector(const char* vector, int32_t len);
00241     virtual void set_vector(const int32_t* vector, int32_t len);
00242     virtual void set_vector(const uint32_t* vector, int32_t len);
00243     virtual void set_vector(const float32_t* vector, int32_t len);
00244     virtual void set_vector(const float64_t* vector, int32_t len);
00245     virtual void set_vector(const floatmax_t* vector, int32_t len);
00246     virtual void set_vector(const int16_t* vector, int32_t len);
00247     virtual void set_vector(const uint16_t* vector, int32_t len);
00248     virtual void set_vector(const int64_t* vector, int32_t len);
00249     virtual void set_vector(const uint64_t* vector, int32_t len);
00251 
00252 
00260     virtual void set_matrix(
00261             const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
00262     virtual void set_int8_matrix(
00263             const int8_t* matrix, int32_t num_feat, int32_t num_vec);
00264     virtual void set_matrix(
00265             const char* matrix, int32_t num_feat, int32_t num_vec);
00266     virtual void set_matrix(
00267             const int32_t* matrix, int32_t num_feat, int32_t num_vec);
00268     virtual void set_uint_matrix(
00269             const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
00270     virtual void set_long_matrix(
00271             const int64_t* matrix, int32_t num_feat, int32_t num_vec);
00272     virtual void set_ulong_matrix(
00273             const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
00274     virtual void set_matrix(
00275             const float32_t* matrix, int32_t num_feat, int32_t num_vec);
00276     virtual void set_matrix(
00277             const float64_t* matrix, int32_t num_feat, int32_t num_vec);
00278     virtual void set_longreal_matrix(
00279             const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
00280     virtual void set_matrix(
00281             const int16_t* matrix, int32_t num_feat, int32_t num_vec);
00282     virtual void set_matrix(
00283             const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
00285 
00293         virtual void set_ndarray(
00294                         const uint8_t* array, int32_t* dims, int32_t num_dims);
00295     virtual void set_int8_ndarray(
00296             const int8_t* array, int32_t* dims, int32_t num_dims);
00297     virtual void set_ndarray(
00298             const char* array, int32_t* dims, int32_t num_dims);
00299     virtual void set_ndarray(
00300             const int32_t* array, int32_t* dims, int32_t num_dims);
00301     virtual void set_uint_ndarray(
00302             const uint32_t* array, int32_t* dims, int32_t num_dims);
00303     virtual void set_long_ndarray(
00304             const int64_t* array, int32_t* dims, int32_t num_dims);
00305     virtual void set_ulong_ndarray(
00306             const uint64_t* array, int32_t* dims, int32_t num_dims);
00307     virtual void set_ndarray(
00308             const float32_t* array, int32_t* dims, int32_t num_dims);
00309     virtual void set_ndarray(
00310                        const  float64_t* array, int32_t* dims, int32_t num_dims);
00311     virtual void set_longreal_ndarray(
00312                         const floatmax_t* array, int32_t* dims, int32_t num_dims);
00313     virtual void set_ndarray(
00314             const int16_t* array, int32_t* dims, int32_t num_dims);
00315     virtual void set_ndarray(
00316             const uint16_t* array, int32_t* dims, int32_t num_dims);
00318 
00326     virtual void set_sparse_matrix(
00327             const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
00328     virtual void set_sparse_matrix(
00329             const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
00330     virtual void set_int8_sparsematrix(
00331             const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
00332     virtual void set_sparse_matrix(
00333             const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
00334     virtual void set_sparse_matrix(
00335             const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
00336     virtual void set_uint_sparsematrix(
00337             const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
00338     virtual void set_long_sparsematrix(
00339             const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
00340     virtual void set_ulong_sparsematrix(
00341             const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
00342     virtual void set_sparse_matrix(
00343             const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
00344     virtual void set_sparse_matrix(
00345             const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
00346     virtual void set_sparse_matrix(
00347             const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
00348     virtual void set_sparse_matrix(
00349             const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
00350     virtual void set_longreal_sparsematrix(
00351             const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
00353 
00354 
00363     virtual void set_string_list(
00364             const SGString<uint8_t>* strings, int32_t num_str);
00365     virtual void set_int8_string_list(
00366             const SGString<int8_t>* strings, int32_t num_str);
00367     virtual void set_string_list(
00368             const SGString<char>* strings, int32_t num_str);
00369     virtual void set_string_list(
00370             const SGString<int32_t>* strings, int32_t num_str);
00371     virtual void set_uint_string_list(
00372             const SGString<uint32_t>* strings, int32_t num_str);
00373     virtual void set_string_list(
00374             const SGString<int16_t>* strings, int32_t num_str);
00375     virtual void set_string_list(
00376             const SGString<uint16_t>* strings, int32_t num_str);
00377     virtual void set_long_string_list(
00378             const SGString<int64_t>* strings, int32_t num_str);
00379     virtual void set_ulong_string_list(
00380             const SGString<uint64_t>* strings, int32_t num_str);
00381     virtual void set_string_list(
00382             const SGString<float32_t>* strings, int32_t num_str);
00383     virtual void set_string_list(
00384             const SGString<float64_t>* strings, int32_t num_str);
00385     virtual void set_longreal_string_list(
00386             const SGString<floatmax_t>* strings, int32_t num_str);
00388 
00390     virtual const char* get_name() const { return "AsciiFile"; }
00391 
00407     static ssize_t getdelim(char **lineptr, size_t *n, char delimiter, FILE* stream);
00408 
00419     static ssize_t getline(char **lineptr, size_t *n, FILE *stream);
00420 
00429     static void tokenize(char delim, substring s, v_array<substring> &ret);
00430 
00431 private:
00438     template <class T> void append_item(DynArray<T>* items, char* ptr_data, char* ptr_item);
00439 
00440 protected:
00441 
00443     CIOBuffer buf;
00444 };
00445 }
00446 #endif //__ASCII_FILE_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation