AsciiFile.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Parts of this code are copyright (c) 2009 Yahoo! Inc.
00008  * All rights reserved.  The copyrights embodied in the content of
00009  * this file are licensed under the BSD (revised) open source license.
00010  *
00011  * Written (W) 2010 Soeren Sonnenburg
00012  * Copyright (C) 2010 Berlin Institute of Technology
00013  */
00014 #ifndef __ASCII_FILE_H__
00015 #define __ASCII_FILE_H__
00016 
00017 #include <shogun/lib/config.h>
00018 #include <shogun/base/DynArray.h>
00019 #include <shogun/lib/common.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/io/SGIO.h>
00022 #include <shogun/io/IOBuffer.h>
00023 
00024 namespace shogun
00025 {
00035 class CAsciiFile: public CFile
00036 {
00037 public:
00039     CAsciiFile();
00040 
00046     CAsciiFile(FILE* f, const char* name=NULL);
00047 
00054     CAsciiFile(char* fname, char rw='r', const char* name=NULL);
00055 
00057     virtual ~CAsciiFile();
00058 
00066     virtual void get_vector(uint8_t*& vector, int32_t& len);
00067     virtual void get_vector(char*& vector, int32_t& len);
00068     virtual void get_vector(int32_t*& vector, int32_t& len);
00069     virtual void get_vector(float64_t*& vector, int32_t& len);
00070     virtual void get_vector(float32_t*& vector, int32_t& len);
00071     virtual void get_vector(int16_t*& vector, int32_t& len);
00072     virtual void get_vector(uint16_t*& vector, int32_t& len);
00074 
00083     virtual void get_matrix(
00084             uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00085     virtual void get_int8_matrix(
00086             int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00087     virtual void get_matrix(
00088             char*& matrix, int32_t& num_feat, int32_t& num_vec);
00089     virtual void get_matrix(
00090             int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00091     virtual void get_uint_matrix(
00092             uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00093     virtual void get_long_matrix(
00094             int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00095     virtual void get_ulong_matrix(
00096             uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00097     virtual void get_matrix(
00098             float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00099     virtual void get_matrix(
00100             float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00101     virtual void get_longreal_matrix(
00102             floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00103     virtual void get_matrix(
00104             int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00105     virtual void get_matrix(
00106             uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00108 
00117     virtual void get_ndarray(
00118                         uint8_t*& array, int32_t*& dims, int32_t& num_dims);
00119     virtual void get_int8_ndarray(
00120             int8_t*& array, int32_t*& dims, int32_t& num_dims);
00121     virtual void get_ndarray(
00122             char*& array, int32_t*& dims, int32_t& num_dims);
00123     virtual void get_ndarray(
00124             int32_t*& array, int32_t*& dims, int32_t& num_dims);
00125     virtual void get_uint_ndarray(
00126             uint32_t*& array, int32_t*& dims, int32_t& num_dims);
00127     virtual void get_long_ndarray(
00128             int64_t*& array, int32_t*& dims, int32_t& num_dims);
00129     virtual void get_ulong_ndarray(
00130             uint64_t*& array, int32_t*& dims, int32_t& num_dims);
00131     virtual void get_ndarray(
00132             float32_t*& array, int32_t*& dims, int32_t& num_dims);
00133     virtual void get_ndarray(
00134                         float64_t*& array, int32_t*& dims, int32_t& num_dims);
00135     virtual void get_longreal_ndarray(
00136                         floatmax_t*& array, int32_t*& dims, int32_t& num_dims);
00137     virtual void get_ndarray(
00138             int16_t*& array, int32_t*& dims, int32_t& num_dims);
00139     virtual void get_ndarray(
00140             uint16_t*& array, int32_t*& dims, int32_t& num_dims);
00142 
00151     virtual void get_sparse_matrix(
00152             SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
00153     virtual void get_sparse_matrix(
00154             SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00155     virtual void get_int8_sparsematrix(
00156             SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00157     virtual void get_sparse_matrix(
00158             SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
00159     virtual void get_sparse_matrix(
00160             SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00161     virtual void get_uint_sparsematrix(
00162             SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00163     virtual void get_long_sparsematrix(
00164             SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00165     virtual void get_ulong_sparsematrix(
00166             SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00167     virtual void get_sparse_matrix(
00168             SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00169     virtual void get_sparse_matrix(
00170             SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00171     virtual void get_sparse_matrix(
00172             SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00173     virtual void get_sparse_matrix(
00174             SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00175     virtual void get_longreal_sparsematrix(
00176             SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00178 
00179 
00188     virtual void get_string_list(
00189             SGString<uint8_t>*& strings, int32_t& num_str,
00190             int32_t& max_string_len);
00191     virtual void get_int8_string_list(
00192             SGString<int8_t>*& strings, int32_t& num_str,
00193             int32_t& max_string_len);
00194     virtual void get_string_list(
00195             SGString<char>*& strings, int32_t& num_str,
00196             int32_t& max_string_len);
00197     virtual void get_string_list(
00198             SGString<int32_t>*& strings, int32_t& num_str,
00199             int32_t& max_string_len);
00200     virtual void get_uint_string_list(
00201             SGString<uint32_t>*& strings, int32_t& num_str,
00202             int32_t& max_string_len);
00203     virtual void get_string_list(
00204             SGString<int16_t>*& strings, int32_t& num_str,
00205             int32_t& max_string_len);
00206     virtual void get_string_list(
00207             SGString<uint16_t>*& strings, int32_t& num_str,
00208             int32_t& max_string_len);
00209     virtual void get_long_string_list(
00210             SGString<int64_t>*& strings, int32_t& num_str,
00211             int32_t& max_string_len);
00212     virtual void get_ulong_string_list(
00213             SGString<uint64_t>*& strings, int32_t& num_str,
00214             int32_t& max_string_len);
00215     virtual void get_string_list(
00216             SGString<float32_t>*& strings, int32_t& num_str,
00217             int32_t& max_string_len);
00218     virtual void get_string_list(
00219             SGString<float64_t>*& strings, int32_t& num_str,
00220             int32_t& max_string_len);
00221     virtual void get_longreal_string_list(
00222             SGString<floatmax_t>*& strings, int32_t& num_str,
00223             int32_t& max_string_len);
00225 
00233     virtual void set_vector(const uint8_t* vector, int32_t len);
00234     virtual void set_vector(const char* vector, int32_t len);
00235     virtual void set_vector(const int32_t* vector, int32_t len);
00236     virtual void set_vector( const float32_t* vector, int32_t len);
00237     virtual void set_vector(const float64_t* vector, int32_t len);
00238     virtual void set_vector(const int16_t* vector, int32_t len);
00239     virtual void set_vector(const uint16_t* vector, int32_t len);
00241 
00242 
00250     virtual void set_matrix(
00251             const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
00252     virtual void set_int8_matrix(
00253             const int8_t* matrix, int32_t num_feat, int32_t num_vec);
00254     virtual void set_matrix(
00255             const char* matrix, int32_t num_feat, int32_t num_vec);
00256     virtual void set_matrix(
00257             const int32_t* matrix, int32_t num_feat, int32_t num_vec);
00258     virtual void set_uint_matrix(
00259             const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
00260     virtual void set_long_matrix(
00261             const int64_t* matrix, int32_t num_feat, int32_t num_vec);
00262     virtual void set_ulong_matrix(
00263             const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
00264     virtual void set_matrix(
00265             const float32_t* matrix, int32_t num_feat, int32_t num_vec);
00266     virtual void set_matrix(
00267             const float64_t* matrix, int32_t num_feat, int32_t num_vec);
00268     virtual void set_longreal_matrix(
00269             const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
00270     virtual void set_matrix(
00271             const int16_t* matrix, int32_t num_feat, int32_t num_vec);
00272     virtual void set_matrix(
00273             const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
00275 
00283         virtual void set_ndarray(
00284                         const uint8_t* array, int32_t* dims, int32_t num_dims);
00285     virtual void set_int8_ndarray(
00286             const int8_t* array, int32_t* dims, int32_t num_dims);
00287     virtual void set_ndarray(
00288             const char* array, int32_t* dims, int32_t num_dims);
00289     virtual void set_ndarray(
00290             const int32_t* array, int32_t* dims, int32_t num_dims);
00291     virtual void set_uint_ndarray(
00292             const uint32_t* array, int32_t* dims, int32_t num_dims);
00293     virtual void set_long_ndarray(
00294             const int64_t* array, int32_t* dims, int32_t num_dims);
00295     virtual void set_ulong_ndarray(
00296             const uint64_t* array, int32_t* dims, int32_t num_dims);
00297     virtual void set_ndarray(
00298             const float32_t* array, int32_t* dims, int32_t num_dims);
00299     virtual void set_ndarray(
00300                        const  float64_t* array, int32_t* dims, int32_t num_dims);
00301     virtual void set_longreal_ndarray(
00302                         const floatmax_t* array, int32_t* dims, int32_t num_dims);
00303     virtual void set_ndarray(
00304             const int16_t* array, int32_t* dims, int32_t num_dims);
00305     virtual void set_ndarray(
00306             const uint16_t* array, int32_t* dims, int32_t num_dims);
00308 
00316     virtual void set_sparse_matrix(
00317             const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
00318     virtual void set_sparse_matrix(
00319             const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
00320     virtual void set_int8_sparsematrix(
00321             const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
00322     virtual void set_sparse_matrix(
00323             const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
00324     virtual void set_sparse_matrix(
00325             const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
00326     virtual void set_uint_sparsematrix(
00327             const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
00328     virtual void set_long_sparsematrix(
00329             const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
00330     virtual void set_ulong_sparsematrix(
00331             const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
00332     virtual void set_sparse_matrix(
00333             const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
00334     virtual void set_sparse_matrix(
00335             const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec); 
00336     virtual void set_sparse_matrix(
00337             const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
00338     virtual void set_sparse_matrix(
00339             const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
00340     virtual void set_longreal_sparsematrix(
00341             const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
00343 
00344 
00353     virtual void set_string_list(
00354             const SGString<uint8_t>* strings, int32_t num_str);
00355     virtual void set_int8_string_list(
00356             const SGString<int8_t>* strings, int32_t num_str);
00357     virtual void set_string_list(
00358             const SGString<char>* strings, int32_t num_str);
00359     virtual void set_string_list(
00360             const SGString<int32_t>* strings, int32_t num_str);
00361     virtual void set_uint_string_list(
00362             const SGString<uint32_t>* strings, int32_t num_str);
00363     virtual void set_string_list(
00364             const SGString<int16_t>* strings, int32_t num_str);
00365     virtual void set_string_list(
00366             const SGString<uint16_t>* strings, int32_t num_str);
00367     virtual void set_long_string_list(
00368             const SGString<int64_t>* strings, int32_t num_str);
00369     virtual void set_ulong_string_list(
00370             const SGString<uint64_t>* strings, int32_t num_str);
00371     virtual void set_string_list(
00372             const SGString<float32_t>* strings, int32_t num_str);
00373     virtual void set_string_list(
00374             const SGString<float64_t>* strings, int32_t num_str);
00375     virtual void set_longreal_string_list(
00376             const SGString<floatmax_t>* strings, int32_t num_str);
00378 
00380     inline virtual const char* get_name() const { return "AsciiFile"; }
00381 
00397     static ssize_t getdelim(char **lineptr, size_t *n, char delimiter, FILE* stream);
00398 
00409     static ssize_t getline(char **lineptr, size_t *n, FILE *stream);
00410 
00419     static void tokenize(char delim, substring s, v_array<substring> &ret);
00420 
00421 private:
00428     template <class T> void append_item(DynArray<T>* items, char* ptr_data, char* ptr_item);
00429 
00430 protected:
00431 
00433     CIOBuffer buf;
00434 };
00435 }
00436 #endif //__ASCII_FILE_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation