SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
CSVFile.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evgeniy Andreev (gsomix)
8  */
9 
10 #ifndef __CSVFILE_H__
11 #define __CSVFILE_H__
12 
13 #include <shogun/lib/config.h>
14 
15 #include <shogun/lib/common.h>
16 #include <shogun/io/File.h>
17 
18 namespace shogun
19 {
20 class CDelimiterTokenizer;
21 class CLineReader;
22 class CParser;
23 template <class ST> class SGString;
24 template <class T> class SGSparseVector;
25 
29 class CCSVFile : public CFile
30 {
31 public:
33  CCSVFile();
34 
40  CCSVFile(FILE* f, const char* name=NULL);
41 
42 #ifdef HAVE_FDOPEN
43 
49  CCSVFile(int fd, const char* mode, const char* name=NULL);
50 #endif
51 
58  CCSVFile(const char* fname, char rw='r', const char* name=NULL);
59 
61  virtual ~CCSVFile();
62 
67  void set_transpose(bool value);
68 
73  void set_delimiter(char delimiter);
74 
79  void set_lines_to_skip(int32_t num_lines);
80 
86  int32_t get_stats(int32_t& num_tokens);
87 
88 #ifndef SWIG
89 
96  virtual void get_vector(int8_t*& vector, int32_t& len);
97  virtual void get_vector(uint8_t*& vector, int32_t& len);
98  virtual void get_vector(char*& vector, int32_t& len);
99  virtual void get_vector(int32_t*& vector, int32_t& len);
100  virtual void get_vector(uint32_t*& vector, int32_t& len);
101  virtual void get_vector(float64_t*& vector, int32_t& len);
102  virtual void get_vector(float32_t*& vector, int32_t& len);
103  virtual void get_vector(floatmax_t*& vector, int32_t& len);
104  virtual void get_vector(int16_t*& vector, int32_t& len);
105  virtual void get_vector(uint16_t*& vector, int32_t& len);
106  virtual void get_vector(int64_t*& vector, int32_t& len);
107  virtual void get_vector(uint64_t*& vector, int32_t& len);
109 
118  virtual void get_matrix(
119  uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
120  virtual void get_matrix(
121  int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
122  virtual void get_matrix(
123  char*& matrix, int32_t& num_feat, int32_t& num_vec);
124  virtual void get_matrix(
125  int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
126  virtual void get_matrix(
127  uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
128  virtual void get_matrix(
129  int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
130  virtual void get_matrix(
131  uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
132  virtual void get_matrix(
133  float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
134  virtual void get_matrix(
135  float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
136  virtual void get_matrix(
137  floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
138  virtual void get_matrix(
139  int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
140  virtual void get_matrix(
141  uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
143 
152  virtual void get_ndarray(
153  uint8_t*& array, int32_t*& dims, int32_t& num_dims);
154  virtual void get_ndarray(
155  char*& array, int32_t*& dims, int32_t& num_dims);
156  virtual void get_ndarray(
157  int32_t*& array, int32_t*& dims, int32_t& num_dims);
158  virtual void get_ndarray(
159  float32_t*& array, int32_t*& dims, int32_t& num_dims);
160  virtual void get_ndarray(
161  float64_t*& array, int32_t*& dims, int32_t& num_dims);
162  virtual void get_ndarray(
163  int16_t*& array, int32_t*& dims, int32_t& num_dims);
164  virtual void get_ndarray(
165  uint16_t*& array, int32_t*& dims, int32_t& num_dims);
167 
176  virtual void get_sparse_matrix(
177  SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
178  virtual void get_sparse_matrix(
179  SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
180  virtual void get_sparse_matrix(
181  SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
182  virtual void get_sparse_matrix(
183  SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
184  virtual void get_sparse_matrix(
185  SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
186  virtual void get_sparse_matrix(
187  SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
188  virtual void get_sparse_matrix(
189  SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
190  virtual void get_sparse_matrix(
191  SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
192  virtual void get_sparse_matrix(
193  SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
194  virtual void get_sparse_matrix(
195  SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
196  virtual void get_sparse_matrix(
197  SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
198  virtual void get_sparse_matrix(
199  SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
200  virtual void get_sparse_matrix(
201  SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
203 
212  virtual void get_string_list(
213  SGString<uint8_t>*& strings, int32_t& num_str,
214  int32_t& max_string_len);
215  virtual void get_string_list(
216  SGString<int8_t>*& strings, int32_t& num_str,
217  int32_t& max_string_len);
218  virtual void get_string_list(
219  SGString<char>*& strings, int32_t& num_str,
220  int32_t& max_string_len);
221  virtual void get_string_list(
222  SGString<int32_t>*& strings, int32_t& num_str,
223  int32_t& max_string_len);
224  virtual void get_string_list(
225  SGString<uint32_t>*& strings, int32_t& num_str,
226  int32_t& max_string_len);
227  virtual void get_string_list(
228  SGString<int16_t>*& strings, int32_t& num_str,
229  int32_t& max_string_len);
230  virtual void get_string_list(
231  SGString<uint16_t>*& strings, int32_t& num_str,
232  int32_t& max_string_len);
233  virtual void get_string_list(
234  SGString<int64_t>*& strings, int32_t& num_str,
235  int32_t& max_string_len);
236  virtual void get_string_list(
237  SGString<uint64_t>*& strings, int32_t& num_str,
238  int32_t& max_string_len);
239  virtual void get_string_list(
240  SGString<float32_t>*& strings, int32_t& num_str,
241  int32_t& max_string_len);
242  virtual void get_string_list(
243  SGString<float64_t>*& strings, int32_t& num_str,
244  int32_t& max_string_len);
245  virtual void get_string_list(
246  SGString<floatmax_t>*& strings, int32_t& num_str,
247  int32_t& max_string_len);
249 
251  /*virtual void get_vector(void*& vector, int32_t& len, DataType& dtype);*/
252 
260  virtual void set_vector(const int8_t* vector, int32_t len);
261  virtual void set_vector(const uint8_t* vector, int32_t len);
262  virtual void set_vector(const char* vector, int32_t len);
263  virtual void set_vector(const int32_t* vector, int32_t len);
264  virtual void set_vector(const uint32_t* vector, int32_t len);
265  virtual void set_vector(const float32_t* vector, int32_t len);
266  virtual void set_vector(const float64_t* vector, int32_t len);
267  virtual void set_vector(const floatmax_t* vector, int32_t len);
268  virtual void set_vector(const int16_t* vector, int32_t len);
269  virtual void set_vector(const uint16_t* vector, int32_t len);
270  virtual void set_vector(const int64_t* vector, int32_t len);
271  virtual void set_vector(const uint64_t* vector, int32_t len);
273 
281  virtual void set_matrix(
282  const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
283  virtual void set_matrix(
284  const int8_t* matrix, int32_t num_feat, int32_t num_vec);
285  virtual void set_matrix(
286  const char* matrix, int32_t num_feat, int32_t num_vec);
287  virtual void set_matrix(
288  const int32_t* matrix, int32_t num_feat, int32_t num_vec);
289  virtual void set_matrix(
290  const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
291  virtual void set_matrix(
292  const int64_t* matrix, int32_t num_feat, int32_t num_vec);
293  virtual void set_matrix(
294  const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
295  virtual void set_matrix(
296  const float32_t* matrix, int32_t num_feat, int32_t num_vec);
297  virtual void set_matrix(
298  const float64_t* matrix, int32_t num_feat, int32_t num_vec);
299  virtual void set_matrix(
300  const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
301  virtual void set_matrix(
302  const int16_t* matrix, int32_t num_feat, int32_t num_vec);
303  virtual void set_matrix(
304  const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
306 
314  virtual void set_sparse_matrix(
315  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
316  virtual void set_sparse_matrix(
317  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
318  virtual void set_sparse_matrix(
319  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
320  virtual void set_sparse_matrix(
321  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
322  virtual void set_sparse_matrix(
323  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
324  virtual void set_sparse_matrix(
325  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
326  virtual void set_sparse_matrix(
327  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
328  virtual void set_sparse_matrix(
329  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
330  virtual void set_sparse_matrix(
331  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
332  virtual void set_sparse_matrix(
333  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
334  virtual void set_sparse_matrix(
335  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
336  virtual void set_sparse_matrix(
337  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
338  virtual void set_sparse_matrix(
339  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
341 
350  virtual void set_string_list(
351  const SGString<uint8_t>* strings, int32_t num_str);
352  virtual void set_string_list(
353  const SGString<int8_t>* strings, int32_t num_str);
354  virtual void set_string_list(
355  const SGString<char>* strings, int32_t num_str);
356  virtual void set_string_list(
357  const SGString<int32_t>* strings, int32_t num_str);
358  virtual void set_string_list(
359  const SGString<uint32_t>* strings, int32_t num_str);
360  virtual void set_string_list(
361  const SGString<int16_t>* strings, int32_t num_str);
362  virtual void set_string_list(
363  const SGString<uint16_t>* strings, int32_t num_str);
364  virtual void set_string_list(
365  const SGString<int64_t>* strings, int32_t num_str);
366  virtual void set_string_list(
367  const SGString<uint64_t>* strings, int32_t num_str);
368  virtual void set_string_list(
369  const SGString<float32_t>* strings, int32_t num_str);
370  virtual void set_string_list(
371  const SGString<float64_t>* strings, int32_t num_str);
372  virtual void set_string_list(
373  const SGString<floatmax_t>* strings, int32_t num_str);
375 #endif // #ifndef SWIG
376 
377  virtual const char* get_name() const { return "CSVFile"; }
378 
379 private:
381  void init();
382 
384  void init_with_defaults();
385 
387  void skip_lines(int32_t num_lines);
388 
389 private:
391  CLineReader* m_line_reader;
392 
394  CParser* m_parser;
395 
397  CDelimiterTokenizer* m_line_tokenizer;
398 
400  CDelimiterTokenizer* m_tokenizer;
401 
403  bool is_data_transposed;
404 
406  char m_delimiter;
407 
409  int32_t m_num_to_skip;
410 };
411 
412 }
413 
414 #endif
virtual ~CCSVFile()
Definition: CSVFile.cpp:48
void set_delimiter(char delimiter)
Definition: CSVFile.cpp:61
virtual void get_matrix(uint8_t *&matrix, int32_t &num_feat, int32_t &num_vec)
void set_transpose(bool value)
Definition: CSVFile.cpp:56
virtual void set_vector(const int8_t *vector, int32_t len)
virtual void set_matrix(const uint8_t *matrix, int32_t num_feat, int32_t num_vec)
virtual void set_string_list(const SGString< uint8_t > *strings, int32_t num_str)
Class for buffered reading from a ascii file.
Definition: LineReader.h:24
void set_lines_to_skip(int32_t num_lines)
Definition: CSVFile.cpp:71
int32_t get_stats(int32_t &num_tokens)
Definition: CSVFile.cpp:76
Class CSVFile used to read data from comma-separated values (CSV) files. See http://en.wikipedia.org/wiki/Comma-separated_values.
Definition: CSVFile.h:29
Class for reading from a string.
Definition: Parser.h:23
double float64_t
Definition: common.h:50
virtual void get_vector(int8_t *&vector, int32_t &len)
long double floatmax_t
Definition: common.h:51
A File access base class.
Definition: File.h:34
virtual void get_sparse_matrix(SGSparseVector< bool > *&matrix, int32_t &num_feat, int32_t &num_vec)
float float32_t
Definition: common.h:49
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class CDelimiterTokenizer is used to tokenize a SGVector into tokens using custom chars as ...
virtual void set_sparse_matrix(const SGSparseVector< bool > *matrix, int32_t num_feat, int32_t num_vec)
template class SGSparseVector The assumtion is that the stored SGSparseVectorEntry* vector is orde...
virtual const char * get_name() const
Definition: CSVFile.h:377
virtual void get_string_list(SGString< uint8_t > *&strings, int32_t &num_str, int32_t &max_string_len)
virtual void get_ndarray(uint8_t *&array, int32_t *&dims, int32_t &num_dims)

SHOGUN Machine Learning Toolbox - Documentation