SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LibSVMFile.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Jiaolong Xu
8  * Written (W) 2013 Evgeniy Andreev (gsomix)
9  * Written (W) 2010 Soeren Sonnenburg
10  */
11 
12 #ifndef __LIBSVMFILE_H__
13 #define __LIBSVMFILE_H__
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/io/File.h>
17 
18 namespace shogun
19 {
20 
21 class CDelimiterTokenizer;
22 class CLineReader;
23 class CParser;
24 template <class ST> class SGString;
25 template <class T> class SGSparseVector;
26 
34 class CLibSVMFile : public CFile
35 {
36 public:
38  CLibSVMFile();
39 
45  CLibSVMFile(FILE* f, const char* name=NULL);
46 
53  CLibSVMFile(const char* fname, char rw='r', const char* name=NULL);
54 
56  virtual ~CLibSVMFile();
57 
58 #ifndef SWIG // SWIG should skip this part
59 
66  virtual void get_vector(int8_t*& vector, int32_t& len) { };
67  virtual void get_vector(uint8_t*& vector, int32_t& len) { };
68  virtual void get_vector(char*& vector, int32_t& len) { };
69  virtual void get_vector(int32_t*& vector, int32_t& len) { };
70  virtual void get_vector(uint32_t*& vector, int32_t& len) { };
71  virtual void get_vector(float64_t*& vector, int32_t& len) { };
72  virtual void get_vector(float32_t*& vector, int32_t& len) { };
73  virtual void get_vector(floatmax_t*& vector, int32_t& len) { };
74  virtual void get_vector(int16_t*& vector, int32_t& len) { };
75  virtual void get_vector(uint16_t*& vector, int32_t& len) { };
76  virtual void get_vector(int64_t*& vector, int32_t& len) { };
77  virtual void get_vector(uint64_t*& vector, int32_t& len) { };
79 
88  virtual void get_matrix(
89  uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
90  virtual void get_matrix(
91  int8_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
92  virtual void get_matrix(
93  char*& matrix, int32_t& num_feat, int32_t& num_vec) { };
94  virtual void get_matrix(
95  int32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
96  virtual void get_matrix(
97  uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
98  virtual void get_matrix(
99  int64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
100  virtual void get_matrix(
101  uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
102  virtual void get_matrix(
103  float32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
104  virtual void get_matrix(
105  float64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
106  virtual void get_matrix(
107  floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
108  virtual void get_matrix(
109  int16_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
110  virtual void get_matrix(
111  uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
113 
122  virtual void get_ndarray(
123  uint8_t*& array, int32_t*& dims, int32_t& num_dims) { };
124  virtual void get_ndarray(
125  char*& array, int32_t*& dims, int32_t& num_dims) { };
126  virtual void get_ndarray(
127  int32_t*& array, int32_t*& dims, int32_t& num_dims) { };
128  virtual void get_ndarray(
129  float32_t*& array, int32_t*& dims, int32_t& num_dims) { };
130  virtual void get_ndarray(
131  float64_t*& array, int32_t*& dims, int32_t& num_dims){ };
132  virtual void get_ndarray(
133  int16_t*& array, int32_t*& dims, int32_t& num_dims){ };
134  virtual void get_ndarray(
135  uint16_t*& array, int32_t*& dims, int32_t& num_dims){ };
137 
146  virtual void get_sparse_matrix(
147  SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
148  virtual void get_sparse_matrix(
149  SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
150  virtual void get_sparse_matrix(
151  SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
152  virtual void get_sparse_matrix(
153  SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
154  virtual void get_sparse_matrix(
155  SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
156  virtual void get_sparse_matrix(
157  SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
158  virtual void get_sparse_matrix(
159  SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
160  virtual void get_sparse_matrix(
161  SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
162  virtual void get_sparse_matrix(
163  SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
164  virtual void get_sparse_matrix(
165  SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
166  virtual void get_sparse_matrix(
167  SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
168  virtual void get_sparse_matrix(
169  SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
170  virtual void get_sparse_matrix(
171  SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
173 
182  virtual void get_sparse_matrix(
183  SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec,
184  float64_t*& labels, bool load_labels=true);
185  virtual void get_sparse_matrix(
186  SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
187  float64_t*& labels, bool load_labels=true);
188  virtual void get_sparse_matrix(
189  SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
190  float64_t*& labels, bool load_labels=true);
191  virtual void get_sparse_matrix(
192  SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec,
193  float64_t*& labels, bool load_labels=true);
194  virtual void get_sparse_matrix(
195  SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
196  float64_t*& labels, bool load_labels=true);
197  virtual void get_sparse_matrix(
198  SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
199  float64_t*& labels, bool load_labels=true);
200  virtual void get_sparse_matrix(
201  SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
202  float64_t*& labels, bool load_labels=true);
203  virtual void get_sparse_matrix(
204  SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
205  float64_t*& labels, bool load_labels=true);
206  virtual void get_sparse_matrix(
207  SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
208  float64_t*& labels, bool load_labels=true);
209  virtual void get_sparse_matrix(
210  SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
211  float64_t*& labels, bool load_labels=true);
212  virtual void get_sparse_matrix(
213  SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
214  float64_t*& labels, bool load_labels=true);
215  virtual void get_sparse_matrix(
216  SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
217  float64_t*& labels, bool load_labels=true);
218  virtual void get_sparse_matrix(
219  SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
220  float64_t*& labels, bool load_labels=true);
221 
230  virtual void get_string_list(
231  SGString<uint8_t>*& strings, int32_t& num_str,
232  int32_t& max_string_len) { };
233  virtual void get_string_list(
234  SGString<int8_t>*& strings, int32_t& num_str,
235  int32_t& max_string_len) { };
236  virtual void get_string_list(
237  SGString<char>*& strings, int32_t& num_str,
238  int32_t& max_string_len) { };
239  virtual void get_string_list(
240  SGString<int32_t>*& strings, int32_t& num_str,
241  int32_t& max_string_len) { };
242  virtual void get_string_list(
243  SGString<uint32_t>*& strings, int32_t& num_str,
244  int32_t& max_string_len) { };
245  virtual void get_string_list(
246  SGString<int16_t>*& strings, int32_t& num_str,
247  int32_t& max_string_len) { };
248  virtual void get_string_list(
249  SGString<uint16_t>*& strings, int32_t& num_str,
250  int32_t& max_string_len) { };
251  virtual void get_string_list(
252  SGString<int64_t>*& strings, int32_t& num_str,
253  int32_t& max_string_len) { };
254  virtual void get_string_list(
255  SGString<uint64_t>*& strings, int32_t& num_str,
256  int32_t& max_string_len) { };
257  virtual void get_string_list(
258  SGString<float32_t>*& strings, int32_t& num_str,
259  int32_t& max_string_len) { };
260  virtual void get_string_list(
261  SGString<float64_t>*& strings, int32_t& num_str,
262  int32_t& max_string_len) { };
263  virtual void get_string_list(
264  SGString<floatmax_t>*& strings, int32_t& num_str,
265  int32_t& max_string_len) { };
267 
276  void get_sparse_matrix(
277  SGSparseVector<bool>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
278  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
279  void get_sparse_matrix(
280  SGSparseVector<uint8_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
281  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
282  void get_sparse_matrix(
283  SGSparseVector<int8_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
284  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
285  void get_sparse_matrix(
286  SGSparseVector<char>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
287  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
288  void get_sparse_matrix(
289  SGSparseVector<int32_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
290  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
291  void get_sparse_matrix(
292  SGSparseVector<uint32_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
293  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
294  void get_sparse_matrix(
295  SGSparseVector<int64_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
296  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
297  void get_sparse_matrix(
298  SGSparseVector<uint64_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
299  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
300  void get_sparse_matrix(
301  SGSparseVector<int16_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
302  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
303  void get_sparse_matrix(
304  SGSparseVector<uint16_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
305  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
306  void get_sparse_matrix(
307  SGSparseVector<float32_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
308  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
309  void get_sparse_matrix(
310  SGSparseVector<float64_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
311  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
312  void get_sparse_matrix(
313  SGSparseVector<floatmax_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
314  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
316 
318  /*virtual void get_vector(void*& vector, int32_t& len, DataType& dtype);*/
319 
327  virtual void set_vector(const int8_t* vector, int32_t len) { };
328  virtual void set_vector(const uint8_t* vector, int32_t len) { };
329  virtual void set_vector(const char* vector, int32_t len) { };
330  virtual void set_vector(const int32_t* vector, int32_t len) { };
331  virtual void set_vector(const uint32_t* vector, int32_t len) { };
332  virtual void set_vector(const float32_t* vector, int32_t len) { };
333  virtual void set_vector(const float64_t* vector, int32_t len) { };
334  virtual void set_vector(const floatmax_t* vector, int32_t len) { };
335  virtual void set_vector(const int16_t* vector, int32_t len) { };
336  virtual void set_vector(const uint16_t* vector, int32_t len) { };
337  virtual void set_vector(const int64_t* vector, int32_t len) { };
338  virtual void set_vector(const uint64_t* vector, int32_t len) { };
340 
348  virtual void set_matrix(
349  const uint8_t* matrix, int32_t num_feat, int32_t num_vec) { };
350  virtual void set_matrix(
351  const int8_t* matrix, int32_t num_feat, int32_t num_vec) { };
352  virtual void set_matrix(
353  const char* matrix, int32_t num_feat, int32_t num_vec) { };
354  virtual void set_matrix(
355  const int32_t* matrix, int32_t num_feat, int32_t num_vec) { };
356  virtual void set_matrix(
357  const uint32_t* matrix, int32_t num_feat, int32_t num_vec) { };
358  virtual void set_matrix(
359  const int64_t* matrix, int32_t num_feat, int32_t num_vec) { };
360  virtual void set_matrix(
361  const uint64_t* matrix, int32_t num_feat, int32_t num_vec) { };
362  virtual void set_matrix(
363  const float32_t* matrix, int32_t num_feat, int32_t num_vec) { };
364  virtual void set_matrix(
365  const float64_t* matrix, int32_t num_feat, int32_t num_vec) { };
366  virtual void set_matrix(
367  const floatmax_t* matrix, int32_t num_feat, int32_t num_vec) { };
368  virtual void set_matrix(
369  const int16_t* matrix, int32_t num_feat, int32_t num_vec) { };
370  virtual void set_matrix(
371  const uint16_t* matrix, int32_t num_feat, int32_t num_vec) { };
373 
381  virtual void set_sparse_matrix(
382  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
383  virtual void set_sparse_matrix(
384  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
385  virtual void set_sparse_matrix(
386  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
387  virtual void set_sparse_matrix(
388  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
389  virtual void set_sparse_matrix(
390  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
391  virtual void set_sparse_matrix(
392  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
393  virtual void set_sparse_matrix(
394  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
395  virtual void set_sparse_matrix(
396  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
397  virtual void set_sparse_matrix(
398  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
399  virtual void set_sparse_matrix(
400  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
401  virtual void set_sparse_matrix(
402  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
403  virtual void set_sparse_matrix(
404  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
405  virtual void set_sparse_matrix(
406  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
408 
416  virtual void set_sparse_matrix(
417  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec,
418  const float64_t* labels);
419  virtual void set_sparse_matrix(
420  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec,
421  const float64_t* labels);
422  virtual void set_sparse_matrix(
423  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec,
424  const float64_t* labels);
425  virtual void set_sparse_matrix(
426  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec,
427  const float64_t* labels);
428  virtual void set_sparse_matrix(
429  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec,
430  const float64_t* labels);
431  virtual void set_sparse_matrix(
432  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec,
433  const float64_t* labels);
434  virtual void set_sparse_matrix(
435  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec,
436  const float64_t* labels);
437  virtual void set_sparse_matrix(
438  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec,
439  const float64_t* labels);
440  virtual void set_sparse_matrix(
441  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec,
442  const float64_t* labels);
443  virtual void set_sparse_matrix(
444  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec,
445  const float64_t* labels);
446  virtual void set_sparse_matrix(
447  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec,
448  const float64_t* labels);
449  virtual void set_sparse_matrix(
450  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec,
451  const float64_t* labels);
452  virtual void set_sparse_matrix(
453  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec,
454  const float64_t* labels);
456 
464  void set_sparse_matrix(
465  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec,
466  const SGVector<float64_t>* multilabel);
467  void set_sparse_matrix(
468  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec,
469  const SGVector<float64_t>* multilabel);
470  void set_sparse_matrix(
471  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec,
472  const SGVector<float64_t>* multilabel);
473  void set_sparse_matrix(
474  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec,
475  const SGVector<float64_t>* multilabel);
476  void set_sparse_matrix(
477  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec,
478  const SGVector<float64_t>* multilabel);
479  void set_sparse_matrix(
480  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec,
481  const SGVector<float64_t>* multilabel);
482  void set_sparse_matrix(
483  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec,
484  const SGVector<float64_t>* multilabel);
485  void set_sparse_matrix(
486  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec,
487  const SGVector<float64_t>* multilabel);
488  void set_sparse_matrix(
489  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec,
490  const SGVector<float64_t>* multilabel);
491  void set_sparse_matrix(
492  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec,
493  const SGVector<float64_t>* multilabel);
494  void set_sparse_matrix(
495  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec,
496  const SGVector<float64_t>* multilabel);
497  void set_sparse_matrix(
498  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec,
499  const SGVector<float64_t>* multilabel);
500  void set_sparse_matrix(
501  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec,
502  const SGVector<float64_t>* multilabel);
504 
513  virtual void set_string_list(
514  const SGString<uint8_t>* strings, int32_t num_str) { };
515  virtual void set_string_list(
516  const SGString<int8_t>* strings, int32_t num_str) { };
517  virtual void set_string_list(
518  const SGString<char>* strings, int32_t num_str) { };
519  virtual void set_string_list(
520  const SGString<int32_t>* strings, int32_t num_str) { };
521  virtual void set_string_list(
522  const SGString<uint32_t>* strings, int32_t num_str) { };
523  virtual void set_string_list(
524  const SGString<int16_t>* strings, int32_t num_str) { };
525  virtual void set_string_list(
526  const SGString<uint16_t>* strings, int32_t num_str) { };
527  virtual void set_string_list(
528  const SGString<int64_t>* strings, int32_t num_str) { };
529  virtual void set_string_list(
530  const SGString<uint64_t>* strings, int32_t num_str) { };
531  virtual void set_string_list(
532  const SGString<float32_t>* strings, int32_t num_str) { };
533  virtual void set_string_list(
534  const SGString<float64_t>* strings, int32_t num_str) { };
535  virtual void set_string_list(
536  const SGString<floatmax_t>* strings, int32_t num_str) { };
538 #endif // #ifndef SWIG // SWIG should skip this part
539 
540  virtual const char* get_name() const { return "LibSVMFile"; }
541 
542 private:
544  void init();
545 
547  void init_with_defaults();
548 
550  int32_t get_num_lines();
551 
553  bool is_feat_entry(const SGVector<char> entry);
554 private:
556  char m_delimiter_feat;
557 
559  char m_delimiter_label;
560 
562  CLineReader* m_line_reader;
563 
565  CParser* m_parser;
566 
568  CDelimiterTokenizer* m_line_tokenizer;
569 
571  CDelimiterTokenizer* m_whitespace_tokenizer;
572 
574  CDelimiterTokenizer* m_delimiter_feat_tokenizer;
575 
577  CDelimiterTokenizer* m_delimiter_label_tokenizer;
578  };
579 
580 }
581 
582 #endif

SHOGUN Machine Learning Toolbox - Documentation