SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ProtobufFile.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evgeniy Andreev (gsomix)
8  */
9 
10 #ifdef HAVE_PROTOBUF
11 
12 #ifndef __PROTOBUFFILE_H__
13 #define __PROTOBUFFILE_H__
14 
15 #include <shogun/io/File.h>
16 #include <shogun/io/protobuf/ShogunVersion.pb.h>
17 #include <shogun/io/protobuf/Headers.pb.h>
18 #include <shogun/io/protobuf/Chunks.pb.h>
19 
20 #include <google/protobuf/message.h>
21 
22 namespace shogun
23 {
24 
35 class CProtobufFile : public CFile
36 {
37 public:
39  CProtobufFile();
40 
46  CProtobufFile(FILE* f, const char* name=NULL);
47 
54  CProtobufFile(const char* fname, char rw='r', const char* name=NULL);
55 
57  virtual ~CProtobufFile();
58 
66  virtual void get_vector(int8_t*& vector, int32_t& len);
67  virtual void get_vector(uint8_t*& vector, int32_t& len);
68  virtual void get_vector(char*& vector, int32_t& len);
69  virtual void get_vector(int32_t*& vector, int32_t& len);
70  virtual void get_vector(uint32_t*& vector, int32_t& len);
71  virtual void get_vector(float64_t*& vector, int32_t& len);
72  virtual void get_vector(float32_t*& vector, int32_t& len);
73  virtual void get_vector(floatmax_t*& vector, int32_t& len);
74  virtual void get_vector(int16_t*& vector, int32_t& len);
75  virtual void get_vector(uint16_t*& vector, int32_t& len);
76  virtual void get_vector(int64_t*& vector, int32_t& len);
77  virtual void get_vector(uint64_t*& vector, int32_t& len);
79 
88  virtual void get_matrix(
89  uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
90  virtual void get_matrix(
91  int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
92  virtual void get_matrix(
93  char*& matrix, int32_t& num_feat, int32_t& num_vec);
94  virtual void get_matrix(
95  int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
96  virtual void get_matrix(
97  uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
98  virtual void get_matrix(
99  int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
100  virtual void get_matrix(
101  uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
102  virtual void get_matrix(
103  float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
104  virtual void get_matrix(
105  float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
106  virtual void get_matrix(
107  floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
108  virtual void get_matrix(
109  int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
110  virtual void get_matrix(
111  uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
113 
122  virtual void get_ndarray(
123  uint8_t*& array, int32_t*& dims, int32_t& num_dims);
124  virtual void get_ndarray(
125  char*& array, int32_t*& dims, int32_t& num_dims);
126  virtual void get_ndarray(
127  int32_t*& array, int32_t*& dims, int32_t& num_dims);
128  virtual void get_ndarray(
129  float32_t*& array, int32_t*& dims, int32_t& num_dims);
130  virtual void get_ndarray(
131  float64_t*& array, int32_t*& dims, int32_t& num_dims);
132  virtual void get_ndarray(
133  int16_t*& array, int32_t*& dims, int32_t& num_dims);
134  virtual void get_ndarray(
135  uint16_t*& array, int32_t*& dims, int32_t& num_dims);
137 
146  virtual void get_sparse_matrix(
147  SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
148  virtual void get_sparse_matrix(
149  SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
150  virtual void get_sparse_matrix(
151  SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
152  virtual void get_sparse_matrix(
153  SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
154  virtual void get_sparse_matrix(
155  SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
156  virtual void get_sparse_matrix(
157  SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
158  virtual void get_sparse_matrix(
159  SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
160  virtual void get_sparse_matrix(
161  SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
162  virtual void get_sparse_matrix(
163  SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
164  virtual void get_sparse_matrix(
165  SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
166  virtual void get_sparse_matrix(
167  SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
168  virtual void get_sparse_matrix(
169  SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
170  virtual void get_sparse_matrix(
171  SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
173 
182  virtual void get_string_list(
183  SGString<uint8_t>*& strings, int32_t& num_str,
184  int32_t& max_string_len);
185  virtual void get_string_list(
186  SGString<int8_t>*& strings, int32_t& num_str,
187  int32_t& max_string_len);
188  virtual void get_string_list(
189  SGString<char>*& strings, int32_t& num_str,
190  int32_t& max_string_len);
191  virtual void get_string_list(
192  SGString<int32_t>*& strings, int32_t& num_str,
193  int32_t& max_string_len);
194  virtual void get_string_list(
195  SGString<uint32_t>*& strings, int32_t& num_str,
196  int32_t& max_string_len);
197  virtual void get_string_list(
198  SGString<int16_t>*& strings, int32_t& num_str,
199  int32_t& max_string_len);
200  virtual void get_string_list(
201  SGString<uint16_t>*& strings, int32_t& num_str,
202  int32_t& max_string_len);
203  virtual void get_string_list(
204  SGString<int64_t>*& strings, int32_t& num_str,
205  int32_t& max_string_len);
206  virtual void get_string_list(
207  SGString<uint64_t>*& strings, int32_t& num_str,
208  int32_t& max_string_len);
209  virtual void get_string_list(
210  SGString<float32_t>*& strings, int32_t& num_str,
211  int32_t& max_string_len);
212  virtual void get_string_list(
213  SGString<float64_t>*& strings, int32_t& num_str,
214  int32_t& max_string_len);
215  virtual void get_string_list(
216  SGString<floatmax_t>*& strings, int32_t& num_str,
217  int32_t& max_string_len);
219 
221  /*virtual void get_vector(void*& vector, int32_t& len, DataType& dtype);*/
222 
230  virtual void set_vector(const int8_t* vector, int32_t len);
231  virtual void set_vector(const uint8_t* vector, int32_t len);
232  virtual void set_vector(const char* vector, int32_t len);
233  virtual void set_vector(const int32_t* vector, int32_t len);
234  virtual void set_vector(const uint32_t* vector, int32_t len);
235  virtual void set_vector(const float32_t* vector, int32_t len);
236  virtual void set_vector(const float64_t* vector, int32_t len);
237  virtual void set_vector(const floatmax_t* vector, int32_t len);
238  virtual void set_vector(const int16_t* vector, int32_t len);
239  virtual void set_vector(const uint16_t* vector, int32_t len);
240  virtual void set_vector(const int64_t* vector, int32_t len);
241  virtual void set_vector(const uint64_t* vector, int32_t len);
243 
251  virtual void set_matrix(
252  const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
253  virtual void set_matrix(
254  const int8_t* matrix, int32_t num_feat, int32_t num_vec);
255  virtual void set_matrix(
256  const char* matrix, int32_t num_feat, int32_t num_vec);
257  virtual void set_matrix(
258  const int32_t* matrix, int32_t num_feat, int32_t num_vec);
259  virtual void set_matrix(
260  const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
261  virtual void set_matrix(
262  const int64_t* matrix, int32_t num_feat, int32_t num_vec);
263  virtual void set_matrix(
264  const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
265  virtual void set_matrix(
266  const float32_t* matrix, int32_t num_feat, int32_t num_vec);
267  virtual void set_matrix(
268  const float64_t* matrix, int32_t num_feat, int32_t num_vec);
269  virtual void set_matrix(
270  const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
271  virtual void set_matrix(
272  const int16_t* matrix, int32_t num_feat, int32_t num_vec);
273  virtual void set_matrix(
274  const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
276 
284  virtual void set_sparse_matrix(
285  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
286  virtual void set_sparse_matrix(
287  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
288  virtual void set_sparse_matrix(
289  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
290  virtual void set_sparse_matrix(
291  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
292  virtual void set_sparse_matrix(
293  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
294  virtual void set_sparse_matrix(
295  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
296  virtual void set_sparse_matrix(
297  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
298  virtual void set_sparse_matrix(
299  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
300  virtual void set_sparse_matrix(
301  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
302  virtual void set_sparse_matrix(
303  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
304  virtual void set_sparse_matrix(
305  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
306  virtual void set_sparse_matrix(
307  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
308  virtual void set_sparse_matrix(
309  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
311 
320  virtual void set_string_list(
321  const SGString<uint8_t>* strings, int32_t num_str);
322  virtual void set_string_list(
323  const SGString<int8_t>* strings, int32_t num_str);
324  virtual void set_string_list(
325  const SGString<char>* strings, int32_t num_str);
326  virtual void set_string_list(
327  const SGString<int32_t>* strings, int32_t num_str);
328  virtual void set_string_list(
329  const SGString<uint32_t>* strings, int32_t num_str);
330  virtual void set_string_list(
331  const SGString<int16_t>* strings, int32_t num_str);
332  virtual void set_string_list(
333  const SGString<uint16_t>* strings, int32_t num_str);
334  virtual void set_string_list(
335  const SGString<int64_t>* strings, int32_t num_str);
336  virtual void set_string_list(
337  const SGString<uint64_t>* strings, int32_t num_str);
338  virtual void set_string_list(
339  const SGString<float32_t>* strings, int32_t num_str);
340  virtual void set_string_list(
341  const SGString<float64_t>* strings, int32_t num_str);
342  virtual void set_string_list(
343  const SGString<floatmax_t>* strings, int32_t num_str);
345 
346  virtual const char* get_name() const { return "ProtobufFile"; }
347 
348 private:
350  void init();
351 
353  void write_big_endian_uint(uint32_t number, uint8_t* array, uint32_t size);
354 
356  uint32_t read_big_endian_uint(uint8_t* array, uint32_t size);
357 
359  int32_t compute_num_messages(uint64_t len, int32_t sizeof_type) const;
360 
362  void read_and_validate_global_header(ShogunVersion_SGDataType type);
363 
365  void write_global_header(ShogunVersion_SGDataType type);
366 
369  VectorHeader read_vector_header();
370  MatrixHeader read_matrix_header();
371  SparseMatrixHeader read_sparse_matrix_header();
372  StringListHeader read_string_list_header();
374 
377  void write_vector_header(int32_t len, int32_t num_messages);
378  void write_matrix_header(int32_t num_feat, int32_t num_vec, int32_t num_messages);
380 
385  void write_sparse_matrix_header(
386  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
387  void write_sparse_matrix_header(
388  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
389  void write_sparse_matrix_header(
390  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
391  void write_sparse_matrix_header(
392  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
393  void write_sparse_matrix_header(
394  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
395  void write_sparse_matrix_header(
396  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
397  void write_sparse_matrix_header(
398  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
399  void write_sparse_matrix_header(
400  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
401  void write_sparse_matrix_header(
402  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
403  void write_sparse_matrix_header(
404  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
405  void write_sparse_matrix_header(
406  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
407  void write_sparse_matrix_header(
408  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
409  void write_sparse_matrix_header(
410  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
412 
417  void write_string_list_header(
418  const SGString<uint8_t>* strings, int32_t num_str);
419  void write_string_list_header(
420  const SGString<int8_t>* strings, int32_t num_str);
421  void write_string_list_header(
422  const SGString<char>* strings, int32_t num_str);
423  void write_string_list_header(
424  const SGString<int32_t>* strings, int32_t num_str);
425  void write_string_list_header(
426  const SGString<uint32_t>* strings, int32_t num_str);
427  void write_string_list_header(
428  const SGString<float64_t>* strings, int32_t num_str);
429  void write_string_list_header(
430  const SGString<float32_t>* strings, int32_t num_str);
431  void write_string_list_header(
432  const SGString<floatmax_t>* strings, int32_t num_str);
433  void write_string_list_header(
434  const SGString<int16_t>* strings, int32_t num_str);
435  void write_string_list_header(
436  const SGString<uint16_t>* strings, int32_t num_str);
437  void write_string_list_header(
438  const SGString<int64_t>* strings, int32_t num_str);
439  void write_string_list_header(
440  const SGString<uint64_t>* strings, int32_t num_str);
442 
444  void read_message(google::protobuf::Message& message);
445 
447  void write_message(const google::protobuf::Message& message);
448 
454  void read_memory_block(uint8_t*& vector, uint64_t len, int32_t num_messages);
455  void read_memory_block(int8_t*& vector, uint64_t len, int32_t num_messages);
456  void read_memory_block(char*& vector, uint64_t len, int32_t num_messages);
457  void read_memory_block(int32_t*& vector, uint64_t len, int32_t num_messages);
458  void read_memory_block(uint32_t*& vector, uint64_t len, int32_t num_messages);
459  void read_memory_block(float64_t*& vector, uint64_t len, int32_t num_messages);
460  void read_memory_block(float32_t*& vector, uint64_t len, int32_t num_messages);
461  void read_memory_block(floatmax_t*& vector, uint64_t len, int32_t num_messages);
462  void read_memory_block(int16_t*& vector, uint64_t len, int32_t num_messages);
463  void read_memory_block(uint16_t*& vector, uint64_t len, int32_t num_messages);
464  void read_memory_block(int64_t*& vector, uint64_t len, int32_t num_messages);
465  void read_memory_block(uint64_t*& vector, uint64_t len, int32_t num_messages);
467 
473  void write_memory_block(const int8_t* vector, uint64_t len, int32_t num_messages);
474  void write_memory_block(const uint8_t* vector, uint64_t len, int32_t num_messages);
475  void write_memory_block(const char* vector, uint64_t len, int32_t num_messages);
476  void write_memory_block(const int32_t* vector, uint64_t len, int32_t num_messages);
477  void write_memory_block(const uint32_t* vector, uint64_t len, int32_t num_messages);
478  void write_memory_block(const float32_t* vector, uint64_t len, int32_t num_messages);
479  void write_memory_block(const float64_t* vector, uint64_t len, int32_t num_messages);
480  void write_memory_block(const floatmax_t* vector, uint64_t len, int32_t num_messages);
481  void write_memory_block(const int16_t* vector, uint64_t len, int32_t num_messages);
482  void write_memory_block(const uint16_t* vector, uint64_t len, int32_t num_messages);
483  void write_memory_block(const int64_t* vector, uint64_t len, int32_t num_messages);
484  void write_memory_block(const uint64_t* vector, uint64_t len, int32_t num_messages);
486 
488  void read_sparse_matrix(SGSparseVector<bool>*& matrix,
489  const SparseMatrixHeader& data_header);
490  void read_sparse_matrix(SGSparseVector<uint8_t>*& matrix,
491  const SparseMatrixHeader& data_header);
492  void read_sparse_matrix(SGSparseVector<int8_t>*& matrix,
493  const SparseMatrixHeader& data_header);
494  void read_sparse_matrix(SGSparseVector<char>*& matrix,
495  const SparseMatrixHeader& data_header);
496  void read_sparse_matrix(SGSparseVector<int32_t>*& matrix,
497  const SparseMatrixHeader& data_header);
498  void read_sparse_matrix(SGSparseVector<uint32_t>*& matrix,
499  const SparseMatrixHeader& data_header);
500  void read_sparse_matrix(SGSparseVector<int16_t>*& matrix,
501  const SparseMatrixHeader& data_header);
502  void read_sparse_matrix(SGSparseVector<uint16_t>*& matrix,
503  const SparseMatrixHeader& data_header);
504  void read_sparse_matrix(SGSparseVector<int64_t>*& matrix,
505  const SparseMatrixHeader& data_header);
506  void read_sparse_matrix(SGSparseVector<uint64_t>*& matrix,
507  const SparseMatrixHeader& data_header);
508  void read_sparse_matrix(SGSparseVector<float32_t>*& matrix,
509  const SparseMatrixHeader& data_header);
510  void read_sparse_matrix(SGSparseVector<float64_t>*& matrix,
511  const SparseMatrixHeader& data_header);
512  void read_sparse_matrix(SGSparseVector<floatmax_t>*& matrix,
513  const SparseMatrixHeader& data_header);
515 
519  void write_sparse_matrix(
520  const SGSparseVector<bool>* matrix, int32_t num_vec);
521  void write_sparse_matrix(
522  const SGSparseVector<uint8_t>* matrix, int32_t num_vec);
523  void write_sparse_matrix(
524  const SGSparseVector<int8_t>* matrix, int32_t num_vec);
525  void write_sparse_matrix(
526  const SGSparseVector<char>* matrix, int32_t num_vec);
527  void write_sparse_matrix(
528  const SGSparseVector<int32_t>* matrix, int32_t num_vec);
529  void write_sparse_matrix(
530  const SGSparseVector<uint32_t>* matrix, int32_t num_vec);
531  void write_sparse_matrix(
532  const SGSparseVector<int16_t>* matrix, int32_t num_vec);
533  void write_sparse_matrix(
534  const SGSparseVector<uint16_t>* matrix, int32_t num_vec);
535  void write_sparse_matrix(
536  const SGSparseVector<int64_t>* matrix, int32_t num_vec);
537  void write_sparse_matrix(
538  const SGSparseVector<uint64_t>* matrix, int32_t num_vec);
539  void write_sparse_matrix(
540  const SGSparseVector<float32_t>* matrix, int32_t num_vec);
541  void write_sparse_matrix(
542  const SGSparseVector<float64_t>* matrix, int32_t num_vec);
543  void write_sparse_matrix(
544  const SGSparseVector<floatmax_t>* matrix, int32_t num_vec);
546 
548  void read_string_list(SGString<uint8_t>*& strings,
549  const StringListHeader& data_header);
550  void read_string_list(SGString<int8_t>*& strings,
551  const StringListHeader& data_header);
552  void read_string_list(SGString<char>*& strings,
553  const StringListHeader& data_header);
554  void read_string_list(SGString<int32_t>*& strings,
555  const StringListHeader& data_header);
556  void read_string_list(SGString<uint32_t>*& strings,
557  const StringListHeader& data_header);
558  void read_string_list(SGString<int16_t>*& strings,
559  const StringListHeader& data_header);
560  void read_string_list(SGString<uint16_t>*& strings,
561  const StringListHeader& data_header);
562  void read_string_list(SGString<int64_t>*& strings,
563  const StringListHeader& data_header);
564  void read_string_list(SGString<uint64_t>*& strings,
565  const StringListHeader& data_header);
566  void read_string_list(SGString<float32_t>*& strings,
567  const StringListHeader& data_header);
568  void read_string_list(SGString<float64_t>*& strings,
569  const StringListHeader& data_header);
570  void read_string_list(SGString<floatmax_t>*& strings,
571  const StringListHeader& data_header);
573 
577  void write_string_list(
578  const SGString<uint8_t>* strings, int32_t num_str);
579  void write_string_list(
580  const SGString<int8_t>* strings, int32_t num_str);
581  void write_string_list(
582  const SGString<char>* strings, int32_t num_str);
583  void write_string_list(
584  const SGString<int32_t>* strings, int32_t num_str);
585  void write_string_list(
586  const SGString<uint32_t>* strings, int32_t num_str);
587  void write_string_list(
588  const SGString<int16_t>* strings, int32_t num_str);
589  void write_string_list(
590  const SGString<uint16_t>* strings, int32_t num_str);
591  void write_string_list(
592  const SGString<int64_t>* strings, int32_t num_str);
593  void write_string_list(
594  const SGString<uint64_t>* strings, int32_t num_str);
595  void write_string_list(
596  const SGString<float32_t>* strings, int32_t num_str);
597  void write_string_list(
598  const SGString<float64_t>* strings, int32_t num_str);
599  void write_string_list(
600  const SGString<floatmax_t>* strings, int32_t num_str);
602 
603 private:
605  int32_t version;
606 
608  int32_t message_size;
609 
611  uint8_t* buffer;
612 
614  uint8_t uint_buffer[4];
615 };
616 
617 }
618 
619 #endif
621 #endif

SHOGUN Machine Learning Toolbox - Documentation