SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
SerializableHdf5Reader00.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2010 Soeren Sonnenburg
8  * Copyright (C) 2010 Berlin Institute of Technology
9  */
10 
11 #include <shogun/lib/config.h>
12 #ifdef HAVE_HDF5
13 
15 
16 using namespace shogun;
17 
18 SerializableHdf5Reader00::SerializableHdf5Reader00(
19  CSerializableHdf5File* file) { m_file = file; }
20 
21 SerializableHdf5Reader00::~SerializableHdf5Reader00() {}
22 
23 bool
24 SerializableHdf5Reader00::read_scalar_wrapped(
25  const TSGDataType* type, void* param)
26 {
27  /* note: param may well be NULL. This doesnt hurt if m->y or m->x are -1 */
28  ASSERT(type);
29 
30  CSerializableHdf5File::type_item_t* m
31  = m_file->m_stack_type.back();
32 
33  switch (type->m_stype) {
34  case ST_NONE:
35  if (m->y != 0 || m->x != 0) return true;
36  break;
37  case ST_STRING:
38  if (m->y == -1 || m->x == -1) break;
39 
40  if (m->sub_y != 0) return true;
41 
42  ASSERT(param);
43  memcpy(param, m->vltype[m->x*m->dims[1] + m->y].p,
44  m->vltype[m->x*m->dims[1] + m->y].len
45  *type->sizeof_ptype());
46 
47  return true;
48  case ST_SPARSE:
49  if (m->sub_y != 0) return true;
50  break;
51  case ST_UNDEFINED:
52  return false;
53  }
54 
55  hid_t mem_type_id;
56  if ((mem_type_id = CSerializableHdf5File::new_stype2hdf5(
57  type->m_stype, type->m_ptype)) < 0) return false;
58 
59  switch (type->m_stype) {
60  case ST_NONE:
61  if (H5Dread(m->dset, mem_type_id, H5S_ALL, H5S_ALL,
62  H5P_DEFAULT, param) < 0) return false;
63  break;
64  case ST_STRING:
65  if (H5Dread(m->dset, mem_type_id, H5S_ALL, H5S_ALL,
66  H5P_DEFAULT, m->vltype) < 0) return false;
67  break;
68  case ST_SPARSE:
69  if (H5Dread(m->dset, m->dtype, H5S_ALL, H5S_ALL,
70  H5P_DEFAULT, m->sparse_ptr) < 0) return false;
71  break;
72  case ST_UNDEFINED:
73  return false;
74  }
75 
76  if (H5Tclose(mem_type_id) < 0) return false;
77 
78  return true;
79 }
80 
81 bool
82 SerializableHdf5Reader00::read_cont_begin_wrapped(
83  const TSGDataType* type, index_t* len_read_y, index_t* len_read_x)
84 {
85  CSerializableHdf5File::type_item_t* m
86  = m_file->m_stack_type.back();
87 
88  if (type->m_ptype != PT_SGOBJECT) {
89  switch (type->m_ctype) {
90  case CT_NDARRAY:
92  case CT_SCALAR:
93  SG_ERROR("read_cont_begin_wrapped(): Implementation error"
94  " during writing Hdf5File (0)!");
95  return false;
96  case CT_VECTOR: case CT_SGVECTOR: *len_read_y = m->dims[0]; break;
97  case CT_MATRIX: case CT_SGMATRIX:
98  *len_read_x = m->dims[0]; *len_read_y = m->dims[1];
99  break;
100  default: return false;
101  }
102 
103  return true;
104  }
105 
106  if (!m_file->attr_exists(STR_IS_CONT)) return false;
107 
108  string_t ctype_buf, buf;
109  type->to_string(ctype_buf, STRING_LEN);
110  if (!m_file->attr_read_string(STR_CTYPE_NAME, buf, STRING_LEN))
111  return false;
112  if (strcmp(ctype_buf, buf) != 0) return false;
113 
114  switch (type->m_ctype) {
115  case CT_NDARRAY:
117  case CT_SCALAR:
118  SG_ERROR("read_cont_begin_wrapped(): Implementation error"
119  " during writing Hdf5File (1)!");
120  return false;
121  case CT_MATRIX: case CT_SGMATRIX:
122  if (!m_file->attr_read_scalar(TYPE_INDEX, STR_LENGTH_X,
123  len_read_x))
124  return false;
125  /* break; */
126  case CT_VECTOR: case CT_SGVECTOR:
127  if (!m_file->attr_read_scalar(TYPE_INDEX, STR_LENGTH_Y,
128  len_read_y))
129  return false;
130  break;
131  default: return false;
132  }
133 
134  return true;
135 }
136 
137 bool
138 SerializableHdf5Reader00::read_cont_end_wrapped(
139  const TSGDataType* type, index_t len_read_y, index_t len_read_x)
140 {
141  return true;
142 }
143 
144 bool
145 SerializableHdf5Reader00::read_string_begin_wrapped(
146  const TSGDataType* type, index_t* length)
147 {
148  CSerializableHdf5File::type_item_t* m
149  = m_file->m_stack_type.back();
150 
151  if (m->y == 0 && m->x == 0) {
152  m->y = -1; m->x = -1;
153  read_scalar_wrapped(type, NULL);
154  m->y = 0; m->x = 0;
155  }
156 
157  *length = m->vltype[m->x*m->dims[1] + m->y].len;
158 
159  return true;
160 }
161 
162 bool
163 SerializableHdf5Reader00::read_string_end_wrapped(
164  const TSGDataType* type, index_t length)
165 {
166  return true;
167 }
168 
169 bool
170 SerializableHdf5Reader00::read_stringentry_begin_wrapped(
171  const TSGDataType* type, index_t y)
172 {
173  CSerializableHdf5File::type_item_t* m
174  = m_file->m_stack_type.back();
175 
176  m->sub_y = y;
177 
178  return true;
179 }
180 
181 bool
182 SerializableHdf5Reader00::read_stringentry_end_wrapped(
183  const TSGDataType* type, index_t y)
184 {
185  return true;
186 }
187 
188 bool
189 SerializableHdf5Reader00::read_sparse_begin_wrapped(
190  const TSGDataType* type, index_t* length)
191 {
192  CSerializableHdf5File::type_item_t* m_prev
193  = m_file->m_stack_type.back();
194 
195  if(!m_file->dspace_select(type->m_ctype, m_prev->y, m_prev->x))
196  return false;
197 
198  CSerializableHdf5File::type_item_t* m = new CSerializableHdf5File
199  ::type_item_t(m_prev->name);
200  m_file->m_stack_type.push_back(m);
201 
202  /* ************************************************************ */
203 
204  if (!m_file->group_open(m->name, STR_GROUP_PREFIX)) return false;
205  if (!m_file->attr_exists(STR_IS_SPARSE)) return false;
206 
207  string_t name;
208  CSerializableHdf5File::index2string(
209  name, STRING_LEN, type->m_ctype, m_prev->y, m_prev->x);
210  if ((m->dset = H5Dopen2(m_file->m_stack_h5stream.back(), name,
211  H5P_DEFAULT)) < 0)
212  return false;
213 
214  if ((m->dtype = H5Dget_type(m->dset)) < 0) return false;
215  if (!CSerializableHdf5File::isequal_stype2hdf5(
216  type->m_stype, type->m_ptype, m->dtype)) return false;
217 
218  if ((m->dspace = H5Dget_space(m->dset)) < 0) return false;
219  if (H5Sget_simple_extent_ndims(m->dspace) != 1) return false;
220 
221 
222  if ((m->rank = H5Sget_simple_extent_dims(m->dspace, m->dims, NULL)
223  ) < 0) return false;
224 
225  if (H5Sget_simple_extent_type(m->dspace) != H5S_NULL
226  && m->rank != 1) return false;
227 
228  *length = m->dims[0];
229 
230  /* ************************************************************ */
231 
232  char* buf = SG_MALLOC(char, CSerializableHdf5File::sizeof_sparsetype());
233 
234  hid_t mem_type_id;
235  if ((mem_type_id = CSerializableHdf5File::new_sparsetype()) < 0)
236  return false;
237 
238  hid_t mem_space_id;
239  if ((mem_space_id = H5Screate_simple(0, NULL, NULL)) < 0)
240  return false;
241 
242  if (H5Dread(m_prev->dset, mem_type_id, mem_space_id,
243  m_prev->dspace, H5P_DEFAULT, buf) < 0) return false;
244 
245  if (H5Sclose(mem_space_id) < 0) return false;
246  if (H5Tclose(mem_type_id) < 0) return false;
247 
248  delete buf;
249 
250  return true;
251 }
252 
253 bool
254 SerializableHdf5Reader00::read_sparse_end_wrapped(
255  const TSGDataType* type, index_t length)
256 {
257  if (!m_file->group_close()) return false;
258 
259  delete m_file->m_stack_type.back();
260  m_file->m_stack_type.pop_back();
261 
262  return true;
263 }
264 
265 bool
266 SerializableHdf5Reader00::read_sparseentry_begin_wrapped(
267  const TSGDataType* type, SGSparseVectorEntry<char>* first_entry,
268  index_t* feat_index, index_t y)
269 {
270  CSerializableHdf5File::type_item_t* m
271  = m_file->m_stack_type.back();
272 
273  m->sparse_ptr = first_entry;
274  m->sub_y = y;
275 
276  return true;
277 }
278 
279 bool
280 SerializableHdf5Reader00::read_sparseentry_end_wrapped(
281  const TSGDataType* type, SGSparseVectorEntry<char>* first_entry,
282  index_t* feat_index, index_t y)
283 {
284  return true;
285 }
286 
287 bool
288 SerializableHdf5Reader00::read_item_begin_wrapped(
289  const TSGDataType* type, index_t y, index_t x)
290 {
291  CSerializableHdf5File::type_item_t* m
292  = m_file->m_stack_type.back();
293  m->y = y; m->x = x;
294 
295  if (type->m_ptype != PT_SGOBJECT) return true;
296 
297  string_t name;
298  if (!CSerializableHdf5File::index2string(
299  name, STRING_LEN, type->m_ctype, y, x)) return false;
300  if (!m_file->group_open(name, "")) return false;
301 
302  return true;
303 }
304 
305 bool
306 SerializableHdf5Reader00::read_item_end_wrapped(
307  const TSGDataType* type, index_t y, index_t x)
308 {
309  if (type->m_ptype == PT_SGOBJECT)
310  if (!m_file->group_close()) return false;
311 
312  return true;
313 }
314 
315 bool
316 SerializableHdf5Reader00::read_sgserializable_begin_wrapped(
317  const TSGDataType* type, char* sgserializable_name,
318  EPrimitiveType* generic)
319 {
320  if (!m_file->attr_exists(STR_IS_SGSERIALIZABLE)) return false;
321 
322  if (m_file->attr_exists(STR_IS_NULL)) {
323  *sgserializable_name = '\0'; return true;
324  }
325 
326  if (!m_file->attr_read_string(
327  STR_INSTANCE_NAME, sgserializable_name, STRING_LEN))
328  return false;
329 
330  if (m_file->attr_exists(STR_GENERIC_NAME)) {
331  string_t buf;
332  if (!m_file->attr_read_string(
333  STR_GENERIC_NAME, buf, STRING_LEN)) return false;
334  if (!TSGDataType::string_to_ptype(generic, buf))
335  return false;
336  }
337 
338  return true;
339 }
340 
341 bool
342 SerializableHdf5Reader00::read_sgserializable_end_wrapped(
343  const TSGDataType* type, const char* sgserializable_name,
344  EPrimitiveType generic)
345 {
346  return true;
347 }
348 
349 bool
350 SerializableHdf5Reader00::read_type_begin_wrapped(
351  const TSGDataType* type, const char* name, const char* prefix)
352 {
353  CSerializableHdf5File::type_item_t* m = new CSerializableHdf5File
354  ::type_item_t(name);
355  m_file->m_stack_type.push_back(m);
356 
357  if (type->m_ptype == PT_SGOBJECT) {
358  if (!m_file->group_open(name, "")) return false;
359  return true;
360  }
361 
362  if ((m->dset = H5Dopen2(m_file->m_stack_h5stream.back(), name,
363  H5P_DEFAULT)) < 0)
364  return false;
365 
366  if ((m->dtype = H5Dget_type(m->dset)) < 0) return false;
367  if (!CSerializableHdf5File::isequal_stype2hdf5(
368  type->m_stype, type->m_ptype, m->dtype)) return false;
369 
370  if ((m->dspace = H5Dget_space(m->dset)) < 0) return false;
371 
372  if (H5Sget_simple_extent_ndims(m->dspace) > 2) return false;
373  if ((m->rank = H5Sget_simple_extent_dims(m->dspace, m->dims, NULL)
374  ) < 0) return false;
375 
376  switch (type->m_ctype) {
377  case CT_NDARRAY:
379  case CT_SCALAR:
380  if (m->rank != 0) return false;
381  if (type->m_stype == ST_STRING) m->vltype = SG_MALLOC(hvl_t, 1);
382  break;
383  case CT_VECTOR: case CT_SGVECTOR:
384  if (H5Sget_simple_extent_type(m->dspace) != H5S_NULL
385  && m->rank != 1) return false;
386  if (type->m_stype == ST_STRING)
387  m->vltype = SG_MALLOC(hvl_t, m->dims[0]);
388  break;
389  case CT_MATRIX: case CT_SGMATRIX:
390  if (H5Sget_simple_extent_type(m->dspace) != H5S_NULL
391  && m->rank != 2) return false;
392  if (type->m_stype == ST_STRING)
393  m->vltype = SG_MALLOC(hvl_t, m->dims[0] *m->dims[1]);
394  break;
395  default: return false;
396  }
397 
398  return true;
399 }
400 
401 bool
402 SerializableHdf5Reader00::read_type_end_wrapped(
403  const TSGDataType* type, const char* name, const char* prefix)
404 {
405  if (type->m_ptype == PT_SGOBJECT)
406  if (!m_file->group_close()) return false;
407 
408  delete m_file->m_stack_type.back();
409  m_file->m_stack_type.pop_back();
410  return true;
411 }
412 
413 #endif /* HAVE_HDF5 */
EStructType m_stype
Definition: DataType.h:73
int32_t index_t
Definition: common.h:62
static bool string_to_ptype(EPrimitiveType *ptype, const char *str)
Definition: DataType.cpp:393
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
size_t sizeof_ptype() const
Definition: DataType.cpp:183
Datatypes that shogun supports.
Definition: DataType.h:68
#define ASSERT(x)
Definition: SGIO.h:201
#define STRING_LEN
Definition: common.h:55
void to_string(char *dest, size_t n) const
Definition: DataType.cpp:145
EContainerType m_ctype
Definition: DataType.h:71
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
template class SGSparseVectorEntry
Definition: File.h:23
char string_t[STRING_LEN]
Definition: common.h:57
EPrimitiveType m_ptype
Definition: DataType.h:75

SHOGUN 机器学习工具包 - 项目文档