25 using namespace shogun;
27 CHDF5File::CHDF5File()
35 CHDF5File::CHDF5File(
char* fname,
char rw, const
char* name) :
CFile()
38 H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
41 set_variable_name(name);
46 h5file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
49 h5file = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
52 h5file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT);
54 h5file = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
61 SG_ERROR(
"Could not open file '%s'\n", fname)
64 CHDF5File::~CHDF5File()
69 #define GET_VECTOR(fname, sg_type, datatype) \
70 void CHDF5File::fname(sg_type*& vec, int32_t& len) \
73 SG_ERROR("File invalid.\n") \
78 hid_t dataset = H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
80 SG_ERROR("Error opening data set\n") \
81 hid_t dtype = H5Dget_type(dataset); \
82 H5T_class_t t_class=H5Tget_class(dtype); \
83 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
87 SG_INFO("No compatible datatype found\n") \
89 get_dims(dataset, dims, ndims, nelements); \
90 if (!((ndims==2 && dims[0]==nelements && dims[1]==1) || \
91 (ndims==2 && dims[0]==1 && dims[1]==nelements) || \
92 (ndims==1 && dims[0]==nelements))) \
93 SG_ERROR("Error not a 1-dimensional vector (ndims=%d, dims[0]=%d)\n", ndims, dims[0]) \
94 vec=SG_MALLOC(sg_type, nelements); \
96 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
97 H5S_ALL, H5P_DEFAULT, vec); \
104 SG_ERROR("Error reading dataset\n") \
108 GET_VECTOR(get_vector,
bool, (CT_VECTOR, ST_NONE, PT_BOOL))
109 GET_VECTOR(get_vector, int8_t, (CT_VECTOR, ST_NONE, PT_INT8))
110 GET_VECTOR(get_vector, uint8_t, (CT_VECTOR, ST_NONE, PT_UINT8))
111 GET_VECTOR(get_vector,
char, (CT_VECTOR, ST_NONE, PT_CHAR))
112 GET_VECTOR(get_vector, int32_t, (CT_VECTOR, ST_NONE, PT_INT32))
113 GET_VECTOR(get_vector, uint32_t, (CT_VECTOR, ST_NONE, PT_UINT32))
117 GET_VECTOR(get_vector, int16_t, (CT_VECTOR, ST_NONE, PT_INT16))
118 GET_VECTOR(get_vector, uint16_t, (CT_VECTOR, ST_NONE, PT_INT16))
119 GET_VECTOR(get_vector, int64_t, (CT_VECTOR, ST_NONE, PT_INT64))
120 GET_VECTOR(get_vector, uint64_t, (CT_VECTOR, ST_NONE, PT_UINT64))
123 #define GET_MATRIX(fname, sg_type, datatype) \
124 void CHDF5File::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
127 SG_ERROR("File invalid.\n") \
132 hid_t dataset = H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
134 SG_ERROR("Error opening data set\n") \
135 hid_t dtype = H5Dget_type(dataset); \
136 H5T_class_t t_class=H5Tget_class(dtype); \
137 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
141 SG_INFO("No compatible datatype found\n") \
143 get_dims(dataset, dims, ndims, nelements); \
145 SG_ERROR("Error not a 2-dimensional matrix\n") \
146 matrix=SG_MALLOC(sg_type, nelements); \
149 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
150 H5S_ALL, H5P_DEFAULT, matrix); \
157 SG_ERROR("Error reading dataset\n") \
161 GET_MATRIX(get_matrix,
bool, (CT_MATRIX, ST_NONE, PT_BOOL))
162 GET_MATRIX(get_matrix,
char, (CT_MATRIX, ST_NONE, PT_CHAR))
163 GET_MATRIX(get_matrix, uint8_t, (CT_MATRIX, ST_NONE, PT_UINT8))
164 GET_MATRIX(get_matrix, int32_t, (CT_MATRIX, ST_NONE, PT_INT32))
165 GET_MATRIX(get_matrix, uint32_t, (CT_MATRIX, ST_NONE, PT_INT32))
166 GET_MATRIX(get_matrix, int64_t, (CT_MATRIX, ST_NONE, PT_INT64))
167 GET_MATRIX(get_matrix, uint64_t, (CT_MATRIX, ST_NONE, PT_INT64))
168 GET_MATRIX(get_matrix, int16_t, (CT_MATRIX, ST_NONE, PT_INT16))
169 GET_MATRIX(get_matrix, uint16_t, (CT_MATRIX, ST_NONE, PT_INT16))
170 GET_MATRIX(get_matrix, float32_t, (CT_MATRIX, ST_NONE, PT_FLOAT32))
171 GET_MATRIX(get_matrix, float64_t, (CT_MATRIX, ST_NONE, PT_FLOAT64))
172 GET_MATRIX(get_matrix, floatmax_t, (CT_MATRIX, ST_NONE, PT_FLOATMAX))
175 void CHDF5File::get_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims)
179 void CHDF5File::get_ndarray(
char*& array, int32_t*& dims, int32_t& num_dims)
183 void CHDF5File::get_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims)
187 void CHDF5File::get_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims)
191 void CHDF5File::get_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims)
195 void CHDF5File::get_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims)
199 void CHDF5File::get_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims)
203 #define GET_SPARSEMATRIX(fname, sg_type, datatype) \
204 void CHDF5File::fname(SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
207 SG_ERROR("File invalid.\n") \
222 #undef GET_SPARSEMATRIX
225 #define GET_STRING_LIST(fname, sg_type, datatype) \
226 void CHDF5File::fname(SGString<sg_type>*& strings, int32_t& num_str, int32_t& max_string_len) \
243 #undef GET_STRING_LIST
247 #define SET_VECTOR(fname, sg_type, dtype, h5type) \
248 void CHDF5File::fname(const sg_type* vec, int32_t len) \
250 if (h5file<0 || !vec) \
251 SG_ERROR("File or vector invalid.\n") \
253 create_group_hierarchy(); \
255 hsize_t dims=(hsize_t) len; \
256 hid_t dataspace, dataset, status; \
257 dataspace=H5Screate_simple(1, &dims, NULL); \
259 SG_ERROR("Could not create hdf5 dataspace\n") \
260 dataset=H5Dcreate2(h5file, variable_name, h5type, dataspace, H5P_DEFAULT,\
261 H5P_DEFAULT, H5P_DEFAULT); \
264 SG_ERROR("Could not create hdf5 dataset - does" \
265 " dataset '%s' already exist?\n", variable_name); \
267 status=H5Dwrite(dataset, h5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, vec); \
269 SG_ERROR("Failed to write hdf5 dataset\n") \
271 H5Sclose(dataspace); \
273 SET_VECTOR(set_vector,
bool, DT_VECTOR_BOOL, boolean_type)
274 SET_VECTOR(set_vector, int8_t, DT_VECTOR_BYTE, H5T_NATIVE_INT8)
275 SET_VECTOR(set_vector, uint8_t, DT_VECTOR_BYTE, H5T_NATIVE_UINT8)
276 SET_VECTOR(set_vector,
char, DT_VECTOR_CHAR, H5T_NATIVE_CHAR)
277 SET_VECTOR(set_vector, int32_t, DT_VECTOR_INT, H5T_NATIVE_INT32)
278 SET_VECTOR(set_vector, uint32_t, DT_VECTOR_UINT, H5T_NATIVE_UINT32)
279 SET_VECTOR(set_vector, float32_t, DT_VECTOR_SHORTREAL, H5T_NATIVE_FLOAT)
280 SET_VECTOR(set_vector, float64_t, DT_VECTOR_REAL, H5T_NATIVE_DOUBLE)
281 SET_VECTOR(set_vector, floatmax_t, DT_VECTOR_LONGREAL, H5T_NATIVE_LDOUBLE)
282 SET_VECTOR(set_vector, int16_t, DT_VECTOR_SHORT, H5T_NATIVE_INT16)
283 SET_VECTOR(set_vector, uint16_t, DT_VECTOR_WORD, H5T_NATIVE_UINT16)
284 SET_VECTOR(set_vector, int64_t, DT_VECTOR_LONG, H5T_NATIVE_LLONG)
285 SET_VECTOR(set_vector, uint64_t, DT_VECTOR_ULONG, H5T_NATIVE_ULLONG)
288 #define SET_MATRIX(fname, sg_type, dtype, h5type) \
289 void CHDF5File::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
291 if (h5file<0 || !matrix) \
292 SG_ERROR("File or matrix invalid.\n") \
294 create_group_hierarchy(); \
296 hsize_t dims[2]={(hsize_t) num_feat, (hsize_t) num_vec}; \
297 hid_t dataspace, dataset, status; \
298 dataspace=H5Screate_simple(2, dims, NULL); \
300 SG_ERROR("Could not create hdf5 dataspace\n") \
301 dataset=H5Dcreate2(h5file, variable_name, h5type, dataspace, H5P_DEFAULT, \
302 H5P_DEFAULT, H5P_DEFAULT); \
305 SG_ERROR("Could not create hdf5 dataset - does" \
306 " dataset '%s' already exist?\n", variable_name); \
308 status=H5Dwrite(dataset, h5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix); \
310 SG_ERROR("Failed to write hdf5 dataset\n") \
312 H5Sclose(dataspace); \
314 SET_MATRIX(set_matrix,
bool, DT_DENSE_BOOL, boolean_type)
315 SET_MATRIX(set_matrix,
char, DT_DENSE_CHAR, H5T_NATIVE_CHAR)
316 SET_MATRIX(set_matrix, int8_t, DT_DENSE_BYTE, H5T_NATIVE_INT8)
317 SET_MATRIX(set_matrix, uint8_t, DT_DENSE_BYTE, H5T_NATIVE_UINT8)
318 SET_MATRIX(set_matrix, int32_t, DT_DENSE_INT, H5T_NATIVE_INT32)
319 SET_MATRIX(set_matrix, uint32_t, DT_DENSE_UINT, H5T_NATIVE_UINT32)
320 SET_MATRIX(set_matrix, int64_t, DT_DENSE_LONG, H5T_NATIVE_INT64)
321 SET_MATRIX(set_matrix, uint64_t, DT_DENSE_ULONG, H5T_NATIVE_UINT64)
322 SET_MATRIX(set_matrix, int16_t, DT_DENSE_SHORT, H5T_NATIVE_INT16)
323 SET_MATRIX(set_matrix, uint16_t, DT_DENSE_WORD, H5T_NATIVE_UINT16)
324 SET_MATRIX(set_matrix, float32_t, DT_DENSE_SHORTREAL, H5T_NATIVE_FLOAT)
325 SET_MATRIX(set_matrix, float64_t, DT_DENSE_REAL, H5T_NATIVE_DOUBLE)
326 SET_MATRIX(set_matrix, floatmax_t, DT_DENSE_LONGREAL, H5T_NATIVE_LDOUBLE)
329 #define SET_SPARSEMATRIX(fname, sg_type, dtype) \
330 void CHDF5File::fname(const SGSparseVector<sg_type>* matrix, \
331 int32_t num_feat, int32_t num_vec) \
333 if (!(file && matrix)) \
334 SG_ERROR("File or matrix invalid.\n") \
350 #undef SET_SPARSEMATRIX
352 #define SET_STRING_LIST(fname, sg_type, dtype) \
353 void CHDF5File::fname(const SGString<sg_type>* strings, int32_t num_str) \
355 if (!(file && strings)) \
356 SG_ERROR("File or strings invalid.\n") \
372 #undef SET_STRING_LIST
374 void CHDF5File::get_boolean_type()
376 boolean_type=H5T_NATIVE_UCHAR;
377 switch (
sizeof(
bool))
380 boolean_type = H5T_NATIVE_UCHAR;
383 boolean_type = H5T_NATIVE_UINT16;
386 boolean_type = H5T_NATIVE_UINT32;
389 boolean_type = H5T_NATIVE_UINT64;
392 SG_ERROR(
"Boolean type not supported on this platform\n")
396 hid_t CHDF5File::get_compatible_type(H5T_class_t t_class,
405 case PT_BOOL:
return boolean_type;
406 case PT_CHAR:
return H5T_NATIVE_CHAR;
407 case PT_INT8:
return H5T_NATIVE_INT8;
408 case PT_UINT8:
return H5T_NATIVE_UINT8;
409 case PT_INT16:
return H5T_NATIVE_INT16;
410 case PT_UINT16:
return H5T_NATIVE_UINT16;
411 case PT_INT32:
return H5T_NATIVE_INT32;
412 case PT_UINT32:
return H5T_NATIVE_UINT32;
413 case PT_INT64:
return H5T_NATIVE_INT64;
414 case PT_UINT64:
return H5T_NATIVE_UINT64;
415 case PT_FLOAT32:
return H5T_NATIVE_FLOAT;
416 case PT_FLOAT64:
return H5T_NATIVE_DOUBLE;
417 case PT_FLOATMAX:
return H5T_NATIVE_LDOUBLE;
419 SG_ERROR(
"complex128_t not compatible with HDF5File!");
422 SG_ERROR(
"Implementation error during writing "
430 SG_ERROR("Variable length containers currently not supported")
433 SG_ERROR("Array containers currently not supported")
441 void CHDF5File::get_dims(hid_t dataset, int32_t*& dims, int32_t& ndims, int64_t& total_elements)
443 hid_t dataspace = H5Dget_space(dataset);
445 SG_ERROR(
"Error obtaining hdf5 dataspace\n")
447 ndims = H5Sget_simple_extent_ndims(dataspace);
448 total_elements=H5Sget_simple_extent_npoints(dataspace);
449 hsize_t* dims_out=SG_MALLOC(hsize_t, ndims);
450 dims=SG_MALLOC(int32_t, ndims);
451 H5Sget_simple_extent_dims(dataspace, dims_out, NULL);
452 for (int32_t i=0; i<ndims; i++)
458 void CHDF5File::create_group_hierarchy()
460 char* vname=get_strdup(variable_name);
461 int32_t vlen=strlen(vname);
462 for (int32_t i=0; i<vlen; i++)
464 if (i!=0 && vname[i]==
'/')
467 hid_t g = H5Gopen2(h5file, vname, H5P_DEFAULT);
470 g=H5Gcreate2(h5file, vname, H5P_DEFAULT, H5P_DEFAULT,
473 SG_ERROR(
"Error creating group '%s'\n", vname)