24 using namespace shogun;
26 CHDF5File::CHDF5File()
34 CHDF5File::CHDF5File(
char* fname,
char rw,
const char* name) :
CFile()
37 H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
40 set_variable_name(name);
45 h5file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
48 h5file = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
51 h5file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT);
53 h5file = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
60 SG_ERROR(
"Could not open file '%s'\n", fname);
63 CHDF5File::~CHDF5File()
68 #define GET_VECTOR(fname, sg_type, datatype) \
69 void CHDF5File::fname(sg_type*& vec, int32_t& len) \
72 SG_ERROR("File invalid.\n"); \
77 hid_t dataset = H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
79 SG_ERROR("Error opening data set\n"); \
80 hid_t dtype = H5Dget_type(dataset); \
81 H5T_class_t t_class=H5Tget_class(dtype); \
82 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
86 SG_INFO("No compatible datatype found\n"); \
88 get_dims(dataset, dims, ndims, nelements); \
89 if (!((ndims==2 && dims[0]==nelements && dims[1]==1) || \
90 (ndims==2 && dims[0]==1 && dims[1]==nelements) || \
91 (ndims==1 && dims[0]==nelements))) \
92 SG_ERROR("Error not a 1-dimensional vector (ndims=%d, dims[0]=%d)\n", ndims, dims[0]); \
93 vec=SG_MALLOC(sg_type, nelements); \
95 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
96 H5S_ALL, H5P_DEFAULT, vec); \
103 SG_ERROR("Error reading dataset\n"); \
107 GET_VECTOR(get_vector,
bool, (CT_VECTOR, ST_NONE, PT_BOOL))
108 GET_VECTOR(get_vector, uint8_t, (CT_VECTOR, ST_NONE, PT_UINT8))
109 GET_VECTOR(get_vector,
char, (CT_VECTOR, ST_NONE, PT_CHAR))
110 GET_VECTOR(get_vector, int32_t, (CT_VECTOR, ST_NONE, PT_INT32))
113 GET_VECTOR(get_vector, int16_t, (CT_VECTOR, ST_NONE, PT_INT16))
114 GET_VECTOR(get_vector, uint16_t, (CT_VECTOR, ST_NONE, PT_INT16))
117 #define GET_MATRIX(fname, sg_type, datatype) \
118 void CHDF5File::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
121 SG_ERROR("File invalid.\n"); \
126 hid_t dataset = H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
128 SG_ERROR("Error opening data set\n"); \
129 hid_t dtype = H5Dget_type(dataset); \
130 H5T_class_t t_class=H5Tget_class(dtype); \
131 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
135 SG_INFO("No compatible datatype found\n"); \
137 get_dims(dataset, dims, ndims, nelements); \
139 SG_ERROR("Error not a 2-dimensional matrix\n"); \
140 matrix=SG_MALLOC(sg_type, nelements); \
143 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
144 H5S_ALL, H5P_DEFAULT, matrix); \
151 SG_ERROR("Error reading dataset\n"); \
155 GET_MATRIX(get_matrix,
bool, (CT_MATRIX, ST_NONE, PT_BOOL))
156 GET_MATRIX(get_matrix,
char, (CT_MATRIX, ST_NONE, PT_CHAR))
157 GET_MATRIX(get_matrix, uint8_t, (CT_MATRIX, ST_NONE, PT_UINT8))
158 GET_MATRIX(get_matrix, int32_t, (CT_MATRIX, ST_NONE, PT_INT32))
159 GET_MATRIX(get_uint_matrix, uint32_t, (CT_MATRIX, ST_NONE, PT_INT32))
160 GET_MATRIX(get_long_matrix, int64_t, (CT_MATRIX, ST_NONE, PT_INT64))
161 GET_MATRIX(get_ulong_matrix, uint64_t, (CT_MATRIX, ST_NONE, PT_INT64))
162 GET_MATRIX(get_matrix, int16_t, (CT_MATRIX, ST_NONE, PT_INT16))
163 GET_MATRIX(get_matrix, uint16_t, (CT_MATRIX, ST_NONE, PT_INT16))
164 GET_MATRIX(get_matrix, float32_t, (CT_MATRIX, ST_NONE, PT_FLOAT32))
165 GET_MATRIX(get_matrix, float64_t, (CT_MATRIX, ST_NONE, PT_FLOAT64))
169 void CHDF5File::get_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims)
173 void CHDF5File::get_ndarray(
char*& array, int32_t*& dims, int32_t& num_dims)
177 void CHDF5File::get_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims)
181 void CHDF5File::get_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims)
185 void CHDF5File::get_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims)
189 void CHDF5File::get_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims)
193 void CHDF5File::get_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims)
197 #define GET_SPARSEMATRIX(fname, sg_type, datatype) \
198 void CHDF5File::fname(SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
201 SG_ERROR("File invalid.\n"); \
215 #undef GET_SPARSEMATRIX
218 #define GET_STRING_LIST(fname, sg_type, datatype) \
219 void CHDF5File::fname(SGString<sg_type>*& strings, int32_t& num_str, int32_t& max_string_len) \
234 GET_STRING_LIST(get_longreal_string_list, floatmax_t, DT_STRING_LONGREAL)
235 #undef GET_STRING_LIST
239 #define SET_VECTOR(fname, sg_type, dtype, h5type) \
240 void CHDF5File::fname(const sg_type* vec, int32_t len) \
242 if (h5file<0 || !vec) \
243 SG_ERROR("File or vector invalid.\n"); \
245 create_group_hierarchy(); \
247 hsize_t dims=(hsize_t) len; \
248 hid_t dataspace, dataset, status; \
249 dataspace=H5Screate_simple(1, &dims, NULL); \
251 SG_ERROR("Could not create hdf5 dataspace\n"); \
252 dataset=H5Dcreate2(h5file, variable_name, h5type, dataspace, H5P_DEFAULT,\
253 H5P_DEFAULT, H5P_DEFAULT); \
256 SG_ERROR("Could not create hdf5 dataset - does" \
257 " dataset '%s' already exist?\n", variable_name); \
259 status=H5Dwrite(dataset, h5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, vec); \
261 SG_ERROR("Failed to write hdf5 dataset\n"); \
263 H5Sclose(dataspace); \
265 SET_VECTOR(set_vector,
bool, DT_VECTOR_BOOL, boolean_type)
266 SET_VECTOR(set_vector, uint8_t, DT_VECTOR_BYTE, H5T_NATIVE_UINT8)
267 SET_VECTOR(set_vector,
char, DT_VECTOR_CHAR, H5T_NATIVE_CHAR)
268 SET_VECTOR(set_vector, int32_t, DT_VECTOR_INT, H5T_NATIVE_INT32)
269 SET_VECTOR(set_vector, float32_t, DT_VECTOR_SHORTREAL, H5T_NATIVE_FLOAT)
270 SET_VECTOR(set_vector, float64_t, DT_VECTOR_REAL, H5T_NATIVE_DOUBLE)
271 SET_VECTOR(set_vector, int16_t, DT_VECTOR_SHORT, H5T_NATIVE_INT16)
272 SET_VECTOR(set_vector, uint16_t, DT_VECTOR_WORD, H5T_NATIVE_UINT16)
275 #define SET_MATRIX(fname, sg_type, dtype, h5type) \
276 void CHDF5File::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
278 if (h5file<0 || !matrix) \
279 SG_ERROR("File or matrix invalid.\n"); \
281 create_group_hierarchy(); \
283 hsize_t dims[2]={(hsize_t) num_feat, (hsize_t) num_vec}; \
284 hid_t dataspace, dataset, status; \
285 dataspace=H5Screate_simple(2, dims, NULL); \
287 SG_ERROR("Could not create hdf5 dataspace\n"); \
288 dataset=H5Dcreate2(h5file, variable_name, h5type, dataspace, H5P_DEFAULT, \
289 H5P_DEFAULT, H5P_DEFAULT); \
292 SG_ERROR("Could not create hdf5 dataset - does" \
293 " dataset '%s' already exist?\n", variable_name); \
295 status=H5Dwrite(dataset, h5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix); \
297 SG_ERROR("Failed to write hdf5 dataset\n"); \
299 H5Sclose(dataspace); \
301 SET_MATRIX(set_matrix,
bool, DT_DENSE_BOOL, boolean_type)
302 SET_MATRIX(set_matrix,
char, DT_DENSE_CHAR, H5T_NATIVE_CHAR)
303 SET_MATRIX(set_matrix, uint8_t, DT_DENSE_BYTE, H5T_NATIVE_UINT8)
304 SET_MATRIX(set_matrix, int32_t, DT_DENSE_INT, H5T_NATIVE_INT32)
305 SET_MATRIX(set_uint_matrix, uint32_t, DT_DENSE_UINT, H5T_NATIVE_UINT32)
306 SET_MATRIX(set_long_matrix, int64_t, DT_DENSE_LONG, H5T_NATIVE_INT64)
307 SET_MATRIX(set_ulong_matrix, uint64_t, DT_DENSE_ULONG, H5T_NATIVE_UINT64)
308 SET_MATRIX(set_matrix, int16_t, DT_DENSE_SHORT, H5T_NATIVE_INT16)
309 SET_MATRIX(set_matrix, uint16_t, DT_DENSE_WORD, H5T_NATIVE_UINT16)
310 SET_MATRIX(set_matrix, float32_t, DT_DENSE_SHORTREAL, H5T_NATIVE_FLOAT)
311 SET_MATRIX(set_matrix, float64_t, DT_DENSE_REAL, H5T_NATIVE_DOUBLE)
312 SET_MATRIX(set_longreal_matrix, floatmax_t, DT_DENSE_LONGREAL, H5T_NATIVE_LDOUBLE)
315 #define SET_SPARSEMATRIX(fname, sg_type, dtype) \
316 void CHDF5File::fname(const SGSparseVector<sg_type>* matrix, \
317 int32_t num_feat, int32_t num_vec) \
319 if (!(file && matrix)) \
320 SG_ERROR("File or matrix invalid.\n"); \
335 #undef SET_SPARSEMATRIX
337 #define SET_STRING_LIST(fname, sg_type, dtype) \
338 void CHDF5File::fname(const SGString<sg_type>* strings, int32_t num_str) \
340 if (!(file && strings)) \
341 SG_ERROR("File or strings invalid.\n"); \
355 SET_STRING_LIST(set_longreal_string_list, floatmax_t, DT_STRING_LONGREAL)
356 #undef SET_STRING_LIST
358 void CHDF5File::get_boolean_type()
360 boolean_type=H5T_NATIVE_UCHAR;
361 switch (
sizeof(
bool))
364 boolean_type = H5T_NATIVE_UCHAR;
367 boolean_type = H5T_NATIVE_UINT16;
370 boolean_type = H5T_NATIVE_UINT32;
373 boolean_type = H5T_NATIVE_UINT64;
376 SG_ERROR(
"Boolean type not supported on this platform\n");
380 hid_t CHDF5File::get_compatible_type(H5T_class_t t_class,
389 case PT_BOOL:
return boolean_type;
390 case PT_CHAR:
return H5T_NATIVE_CHAR;
391 case PT_INT8:
return H5T_NATIVE_INT8;
392 case PT_UINT8:
return H5T_NATIVE_UINT8;
393 case PT_INT16:
return H5T_NATIVE_INT16;
394 case PT_UINT16:
return H5T_NATIVE_UINT16;
395 case PT_INT32:
return H5T_NATIVE_INT32;
396 case PT_UINT32:
return H5T_NATIVE_UINT32;
397 case PT_INT64:
return H5T_NATIVE_INT64;
398 case PT_UINT64:
return H5T_NATIVE_UINT64;
399 case PT_FLOAT32:
return H5T_NATIVE_FLOAT;
400 case PT_FLOAT64:
return H5T_NATIVE_DOUBLE;
401 case PT_FLOATMAX:
return H5T_NATIVE_LDOUBLE;
403 SG_ERROR(
"Implementation error during writing "
411 SG_ERROR(
"Variable length containers currently not supported");
414 SG_ERROR(
"Array containers currently not supported");
422 void CHDF5File::get_dims(hid_t dataset, int32_t*& dims, int32_t& ndims, int64_t& total_elements)
424 hid_t dataspace = H5Dget_space(dataset);
426 SG_ERROR(
"Error obtaining hdf5 dataspace\n");
428 ndims = H5Sget_simple_extent_ndims(dataspace);
429 total_elements=H5Sget_simple_extent_npoints(dataspace);
430 hsize_t* dims_out=
SG_MALLOC(hsize_t, ndims);
432 H5Sget_simple_extent_dims(dataspace, dims_out, NULL);
433 for (int32_t i=0; i<ndims; i++)
439 void CHDF5File::create_group_hierarchy()
441 char* vname=strdup(variable_name);
442 int32_t vlen=strlen(vname);
443 for (int32_t i=0; i<vlen; i++)
445 if (i!=0 && vname[i]==
'/')
448 hid_t g = H5Gopen2(h5file, vname, H5P_DEFAULT);
451 g=H5Gcreate2(h5file, vname, H5P_DEFAULT, H5P_DEFAULT,
454 SG_ERROR(
"Error creating group '%s'\n", vname);