12 #if defined(HAVE_HDF5) && defined( HAVE_CURL)
18 #include <curl/curl.h>
25 using namespace shogun;
27 CMLDataHDF5File::CMLDataHDF5File()
29 SG_UNSTABLE(
"CMLDataHDF5File::CMLDataHDF5File()",
"\n")
35 size_t write_data(
void *ptr,
size_t size,
size_t nmemb, FILE *stream) {
36 size_t written = fwrite(ptr, size, nmemb, stream);
40 CMLDataHDF5File::CMLDataHDF5File(
char* data_name,
42 const char* url_prefix) :
CFile()
45 H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
48 set_variable_name(name);
53 mldata_url = SG_CALLOC(
char, strlen(url_prefix)+strlen(data_name)+1);
54 strcat(mldata_url, url_prefix);
55 strcat(mldata_url, data_name);
57 fname = SG_CALLOC(
char, strlen((
char*)
"/tmp/")+strlen(data_name)+strlen((
char*)
".h5")+1);
58 strcat(fname, (
char*)
"/tmp/");
59 strcat(fname, data_name);
60 strcat(fname, (
char*)
".h5");
62 curl = curl_easy_init();
63 fp = fopen(fname,
"wb");
67 SG_ERROR(
"Could not open file '%s'\n", fname)
72 curl_easy_setopt(curl, CURLOPT_URL, mldata_url);
73 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_data);
74 curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
75 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
76 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
77 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
78 curl_easy_perform(curl);
79 curl_easy_cleanup(curl);
85 h5file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
88 SG_ERROR(
"Could not open data repository '%s'\n", data_name)
91 CMLDataHDF5File::~CMLDataHDF5File()
99 #define GET_VECTOR(fname, sg_type, datatype) \
100 void CMLDataHDF5File::fname(sg_type*& vec, int32_t& len) \
103 SG_ERROR("File invalid.\n") \
108 hid_t dataset=H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
110 SG_ERROR("Error opening data set\n") \
111 hid_t dtype=H5Dget_type(dataset); \
112 H5T_class_t t_class=H5Tget_class(dtype); \
113 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
117 SG_INFO("No compatible datatype found\n") \
119 get_dims(dataset, dims, ndims, nelements); \
120 if (!((ndims==2 && dims[0]==nelements && dims[1]==1) || \
121 (ndims==2 && dims[0]==1 && dims[1]==nelements) || \
122 (ndims==1 && dims[0]==nelements))) \
123 SG_ERROR("Error not a 1-dimensional vector (ndims=%d, dims[0]=%d)\n", ndims, dims[0]) \
124 vec=SG_MALLOC(sg_type, nelements); \
126 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
127 H5S_ALL, H5P_DEFAULT, vec); \
134 SG_ERROR("Error reading dataset\n") \
138 GET_VECTOR(get_vector,
bool, (CT_VECTOR, ST_NONE, PT_BOOL))
139 GET_VECTOR(get_vector, int8_t, (CT_VECTOR, ST_NONE, PT_INT8))
140 GET_VECTOR(get_vector, uint8_t, (CT_VECTOR, ST_NONE, PT_UINT8))
141 GET_VECTOR(get_vector,
char, (CT_VECTOR, ST_NONE, PT_CHAR))
142 GET_VECTOR(get_vector, int32_t, (CT_VECTOR, ST_NONE, PT_INT32))
143 GET_VECTOR(get_vector, uint32_t, (CT_VECTOR, ST_NONE, PT_UINT32))
147 GET_VECTOR(get_vector, int16_t, (CT_VECTOR, ST_NONE, PT_INT16))
148 GET_VECTOR(get_vector, uint16_t, (CT_VECTOR, ST_NONE, PT_INT16))
149 GET_VECTOR(get_vector, int64_t, (CT_VECTOR, ST_NONE, PT_INT64))
150 GET_VECTOR(get_vector, uint64_t, (CT_VECTOR, ST_NONE, PT_UINT64))
153 #define GET_MATRIX(fname, sg_type, datatype) \
154 void CMLDataHDF5File::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
157 SG_ERROR("File invalid.\n") \
162 hid_t dataset = H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
164 SG_ERROR("Error opening data set\n") \
165 hid_t dtype = H5Dget_type(dataset); \
166 H5T_class_t t_class=H5Tget_class(dtype); \
167 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
171 SG_INFO("No compatible datatype found\n") \
173 get_dims(dataset, dims, ndims, nelements); \
175 SG_ERROR("Error not a 2-dimensional matrix\n") \
176 matrix=SG_MALLOC(sg_type, nelements); \
179 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
180 H5S_ALL, H5P_DEFAULT, matrix); \
187 SG_ERROR("Error reading dataset\n") \
191 GET_MATRIX(get_matrix,
bool, (CT_MATRIX, ST_NONE, PT_BOOL))
192 GET_MATRIX(get_matrix,
char, (CT_MATRIX, ST_NONE, PT_CHAR))
193 GET_MATRIX(get_matrix, uint8_t, (CT_MATRIX, ST_NONE, PT_UINT8))
194 GET_MATRIX(get_matrix, int32_t, (CT_MATRIX, ST_NONE, PT_INT32))
195 GET_MATRIX(get_matrix, uint32_t, (CT_MATRIX, ST_NONE, PT_INT32))
196 GET_MATRIX(get_matrix, int64_t, (CT_MATRIX, ST_NONE, PT_INT64))
197 GET_MATRIX(get_matrix, uint64_t, (CT_MATRIX, ST_NONE, PT_INT64))
198 GET_MATRIX(get_matrix, int16_t, (CT_MATRIX, ST_NONE, PT_INT16))
199 GET_MATRIX(get_matrix, uint16_t, (CT_MATRIX, ST_NONE, PT_INT16))
200 GET_MATRIX(get_matrix, float32_t, (CT_MATRIX, ST_NONE, PT_FLOAT32))
201 GET_MATRIX(get_matrix, float64_t, (CT_MATRIX, ST_NONE, PT_FLOAT64))
202 GET_MATRIX(get_matrix, floatmax_t, (CT_MATRIX, ST_NONE, PT_FLOATMAX))
205 void CMLDataHDF5File::get_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims)
209 void CMLDataHDF5File::get_ndarray(
char*& array, int32_t*& dims, int32_t& num_dims)
213 void CMLDataHDF5File::get_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims)
217 void CMLDataHDF5File::get_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims)
221 void CMLDataHDF5File::get_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims)
225 void CMLDataHDF5File::get_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims)
229 void CMLDataHDF5File::get_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims)
233 #define GET_SPARSEMATRIX(fname, sg_type, datatype) \
234 void CMLDataHDF5File::fname(SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
237 SG_ERROR("File invalid.\n") \
252 #undef GET_SPARSEMATRIX
255 #define GET_STRING_LIST(fname, sg_type, datatype) \
256 void CMLDataHDF5File::fname(SGString<sg_type>*& strings, int32_t& num_str, int32_t& max_string_len) \
273 #undef GET_STRING_LIST
275 void CMLDataHDF5File::get_boolean_type()
277 boolean_type=H5T_NATIVE_UCHAR;
278 switch (
sizeof(
bool))
281 boolean_type = H5T_NATIVE_UCHAR;
284 boolean_type = H5T_NATIVE_UINT16;
287 boolean_type = H5T_NATIVE_UINT32;
290 boolean_type = H5T_NATIVE_UINT64;
293 SG_ERROR(
"Boolean type not supported on this platform\n")
297 hid_t CMLDataHDF5File::get_compatible_type(H5T_class_t t_class,
306 case PT_BOOL:
return boolean_type;
307 case PT_CHAR:
return H5T_NATIVE_CHAR;
308 case PT_INT8:
return H5T_NATIVE_INT8;
309 case PT_UINT8:
return H5T_NATIVE_UINT8;
310 case PT_INT16:
return H5T_NATIVE_INT16;
311 case PT_UINT16:
return H5T_NATIVE_UINT16;
312 case PT_INT32:
return H5T_NATIVE_INT32;
313 case PT_UINT32:
return H5T_NATIVE_UINT32;
314 case PT_INT64:
return H5T_NATIVE_INT64;
315 case PT_UINT64:
return H5T_NATIVE_UINT64;
316 case PT_FLOAT32:
return H5T_NATIVE_FLOAT;
317 case PT_FLOAT64:
return H5T_NATIVE_DOUBLE;
318 case PT_FLOATMAX:
return H5T_NATIVE_LDOUBLE;
320 SG_ERROR(
"complex128_t not compatible with HDF5File!");
323 SG_ERROR(
"Implementation error during writing "
331 SG_ERROR("Variable length containers currently not supported")
334 SG_ERROR("Array containers currently not supported")
342 void CMLDataHDF5File::get_dims(hid_t dataset, int32_t*& dims, int32_t& ndims, int64_t& total_elements)
344 hid_t dataspace = H5Dget_space(dataset);
346 SG_ERROR(
"Error obtaining hdf5 dataspace\n")
348 ndims = H5Sget_simple_extent_ndims(dataspace);
349 total_elements=H5Sget_simple_extent_npoints(dataspace);
350 hsize_t* dims_out=SG_MALLOC(hsize_t, ndims);
351 dims=SG_MALLOC(int32_t, ndims);
352 H5Sget_simple_extent_dims(dataspace, dims_out, NULL);
353 for (int32_t i=0; i<ndims; i++)
359 void CMLDataHDF5File::create_group_hierarchy()
361 char* vname=get_strdup(variable_name);
362 int32_t vlen=strlen(vname);
363 for (int32_t i=0; i<vlen; i++)
365 if (i!=0 && vname[i]==
'/')
368 hid_t g = H5Gopen2(h5file, vname, H5P_DEFAULT);
371 g=H5Gcreate2(h5file, vname, H5P_DEFAULT, H5P_DEFAULT,
374 SG_ERROR(
"Error creating group '%s'\n", vname)
382 #endif // HAVE_CURL && HAVE_HDF5