12 #if defined(HAVE_HDF5) && defined( HAVE_CURL)
17 #include <curl/curl.h>
23 CMLDataHDF5File::CMLDataHDF5File()
25 SG_UNSTABLE(
"CMLDataHDF5File::CMLDataHDF5File()",
"\n")
31 size_t write_data(
void *ptr,
size_t size,
size_t nmemb, FILE *stream) {
32 size_t written = fwrite(ptr, size, nmemb, stream);
36 CMLDataHDF5File::CMLDataHDF5File(
char* data_name,
38 const char* url_prefix) :
CFile()
41 H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
44 set_variable_name(name);
49 mldata_url = SG_CALLOC(
char, strlen(url_prefix)+strlen(data_name)+1);
50 strcat(mldata_url, url_prefix);
51 strcat(mldata_url, data_name);
53 fname = SG_CALLOC(
char, strlen((
char*)
"/tmp/")+strlen(data_name)+strlen((
char*)
".h5")+1);
54 strcat(fname, (
char*)
"/tmp/");
55 strcat(fname, data_name);
56 strcat(fname, (
char*)
".h5");
58 curl = curl_easy_init();
59 fp = fopen(fname,
"wb");
63 SG_ERROR(
"Could not open file '%s'\n", fname)
68 curl_easy_setopt(curl, CURLOPT_URL, mldata_url);
69 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_data);
70 curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
71 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
72 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
73 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
74 curl_easy_perform(curl);
75 curl_easy_cleanup(curl);
81 h5file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
84 SG_ERROR(
"Could not open data repository '%s'\n", data_name)
87 CMLDataHDF5File::~CMLDataHDF5File()
95 #define GET_VECTOR(fname, sg_type, datatype) \
96 void CMLDataHDF5File::fname(sg_type*& vec, int32_t& len) \
99 SG_ERROR("File invalid.\n") \
104 hid_t dataset=H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
106 SG_ERROR("Error opening data set\n") \
107 hid_t dtype=H5Dget_type(dataset); \
108 H5T_class_t t_class=H5Tget_class(dtype); \
109 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
113 SG_INFO("No compatible datatype found\n") \
115 get_dims(dataset, dims, ndims, nelements); \
116 if (!((ndims==2 && dims[0]==nelements && dims[1]==1) || \
117 (ndims==2 && dims[0]==1 && dims[1]==nelements) || \
118 (ndims==1 && dims[0]==nelements))) \
119 SG_ERROR("Error not a 1-dimensional vector (ndims=%d, dims[0]=%d)\n", ndims, dims[0]) \
120 vec=SG_MALLOC(sg_type, nelements); \
122 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
123 H5S_ALL, H5P_DEFAULT, vec); \
130 SG_ERROR("Error reading dataset\n") \
134 GET_VECTOR(get_vector,
bool, (CT_VECTOR, ST_NONE, PT_BOOL))
135 GET_VECTOR(get_vector, int8_t, (CT_VECTOR, ST_NONE, PT_INT8))
136 GET_VECTOR(get_vector, uint8_t, (CT_VECTOR, ST_NONE, PT_UINT8))
137 GET_VECTOR(get_vector,
char, (CT_VECTOR, ST_NONE, PT_CHAR))
138 GET_VECTOR(get_vector, int32_t, (CT_VECTOR, ST_NONE, PT_INT32))
139 GET_VECTOR(get_vector, uint32_t, (CT_VECTOR, ST_NONE, PT_UINT32))
143 GET_VECTOR(get_vector, int16_t, (CT_VECTOR, ST_NONE, PT_INT16))
144 GET_VECTOR(get_vector, uint16_t, (CT_VECTOR, ST_NONE, PT_INT16))
145 GET_VECTOR(get_vector, int64_t, (CT_VECTOR, ST_NONE, PT_INT64))
146 GET_VECTOR(get_vector, uint64_t, (CT_VECTOR, ST_NONE, PT_UINT64))
149 #define GET_MATRIX(fname, sg_type, datatype) \
150 void CMLDataHDF5File::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
153 SG_ERROR("File invalid.\n") \
158 hid_t dataset = H5Dopen2(h5file, variable_name, H5P_DEFAULT); \
160 SG_ERROR("Error opening data set\n") \
161 hid_t dtype = H5Dget_type(dataset); \
162 H5T_class_t t_class=H5Tget_class(dtype); \
163 TSGDataType t datatype; hid_t h5_type=get_compatible_type(t_class, &t); \
167 SG_INFO("No compatible datatype found\n") \
169 get_dims(dataset, dims, ndims, nelements); \
171 SG_ERROR("Error not a 2-dimensional matrix\n") \
172 matrix=SG_MALLOC(sg_type, nelements); \
175 herr_t status = H5Dread(dataset, h5_type, H5S_ALL, \
176 H5S_ALL, H5P_DEFAULT, matrix); \
183 SG_ERROR("Error reading dataset\n") \
187 GET_MATRIX(get_matrix,
bool, (CT_MATRIX, ST_NONE, PT_BOOL))
188 GET_MATRIX(get_matrix,
char, (CT_MATRIX, ST_NONE, PT_CHAR))
189 GET_MATRIX(get_matrix, uint8_t, (CT_MATRIX, ST_NONE, PT_UINT8))
190 GET_MATRIX(get_matrix, int32_t, (CT_MATRIX, ST_NONE, PT_INT32))
191 GET_MATRIX(get_matrix, uint32_t, (CT_MATRIX, ST_NONE, PT_INT32))
192 GET_MATRIX(get_matrix, int64_t, (CT_MATRIX, ST_NONE, PT_INT64))
193 GET_MATRIX(get_matrix, uint64_t, (CT_MATRIX, ST_NONE, PT_INT64))
194 GET_MATRIX(get_matrix, int16_t, (CT_MATRIX, ST_NONE, PT_INT16))
195 GET_MATRIX(get_matrix, uint16_t, (CT_MATRIX, ST_NONE, PT_INT16))
196 GET_MATRIX(get_matrix, float32_t, (CT_MATRIX, ST_NONE, PT_FLOAT32))
197 GET_MATRIX(get_matrix, float64_t, (CT_MATRIX, ST_NONE, PT_FLOAT64))
198 GET_MATRIX(get_matrix, floatmax_t, (CT_MATRIX, ST_NONE, PT_FLOATMAX))
201 #define GET_SPARSEMATRIX(fname, sg_type, datatype) \
202 void CMLDataHDF5File::fname(SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
205 SG_ERROR("File invalid.\n") \
220 #undef GET_SPARSEMATRIX
223 #define GET_STRING_LIST(fname, sg_type, datatype) \
224 void CMLDataHDF5File::fname(SGString<sg_type>*& strings, int32_t& num_str, int32_t& max_string_len) \
241 #undef GET_STRING_LIST
243 void CMLDataHDF5File::get_boolean_type()
245 boolean_type=H5T_NATIVE_UCHAR;
246 switch (
sizeof(
bool))
249 boolean_type = H5T_NATIVE_UCHAR;
252 boolean_type = H5T_NATIVE_UINT16;
255 boolean_type = H5T_NATIVE_UINT32;
258 boolean_type = H5T_NATIVE_UINT64;
261 SG_ERROR(
"Boolean type not supported on this platform\n")
265 hid_t CMLDataHDF5File::get_compatible_type(H5T_class_t t_class,
274 case PT_BOOL:
return boolean_type;
275 case PT_CHAR:
return H5T_NATIVE_CHAR;
276 case PT_INT8:
return H5T_NATIVE_INT8;
277 case PT_UINT8:
return H5T_NATIVE_UINT8;
278 case PT_INT16:
return H5T_NATIVE_INT16;
279 case PT_UINT16:
return H5T_NATIVE_UINT16;
280 case PT_INT32:
return H5T_NATIVE_INT32;
281 case PT_UINT32:
return H5T_NATIVE_UINT32;
282 case PT_INT64:
return H5T_NATIVE_INT64;
283 case PT_UINT64:
return H5T_NATIVE_UINT64;
284 case PT_FLOAT32:
return H5T_NATIVE_FLOAT;
285 case PT_FLOAT64:
return H5T_NATIVE_DOUBLE;
286 case PT_FLOATMAX:
return H5T_NATIVE_LDOUBLE;
288 SG_ERROR(
"complex128_t not compatible with HDF5File!");
292 SG_ERROR(
"Implementation error during writing "
300 SG_ERROR("Variable length containers currently not supported")
303 SG_ERROR("Array containers currently not supported")
311 void CMLDataHDF5File::get_dims(hid_t dataset, int32_t*& dims, int32_t& ndims, int64_t& total_elements)
313 hid_t dataspace = H5Dget_space(dataset);
315 SG_ERROR(
"Error obtaining hdf5 dataspace\n")
317 ndims = H5Sget_simple_extent_ndims(dataspace);
318 total_elements=H5Sget_simple_extent_npoints(dataspace);
319 hsize_t* dims_out=SG_MALLOC(hsize_t, ndims);
320 dims=SG_MALLOC(int32_t, ndims);
321 H5Sget_simple_extent_dims(dataspace, dims_out, NULL);
322 for (int32_t i=0; i<ndims; i++)
328 void CMLDataHDF5File::create_group_hierarchy()
330 char* vname=get_strdup(variable_name);
331 int32_t vlen=strlen(vname);
332 for (int32_t i=0; i<vlen; i++)
334 if (i!=0 && vname[i]==
'/')
337 hid_t g = H5Gopen2(h5file, vname, H5P_DEFAULT);
340 g=H5Gcreate2(h5file, vname, H5P_DEFAULT, H5P_DEFAULT,
343 SG_ERROR(
"Error creating group '%s'\n", vname)
351 #endif // HAVE_CURL && HAVE_HDF5
#define GET_MATRIX(fname, sg_type, datatype)
#define GET_STRING_LIST(fname, sg_type, datatype)
Datatypes that shogun supports.
A File access base class.
all of classes and functions are contained in the shogun namespace
#define GET_VECTOR(fname, sg_type, datatype)
#define SG_UNSTABLE(func,...)
#define GET_SPARSEMATRIX(fname, sg_type, datatype)