SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DataType.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2010 Soeren Sonnenburg
8  * Written (W) 2011-2013 Heiko Strathmann
9  * Copyright (C) 2010 Berlin Institute of Technology
10  */
11 
12 #include <string.h>
13 
14 #include <shogun/base/SGObject.h>
15 #include <shogun/lib/DataType.h>
16 #include <shogun/lib/SGString.h>
18 #include <shogun/io/SGIO.h>
19 
20 using namespace shogun;
21 
22 TSGDataType::TSGDataType(EContainerType ctype, EStructType stype,
23  EPrimitiveType ptype)
24 {
25  m_ctype = ctype, m_stype = stype, m_ptype = ptype;
26  m_length_y = m_length_x = NULL;
27 }
28 
29 TSGDataType::TSGDataType(EContainerType ctype, EStructType stype,
30  EPrimitiveType ptype, index_t* length)
31 {
32  m_ctype = ctype, m_stype = stype, m_ptype = ptype;
33  m_length_y = length, m_length_x = NULL;
34 }
35 
36 TSGDataType::TSGDataType(EContainerType ctype, EStructType stype,
37  EPrimitiveType ptype, index_t* length_y,
38  index_t* length_x)
39 {
40  m_ctype = ctype, m_stype = stype, m_ptype = ptype;
41  m_length_y = length_y, m_length_x = length_x;
42 }
43 
44 bool
46 {
47  /* handle CT_SG* and SG_* ambiguity */
48  bool ctype_equal=false;
49  if ((m_ctype==CT_VECTOR && a.m_ctype==CT_SGVECTOR) ||
50  (m_ctype==CT_SGVECTOR && a.m_ctype==CT_VECTOR) ||
51  (m_ctype==CT_MATRIX && a.m_ctype==CT_SGMATRIX) ||
52  (m_ctype==CT_SGMATRIX && a.m_ctype==CT_MATRIX) ||
53  (m_ctype==a.m_ctype))
54  ctype_equal=true;
55 
56  bool result = ctype_equal && m_stype == a.m_stype
57  && m_ptype == a.m_ptype;
58 
59  result &= m_length_y != NULL && a.m_length_y != NULL
60  ? *m_length_y == *a.m_length_y: m_length_y == a.m_length_y;
61  result &= m_length_x != NULL && a.m_length_x != NULL
62  ? *m_length_x == *a.m_length_x: m_length_x == a.m_length_x;
63 
64  return result;
65 }
66 
68 {
69  if (m_ctype!=other.m_ctype)
70  {
71  SG_SDEBUG("leaving TSGDataType::equals_without_length(): container types are "
72  "different\n");
73  return false;
74  }
75 
76  if (m_stype!=other.m_stype)
77  {
78  SG_SDEBUG("leaving TSGDataType::equals_without_length(): struct types are "
79  "different\n");
80  return false;
81  }
82 
83  if (m_ptype!=other.m_ptype)
84  {
85  SG_SDEBUG("leaving TSGDataType::equals_without_length(): primitive types are "
86  "different\n");
87  return false;
88  }
89 
90  SG_SDEBUG("leaving TSGDataType::equals_without_length(): data types "
91  "without lengths are equal\n");
92  return true;
93 }
94 
96 {
97  SG_SDEBUG("entering TSGDataType::equals()\n");
98 
99  if (!equals_without_length(other))
100  {
101  SG_SDEBUG("leaving TSGDataType::equals(): Data types without lengths "
102  "are not equal\n");
103  return false;
104  }
105 
106  if ((!m_length_y && other.m_length_y) || (m_length_y && !other.m_length_y))
107  {
108  SG_SDEBUG("leaving TSGDataType::equals(): length_y is at %p while "
109  "other's length_y is at %p\n", m_length_y, other.m_length_y);
110  return false;
111  }
112 
113  if (m_length_y && other.m_length_y)
114  {
115  if (*m_length_y!=*other.m_length_y)
116  {
117  SG_SDEBUG("leaving TSGDataType::equals(): length_y=%d while "
118  "other's length_y=%d\n", *m_length_y, *other.m_length_y);
119  return false;
120  }
121  }
122 
123  if ((!m_length_x && other.m_length_x) || (m_length_x && !other.m_length_x))
124  {
125  SG_SDEBUG("leaving TSGDataType::equals(): m_length_x is at %p while "
126  "other's m_length_x is at %p\n", m_length_x, other.m_length_x);
127  return false;
128  }
129 
130  if (m_length_x && other.m_length_x)
131  {
132  if (*m_length_x!=*other.m_length_x)
133  {
134  SG_SDEBUG("leaving TSGDataType::equals(): m_length_x=%d while "
135  "other's m_length_x=%d\n", *m_length_x, *other.m_length_x);
136  return false;
137  }
138  }
139 
140  SG_SDEBUG("leaving TSGDataType::equals(): datatypes are equal\n");
141  return true;
142 }
143 
144 void
145 TSGDataType::to_string(char* dest, size_t n) const
146 {
147  char* p = dest;
148 
149  switch (m_ctype) {
150  case CT_SCALAR: strncpy(p, "", n); break;
151  case CT_VECTOR: strncpy(p, "Vector<", n); break;
152  case CT_SGVECTOR: strncpy(p, "SGVector<", n); break;
153  case CT_MATRIX: strncpy(p, "Matrix<", n); break;
154  case CT_SGMATRIX: strncpy(p, "SGMatrix<", n); break;
155  case CT_NDARRAY: strncpy(p, "N-Dimensional Array<", n); break;
156  case CT_UNDEFINED: default: strncpy(p, "Undefined", n); break;
157  }
158 
159  if (m_ctype != CT_UNDEFINED)
160  {
161  size_t np = strlen(p);
162  stype_to_string(p + np, m_stype, m_ptype, n - np - 2);
163  }
164 
165  switch (m_ctype) {
166  case CT_SCALAR: break;
167  case CT_VECTOR:
168  case CT_SGVECTOR:
169  case CT_MATRIX:
170  case CT_SGMATRIX:
171  case CT_NDARRAY: strcat(p, ">"); break;
172  case CT_UNDEFINED: default: break;
173  }
174 }
175 
176 size_t
178 {
179  return sizeof_stype(m_stype, m_ptype);
180 }
181 
182 size_t
184 {
185  return sizeof_ptype(m_ptype);
186 }
187 
188 size_t
189 TSGDataType::sizeof_stype(EStructType stype, EPrimitiveType ptype)
190 {
191  switch (stype) {
192  case ST_NONE: return sizeof_ptype(ptype);
193  case ST_STRING:
194  switch (ptype) {
195  case PT_BOOL: return sizeof (SGString<bool>);
196  case PT_CHAR: return sizeof (SGString<char>);
197  case PT_INT8: return sizeof (SGString<int8_t>);
198  case PT_UINT8: return sizeof (SGString<uint8_t>);
199  case PT_INT16: return sizeof (SGString<int16_t>);
200  case PT_UINT16: return sizeof (SGString<uint16_t>);
201  case PT_INT32: return sizeof (SGString<int32_t>);
202  case PT_UINT32: return sizeof (SGString<uint32_t>);
203  case PT_INT64: return sizeof (SGString<int64_t>);
204  case PT_UINT64: return sizeof (SGString<uint64_t>);
205  case PT_FLOAT32: return sizeof (SGString<float32_t>);
206  case PT_FLOAT64: return sizeof (SGString<float64_t>);
207  case PT_FLOATMAX: return sizeof (SGString<floatmax_t>);
208  case PT_COMPLEX128:
209  SG_SWARNING("TGSDataType::sizeof_stype(): Strings are"
210  " not supported for complex128_t\n");
211  return -1;
212  case PT_SGOBJECT:
213  SG_SWARNING("TGSDataType::sizeof_stype(): Strings are"
214  " not supported for SGObject\n");
215  return -1;
216  case PT_UNDEFINED: default:
217  SG_SERROR("Implementation error: undefined primitive type\n");
218  break;
219  }
220  break;
221  case ST_SPARSE:
222  switch (ptype) {
223  case PT_BOOL: return sizeof (SGSparseVector<bool>);
224  case PT_CHAR: return sizeof (SGSparseVector<char>);
225  case PT_INT8: return sizeof (SGSparseVector<int8_t>);
226  case PT_UINT8: return sizeof (SGSparseVector<uint8_t>);
227  case PT_INT16: return sizeof (SGSparseVector<int16_t>);
228  case PT_UINT16: return sizeof (SGSparseVector<uint16_t>);
229  case PT_INT32: return sizeof (SGSparseVector<int32_t>);
230  case PT_UINT32: return sizeof (SGSparseVector<uint32_t>);
231  case PT_INT64: return sizeof (SGSparseVector<int64_t>);
232  case PT_UINT64: return sizeof (SGSparseVector<uint64_t>);
233  case PT_FLOAT32: return sizeof (SGSparseVector<float32_t>);
234  case PT_FLOAT64: return sizeof (SGSparseVector<float64_t>);
235  case PT_FLOATMAX: return sizeof (SGSparseVector<floatmax_t>);
236  case PT_COMPLEX128: return sizeof (SGSparseVector<complex128_t>);
237  case PT_SGOBJECT: return -1;
238  case PT_UNDEFINED: default:
239  SG_SERROR("Implementation error: undefined primitive type\n");
240  break;
241  }
242  break;
243  case ST_UNDEFINED: default:
244  SG_SERROR("Implementation error: undefined structure type\n");
245  break;
246  }
247 
248  return -1;
249 }
250 
251 size_t
252 TSGDataType::sizeof_ptype(EPrimitiveType ptype)
253 {
254  switch (ptype) {
255  case PT_BOOL: return sizeof (bool);
256  case PT_CHAR: return sizeof (char);
257  case PT_INT8: return sizeof (int8_t);
258  case PT_UINT8: return sizeof (uint8_t);
259  case PT_INT16: return sizeof (int16_t);
260  case PT_UINT16: return sizeof (uint16_t);
261  case PT_INT32: return sizeof (int32_t);
262  case PT_UINT32: return sizeof (uint32_t);
263  case PT_INT64: return sizeof (int64_t);
264  case PT_UINT64: return sizeof (uint64_t);
265  case PT_FLOAT32: return sizeof (float32_t);
266  case PT_FLOAT64: return sizeof (float64_t);
267  case PT_FLOATMAX: return sizeof (floatmax_t);
268  case PT_COMPLEX128: return sizeof (complex128_t);
269  case PT_SGOBJECT: return sizeof (CSGObject*);
270  case PT_UNDEFINED: default:
271  SG_SERROR("Implementation error: undefined primitive type\n");
272  break;
273  }
274 
275  return -1;
276 }
277 
278 size_t
279 TSGDataType::sizeof_sparseentry(EPrimitiveType ptype)
280 {
281  switch (ptype) {
282  case PT_BOOL: return sizeof (SGSparseVectorEntry<bool>);
283  case PT_CHAR: return sizeof (SGSparseVectorEntry<char>);
284  case PT_INT8: return sizeof (SGSparseVectorEntry<int8_t>);
285  case PT_UINT8: return sizeof (SGSparseVectorEntry<uint8_t>);
286  case PT_INT16: return sizeof (SGSparseVectorEntry<int16_t>);
287  case PT_UINT16: return sizeof (SGSparseVectorEntry<uint16_t>);
288  case PT_INT32: return sizeof (SGSparseVectorEntry<int32_t>);
289  case PT_UINT32: return sizeof (SGSparseVectorEntry<uint32_t>);
290  case PT_INT64: return sizeof (SGSparseVectorEntry<int64_t>);
291  case PT_UINT64: return sizeof (SGSparseVectorEntry<uint64_t>);
292  case PT_FLOAT32: return sizeof (SGSparseVectorEntry<float32_t>);
293  case PT_FLOAT64: return sizeof (SGSparseVectorEntry<float64_t>);
294  case PT_FLOATMAX: return sizeof (SGSparseVectorEntry<floatmax_t>);
295  case PT_COMPLEX128: return sizeof (SGSparseVectorEntry<complex128_t>);
296  case PT_SGOBJECT: return -1;
297  case PT_UNDEFINED: default:
298  SG_SERROR("Implementation error: undefined primitive type\n");
299  break;
300  }
301 
302  return -1;
303 }
304 
305 #define ENTRY_OFFSET(k, type) \
306  ((char*) &((SGSparseVectorEntry<type>*) (k))->entry - (char*) (k))
307 size_t
308 TSGDataType::offset_sparseentry(EPrimitiveType ptype)
309 {
310  size_t result = -1; void* x = &result;
311 
312  switch (ptype) {
313  case PT_BOOL: result = ENTRY_OFFSET(x, bool); break;
314  case PT_CHAR: result = ENTRY_OFFSET(x, char); break;
315  case PT_INT8: result = ENTRY_OFFSET(x, int8_t); break;
316  case PT_UINT8: result = ENTRY_OFFSET(x, uint8_t); break;
317  case PT_INT16: result = ENTRY_OFFSET(x, int16_t); break;
318  case PT_UINT16: result = ENTRY_OFFSET(x, uint16_t); break;
319  case PT_INT32: result = ENTRY_OFFSET(x, int32_t); break;
320  case PT_UINT32: result = ENTRY_OFFSET(x, uint32_t); break;
321  case PT_INT64: result = ENTRY_OFFSET(x, int64_t); break;
322  case PT_UINT64: result = ENTRY_OFFSET(x, uint64_t); break;
323  case PT_FLOAT32: result = ENTRY_OFFSET(x, float32_t); break;
324  case PT_FLOAT64: result = ENTRY_OFFSET(x, float64_t); break;
325  case PT_FLOATMAX: result = ENTRY_OFFSET(x, floatmax_t); break;
326  case PT_COMPLEX128: result = ENTRY_OFFSET(x, complex128_t); break;
327  case PT_SGOBJECT: return -1;
328  case PT_UNDEFINED: default:
329  SG_SERROR("Implementation error: undefined primitive type\n");
330  break;
331  }
332 
333  return result;
334 }
335 
336 void
337 TSGDataType::stype_to_string(char* dest, EStructType stype,
338  EPrimitiveType ptype, size_t n)
339 {
340  char* p = dest;
341 
342  switch (stype) {
343  case ST_NONE: strncpy(p, "", n); break;
344  case ST_STRING: strncpy(p, "String<", n); break;
345  case ST_SPARSE: strncpy(p, "Sparse<", n); break;
346  case ST_UNDEFINED: default:
347  SG_SERROR("Implementation error: undefined structure type\n");
348  break;
349  }
350 
351  size_t np = strlen(p);
352  ptype_to_string(p + np, ptype, n - np - 2);
353 
354  switch (stype) {
355  case ST_NONE: break;
356  case ST_STRING: case ST_SPARSE:
357  strcat(p, ">"); break;
358  case ST_UNDEFINED: default:
359  SG_SERROR("Implementation error: undefined structure type\n");
360  break;
361  }
362 }
363 
364 void
365 TSGDataType::ptype_to_string(char* dest, EPrimitiveType ptype,
366  size_t n)
367 {
368  char* p = dest;
369 
370  switch (ptype) {
371  case PT_BOOL: strncpy(p, "bool", n); break;
372  case PT_CHAR: strncpy(p, "char", n); break;
373  case PT_INT8: strncpy(p, "int8", n); break;
374  case PT_UINT8: strncpy(p, "uint8", n); break;
375  case PT_INT16: strncpy(p, "int16", n); break;
376  case PT_UINT16: strncpy(p, "uint16", n); break;
377  case PT_INT32: strncpy(p, "int32", n); break;
378  case PT_UINT32: strncpy(p, "uint32", n); break;
379  case PT_INT64: strncpy(p, "int64", n); break;
380  case PT_UINT64: strncpy(p, "uint64", n); break;
381  case PT_FLOAT32: strncpy(p, "float32", n); break;
382  case PT_FLOAT64: strncpy(p, "float64", n); break;
383  case PT_FLOATMAX: strncpy(p, "floatmax", n); break;
384  case PT_COMPLEX128: strncpy(p, "complex128", n); break;
385  case PT_SGOBJECT: strncpy(p, "SGSerializable*", n); break;
386  case PT_UNDEFINED: default:
387  SG_SERROR("Implementation error: undefined primitive type\n");
388  break;
389  }
390 }
391 
392 bool
393 TSGDataType::string_to_ptype(EPrimitiveType* ptype, const char* str)
394 {
395  if (strcmp(str, "bool") == 0) {
396  *ptype = PT_BOOL; return true; }
397  if (strcmp(str, "char") == 0) {
398  *ptype = PT_CHAR; return true; }
399  if (strcmp(str, "int8") == 0) {
400  *ptype = PT_INT8; return true; }
401  if (strcmp(str, "uint8") == 0) {
402  *ptype = PT_UINT8; return true; }
403  if (strcmp(str, "int16") == 0) {
404  *ptype = PT_INT16; return true; }
405  if (strcmp(str, "uint16") == 0) {
406  *ptype = PT_UINT16; return true; }
407  if (strcmp(str, "int32") == 0) {
408  *ptype = PT_INT32; return true; }
409  if (strcmp(str, "uint32") == 0) {
410  *ptype = PT_UINT32; return true; }
411  if (strcmp(str, "int64") == 0) {
412  *ptype = PT_INT64; return true; }
413  if (strcmp(str, "uint64") == 0) {
414  *ptype = PT_UINT64; return true; }
415  if (strcmp(str, "float32") == 0) {
416  *ptype = PT_FLOAT32; return true; }
417  if (strcmp(str, "float64") == 0) {
418  *ptype = PT_FLOAT64; return true; }
419  if (strcmp(str, "floatmax") == 0) {
420  *ptype = PT_FLOATMAX; return true; }
421  if (strcmp(str, "complex128") == 0) {
422  *ptype = PT_COMPLEX128; return true; }
423  if (strcmp(str, "SGSerializable*") == 0) {
424  *ptype = PT_SGOBJECT; return true; }
425 
426  /* Make sure that the compiler will warn at this position. */
427  switch (*ptype) {
428  case PT_BOOL: case PT_CHAR: case PT_INT8: case PT_UINT8:
429  case PT_INT16: case PT_UINT16: case PT_INT32: case PT_UINT32:
430  case PT_INT64: case PT_UINT64: case PT_FLOAT32: case PT_FLOAT64:
431  case PT_FLOATMAX: case PT_COMPLEX128: case PT_SGOBJECT: break;
432  case PT_UNDEFINED: default:
433  SG_SERROR("Implementation error: undefined primitive type\n");
434  break;
435  }
436 
437  return false;
438 }
439 
441 {
442  switch (m_stype)
443  {
444  case ST_NONE:
445  return get_num_elements()*sizeof_ptype();
446  case ST_STRING:
447  if (m_ptype==PT_SGOBJECT)
448  return 0;
449 
450  return get_num_elements()*sizeof_stype();
451  case ST_SPARSE:
452  if (m_ptype==PT_SGOBJECT)
453  return 0;
454 
456  case ST_UNDEFINED: default:
457  SG_SERROR("Implementation error: undefined structure type\n");
458  break;
459  }
460 
461  return 0;
462 }
463 
465 {
466  switch (m_ctype)
467  {
468  case CT_SCALAR:
469  return 1;
470  case CT_VECTOR: case CT_SGVECTOR:
471  /* length_y contains the length for vectors */
472  return *m_length_y;
473  case CT_MATRIX: case CT_SGMATRIX:
474  return (*m_length_y)*(*m_length_x);
475  case CT_NDARRAY:
477  case CT_UNDEFINED: default:
478  SG_SERROR("Implementation error: undefined container type\n");
479  break;
480  }
481  return 0;
482 }

SHOGUN Machine Learning Toolbox - Documentation