DataType.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Copyright (C) 2010 Berlin Institute of Technology
00009  */
00010 
00011 #ifndef __DATATYPE_H__
00012 #define __DATATYPE_H__
00013 
00014 #include <shogun/lib/common.h>
00015 //#include <shogun/mathematics/Math.h>
00016 #include <shogun/io/SGIO.h>
00017 
00018 #define PT_NOT_GENERIC  PT_SGOBJECT
00019 #define PT_LONGEST      floatmax_t
00020 
00021 namespace shogun
00022 {
00023 
00024 //class CMath;
00025 template<class T> class CCache;
00026 
00028 typedef int32_t index_t;
00029 
00031 template<class T> class SGVector
00032 {
00033     public:
00035         SGVector() : vector(NULL), vlen(0), do_free(false) { }
00036 
00038         SGVector(T* v, index_t len, bool free_vec=false)
00039             : vector(v), vlen(len), do_free(free_vec) { }
00040 
00042         SGVector(index_t len, bool free_vec=false)
00043             : vlen(len), do_free(free_vec)
00044         {
00045             vector=SG_MALLOC(T, len);
00046         }
00047 
00049         SGVector(const SGVector &orig)
00050             : vector(orig.vector), vlen(orig.vlen), do_free(orig.do_free) { }
00051 
00053         virtual ~SGVector()
00054         {
00055         }
00056 
00061         static SGVector get_vector(SGVector &src, bool own=true)
00062         {
00063             if (!own)
00064                 return src;
00065 
00066             src.do_free=false;
00067             return SGVector(src.vector, src.vlen);
00068         }
00069 
00071         void zero()
00072         {
00073             if (vector && vlen)
00074                 set_const(0);
00075         }
00076 
00078         void set_const(T const_elem)
00079         {
00080             for (index_t i=0; i<vlen; i++)
00081                 vector[i]=const_elem ;
00082         }
00083 
00085         void range_fill(T start=0)
00086         {
00087             range_fill_vector(vector, vlen, start);
00088         }
00089 
00091         void random(T min_value, T max_value)
00092         {
00093             random_vector(vector, vlen, min_value, max_value);
00094         }
00095 
00097         void randperm()
00098         {
00099             randperm(vector, vlen);
00100         }
00101 
00103         template <class VT>
00104         static VT* clone_vector(const VT* vec, int32_t len)
00105         {
00106             VT* result = SG_MALLOC(VT, len);
00107             for (int32_t i=0; i<len; i++)
00108                 result[i]=vec[i];
00109 
00110             return result;
00111         }
00112 
00114         template <class VT>
00115         static void fill_vector(VT* vec, int32_t len, VT value)
00116         {
00117             for (int32_t i=0; i<len; i++)
00118                 vec[i]=value;
00119         }
00120 
00122         template <class VT>
00123         static void range_fill_vector(VT* vec, int32_t len, VT start=0)
00124         {
00125             for (int32_t i=0; i<len; i++)
00126                 vec[i]=i+start;
00127         }
00128 
00130         template <class VT>
00131         static void random_vector(VT* vec, int32_t len, VT min_value, VT max_value)
00132         {
00133             //FIXME for (int32_t i=0; i<len; i++)
00134             //FIXME     vec[i]=CMath::random(min_value, max_value);
00135         }
00136 
00138         template <class VT>
00139         static void randperm(VT* perm, int32_t n)
00140         {
00141             for (int32_t i = 0; i < n; i++)
00142                 perm[i] = i;
00143             permute(perm,n);
00144         }
00145 
00147         template <class VT>
00148         static void permute(VT* perm, int32_t n)
00149         {
00150             //FIXME for (int32_t i = 0; i < n; i++)
00151             //FIXME     CMath::swap(perm[random(0, n - 1)], perm[i]);
00152         }
00153 
00159         const T& get_element(index_t index)
00160         {
00161             ASSERT(vector && (index>=0) && (index<vlen));
00162             return vector[index];
00163         }
00164 
00171         void set_element(const T& p_element, index_t index)
00172         {
00173             ASSERT(vector && (index>=0) && (index<vlen));
00174             vector[index]=p_element;
00175         }
00176 
00182         void resize_vector(int32_t n)
00183         {
00184             vector=SG_REALLOC(T, vector, n);
00185 
00186             if (n > vlen)
00187                 memset(&vector[vlen], 0, (n-vlen)*sizeof(T));
00188             vlen=n;
00189         }
00190 
00196         inline const T& operator[](index_t index) const
00197         {
00198             return vector[index];
00199         }
00200 
00206         inline T& operator[](index_t index)
00207         {
00208             return vector[index];
00209         }
00210 
00212         virtual void free_vector()
00213         {
00214             if (do_free)
00215                 SG_FREE(vector);
00216 
00217             vector=NULL;
00218             do_free=false;
00219             vlen=0;
00220         }
00221 
00223         virtual void destroy_vector()
00224         {
00225             do_free=true;
00226             free_vector();
00227         }
00228 
00230         void display_size() const
00231         {
00232             SG_SPRINT("SGVector '%p' of size: %d\n", vector, vlen);
00233         }
00234 
00236         void display_vector() const
00237         {
00238             display_size();
00239             for (int32_t i=0; i<vlen; i++)
00240                 SG_SPRINT("%10.10g,", (float64_t) vector[i]);
00241             SG_SPRINT("\n");
00242         }
00243 
00244     public:
00246         T* vector;
00248         index_t vlen;
00250         bool do_free;
00251 };
00252 
00253 //template<class T> class SGCachedVector : public SGVector<T>
00254 //{
00255 //  public:
00256 //      /** default constructor */
00257 //      SGCachedVector(CCache<T>* c, index_t i)
00258 //          : SGVector<T>(), cache(c), idx(i)
00259 //      {
00260 //      }
00261 //
00262 //      /** constructor for setting params */
00263 //      SGCachedVector(CCache<T>* c, index_t i,
00264 //              T* v, index_t len, bool free_vec=false)
00265 //          : SGVector<T>(v, len, free_vec), cache(c), idx(i)
00266 //      {
00267 //      }
00268 //
00269 //      /** constructor to create new vector in memory */
00270 //      SGCachedVector(CCache<T>* c, index_t i, index_t len, bool free_vec=false) :
00271 //          SGVector<T>(len, free_vec), cache(c), idx(i)
00272 //      {
00273 //      }
00274 //
00275 //      /** free vector */
00276 //      virtual void free_vector()
00277 //      {
00278 //          //clean up cache fixme
00279 //          SGVector<T>::free_vector();
00280 //      }
00281 //
00282 //      /** destroy vector */
00283 //      virtual void destroy_vector()
00284 //      {
00285 //          //clean up cache fixme
00286 //          SGVector<T>::destroy_vector();
00287 //          if (cache)
00288 //              cache->unlock_entry(idx);
00289 //      }
00290 //
00291 //  public:
00292 //      /** idx */
00293 //      index_t idx;
00294 //
00295 //      /** cache */
00296 //      CCache<T>* cache;
00297 //};
00298 
00300 template<class T> class SGMatrix
00301 {
00302     public:
00304         SGMatrix() : matrix(NULL), num_rows(0), num_cols(0), do_free(false) { }
00305 
00307         SGMatrix(T* m, index_t nrows, index_t ncols, bool free_mat=false)
00308             : matrix(m), num_rows(nrows), num_cols(ncols), do_free(free_mat) { }
00309 
00311         SGMatrix(index_t nrows, index_t ncols, bool free_mat=false)
00312             : num_rows(nrows), num_cols(ncols), do_free(free_mat)
00313         {
00314             matrix=SG_MALLOC(T, nrows*ncols);
00315         }
00316 
00318         SGMatrix(const SGMatrix &orig)
00319             : matrix(orig.matrix), num_rows(orig.num_rows),
00320             num_cols(orig.num_cols), do_free(orig.do_free) { }
00321 
00323         virtual ~SGMatrix()
00324         {
00325         }
00326 
00328         virtual void free_matrix()
00329         {
00330             if (do_free)
00331                 SG_FREE(matrix);
00332 
00333             matrix=NULL;
00334             do_free=false;
00335             num_rows=0;
00336             num_cols=0;
00337         }
00338 
00340         virtual void destroy_matrix()
00341         {
00342             do_free=true;
00343             free_matrix();
00344         }
00345 
00349         inline const T& operator[](index_t index) const
00350         {
00351             return matrix[index];
00352         }
00353 
00357         inline T& operator[](index_t index)
00358         {
00359             return matrix[index];
00360         }
00361 
00362     public:
00364         T* matrix;
00366         index_t num_rows;
00368         index_t num_cols;
00370         bool do_free;
00371 };
00372 
00374 template<class T> class SGNDArray
00375 {
00376     public:
00378         SGNDArray() : array(NULL), dims(NULL), num_dims(0) { }
00379 
00381         SGNDArray(T* a, index_t* d, index_t nd)
00382             : array(a), dims(d), num_dims(nd) { }
00383 
00385         SGNDArray(const SGNDArray &orig)
00386             : array(orig.array), dims(orig.dims), num_dims(orig.num_dims) { }
00387 
00388     public:
00390         T* array;
00392         index_t* dims;
00394         index_t num_dims;
00395 };
00396 
00398 template<class T> class SGString
00399 {
00400 public:
00402     SGString() : string(NULL), slen(0), do_free(false) { }
00403 
00405     SGString(T* s, index_t l, bool free_s=false)
00406         : string(s), slen(l), do_free(free_s) { }
00407 
00409     SGString(SGVector<T> v)
00410         : string(v.vector), slen(v.vlen), do_free(v.do_free) { }
00411 
00413     SGString(index_t len, bool free_s=false) :
00414         slen(len), do_free(free_s)
00415     {
00416         string=SG_MALLOC(T, len);
00417     }
00418 
00420     SGString(const SGString &orig)
00421         : string(orig.string), slen(orig.slen), do_free(orig.do_free) { }
00422 
00424     void free_string()
00425     {
00426         if (do_free)
00427             SG_FREE(string);
00428 
00429         string=NULL;
00430         do_free=false;
00431         slen=0;
00432     }
00433 
00435     void destroy_string()
00436     {
00437         do_free=true;
00438         free_string();
00439     }
00440 
00441 public:
00443     T* string;
00445     index_t slen;
00447     bool do_free;
00448 };
00449 
00451 template <class T> struct SGStringList
00452 {
00453 public:
00455     SGStringList() : num_strings(0), max_string_length(0), strings(NULL), 
00456         do_free(false) { }
00457 
00459     SGStringList(SGString<T>* s, index_t num_s, index_t max_length,
00460             bool free_strings=false) : num_strings(num_s),
00461             max_string_length(max_length), strings(s), do_free(free_strings) { }
00462 
00464     SGStringList(index_t num_s, index_t max_length, bool free_strings=false)
00465         : num_strings(num_s), max_string_length(max_length),
00466           do_free(free_strings)
00467     {
00468         strings=SG_MALLOC(SGString<T>, num_strings);
00469     }
00470 
00472     SGStringList(const SGStringList &orig) :
00473         num_strings(orig.num_strings),
00474         max_string_length(orig.max_string_length),
00475         strings(orig.strings), do_free(orig.do_free) { }
00476 
00478     void free_list()
00479     {
00480         if (do_free)
00481             SG_FREE(strings);
00482 
00483         strings=NULL;
00484         do_free=false;
00485         num_strings=0;
00486         max_string_length=0;
00487     }
00488 
00490     void destroy_list()
00491     {
00492         do_free=true;
00493         free_list();
00494     }
00495 
00496 public:
00498     index_t num_strings;
00499 
00501     index_t max_string_length;
00502 
00504     SGString<T>* strings;
00505 
00507     bool do_free;
00508 };
00509 
00511 template <class T> struct SGSparseVectorEntry
00512 {
00514     index_t feat_index;
00516     T entry;
00517 };
00518 
00520 template <class T> class SGSparseVector
00521 {
00522 public:
00524     SGSparseVector() :
00525         vec_index(0), num_feat_entries(0), features(NULL), do_free(false) {}
00526 
00528     SGSparseVector(SGSparseVectorEntry<T>* feats, index_t num_entries,
00529             index_t index, bool free_v=false) :
00530             vec_index(index), num_feat_entries(num_entries), features(feats),
00531             do_free(free_v) {}
00532 
00534     SGSparseVector(index_t num_entries, index_t index, bool free_v=false) :
00535         vec_index(index), num_feat_entries(num_entries), do_free(free_v)
00536     {
00537         features=SG_MALLOC(SGSparseVectorEntry<T>, num_feat_entries);
00538     }
00539 
00541     SGSparseVector(const SGSparseVector& orig) :
00542             vec_index(orig.vec_index), num_feat_entries(orig.num_feat_entries),
00543             features(orig.features), do_free(orig.do_free) {}
00544 
00546     void free_vector()
00547     {
00548         if (do_free)
00549             SG_FREE(features);
00550 
00551         features=NULL;
00552         do_free=false;
00553         vec_index=0;
00554         num_feat_entries=0;
00555     }
00556 
00558     void destroy_vector()
00559     {
00560         do_free=true;
00561         free_vector();
00562     }
00563 
00564 public:
00566     index_t vec_index;
00567 
00569     index_t num_feat_entries;
00570 
00572     SGSparseVectorEntry<T>* features;
00573 
00575     bool do_free;
00576 };
00577 
00579 template <class T> class SGSparseMatrix
00580 {
00581     public:
00583         SGSparseMatrix() :
00584             num_vectors(0), num_features(0), sparse_matrix(NULL),
00585             do_free(false) { }
00586 
00587 
00589         SGSparseMatrix(SGSparseVector<T>* vecs, index_t num_feat,
00590                 index_t num_vec, bool free_m=false) :
00591             num_vectors(num_vec), num_features(num_feat),
00592             sparse_matrix(vecs), do_free(free_m) { }
00593 
00595         SGSparseMatrix(index_t num_vec, index_t num_feat, bool free_m=false) :
00596             num_vectors(num_vec), num_features(num_feat), do_free(free_m)
00597         {
00598             sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors);
00599         }
00600 
00602         SGSparseMatrix(const SGSparseMatrix &orig) :
00603             num_vectors(orig.num_vectors), num_features(orig.num_features),
00604             sparse_matrix(orig.sparse_matrix), do_free(orig.do_free) { }
00605 
00607         void free_matrix()
00608         {
00609             if (do_free)
00610                 SG_FREE(sparse_matrix);
00611 
00612             sparse_matrix=NULL;
00613             do_free=false;
00614             num_vectors=0;
00615             num_features=0;
00616         }
00617 
00619         void own_matrix()
00620         {
00621             for (index_t i=0; i<num_vectors; i++)
00622                 sparse_matrix[i].do_free=false;
00623 
00624             do_free=false;
00625         }
00626 
00628         void destroy_matrix()
00629         {
00630             do_free=true;
00631             free_matrix();
00632         }
00633 
00634     public:
00636     index_t num_vectors;
00637 
00639     index_t num_features;
00640 
00642     SGSparseVector<T>* sparse_matrix;
00643 
00645     bool do_free;
00646 };
00647 
00648 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00649 enum EContainerType
00650 {
00651     CT_SCALAR=0,
00652     CT_VECTOR=1,
00653     CT_MATRIX=2,
00654     CT_NDARRAY=3,
00655     CT_SGVECTOR=4,
00656     CT_SGMATRIX=5
00657 };
00658 
00659 enum EStructType
00660 {
00661     ST_NONE=0,
00662     ST_STRING=1,
00663     ST_SPARSE=2
00664 };
00665 
00666 enum EPrimitiveType
00667 {
00668     PT_BOOL=0,
00669     PT_CHAR=1,
00670     PT_INT8=2,
00671     PT_UINT8=3,
00672     PT_INT16=4,
00673     PT_UINT16=5,
00674     PT_INT32=6,
00675     PT_UINT32=7,
00676     PT_INT64=8,
00677     PT_UINT64=9,
00678     PT_FLOAT32=10,
00679     PT_FLOAT64=11,
00680     PT_FLOATMAX=12,
00681     PT_SGOBJECT=13
00682 };
00683 #endif
00684 
00686 struct TSGDataType
00687 {
00689     EContainerType m_ctype;
00691     EStructType m_stype;
00693     EPrimitiveType m_ptype;
00694 
00696     index_t *m_length_y;
00698     index_t *m_length_x;
00699 
00705     explicit TSGDataType(EContainerType ctype, EStructType stype,
00706                          EPrimitiveType ptype);
00713     explicit TSGDataType(EContainerType ctype, EStructType stype,
00714                          EPrimitiveType ptype, index_t* length);
00722     explicit TSGDataType(EContainerType ctype, EStructType stype,
00723                          EPrimitiveType ptype, index_t* length_y,
00724                          index_t* length_x);
00725 
00727     bool operator==(const TSGDataType& a);
00731     inline bool operator!=(const TSGDataType& a)
00732     {
00733         return !(*this == a);
00734     }
00735 
00740     void to_string(char* dest, size_t n) const;
00741 
00743     size_t sizeof_stype() const;
00745     size_t sizeof_ptype() const;
00746 
00750     static size_t sizeof_sparseentry(EPrimitiveType ptype);
00751 
00755     static size_t offset_sparseentry(EPrimitiveType ptype);
00756 
00763     static void stype_to_string(char* dest, EStructType stype,
00764                                 EPrimitiveType ptype, size_t n);
00770     static void ptype_to_string(char* dest, EPrimitiveType ptype,
00771                                 size_t n);
00776     static bool string_to_ptype(EPrimitiveType* ptype,
00777                                 const char* str);
00778 
00782     size_t get_size();
00783 
00787     index_t get_num_elements();
00788 };
00789 }
00790 #endif /* __DATATYPE_H__  */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation