SimpleFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  * Copyright (C) 2010 Berlin Institute of Technology
00011  */
00012 
00013 #ifndef _SIMPLEFEATURES__H__
00014 #define _SIMPLEFEATURES__H__
00015 
00016 #include <shogun/lib/common.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/lib/Cache.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/preprocessor/SimplePreprocessor.h>
00022 #include <shogun/features/DotFeatures.h>
00023 #include <shogun/features/StringFeatures.h>
00024 #include <shogun/base/Parameter.h>
00025 #include <shogun/lib/DataType.h>
00026 
00027 #include <string.h>
00028 
00029 namespace shogun {
00030 template<class ST> class CStringFeatures;
00031 template<class ST> class CSimpleFeatures;
00032 template<class ST> class CSimplePreprocessor;
00033 template<class ST> struct SGMatrix;
00034 class CDotFeatures;
00035 
00065 template<class ST> class CSimpleFeatures: public CDotFeatures
00066 {
00067 public:
00072     CSimpleFeatures(int32_t size = 0) : CDotFeatures(size) { init(); }
00073 
00075     CSimpleFeatures(const CSimpleFeatures & orig) :
00076             CDotFeatures(orig)
00077     {
00078         copy_feature_matrix(SGMatrix<ST>(orig.feature_matrix,
00079                                          orig.num_features,
00080                                          orig.num_vectors));
00081         initialize_cache();
00082         m_subset=orig.m_subset->duplicate();
00083     }
00084 
00089     CSimpleFeatures(SGMatrix<ST> matrix) :
00090             CDotFeatures()
00091     {
00092         init();
00093         set_feature_matrix(matrix);
00094     }
00095 
00102     CSimpleFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
00103             CDotFeatures()
00104     {
00105         init();
00106         set_feature_matrix(src, num_feat, num_vec);
00107     }
00108 
00113     CSimpleFeatures(CFile* loader) :
00114             CDotFeatures(loader)
00115     {
00116         init();
00117         load(loader);
00118     }
00119 
00124     virtual CFeatures* duplicate() const
00125     {
00126         return new CSimpleFeatures<ST>(*this);
00127     }
00128 
00129     virtual ~CSimpleFeatures() { free_features(); }
00130 
00135     void free_feature_matrix()
00136     {
00137         remove_subset();
00138         SG_FREE(feature_matrix);
00139         feature_matrix = NULL;
00140         feature_matrix_num_features = num_features;
00141         feature_matrix_num_vectors = num_vectors;
00142         num_vectors = 0;
00143         num_features = 0;
00144     }
00145 
00150     void free_features()
00151     {
00152         remove_subset();
00153         free_feature_matrix();
00154         SG_UNREF(feature_cache);
00155     }
00156 
00168     ST* get_feature_vector(int32_t num, int32_t& len, bool& dofree)
00169     {
00170         /* index conversion for subset, only for array access */
00171         int32_t real_num=subset_idx_conversion(num);
00172 
00173         len = num_features;
00174 
00175         if (feature_matrix)
00176         {
00177             dofree = false;
00178             return &feature_matrix[real_num * int64_t(num_features)];
00179         }
00180 
00181         ST* feat = NULL;
00182         dofree = false;
00183 
00184         if (feature_cache)
00185         {
00186             feat = feature_cache->lock_entry(num);
00187 
00188             if (feat)
00189                 return feat;
00190             else 
00191                 feat = feature_cache->set_entry(real_num);
00192         }
00193 
00194         if (!feat)
00195             dofree = true;
00196         feat = compute_feature_vector(num, len, feat);
00197 
00198         if (get_num_preprocessors())
00199         {
00200             int32_t tmp_len = len;
00201             ST* tmp_feat_before = feat;
00202             ST* tmp_feat_after = NULL;
00203 
00204             for (int32_t i = 0; i < get_num_preprocessors(); i++)
00205             {
00206                 CSimplePreprocessor<ST>* p =
00207                         (CSimplePreprocessor<ST>*) get_preprocessor(i);
00208                 // temporary hack
00209                 SGVector<ST> applied = p->apply_to_feature_vector(
00210                         SGVector<ST>(tmp_feat_before, tmp_len));
00211                 tmp_feat_after = applied.vector;
00212                 SG_UNREF(p);
00213 
00214                 if (i != 0) // delete feature vector, except for the the first one, i.e., feat
00215                     SG_FREE(tmp_feat_before);
00216                 tmp_feat_before = tmp_feat_after;
00217             }
00218 
00219             memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
00220             SG_FREE(tmp_feat_after);
00221 
00222             len = tmp_len;
00223         }
00224         return feat;
00225     }
00226 
00234     void set_feature_vector(SGVector<ST> vector, int32_t num)
00235     {
00236         /* index conversion for subset, only for array access */
00237         int32_t real_num=subset_idx_conversion(num);
00238 
00239         if (num>=get_num_vectors())
00240         {
00241             SG_ERROR("Index out of bounds (number of vectors %d, you "
00242             "requested %d)\n", get_num_vectors(), num);
00243         }
00244 
00245         if (!feature_matrix)
00246             SG_ERROR("Requires a in-memory feature matrix\n");
00247 
00248         if (vector.vlen != num_features)
00249             SG_ERROR(
00250                     "Vector not of length %d (has %d)\n", num_features, vector.vlen);
00251 
00252         memcpy(&feature_matrix[real_num * int64_t(num_features)], vector.vector,
00253                 int64_t(num_features) * sizeof(ST));
00254     }
00255 
00263     SGVector<ST> get_feature_vector(int32_t num)
00264     {
00265         /* index conversion for subset, only for array access */
00266         int32_t real_num=subset_idx_conversion(num);
00267 
00268         if (num >= get_num_vectors())
00269         {
00270             SG_ERROR("Index out of bounds (number of vectors %d, you "
00271             "requested %d)\n", get_num_vectors(), real_num);
00272         }
00273 
00274         SGVector<ST> vec;
00275         vec.vector = get_feature_vector(num, vec.vlen, vec.do_free);
00276         return vec;
00277     }
00278 
00287     void free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
00288     {
00289         if (feature_cache)
00290             feature_cache->unlock_entry(subset_idx_conversion(num));
00291 
00292         if (dofree)
00293             SG_FREE(feat_vec);
00294     }
00295 
00303     void free_feature_vector(SGVector<ST> vec, int32_t num)
00304     {
00305         free_feature_vector(vec.vector, num, vec.do_free);
00306     }
00307 
00321     void vector_subset(int32_t* idx, int32_t idx_len)
00322     {
00323         if (m_subset)
00324             SG_ERROR("A subset is set, cannot call vector_subset\n");
00325 
00326         ASSERT(feature_matrix);
00327         ASSERT(idx_len<=num_vectors);
00328 
00329         int32_t num_vec = num_vectors;
00330         num_vectors = idx_len;
00331 
00332         int32_t old_ii = -1;
00333 
00334         for (int32_t i = 0; i < idx_len; i++)
00335         {
00336             int32_t ii = idx[i];
00337             ASSERT(old_ii<ii);
00338 
00339             if (ii < 0 || ii >= num_vec)
00340                 SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec);
00341 
00342             if (i == ii)
00343                 continue;
00344 
00345             memcpy(&feature_matrix[int64_t(num_features) * i],
00346                     &feature_matrix[int64_t(num_features) * ii],
00347                     num_features * sizeof(ST));
00348             old_ii = ii;
00349         }
00350     }
00351 
00365     void feature_subset(int32_t* idx, int32_t idx_len)
00366     {
00367         if (m_subset)
00368             SG_ERROR("A subset is set, cannot call feature_subset\n");
00369 
00370         ASSERT(feature_matrix);
00371         ASSERT(idx_len<=num_features);
00372         int32_t num_feat = num_features;
00373         num_features = idx_len;
00374 
00375         for (int32_t i = 0; i < num_vectors; i++)
00376         {
00377             ST* src = &feature_matrix[int64_t(num_feat) * i];
00378             ST* dst = &feature_matrix[int64_t(num_features) * i];
00379 
00380             int32_t old_jj = -1;
00381             for (int32_t j = 0; j < idx_len; j++)
00382             {
00383                 int32_t jj = idx[j];
00384                 ASSERT(old_jj<jj);
00385                 if (jj < 0 || jj >= num_feat)
00386                     SG_ERROR(
00387                             "Index out of range: should be 0<%d<%d\n", jj, num_feat);
00388 
00389                 dst[j] = src[jj];
00390                 old_jj = jj;
00391             }
00392         }
00393     }
00394 
00404     void get_feature_matrix(ST** dst, int32_t* num_feat, int32_t* num_vec)
00405     {
00406         ASSERT(feature_matrix);
00407 
00408         int64_t num = int64_t(num_features) * get_num_vectors();
00409         *num_feat = num_features;
00410         *num_vec = get_num_vectors();
00411         *dst = SG_MALLOC(ST, num);
00412 
00413         /* copying depends on whether a subset is used */
00414         if (m_subset)
00415         {
00416             /* copy vector wise */
00417             for (int32_t i = 0; i < *num_vec; ++i)
00418             {
00419                 int32_t real_i = m_subset->subset_idx_conversion(i);
00420                 memcpy(*dst, &feature_matrix[real_i * int64_t(num_features)],
00421                         num_features * sizeof(ST));
00422             }
00423         }
00424         else
00425         {
00426             /* copy complete matrix */
00427             memcpy(*dst, feature_matrix, num * sizeof(ST));
00428         }
00429     }
00430 
00437     SGMatrix<ST> get_feature_matrix()
00438     {
00439         return SGMatrix<ST>(feature_matrix, num_features, num_vectors);
00440     }
00441 
00448     void set_feature_matrix(SGMatrix<ST> matrix)
00449     {
00450         remove_subset();
00451         free_feature_matrix();
00452         feature_matrix = matrix.matrix;
00453         num_features = matrix.num_rows;
00454         num_vectors = matrix.num_cols;
00455         feature_matrix_num_vectors = num_vectors;
00456         feature_matrix_num_features = num_features;
00457     }
00458 
00468     ST* get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
00469     {
00470         num_feat = num_features;
00471         num_vec = num_vectors;
00472         return feature_matrix;
00473     }
00474 
00481     CSimpleFeatures<ST>* get_transposed()
00482     {
00483         int32_t num_feat;
00484         int32_t num_vec;
00485         ST* fm = get_transposed(num_feat, num_vec);
00486 
00487         return new CSimpleFeatures<ST>(fm, num_feat, num_vec);
00488     }
00489 
00501     ST* get_transposed(int32_t &num_feat, int32_t &num_vec)
00502     {
00503         num_feat = get_num_vectors();
00504         num_vec = num_features;
00505 
00506         int32_t old_num_vec=get_num_vectors();
00507 
00508         ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
00509 
00510         for (int32_t i=0; i<old_num_vec; i++)
00511         {
00512             SGVector<ST> vec=get_feature_vector(i);
00513 
00514             for (int32_t j=0; j<vec.vlen; j++)
00515                 fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
00516 
00517             free_feature_vector(vec, i);
00518         }
00519 
00520         return fm;
00521     }
00522 
00535     virtual void set_feature_matrix(ST* fm, int32_t num_feat, int32_t num_vec)
00536     {
00537         if (m_subset)
00538             SG_ERROR("A subset is set, cannot call set_feature_matrix\n");
00539 
00540         free_feature_matrix();
00541         feature_matrix = fm;
00542         feature_matrix_num_features = num_feat;
00543         feature_matrix_num_vectors = num_vec;
00544 
00545         num_features = num_feat;
00546         num_vectors = num_vec;
00547         initialize_cache();
00548     }
00549 
00559     virtual void copy_feature_matrix(SGMatrix<ST> src)
00560     {
00561         if (m_subset)
00562             SG_ERROR("A subset is set, cannot call copy_feature_matrix\n");
00563 
00564         free_feature_matrix();
00565         int32_t num_feat = src.num_rows;
00566         int32_t num_vec = src.num_cols;
00567         feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec);
00568         feature_matrix_num_features = num_feat;
00569         feature_matrix_num_vectors = num_vec;
00570 
00571         memcpy(feature_matrix, src.matrix,
00572                 (sizeof(ST) * ((int64_t) num_feat) * num_vec));
00573 
00574         num_features = num_feat;
00575         num_vectors = num_vec;
00576         initialize_cache();
00577     }
00578 
00585     void obtain_from_dot(CDotFeatures* df)
00586     {
00587         remove_subset();
00588 
00589         int32_t num_feat = df->get_dim_feature_space();
00590         int32_t num_vec = df->get_num_vectors();
00591 
00592         ASSERT(num_feat>0 && num_vec>0);
00593 
00594         free_feature_matrix();
00595         feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec);
00596         feature_matrix_num_features = num_feat;
00597         feature_matrix_num_vectors = num_vec;
00598 
00599         for (int32_t i = 0; i < num_vec; i++)
00600         {
00601             SGVector<float64_t> v = df->get_computed_dot_feature_vector(i);
00602             ASSERT(num_feat==v.vlen);
00603 
00604             for (int32_t j = 0; j < num_feat; j++)
00605                 feature_matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
00606 
00607             v.free_vector();
00608         }
00609         num_features = num_feat;
00610         num_vectors = num_vec;
00611     }
00612 
00623     virtual bool apply_preprocessor(bool force_preprocessing = false)
00624     {
00625         if (m_subset)
00626             SG_ERROR("A subset is set, cannot call apply_preproc\n");
00627 
00628         SG_DEBUG( "force: %d\n", force_preprocessing);
00629 
00630         if (feature_matrix && get_num_preprocessors())
00631         {
00632             for (int32_t i = 0; i < get_num_preprocessors(); i++)
00633             {
00634                 if ((!is_preprocessed(i) || force_preprocessing))
00635                 {
00636                     set_preprocessed(i);
00637                     CSimplePreprocessor<ST>* p =
00638                             (CSimplePreprocessor<ST>*) get_preprocessor(i);
00639                     SG_INFO( "preprocessing using preproc %s\n", p->get_name());
00640 
00641                     if (p->apply_to_feature_matrix(this).matrix == NULL)
00642                     {
00643                         SG_UNREF(p);
00644                         return false;
00645                     }SG_UNREF(p);
00646 
00647                 }
00648             }
00649 
00650             return true;
00651         }
00652         else
00653         {
00654             if (!feature_matrix)
00655                 SG_ERROR( "no feature matrix\n");
00656 
00657             if (!get_num_preprocessors())
00658                 SG_ERROR( "no preprocessors available\n");
00659 
00660             return false;
00661         }
00662     }
00663 
00668     virtual int32_t get_size() { return sizeof(ST); }
00669 
00674     virtual inline int32_t get_num_vectors() const
00675     {
00676         return m_subset ? m_subset->get_size() : num_vectors;
00677     }
00678 
00683     inline int32_t get_num_features() { return num_features; }
00684 
00689     inline void set_num_features(int32_t num)
00690     {
00691         num_features = num;
00692         initialize_cache();
00693     }
00694 
00701     inline void set_num_vectors(int32_t num)
00702     {
00703         if (m_subset)
00704             SG_ERROR("A subset is set, cannot call set_num_vectors\n");
00705 
00706         num_vectors = num;
00707         initialize_cache();
00708     }
00709 
00714     inline void initialize_cache()
00715     {
00716         if (m_subset)
00717             SG_ERROR("A subset is set, cannot call initialize_cache\n");
00718 
00719         if (num_features && num_vectors)
00720         {
00721             SG_UNREF(feature_cache);
00722             feature_cache = new CCache<ST>(get_cache_size(), num_features,
00723                     num_vectors);
00724             SG_REF(feature_cache);
00725         }
00726     }
00727 
00732     inline virtual EFeatureClass get_feature_class() { return C_SIMPLE; }
00733 
00738     inline virtual EFeatureType get_feature_type();
00739 
00748     virtual bool reshape(int32_t p_num_features, int32_t p_num_vectors)
00749     {
00750         if (m_subset)
00751             SG_ERROR("A subset is set, cannot call reshape\n");
00752 
00753         if (p_num_features * p_num_vectors
00754                 == this->num_features * this->num_vectors)
00755         {
00756             num_features = p_num_features;
00757             num_vectors = p_num_vectors;
00758             return true;
00759         } else
00760             return false;
00761     }
00762 
00770     virtual int32_t get_dim_feature_space() const { return num_features; }
00771 
00781     virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df,
00782             int32_t vec_idx2)
00783     {
00784         ASSERT(df);
00785         ASSERT(df->get_feature_type() == get_feature_type());
00786         ASSERT(df->get_feature_class() == get_feature_class());
00787         CSimpleFeatures<ST>* sf = (CSimpleFeatures<ST>*) df;
00788 
00789         int32_t len1, len2;
00790         bool free1, free2;
00791 
00792         ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
00793         ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
00794 
00795         float64_t result = CMath::dot(vec1, vec2, len1);
00796 
00797         free_feature_vector(vec1, vec_idx1, free1);
00798         sf->free_feature_vector(vec2, vec_idx2, free2);
00799 
00800         return result;
00801     }
00802 
00811     virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2,
00812             int32_t vec2_len);
00813 
00824     virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00825             float64_t* vec2, int32_t vec2_len, bool abs_val = false)
00826     {
00827         ASSERT(vec2_len == num_features);
00828 
00829         int32_t vlen;
00830         bool vfree;
00831         ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00832 
00833         ASSERT(vlen == num_features);
00834 
00835         if (abs_val)
00836         {
00837             for (int32_t i = 0; i < num_features; i++)
00838                 vec2[i] += alpha * CMath::abs(vec1[i]);
00839         }
00840         else
00841         {
00842             for (int32_t i = 0; i < num_features; i++)
00843                 vec2[i] += alpha * vec1[i];
00844         }
00845 
00846         free_feature_vector(vec1, vec_idx1, vfree);
00847     }
00848 
00854     virtual inline int32_t get_nnz_features_for_vector(int32_t num)
00855     {
00856         /* H.Strathmann: TODO fix according to Soerens mail */
00857         return num_features;
00858     }
00859 
00867     virtual inline bool Align_char_features(CStringFeatures<char>* cf,
00868             CStringFeatures<char>* Ref, float64_t gapCost)
00869     {
00870         return false;
00871     }
00872 
00877     virtual inline void load(CFile* loader);
00878 
00883     virtual inline void save(CFile* saver);
00884 
00885     #ifndef DOXYGEN_SHOULD_SKIP_THIS
00886 
00887     struct simple_feature_iterator
00888     {
00890         ST* vec;
00892         int32_t vidx;
00894         int32_t vlen;
00896         bool vfree;
00897 
00899         int32_t index;
00900     };
00901     #endif
00902 
00914     virtual void* get_feature_iterator(int32_t vector_index)
00915     {
00916         if (vector_index>=get_num_vectors())
00917         {
00918             SG_ERROR("Index out of bounds (number of vectors %d, you "
00919             "requested %d)\n", get_num_vectors(), vector_index);
00920         }
00921 
00922         simple_feature_iterator* iterator = SG_MALLOC(simple_feature_iterator, 1);
00923         iterator->vec = get_feature_vector(vector_index, iterator->vlen,
00924                 iterator->vfree);
00925         iterator->vidx = vector_index;
00926         iterator->index = 0;
00927         return iterator;
00928     }
00929 
00942     virtual bool get_next_feature(int32_t& index, float64_t& value,
00943             void* iterator)
00944     {
00945         simple_feature_iterator* it = (simple_feature_iterator*) iterator;
00946         if (!it || it->index >= it->vlen)
00947             return false;
00948 
00949         index = it->index++;
00950         value = (float64_t) it->vec[index];
00951 
00952         return true;
00953     }
00954 
00960     virtual void free_feature_iterator(void* iterator)
00961     {
00962         if (!iterator)
00963             return;
00964 
00965         simple_feature_iterator* it = (simple_feature_iterator*) iterator;
00966         free_feature_vector(it->vec, it->vidx, it->vfree);
00967         SG_FREE(it);
00968     }
00969 
00978     virtual CFeatures* copy_subset(SGVector<index_t> indices)
00979     {
00980         SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
00981 
00982         for (index_t i=0; i<indices.vlen; ++i)
00983         {
00984             index_t real_idx=subset_idx_conversion(indices.vector[i]);
00985             memcpy(&feature_matrix_copy.matrix[i*num_features],
00986                     &feature_matrix[real_idx*num_features],
00987                     num_features*sizeof(ST));
00988         }
00989 
00990         return new CSimpleFeatures(feature_matrix_copy);
00991     }
00992 
00994     inline virtual const char* get_name() const { return "SimpleFeatures"; }
00995 
00996 protected:
01008     virtual ST* compute_feature_vector(int32_t num, int32_t& len, ST* target =
01009             NULL)
01010     {
01011         SG_NOTIMPLEMENTED;
01012         len = 0;
01013         return NULL;
01014     }
01015 
01016 private:
01017     void init()
01018     {
01019         num_vectors = 0;
01020         num_features = 0;
01021 
01022         feature_matrix = NULL;
01023         feature_matrix_num_vectors = 0;
01024         feature_matrix_num_features = 0;
01025 
01026         feature_cache = NULL;
01027 
01028         set_generic<ST>();
01029         /* not store number of vectors in subset */
01030         m_parameters->add(&num_vectors, "num_vectors",
01031                 "Number of vectors.");
01032         m_parameters->add(&num_features, "num_features", "Number of features.");
01033         m_parameters->add_matrix(&feature_matrix, &feature_matrix_num_features,
01034                 &feature_matrix_num_vectors, "feature_matrix",
01035                 "Matrix of feature vectors / 1 vector per column.");
01036     }
01037 
01038 protected:
01040     int32_t num_vectors;
01041 
01043     int32_t num_features;
01044 
01049     ST* feature_matrix;
01050 
01052     int32_t feature_matrix_num_vectors;
01053 
01055     int32_t feature_matrix_num_features;
01056 
01058     CCache<ST>* feature_cache;
01059 };
01060 
01061 #ifndef DOXYGEN_SHOULD_SKIP_THIS
01062 
01063 #define GET_FEATURE_TYPE(f_type, sg_type)   \
01064 template<> inline EFeatureType CSimpleFeatures<sg_type>::get_feature_type() \
01065 {                                                                           \
01066     return f_type;                                                          \
01067 }
01068 
01069 GET_FEATURE_TYPE(F_BOOL, bool)
01070 GET_FEATURE_TYPE(F_CHAR, char)
01071 GET_FEATURE_TYPE(F_BYTE, uint8_t)
01072 GET_FEATURE_TYPE(F_BYTE, int8_t)
01073 GET_FEATURE_TYPE(F_SHORT, int16_t)
01074 GET_FEATURE_TYPE(F_WORD, uint16_t)
01075 GET_FEATURE_TYPE(F_INT, int32_t)
01076 GET_FEATURE_TYPE(F_UINT, uint32_t)
01077 GET_FEATURE_TYPE(F_LONG, int64_t)
01078 GET_FEATURE_TYPE(F_ULONG, uint64_t)
01079 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
01080 GET_FEATURE_TYPE(F_DREAL, float64_t)
01081 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
01082 #undef GET_FEATURE_TYPE
01083 
01092 template<> inline bool CSimpleFeatures<float64_t>::Align_char_features(
01093         CStringFeatures<char>* cf, CStringFeatures<char>* Ref,
01094         float64_t gapCost)
01095 {
01096     ASSERT(cf);
01097     /*num_vectors=cf->get_num_vectors();
01098      num_features=Ref->get_num_vectors();
01099 
01100      int64_t len=((int64_t) num_vectors)*num_features;
01101      free_feature_matrix();
01102      feature_matrix=SG_MALLOC(float64_t, len);
01103      int32_t num_cf_feat=0;
01104      int32_t num_cf_vec=0;
01105      int32_t num_ref_feat=0;
01106      int32_t num_ref_vec=0;
01107      char* fm_cf=NULL; //cf->get_feature_matrix(num_cf_feat, num_cf_vec);
01108      char* fm_ref=NULL; //Ref->get_feature_matrix(num_ref_feat, num_ref_vec);
01109 
01110      ASSERT(num_cf_vec==num_vectors);
01111      ASSERT(num_ref_vec==num_features);
01112 
01113      SG_INFO( "computing aligments of %i vectors to %i reference vectors: ", num_cf_vec, num_ref_vec) ;
01114      for (int32_t i=0; i< num_ref_vec; i++)
01115      {
01116      SG_PROGRESS(i, num_ref_vec) ;
01117      for (int32_t j=0; j<num_cf_vec; j++)
01118      feature_matrix[i+j*num_features] = CMath::Align(&fm_cf[j*num_cf_feat], &fm_ref[i*num_ref_feat], num_cf_feat, num_ref_feat, gapCost);
01119      } ;
01120 
01121      SG_INFO( "created %i x %i matrix (0x%p)\n", num_features, num_vectors, feature_matrix) ;*/
01122     return true;
01123 }
01124 
01125 template<> inline float64_t CSimpleFeatures<bool>::dense_dot(int32_t vec_idx1,
01126         const float64_t* vec2, int32_t vec2_len)
01127 {
01128     ASSERT(vec2_len == num_features);
01129 
01130     int32_t vlen;
01131     bool vfree;
01132     bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01133 
01134     ASSERT(vlen == num_features);
01135     float64_t result = 0;
01136 
01137     for (int32_t i = 0; i < num_features; i++)
01138         result += vec1[i] ? vec2[i] : 0;
01139 
01140     free_feature_vector(vec1, vec_idx1, vfree);
01141 
01142     return result;
01143 }
01144 
01145 template<> inline float64_t CSimpleFeatures<char>::dense_dot(int32_t vec_idx1,
01146         const float64_t* vec2, int32_t vec2_len)
01147 {
01148     ASSERT(vec2_len == num_features);
01149 
01150     int32_t vlen;
01151     bool vfree;
01152     char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01153 
01154     ASSERT(vlen == num_features);
01155     float64_t result = 0;
01156 
01157     for (int32_t i = 0; i < num_features; i++)
01158         result += vec1[i] * vec2[i];
01159 
01160     free_feature_vector(vec1, vec_idx1, vfree);
01161 
01162     return result;
01163 }
01164 
01165 template<> inline float64_t CSimpleFeatures<int8_t>::dense_dot(int32_t vec_idx1,
01166         const float64_t* vec2, int32_t vec2_len)
01167 {
01168     ASSERT(vec2_len == num_features);
01169 
01170     int32_t vlen;
01171     bool vfree;
01172     int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01173 
01174     ASSERT(vlen == num_features);
01175     float64_t result = 0;
01176 
01177     for (int32_t i = 0; i < num_features; i++)
01178         result += vec1[i] * vec2[i];
01179 
01180     free_feature_vector(vec1, vec_idx1, vfree);
01181 
01182     return result;
01183 }
01184 
01185 template<> inline float64_t CSimpleFeatures<uint8_t>::dense_dot(
01186         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01187 {
01188     ASSERT(vec2_len == num_features);
01189 
01190     int32_t vlen;
01191     bool vfree;
01192     uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01193 
01194     ASSERT(vlen == num_features);
01195     float64_t result = 0;
01196 
01197     for (int32_t i = 0; i < num_features; i++)
01198         result += vec1[i] * vec2[i];
01199 
01200     free_feature_vector(vec1, vec_idx1, vfree);
01201 
01202     return result;
01203 }
01204 
01205 template<> inline float64_t CSimpleFeatures<int16_t>::dense_dot(
01206         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01207 {
01208     ASSERT(vec2_len == num_features);
01209 
01210     int32_t vlen;
01211     bool vfree;
01212     int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01213 
01214     ASSERT(vlen == num_features);
01215     float64_t result = 0;
01216 
01217     for (int32_t i = 0; i < num_features; i++)
01218         result += vec1[i] * vec2[i];
01219 
01220     free_feature_vector(vec1, vec_idx1, vfree);
01221 
01222     return result;
01223 }
01224 
01225 template<> inline float64_t CSimpleFeatures<uint16_t>::dense_dot(
01226         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01227 {
01228     ASSERT(vec2_len == num_features);
01229 
01230     int32_t vlen;
01231     bool vfree;
01232     uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01233 
01234     ASSERT(vlen == num_features);
01235     float64_t result = 0;
01236 
01237     for (int32_t i = 0; i < num_features; i++)
01238         result += vec1[i] * vec2[i];
01239 
01240     free_feature_vector(vec1, vec_idx1, vfree);
01241 
01242     return result;
01243 }
01244 
01245 template<> inline float64_t CSimpleFeatures<int32_t>::dense_dot(
01246         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01247 {
01248     ASSERT(vec2_len == num_features);
01249 
01250     int32_t vlen;
01251     bool vfree;
01252     int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01253 
01254     ASSERT(vlen == num_features);
01255     float64_t result = 0;
01256 
01257     for (int32_t i = 0; i < num_features; i++)
01258         result += vec1[i] * vec2[i];
01259 
01260     free_feature_vector(vec1, vec_idx1, vfree);
01261 
01262     return result;
01263 }
01264 
01265 template<> inline float64_t CSimpleFeatures<uint32_t>::dense_dot(
01266         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01267 {
01268     ASSERT(vec2_len == num_features);
01269 
01270     int32_t vlen;
01271     bool vfree;
01272     uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01273 
01274     ASSERT(vlen == num_features);
01275     float64_t result = 0;
01276 
01277     for (int32_t i = 0; i < num_features; i++)
01278         result += vec1[i] * vec2[i];
01279 
01280     free_feature_vector(vec1, vec_idx1, vfree);
01281 
01282     return result;
01283 }
01284 
01285 template<> inline float64_t CSimpleFeatures<int64_t>::dense_dot(
01286         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01287 {
01288     ASSERT(vec2_len == num_features);
01289 
01290     int32_t vlen;
01291     bool vfree;
01292     int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01293 
01294     ASSERT(vlen == num_features);
01295     float64_t result = 0;
01296 
01297     for (int32_t i = 0; i < num_features; i++)
01298         result += vec1[i] * vec2[i];
01299 
01300     free_feature_vector(vec1, vec_idx1, vfree);
01301 
01302     return result;
01303 }
01304 
01305 template<> inline float64_t CSimpleFeatures<uint64_t>::dense_dot(
01306         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01307 {
01308     ASSERT(vec2_len == num_features);
01309 
01310     int32_t vlen;
01311     bool vfree;
01312     uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01313 
01314     ASSERT(vlen == num_features);
01315     float64_t result = 0;
01316 
01317     for (int32_t i = 0; i < num_features; i++)
01318         result += vec1[i] * vec2[i];
01319 
01320     free_feature_vector(vec1, vec_idx1, vfree);
01321 
01322     return result;
01323 }
01324 
01325 template<> inline float64_t CSimpleFeatures<float32_t>::dense_dot(
01326         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01327 {
01328     ASSERT(vec2_len == num_features);
01329 
01330     int32_t vlen;
01331     bool vfree;
01332     float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01333 
01334     ASSERT(vlen == num_features);
01335     float64_t result = 0;
01336 
01337     for (int32_t i = 0; i < num_features; i++)
01338         result += vec1[i] * vec2[i];
01339 
01340     free_feature_vector(vec1, vec_idx1, vfree);
01341 
01342     return result;
01343 }
01344 
01345 template<> inline float64_t CSimpleFeatures<float64_t>::dense_dot(
01346         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01347 {
01348     ASSERT(vec2_len == num_features);
01349 
01350     int32_t vlen;
01351     bool vfree;
01352     float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01353 
01354     ASSERT(vlen == num_features);
01355     float64_t result = CMath::dot(vec1, vec2, num_features);
01356 
01357     free_feature_vector(vec1, vec_idx1, vfree);
01358 
01359     return result;
01360 }
01361 
01362 template<> inline float64_t CSimpleFeatures<floatmax_t>::dense_dot(
01363         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01364 {
01365     ASSERT(vec2_len == num_features);
01366 
01367     int32_t vlen;
01368     bool vfree;
01369     floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01370 
01371     ASSERT(vlen == num_features);
01372     float64_t result = 0;
01373 
01374     for (int32_t i = 0; i < num_features; i++)
01375         result += vec1[i] * vec2[i];
01376 
01377     free_feature_vector(vec1, vec_idx1, vfree);
01378 
01379     return result;
01380 }
01381 
01382 #define LOAD(f_load, sg_type)                                               \
01383 template<> inline void CSimpleFeatures<sg_type>::load(CFile* loader)        \
01384 {                                                                           \
01385     SG_SET_LOCALE_C;                                                    \
01386     ASSERT(loader);                                                         \
01387     sg_type* matrix;                                                        \
01388     int32_t num_feat;                                                       \
01389     int32_t num_vec;                                                        \
01390     loader->f_load(matrix, num_feat, num_vec);                              \
01391     set_feature_matrix(matrix, num_feat, num_vec);                          \
01392     SG_RESET_LOCALE;                                                    \
01393 }
01394 
01395 LOAD(get_matrix, bool)
01396 LOAD(get_matrix, char)
01397 LOAD(get_int8_matrix, int8_t)
01398 LOAD(get_matrix, uint8_t)
01399 LOAD(get_matrix, int16_t)
01400 LOAD(get_matrix, uint16_t)
01401 LOAD(get_matrix, int32_t)
01402 LOAD(get_uint_matrix, uint32_t)
01403 LOAD(get_long_matrix, int64_t)
01404 LOAD(get_ulong_matrix, uint64_t)
01405 LOAD(get_matrix, float32_t)
01406 LOAD(get_matrix, float64_t)
01407 LOAD(get_longreal_matrix, floatmax_t)
01408 #undef LOAD
01409 
01410 #define SAVE(f_write, sg_type)                                              \
01411 template<> inline void CSimpleFeatures<sg_type>::save(CFile* writer)        \
01412 {                                                                           \
01413     SG_SET_LOCALE_C;                                                    \
01414     ASSERT(writer);                                                         \
01415     writer->f_write(feature_matrix, num_features, num_vectors);             \
01416     SG_RESET_LOCALE;                                                    \
01417 }
01418 
01419 SAVE(set_matrix, bool)
01420 SAVE(set_matrix, char)
01421 SAVE(set_int8_matrix, int8_t)
01422 SAVE(set_matrix, uint8_t)
01423 SAVE(set_matrix, int16_t)
01424 SAVE(set_matrix, uint16_t)
01425 SAVE(set_matrix, int32_t)
01426 SAVE(set_uint_matrix, uint32_t)
01427 SAVE(set_long_matrix, int64_t)
01428 SAVE(set_ulong_matrix, uint64_t)
01429 SAVE(set_matrix, float32_t)
01430 SAVE(set_matrix, float64_t)
01431 SAVE(set_longreal_matrix, floatmax_t)
01432 #undef SAVE
01433 
01434 #endif // DOXYGEN_SHOULD_SKIP_THIS
01435 }
01436 #endif // _SIMPLEFEATURES__H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation