00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _SIMPLEFEATURES__H__
00014 #define _SIMPLEFEATURES__H__
00015
00016 #include <shogun/lib/common.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/lib/Cache.h>
00020 #include <shogun/io/File.h>
00021 #include <shogun/preprocessor/SimplePreprocessor.h>
00022 #include <shogun/features/DotFeatures.h>
00023 #include <shogun/features/StringFeatures.h>
00024 #include <shogun/base/Parameter.h>
00025 #include <shogun/lib/DataType.h>
00026
00027 #include <string.h>
00028
00029 namespace shogun {
00030 template<class ST> class CStringFeatures;
00031 template<class ST> class CSimpleFeatures;
00032 template<class ST> class CSimplePreprocessor;
00033 template<class ST> struct SGMatrix;
00034 class CDotFeatures;
00035
00065 template<class ST> class CSimpleFeatures: public CDotFeatures
00066 {
00067 public:
00072 CSimpleFeatures(int32_t size = 0) : CDotFeatures(size) { init(); }
00073
00075 CSimpleFeatures(const CSimpleFeatures & orig) :
00076 CDotFeatures(orig)
00077 {
00078 copy_feature_matrix(SGMatrix<ST>(orig.feature_matrix,
00079 orig.num_features,
00080 orig.num_vectors));
00081 initialize_cache();
00082 m_subset=orig.m_subset->duplicate();
00083 }
00084
00089 CSimpleFeatures(SGMatrix<ST> matrix) :
00090 CDotFeatures()
00091 {
00092 init();
00093 set_feature_matrix(matrix);
00094 }
00095
00102 CSimpleFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
00103 CDotFeatures()
00104 {
00105 init();
00106 set_feature_matrix(src, num_feat, num_vec);
00107 }
00108
00113 CSimpleFeatures(CFile* loader) :
00114 CDotFeatures(loader)
00115 {
00116 init();
00117 load(loader);
00118 }
00119
00124 virtual CFeatures* duplicate() const
00125 {
00126 return new CSimpleFeatures<ST>(*this);
00127 }
00128
00129 virtual ~CSimpleFeatures() { free_features(); }
00130
00135 void free_feature_matrix()
00136 {
00137 remove_subset();
00138 SG_FREE(feature_matrix);
00139 feature_matrix = NULL;
00140 feature_matrix_num_features = num_features;
00141 feature_matrix_num_vectors = num_vectors;
00142 num_vectors = 0;
00143 num_features = 0;
00144 }
00145
00150 void free_features()
00151 {
00152 remove_subset();
00153 free_feature_matrix();
00154 SG_UNREF(feature_cache);
00155 }
00156
00168 ST* get_feature_vector(int32_t num, int32_t& len, bool& dofree)
00169 {
00170
00171 int32_t real_num=subset_idx_conversion(num);
00172
00173 len = num_features;
00174
00175 if (feature_matrix)
00176 {
00177 dofree = false;
00178 return &feature_matrix[real_num * int64_t(num_features)];
00179 }
00180
00181 ST* feat = NULL;
00182 dofree = false;
00183
00184 if (feature_cache)
00185 {
00186 feat = feature_cache->lock_entry(num);
00187
00188 if (feat)
00189 return feat;
00190 else
00191 feat = feature_cache->set_entry(real_num);
00192 }
00193
00194 if (!feat)
00195 dofree = true;
00196 feat = compute_feature_vector(num, len, feat);
00197
00198 if (get_num_preprocessors())
00199 {
00200 int32_t tmp_len = len;
00201 ST* tmp_feat_before = feat;
00202 ST* tmp_feat_after = NULL;
00203
00204 for (int32_t i = 0; i < get_num_preprocessors(); i++)
00205 {
00206 CSimplePreprocessor<ST>* p =
00207 (CSimplePreprocessor<ST>*) get_preprocessor(i);
00208
00209 SGVector<ST> applied = p->apply_to_feature_vector(
00210 SGVector<ST>(tmp_feat_before, tmp_len));
00211 tmp_feat_after = applied.vector;
00212 SG_UNREF(p);
00213
00214 if (i != 0)
00215 SG_FREE(tmp_feat_before);
00216 tmp_feat_before = tmp_feat_after;
00217 }
00218
00219 memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
00220 SG_FREE(tmp_feat_after);
00221
00222 len = tmp_len;
00223 }
00224 return feat;
00225 }
00226
00234 void set_feature_vector(SGVector<ST> vector, int32_t num)
00235 {
00236
00237 int32_t real_num=subset_idx_conversion(num);
00238
00239 if (num>=get_num_vectors())
00240 {
00241 SG_ERROR("Index out of bounds (number of vectors %d, you "
00242 "requested %d)\n", get_num_vectors(), num);
00243 }
00244
00245 if (!feature_matrix)
00246 SG_ERROR("Requires a in-memory feature matrix\n");
00247
00248 if (vector.vlen != num_features)
00249 SG_ERROR(
00250 "Vector not of length %d (has %d)\n", num_features, vector.vlen);
00251
00252 memcpy(&feature_matrix[real_num * int64_t(num_features)], vector.vector,
00253 int64_t(num_features) * sizeof(ST));
00254 }
00255
00263 SGVector<ST> get_feature_vector(int32_t num)
00264 {
00265
00266 int32_t real_num=subset_idx_conversion(num);
00267
00268 if (num >= get_num_vectors())
00269 {
00270 SG_ERROR("Index out of bounds (number of vectors %d, you "
00271 "requested %d)\n", get_num_vectors(), real_num);
00272 }
00273
00274 SGVector<ST> vec;
00275 vec.vector = get_feature_vector(num, vec.vlen, vec.do_free);
00276 return vec;
00277 }
00278
00287 void free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
00288 {
00289 if (feature_cache)
00290 feature_cache->unlock_entry(subset_idx_conversion(num));
00291
00292 if (dofree)
00293 SG_FREE(feat_vec);
00294 }
00295
00303 void free_feature_vector(SGVector<ST> vec, int32_t num)
00304 {
00305 free_feature_vector(vec.vector, num, vec.do_free);
00306 }
00307
00321 void vector_subset(int32_t* idx, int32_t idx_len)
00322 {
00323 if (m_subset)
00324 SG_ERROR("A subset is set, cannot call vector_subset\n");
00325
00326 ASSERT(feature_matrix);
00327 ASSERT(idx_len<=num_vectors);
00328
00329 int32_t num_vec = num_vectors;
00330 num_vectors = idx_len;
00331
00332 int32_t old_ii = -1;
00333
00334 for (int32_t i = 0; i < idx_len; i++)
00335 {
00336 int32_t ii = idx[i];
00337 ASSERT(old_ii<ii);
00338
00339 if (ii < 0 || ii >= num_vec)
00340 SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec);
00341
00342 if (i == ii)
00343 continue;
00344
00345 memcpy(&feature_matrix[int64_t(num_features) * i],
00346 &feature_matrix[int64_t(num_features) * ii],
00347 num_features * sizeof(ST));
00348 old_ii = ii;
00349 }
00350 }
00351
00365 void feature_subset(int32_t* idx, int32_t idx_len)
00366 {
00367 if (m_subset)
00368 SG_ERROR("A subset is set, cannot call feature_subset\n");
00369
00370 ASSERT(feature_matrix);
00371 ASSERT(idx_len<=num_features);
00372 int32_t num_feat = num_features;
00373 num_features = idx_len;
00374
00375 for (int32_t i = 0; i < num_vectors; i++)
00376 {
00377 ST* src = &feature_matrix[int64_t(num_feat) * i];
00378 ST* dst = &feature_matrix[int64_t(num_features) * i];
00379
00380 int32_t old_jj = -1;
00381 for (int32_t j = 0; j < idx_len; j++)
00382 {
00383 int32_t jj = idx[j];
00384 ASSERT(old_jj<jj);
00385 if (jj < 0 || jj >= num_feat)
00386 SG_ERROR(
00387 "Index out of range: should be 0<%d<%d\n", jj, num_feat);
00388
00389 dst[j] = src[jj];
00390 old_jj = jj;
00391 }
00392 }
00393 }
00394
00404 void get_feature_matrix(ST** dst, int32_t* num_feat, int32_t* num_vec)
00405 {
00406 ASSERT(feature_matrix);
00407
00408 int64_t num = int64_t(num_features) * get_num_vectors();
00409 *num_feat = num_features;
00410 *num_vec = get_num_vectors();
00411 *dst = SG_MALLOC(ST, num);
00412
00413
00414 if (m_subset)
00415 {
00416
00417 for (int32_t i = 0; i < *num_vec; ++i)
00418 {
00419 int32_t real_i = m_subset->subset_idx_conversion(i);
00420 memcpy(*dst, &feature_matrix[real_i * int64_t(num_features)],
00421 num_features * sizeof(ST));
00422 }
00423 }
00424 else
00425 {
00426
00427 memcpy(*dst, feature_matrix, num * sizeof(ST));
00428 }
00429 }
00430
00437 SGMatrix<ST> get_feature_matrix()
00438 {
00439 return SGMatrix<ST>(feature_matrix, num_features, num_vectors);
00440 }
00441
00448 void set_feature_matrix(SGMatrix<ST> matrix)
00449 {
00450 remove_subset();
00451 free_feature_matrix();
00452 feature_matrix = matrix.matrix;
00453 num_features = matrix.num_rows;
00454 num_vectors = matrix.num_cols;
00455 feature_matrix_num_vectors = num_vectors;
00456 feature_matrix_num_features = num_features;
00457 }
00458
00468 ST* get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
00469 {
00470 num_feat = num_features;
00471 num_vec = num_vectors;
00472 return feature_matrix;
00473 }
00474
00481 CSimpleFeatures<ST>* get_transposed()
00482 {
00483 int32_t num_feat;
00484 int32_t num_vec;
00485 ST* fm = get_transposed(num_feat, num_vec);
00486
00487 return new CSimpleFeatures<ST>(fm, num_feat, num_vec);
00488 }
00489
00501 ST* get_transposed(int32_t &num_feat, int32_t &num_vec)
00502 {
00503 num_feat = get_num_vectors();
00504 num_vec = num_features;
00505
00506 int32_t old_num_vec=get_num_vectors();
00507
00508 ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
00509
00510 for (int32_t i=0; i<old_num_vec; i++)
00511 {
00512 SGVector<ST> vec=get_feature_vector(i);
00513
00514 for (int32_t j=0; j<vec.vlen; j++)
00515 fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
00516
00517 free_feature_vector(vec, i);
00518 }
00519
00520 return fm;
00521 }
00522
00535 virtual void set_feature_matrix(ST* fm, int32_t num_feat, int32_t num_vec)
00536 {
00537 if (m_subset)
00538 SG_ERROR("A subset is set, cannot call set_feature_matrix\n");
00539
00540 free_feature_matrix();
00541 feature_matrix = fm;
00542 feature_matrix_num_features = num_feat;
00543 feature_matrix_num_vectors = num_vec;
00544
00545 num_features = num_feat;
00546 num_vectors = num_vec;
00547 initialize_cache();
00548 }
00549
00559 virtual void copy_feature_matrix(SGMatrix<ST> src)
00560 {
00561 if (m_subset)
00562 SG_ERROR("A subset is set, cannot call copy_feature_matrix\n");
00563
00564 free_feature_matrix();
00565 int32_t num_feat = src.num_rows;
00566 int32_t num_vec = src.num_cols;
00567 feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec);
00568 feature_matrix_num_features = num_feat;
00569 feature_matrix_num_vectors = num_vec;
00570
00571 memcpy(feature_matrix, src.matrix,
00572 (sizeof(ST) * ((int64_t) num_feat) * num_vec));
00573
00574 num_features = num_feat;
00575 num_vectors = num_vec;
00576 initialize_cache();
00577 }
00578
00585 void obtain_from_dot(CDotFeatures* df)
00586 {
00587 remove_subset();
00588
00589 int32_t num_feat = df->get_dim_feature_space();
00590 int32_t num_vec = df->get_num_vectors();
00591
00592 ASSERT(num_feat>0 && num_vec>0);
00593
00594 free_feature_matrix();
00595 feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec);
00596 feature_matrix_num_features = num_feat;
00597 feature_matrix_num_vectors = num_vec;
00598
00599 for (int32_t i = 0; i < num_vec; i++)
00600 {
00601 SGVector<float64_t> v = df->get_computed_dot_feature_vector(i);
00602 ASSERT(num_feat==v.vlen);
00603
00604 for (int32_t j = 0; j < num_feat; j++)
00605 feature_matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
00606
00607 v.free_vector();
00608 }
00609 num_features = num_feat;
00610 num_vectors = num_vec;
00611 }
00612
00623 virtual bool apply_preprocessor(bool force_preprocessing = false)
00624 {
00625 if (m_subset)
00626 SG_ERROR("A subset is set, cannot call apply_preproc\n");
00627
00628 SG_DEBUG( "force: %d\n", force_preprocessing);
00629
00630 if (feature_matrix && get_num_preprocessors())
00631 {
00632 for (int32_t i = 0; i < get_num_preprocessors(); i++)
00633 {
00634 if ((!is_preprocessed(i) || force_preprocessing))
00635 {
00636 set_preprocessed(i);
00637 CSimplePreprocessor<ST>* p =
00638 (CSimplePreprocessor<ST>*) get_preprocessor(i);
00639 SG_INFO( "preprocessing using preproc %s\n", p->get_name());
00640
00641 if (p->apply_to_feature_matrix(this).matrix == NULL)
00642 {
00643 SG_UNREF(p);
00644 return false;
00645 }SG_UNREF(p);
00646
00647 }
00648 }
00649
00650 return true;
00651 }
00652 else
00653 {
00654 if (!feature_matrix)
00655 SG_ERROR( "no feature matrix\n");
00656
00657 if (!get_num_preprocessors())
00658 SG_ERROR( "no preprocessors available\n");
00659
00660 return false;
00661 }
00662 }
00663
00668 virtual int32_t get_size() { return sizeof(ST); }
00669
00674 virtual inline int32_t get_num_vectors() const
00675 {
00676 return m_subset ? m_subset->get_size() : num_vectors;
00677 }
00678
00683 inline int32_t get_num_features() { return num_features; }
00684
00689 inline void set_num_features(int32_t num)
00690 {
00691 num_features = num;
00692 initialize_cache();
00693 }
00694
00701 inline void set_num_vectors(int32_t num)
00702 {
00703 if (m_subset)
00704 SG_ERROR("A subset is set, cannot call set_num_vectors\n");
00705
00706 num_vectors = num;
00707 initialize_cache();
00708 }
00709
00714 inline void initialize_cache()
00715 {
00716 if (m_subset)
00717 SG_ERROR("A subset is set, cannot call initialize_cache\n");
00718
00719 if (num_features && num_vectors)
00720 {
00721 SG_UNREF(feature_cache);
00722 feature_cache = new CCache<ST>(get_cache_size(), num_features,
00723 num_vectors);
00724 SG_REF(feature_cache);
00725 }
00726 }
00727
00732 inline virtual EFeatureClass get_feature_class() { return C_SIMPLE; }
00733
00738 inline virtual EFeatureType get_feature_type();
00739
00748 virtual bool reshape(int32_t p_num_features, int32_t p_num_vectors)
00749 {
00750 if (m_subset)
00751 SG_ERROR("A subset is set, cannot call reshape\n");
00752
00753 if (p_num_features * p_num_vectors
00754 == this->num_features * this->num_vectors)
00755 {
00756 num_features = p_num_features;
00757 num_vectors = p_num_vectors;
00758 return true;
00759 } else
00760 return false;
00761 }
00762
00770 virtual int32_t get_dim_feature_space() const { return num_features; }
00771
00781 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df,
00782 int32_t vec_idx2)
00783 {
00784 ASSERT(df);
00785 ASSERT(df->get_feature_type() == get_feature_type());
00786 ASSERT(df->get_feature_class() == get_feature_class());
00787 CSimpleFeatures<ST>* sf = (CSimpleFeatures<ST>*) df;
00788
00789 int32_t len1, len2;
00790 bool free1, free2;
00791
00792 ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
00793 ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
00794
00795 float64_t result = CMath::dot(vec1, vec2, len1);
00796
00797 free_feature_vector(vec1, vec_idx1, free1);
00798 sf->free_feature_vector(vec2, vec_idx2, free2);
00799
00800 return result;
00801 }
00802
00811 virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2,
00812 int32_t vec2_len);
00813
00824 virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00825 float64_t* vec2, int32_t vec2_len, bool abs_val = false)
00826 {
00827 ASSERT(vec2_len == num_features);
00828
00829 int32_t vlen;
00830 bool vfree;
00831 ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00832
00833 ASSERT(vlen == num_features);
00834
00835 if (abs_val)
00836 {
00837 for (int32_t i = 0; i < num_features; i++)
00838 vec2[i] += alpha * CMath::abs(vec1[i]);
00839 }
00840 else
00841 {
00842 for (int32_t i = 0; i < num_features; i++)
00843 vec2[i] += alpha * vec1[i];
00844 }
00845
00846 free_feature_vector(vec1, vec_idx1, vfree);
00847 }
00848
00854 virtual inline int32_t get_nnz_features_for_vector(int32_t num)
00855 {
00856
00857 return num_features;
00858 }
00859
00867 virtual inline bool Align_char_features(CStringFeatures<char>* cf,
00868 CStringFeatures<char>* Ref, float64_t gapCost)
00869 {
00870 return false;
00871 }
00872
00877 virtual inline void load(CFile* loader);
00878
00883 virtual inline void save(CFile* saver);
00884
00885 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00886
00887 struct simple_feature_iterator
00888 {
00890 ST* vec;
00892 int32_t vidx;
00894 int32_t vlen;
00896 bool vfree;
00897
00899 int32_t index;
00900 };
00901 #endif
00902
00914 virtual void* get_feature_iterator(int32_t vector_index)
00915 {
00916 if (vector_index>=get_num_vectors())
00917 {
00918 SG_ERROR("Index out of bounds (number of vectors %d, you "
00919 "requested %d)\n", get_num_vectors(), vector_index);
00920 }
00921
00922 simple_feature_iterator* iterator = SG_MALLOC(simple_feature_iterator, 1);
00923 iterator->vec = get_feature_vector(vector_index, iterator->vlen,
00924 iterator->vfree);
00925 iterator->vidx = vector_index;
00926 iterator->index = 0;
00927 return iterator;
00928 }
00929
00942 virtual bool get_next_feature(int32_t& index, float64_t& value,
00943 void* iterator)
00944 {
00945 simple_feature_iterator* it = (simple_feature_iterator*) iterator;
00946 if (!it || it->index >= it->vlen)
00947 return false;
00948
00949 index = it->index++;
00950 value = (float64_t) it->vec[index];
00951
00952 return true;
00953 }
00954
00960 virtual void free_feature_iterator(void* iterator)
00961 {
00962 if (!iterator)
00963 return;
00964
00965 simple_feature_iterator* it = (simple_feature_iterator*) iterator;
00966 free_feature_vector(it->vec, it->vidx, it->vfree);
00967 SG_FREE(it);
00968 }
00969
00978 virtual CFeatures* copy_subset(SGVector<index_t> indices)
00979 {
00980 SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
00981
00982 for (index_t i=0; i<indices.vlen; ++i)
00983 {
00984 index_t real_idx=subset_idx_conversion(indices.vector[i]);
00985 memcpy(&feature_matrix_copy.matrix[i*num_features],
00986 &feature_matrix[real_idx*num_features],
00987 num_features*sizeof(ST));
00988 }
00989
00990 return new CSimpleFeatures(feature_matrix_copy);
00991 }
00992
00994 inline virtual const char* get_name() const { return "SimpleFeatures"; }
00995
00996 protected:
01008 virtual ST* compute_feature_vector(int32_t num, int32_t& len, ST* target =
01009 NULL)
01010 {
01011 SG_NOTIMPLEMENTED;
01012 len = 0;
01013 return NULL;
01014 }
01015
01016 private:
01017 void init()
01018 {
01019 num_vectors = 0;
01020 num_features = 0;
01021
01022 feature_matrix = NULL;
01023 feature_matrix_num_vectors = 0;
01024 feature_matrix_num_features = 0;
01025
01026 feature_cache = NULL;
01027
01028 set_generic<ST>();
01029
01030 m_parameters->add(&num_vectors, "num_vectors",
01031 "Number of vectors.");
01032 m_parameters->add(&num_features, "num_features", "Number of features.");
01033 m_parameters->add_matrix(&feature_matrix, &feature_matrix_num_features,
01034 &feature_matrix_num_vectors, "feature_matrix",
01035 "Matrix of feature vectors / 1 vector per column.");
01036 }
01037
01038 protected:
01040 int32_t num_vectors;
01041
01043 int32_t num_features;
01044
01049 ST* feature_matrix;
01050
01052 int32_t feature_matrix_num_vectors;
01053
01055 int32_t feature_matrix_num_features;
01056
01058 CCache<ST>* feature_cache;
01059 };
01060
01061 #ifndef DOXYGEN_SHOULD_SKIP_THIS
01062
01063 #define GET_FEATURE_TYPE(f_type, sg_type) \
01064 template<> inline EFeatureType CSimpleFeatures<sg_type>::get_feature_type() \
01065 { \
01066 return f_type; \
01067 }
01068
01069 GET_FEATURE_TYPE(F_BOOL, bool)
01070 GET_FEATURE_TYPE(F_CHAR, char)
01071 GET_FEATURE_TYPE(F_BYTE, uint8_t)
01072 GET_FEATURE_TYPE(F_BYTE, int8_t)
01073 GET_FEATURE_TYPE(F_SHORT, int16_t)
01074 GET_FEATURE_TYPE(F_WORD, uint16_t)
01075 GET_FEATURE_TYPE(F_INT, int32_t)
01076 GET_FEATURE_TYPE(F_UINT, uint32_t)
01077 GET_FEATURE_TYPE(F_LONG, int64_t)
01078 GET_FEATURE_TYPE(F_ULONG, uint64_t)
01079 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
01080 GET_FEATURE_TYPE(F_DREAL, float64_t)
01081 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
01082 #undef GET_FEATURE_TYPE
01083
01092 template<> inline bool CSimpleFeatures<float64_t>::Align_char_features(
01093 CStringFeatures<char>* cf, CStringFeatures<char>* Ref,
01094 float64_t gapCost)
01095 {
01096 ASSERT(cf);
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122 return true;
01123 }
01124
01125 template<> inline float64_t CSimpleFeatures<bool>::dense_dot(int32_t vec_idx1,
01126 const float64_t* vec2, int32_t vec2_len)
01127 {
01128 ASSERT(vec2_len == num_features);
01129
01130 int32_t vlen;
01131 bool vfree;
01132 bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01133
01134 ASSERT(vlen == num_features);
01135 float64_t result = 0;
01136
01137 for (int32_t i = 0; i < num_features; i++)
01138 result += vec1[i] ? vec2[i] : 0;
01139
01140 free_feature_vector(vec1, vec_idx1, vfree);
01141
01142 return result;
01143 }
01144
01145 template<> inline float64_t CSimpleFeatures<char>::dense_dot(int32_t vec_idx1,
01146 const float64_t* vec2, int32_t vec2_len)
01147 {
01148 ASSERT(vec2_len == num_features);
01149
01150 int32_t vlen;
01151 bool vfree;
01152 char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01153
01154 ASSERT(vlen == num_features);
01155 float64_t result = 0;
01156
01157 for (int32_t i = 0; i < num_features; i++)
01158 result += vec1[i] * vec2[i];
01159
01160 free_feature_vector(vec1, vec_idx1, vfree);
01161
01162 return result;
01163 }
01164
01165 template<> inline float64_t CSimpleFeatures<int8_t>::dense_dot(int32_t vec_idx1,
01166 const float64_t* vec2, int32_t vec2_len)
01167 {
01168 ASSERT(vec2_len == num_features);
01169
01170 int32_t vlen;
01171 bool vfree;
01172 int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01173
01174 ASSERT(vlen == num_features);
01175 float64_t result = 0;
01176
01177 for (int32_t i = 0; i < num_features; i++)
01178 result += vec1[i] * vec2[i];
01179
01180 free_feature_vector(vec1, vec_idx1, vfree);
01181
01182 return result;
01183 }
01184
01185 template<> inline float64_t CSimpleFeatures<uint8_t>::dense_dot(
01186 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01187 {
01188 ASSERT(vec2_len == num_features);
01189
01190 int32_t vlen;
01191 bool vfree;
01192 uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01193
01194 ASSERT(vlen == num_features);
01195 float64_t result = 0;
01196
01197 for (int32_t i = 0; i < num_features; i++)
01198 result += vec1[i] * vec2[i];
01199
01200 free_feature_vector(vec1, vec_idx1, vfree);
01201
01202 return result;
01203 }
01204
01205 template<> inline float64_t CSimpleFeatures<int16_t>::dense_dot(
01206 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01207 {
01208 ASSERT(vec2_len == num_features);
01209
01210 int32_t vlen;
01211 bool vfree;
01212 int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01213
01214 ASSERT(vlen == num_features);
01215 float64_t result = 0;
01216
01217 for (int32_t i = 0; i < num_features; i++)
01218 result += vec1[i] * vec2[i];
01219
01220 free_feature_vector(vec1, vec_idx1, vfree);
01221
01222 return result;
01223 }
01224
01225 template<> inline float64_t CSimpleFeatures<uint16_t>::dense_dot(
01226 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01227 {
01228 ASSERT(vec2_len == num_features);
01229
01230 int32_t vlen;
01231 bool vfree;
01232 uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01233
01234 ASSERT(vlen == num_features);
01235 float64_t result = 0;
01236
01237 for (int32_t i = 0; i < num_features; i++)
01238 result += vec1[i] * vec2[i];
01239
01240 free_feature_vector(vec1, vec_idx1, vfree);
01241
01242 return result;
01243 }
01244
01245 template<> inline float64_t CSimpleFeatures<int32_t>::dense_dot(
01246 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01247 {
01248 ASSERT(vec2_len == num_features);
01249
01250 int32_t vlen;
01251 bool vfree;
01252 int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01253
01254 ASSERT(vlen == num_features);
01255 float64_t result = 0;
01256
01257 for (int32_t i = 0; i < num_features; i++)
01258 result += vec1[i] * vec2[i];
01259
01260 free_feature_vector(vec1, vec_idx1, vfree);
01261
01262 return result;
01263 }
01264
01265 template<> inline float64_t CSimpleFeatures<uint32_t>::dense_dot(
01266 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01267 {
01268 ASSERT(vec2_len == num_features);
01269
01270 int32_t vlen;
01271 bool vfree;
01272 uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01273
01274 ASSERT(vlen == num_features);
01275 float64_t result = 0;
01276
01277 for (int32_t i = 0; i < num_features; i++)
01278 result += vec1[i] * vec2[i];
01279
01280 free_feature_vector(vec1, vec_idx1, vfree);
01281
01282 return result;
01283 }
01284
01285 template<> inline float64_t CSimpleFeatures<int64_t>::dense_dot(
01286 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01287 {
01288 ASSERT(vec2_len == num_features);
01289
01290 int32_t vlen;
01291 bool vfree;
01292 int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01293
01294 ASSERT(vlen == num_features);
01295 float64_t result = 0;
01296
01297 for (int32_t i = 0; i < num_features; i++)
01298 result += vec1[i] * vec2[i];
01299
01300 free_feature_vector(vec1, vec_idx1, vfree);
01301
01302 return result;
01303 }
01304
01305 template<> inline float64_t CSimpleFeatures<uint64_t>::dense_dot(
01306 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01307 {
01308 ASSERT(vec2_len == num_features);
01309
01310 int32_t vlen;
01311 bool vfree;
01312 uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01313
01314 ASSERT(vlen == num_features);
01315 float64_t result = 0;
01316
01317 for (int32_t i = 0; i < num_features; i++)
01318 result += vec1[i] * vec2[i];
01319
01320 free_feature_vector(vec1, vec_idx1, vfree);
01321
01322 return result;
01323 }
01324
01325 template<> inline float64_t CSimpleFeatures<float32_t>::dense_dot(
01326 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01327 {
01328 ASSERT(vec2_len == num_features);
01329
01330 int32_t vlen;
01331 bool vfree;
01332 float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01333
01334 ASSERT(vlen == num_features);
01335 float64_t result = 0;
01336
01337 for (int32_t i = 0; i < num_features; i++)
01338 result += vec1[i] * vec2[i];
01339
01340 free_feature_vector(vec1, vec_idx1, vfree);
01341
01342 return result;
01343 }
01344
01345 template<> inline float64_t CSimpleFeatures<float64_t>::dense_dot(
01346 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01347 {
01348 ASSERT(vec2_len == num_features);
01349
01350 int32_t vlen;
01351 bool vfree;
01352 float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01353
01354 ASSERT(vlen == num_features);
01355 float64_t result = CMath::dot(vec1, vec2, num_features);
01356
01357 free_feature_vector(vec1, vec_idx1, vfree);
01358
01359 return result;
01360 }
01361
01362 template<> inline float64_t CSimpleFeatures<floatmax_t>::dense_dot(
01363 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
01364 {
01365 ASSERT(vec2_len == num_features);
01366
01367 int32_t vlen;
01368 bool vfree;
01369 floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
01370
01371 ASSERT(vlen == num_features);
01372 float64_t result = 0;
01373
01374 for (int32_t i = 0; i < num_features; i++)
01375 result += vec1[i] * vec2[i];
01376
01377 free_feature_vector(vec1, vec_idx1, vfree);
01378
01379 return result;
01380 }
01381
01382 #define LOAD(f_load, sg_type) \
01383 template<> inline void CSimpleFeatures<sg_type>::load(CFile* loader) \
01384 { \
01385 SG_SET_LOCALE_C; \
01386 ASSERT(loader); \
01387 sg_type* matrix; \
01388 int32_t num_feat; \
01389 int32_t num_vec; \
01390 loader->f_load(matrix, num_feat, num_vec); \
01391 set_feature_matrix(matrix, num_feat, num_vec); \
01392 SG_RESET_LOCALE; \
01393 }
01394
01395 LOAD(get_matrix, bool)
01396 LOAD(get_matrix, char)
01397 LOAD(get_int8_matrix, int8_t)
01398 LOAD(get_matrix, uint8_t)
01399 LOAD(get_matrix, int16_t)
01400 LOAD(get_matrix, uint16_t)
01401 LOAD(get_matrix, int32_t)
01402 LOAD(get_uint_matrix, uint32_t)
01403 LOAD(get_long_matrix, int64_t)
01404 LOAD(get_ulong_matrix, uint64_t)
01405 LOAD(get_matrix, float32_t)
01406 LOAD(get_matrix, float64_t)
01407 LOAD(get_longreal_matrix, floatmax_t)
01408 #undef LOAD
01409
01410 #define SAVE(f_write, sg_type) \
01411 template<> inline void CSimpleFeatures<sg_type>::save(CFile* writer) \
01412 { \
01413 SG_SET_LOCALE_C; \
01414 ASSERT(writer); \
01415 writer->f_write(feature_matrix, num_features, num_vectors); \
01416 SG_RESET_LOCALE; \
01417 }
01418
01419 SAVE(set_matrix, bool)
01420 SAVE(set_matrix, char)
01421 SAVE(set_int8_matrix, int8_t)
01422 SAVE(set_matrix, uint8_t)
01423 SAVE(set_matrix, int16_t)
01424 SAVE(set_matrix, uint16_t)
01425 SAVE(set_matrix, int32_t)
01426 SAVE(set_uint_matrix, uint32_t)
01427 SAVE(set_long_matrix, int64_t)
01428 SAVE(set_ulong_matrix, uint64_t)
01429 SAVE(set_matrix, float32_t)
01430 SAVE(set_matrix, float64_t)
01431 SAVE(set_longreal_matrix, floatmax_t)
01432 #undef SAVE
01433
01434 #endif // DOXYGEN_SHOULD_SKIP_THIS
01435 }
01436 #endif // _SIMPLEFEATURES__H__