SimpleFeatures.cpp

Go to the documentation of this file.
00001 #include <shogun/features/SimpleFeatures.h>
00002 #include <shogun/preprocessor/SimplePreprocessor.h>
00003 #include <shogun/io/SGIO.h>
00004 #include <shogun/base/Parameter.h>
00005 #include <shogun/mathematics/Math.h>
00006 
00007 #include <string.h>
00008 
00009 namespace shogun {
00010 
00011 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(int32_t size) : CDotFeatures(size)
00012 {
00013     init();
00014 }
00015 
00016 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(const CSimpleFeatures & orig) :
00017         CDotFeatures(orig)
00018 {
00019     copy_feature_matrix(SGMatrix<ST>(orig.feature_matrix,
00020                                      orig.num_features,
00021                                      orig.num_vectors));
00022     initialize_cache();
00023     m_subset=orig.m_subset->duplicate();
00024 }
00025 
00026 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(SGMatrix<ST> matrix) :
00027         CDotFeatures()
00028 {
00029     init();
00030     set_feature_matrix(matrix);
00031 }
00032 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
00033         CDotFeatures()
00034 {
00035     init();
00036     set_feature_matrix(src, num_feat, num_vec);
00037 }
00038 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(CFile* loader) :
00039         CDotFeatures(loader)
00040 {
00041     init();
00042     load(loader);
00043 }
00044 template<class ST> CFeatures* CSimpleFeatures<ST>::duplicate() const
00045 {
00046     return new CSimpleFeatures<ST>(*this);
00047 }
00048 
00049 template<class ST> CSimpleFeatures<ST>::~CSimpleFeatures() { free_features(); }
00050 
00051 template<class ST> void CSimpleFeatures<ST>::free_features()
00052 {
00053     remove_subset();
00054     free_feature_matrix();
00055     SG_UNREF(feature_cache);
00056 }
00057 
00058 template<class ST> void CSimpleFeatures<ST>::free_feature_matrix()
00059 {
00060     remove_subset();
00061     SG_FREE(feature_matrix);
00062     feature_matrix = NULL;
00063     feature_matrix_num_features = num_features;
00064     feature_matrix_num_vectors = num_vectors;
00065     num_vectors = 0;
00066     num_features = 0;
00067 }
00068 
00069 template<class ST> ST* CSimpleFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
00070 {
00071     /* index conversion for subset, only for array access */
00072     int32_t real_num=subset_idx_conversion(num);
00073 
00074     len = num_features;
00075 
00076     if (feature_matrix)
00077     {
00078         dofree = false;
00079         return &feature_matrix[real_num * int64_t(num_features)];
00080     }
00081 
00082     ST* feat = NULL;
00083     dofree = false;
00084 
00085     if (feature_cache)
00086     {
00087         feat = feature_cache->lock_entry(num);
00088 
00089         if (feat)
00090             return feat;
00091         else 
00092             feat = feature_cache->set_entry(real_num);
00093     }
00094 
00095     if (!feat)
00096         dofree = true;
00097     feat = compute_feature_vector(num, len, feat);
00098 
00099     if (get_num_preprocessors())
00100     {
00101         int32_t tmp_len = len;
00102         ST* tmp_feat_before = feat;
00103         ST* tmp_feat_after = NULL;
00104 
00105         for (int32_t i = 0; i < get_num_preprocessors(); i++)
00106         {
00107             CSimplePreprocessor<ST>* p =
00108                     (CSimplePreprocessor<ST>*) get_preprocessor(i);
00109             // temporary hack
00110             SGVector<ST> applied = p->apply_to_feature_vector(
00111                     SGVector<ST>(tmp_feat_before, tmp_len));
00112             tmp_feat_after = applied.vector;
00113             SG_UNREF(p);
00114 
00115             if (i != 0) // delete feature vector, except for the the first one, i.e., feat
00116                 SG_FREE(tmp_feat_before);
00117             tmp_feat_before = tmp_feat_after;
00118         }
00119 
00120         memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
00121         SG_FREE(tmp_feat_after);
00122 
00123         len = tmp_len;
00124     }
00125     return feat;
00126 }
00127 
00128 template<class ST> void CSimpleFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
00129 {
00130     /* index conversion for subset, only for array access */
00131     int32_t real_num=subset_idx_conversion(num);
00132 
00133     if (num>=get_num_vectors())
00134     {
00135         SG_ERROR("Index out of bounds (number of vectors %d, you "
00136         "requested %d)\n", get_num_vectors(), num);
00137     }
00138 
00139     if (!feature_matrix)
00140         SG_ERROR("Requires a in-memory feature matrix\n");
00141 
00142     if (vector.vlen != num_features)
00143         SG_ERROR(
00144                 "Vector not of length %d (has %d)\n", num_features, vector.vlen);
00145 
00146     memcpy(&feature_matrix[real_num * int64_t(num_features)], vector.vector,
00147             int64_t(num_features) * sizeof(ST));
00148 }
00149 
00150 template<class ST> SGVector<ST> CSimpleFeatures<ST>::get_feature_vector(int32_t num)
00151 {
00152     /* index conversion for subset, only for array access */
00153     int32_t real_num=subset_idx_conversion(num);
00154 
00155     if (num >= get_num_vectors())
00156     {
00157         SG_ERROR("Index out of bounds (number of vectors %d, you "
00158         "requested %d)\n", get_num_vectors(), real_num);
00159     }
00160 
00161     SGVector<ST> vec;
00162     vec.vector = get_feature_vector(num, vec.vlen, vec.do_free);
00163     return vec;
00164 }
00165 
00166 template<class ST> void CSimpleFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
00167 {
00168     if (feature_cache)
00169         feature_cache->unlock_entry(subset_idx_conversion(num));
00170 
00171     if (dofree)
00172         SG_FREE(feat_vec);
00173 }
00174 
00175 template<class ST> void CSimpleFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
00176 {
00177     free_feature_vector(vec.vector, num, vec.do_free);
00178 }
00179 
00180 template<class ST> void CSimpleFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
00181 {
00182     if (m_subset)
00183         SG_ERROR("A subset is set, cannot call vector_subset\n");
00184 
00185     ASSERT(feature_matrix);
00186     ASSERT(idx_len<=num_vectors);
00187 
00188     int32_t num_vec = num_vectors;
00189     num_vectors = idx_len;
00190 
00191     int32_t old_ii = -1;
00192 
00193     for (int32_t i = 0; i < idx_len; i++)
00194     {
00195         int32_t ii = idx[i];
00196         ASSERT(old_ii<ii);
00197 
00198         if (ii < 0 || ii >= num_vec)
00199             SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec);
00200 
00201         if (i == ii)
00202             continue;
00203 
00204         memcpy(&feature_matrix[int64_t(num_features) * i],
00205                 &feature_matrix[int64_t(num_features) * ii],
00206                 num_features * sizeof(ST));
00207         old_ii = ii;
00208     }
00209 }
00210 
00211 template<class ST> void CSimpleFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
00212 {
00213     if (m_subset)
00214         SG_ERROR("A subset is set, cannot call feature_subset\n");
00215 
00216     ASSERT(feature_matrix);
00217     ASSERT(idx_len<=num_features);
00218     int32_t num_feat = num_features;
00219     num_features = idx_len;
00220 
00221     for (int32_t i = 0; i < num_vectors; i++)
00222     {
00223         ST* src = &feature_matrix[int64_t(num_feat) * i];
00224         ST* dst = &feature_matrix[int64_t(num_features) * i];
00225 
00226         int32_t old_jj = -1;
00227         for (int32_t j = 0; j < idx_len; j++)
00228         {
00229             int32_t jj = idx[j];
00230             ASSERT(old_jj<jj);
00231             if (jj < 0 || jj >= num_feat)
00232                 SG_ERROR(
00233                         "Index out of range: should be 0<%d<%d\n", jj, num_feat);
00234 
00235             dst[j] = src[jj];
00236             old_jj = jj;
00237         }
00238     }
00239 }
00240 
00241 template<class ST> void CSimpleFeatures<ST>::get_feature_matrix(ST** dst, int32_t* num_feat, int32_t* num_vec)
00242 {
00243     ASSERT(feature_matrix);
00244 
00245     int64_t num = int64_t(num_features) * get_num_vectors();
00246     *num_feat = num_features;
00247     *num_vec = get_num_vectors();
00248     *dst = SG_MALLOC(ST, num);
00249 
00250     /* copying depends on whether a subset is used */
00251     if (m_subset)
00252     {
00253         /* copy vector wise */
00254         for (int32_t i = 0; i < *num_vec; ++i)
00255         {
00256             int32_t real_i = m_subset->subset_idx_conversion(i);
00257             memcpy(*dst, &feature_matrix[real_i * int64_t(num_features)],
00258                     num_features * sizeof(ST));
00259         }
00260     }
00261     else
00262     {
00263         /* copy complete matrix */
00264         memcpy(*dst, feature_matrix, num * sizeof(ST));
00265     }
00266 }
00267 
00268 template<class ST> SGMatrix<ST> CSimpleFeatures<ST>::get_feature_matrix()
00269 {
00270     return SGMatrix<ST>(feature_matrix, num_features, num_vectors);
00271 }
00272 
00273 template<class ST> SGMatrix<ST> CSimpleFeatures<ST>::steal_feature_matrix()
00274 {
00275     SGMatrix<ST> st_feature_matrix(feature_matrix, num_features, num_vectors);
00276     remove_subset();
00277     SG_UNREF(feature_cache);
00278     clean_preprocessors();
00279 
00280     feature_matrix = NULL;
00281     feature_matrix_num_vectors = 0;
00282     feature_matrix_num_features = 0;
00283     num_features = 0;
00284     num_vectors = 0;
00285     return st_feature_matrix;
00286 }
00287 
00288 template<class ST> void CSimpleFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix)
00289 {
00290     remove_subset();
00291     free_feature_matrix();
00292     feature_matrix = matrix.matrix;
00293     num_features = matrix.num_rows;
00294     num_vectors = matrix.num_cols;
00295     feature_matrix_num_vectors = num_vectors;
00296     feature_matrix_num_features = num_features;
00297 }
00298 
00299 template<class ST> ST* CSimpleFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
00300 {
00301     num_feat = num_features;
00302     num_vec = num_vectors;
00303     return feature_matrix;
00304 }
00305 
00306 template<class ST> CSimpleFeatures<ST>* CSimpleFeatures<ST>::get_transposed()
00307 {
00308     int32_t num_feat;
00309     int32_t num_vec;
00310     ST* fm = get_transposed(num_feat, num_vec);
00311 
00312     return new CSimpleFeatures<ST>(fm, num_feat, num_vec);
00313 }
00314 
00315 template<class ST> ST* CSimpleFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
00316 {
00317     num_feat = get_num_vectors();
00318     num_vec = num_features;
00319 
00320     int32_t old_num_vec=get_num_vectors();
00321 
00322     ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
00323 
00324     for (int32_t i=0; i<old_num_vec; i++)
00325     {
00326         SGVector<ST> vec=get_feature_vector(i);
00327 
00328         for (int32_t j=0; j<vec.vlen; j++)
00329             fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
00330 
00331         free_feature_vector(vec, i);
00332     }
00333 
00334     return fm;
00335 }
00336 
00337 template<class ST> void CSimpleFeatures<ST>::set_feature_matrix(ST* fm, int32_t num_feat, int32_t num_vec)
00338 {
00339     if (m_subset)
00340         SG_ERROR("A subset is set, cannot call set_feature_matrix\n");
00341 
00342     free_feature_matrix();
00343     feature_matrix = fm;
00344     feature_matrix_num_features = num_feat;
00345     feature_matrix_num_vectors = num_vec;
00346 
00347     num_features = num_feat;
00348     num_vectors = num_vec;
00349     initialize_cache();
00350 }
00351 
00352 template<class ST> void CSimpleFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src)
00353 {
00354     if (m_subset)
00355         SG_ERROR("A subset is set, cannot call copy_feature_matrix\n");
00356 
00357     free_feature_matrix();
00358     int32_t num_feat = src.num_rows;
00359     int32_t num_vec = src.num_cols;
00360     feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec);
00361     feature_matrix_num_features = num_feat;
00362     feature_matrix_num_vectors = num_vec;
00363 
00364     memcpy(feature_matrix, src.matrix,
00365             (sizeof(ST) * ((int64_t) num_feat) * num_vec));
00366 
00367     num_features = num_feat;
00368     num_vectors = num_vec;
00369     initialize_cache();
00370 }
00371 
00372 template<class ST> void CSimpleFeatures<ST>::obtain_from_dot(CDotFeatures* df)
00373 {
00374     remove_subset();
00375 
00376     int32_t num_feat = df->get_dim_feature_space();
00377     int32_t num_vec = df->get_num_vectors();
00378 
00379     ASSERT(num_feat>0 && num_vec>0);
00380 
00381     free_feature_matrix();
00382     feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec);
00383     feature_matrix_num_features = num_feat;
00384     feature_matrix_num_vectors = num_vec;
00385 
00386     for (int32_t i = 0; i < num_vec; i++)
00387     {
00388         SGVector<float64_t> v = df->get_computed_dot_feature_vector(i);
00389         ASSERT(num_feat==v.vlen);
00390 
00391         for (int32_t j = 0; j < num_feat; j++)
00392             feature_matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
00393 
00394         v.free_vector();
00395     }
00396     num_features = num_feat;
00397     num_vectors = num_vec;
00398 }
00399 
00400 template<class ST> bool CSimpleFeatures<ST>::apply_preprocessor(bool force_preprocessing)
00401 {
00402     if (m_subset)
00403         SG_ERROR("A subset is set, cannot call apply_preproc\n");
00404 
00405     SG_DEBUG( "force: %d\n", force_preprocessing);
00406 
00407     if (feature_matrix && get_num_preprocessors())
00408     {
00409         for (int32_t i = 0; i < get_num_preprocessors(); i++)
00410         {
00411             if ((!is_preprocessed(i) || force_preprocessing))
00412             {
00413                 set_preprocessed(i);
00414                 CSimplePreprocessor<ST>* p =
00415                         (CSimplePreprocessor<ST>*) get_preprocessor(i);
00416                 SG_INFO( "preprocessing using preproc %s\n", p->get_name());
00417 
00418                 if (p->apply_to_feature_matrix(this).matrix == NULL)
00419                 {
00420                     SG_UNREF(p);
00421                     return false;
00422                 }SG_UNREF(p);
00423 
00424             }
00425         }
00426 
00427         return true;
00428     }
00429     else
00430     {
00431         if (!feature_matrix)
00432             SG_ERROR( "no feature matrix\n");
00433 
00434         if (!get_num_preprocessors())
00435             SG_ERROR( "no preprocessors available\n");
00436 
00437         return false;
00438     }
00439 }
00440 
00441 template<class ST> int32_t CSimpleFeatures<ST>::get_size() { return sizeof(ST); }
00442 
00443 template<class ST> int32_t CSimpleFeatures<ST>::get_num_vectors() const
00444 {
00445     return m_subset ? m_subset->get_size() : num_vectors;
00446 }
00447 
00448 template<class ST> int32_t CSimpleFeatures<ST>::get_num_features() { return num_features; }
00449 
00450 template<class ST> void CSimpleFeatures<ST>::set_num_features(int32_t num)
00451 {
00452     num_features = num;
00453     initialize_cache();
00454 }
00455 
00456 template<class ST> void CSimpleFeatures<ST>::set_num_vectors(int32_t num)
00457 {
00458     if (m_subset)
00459         SG_ERROR("A subset is set, cannot call set_num_vectors\n");
00460 
00461     num_vectors = num;
00462     initialize_cache();
00463 }
00464 
00465 template<class ST> void CSimpleFeatures<ST>::initialize_cache()
00466 {
00467     if (m_subset)
00468         SG_ERROR("A subset is set, cannot call initialize_cache\n");
00469 
00470     if (num_features && num_vectors)
00471     {
00472         SG_UNREF(feature_cache);
00473         feature_cache = new CCache<ST>(get_cache_size(), num_features,
00474                 num_vectors);
00475         SG_REF(feature_cache);
00476     }
00477 }
00478 
00479 template<class ST> EFeatureClass CSimpleFeatures<ST>::get_feature_class() { return C_SIMPLE; }
00480 
00481 template<class ST> bool CSimpleFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
00482 {
00483     if (m_subset)
00484         SG_ERROR("A subset is set, cannot call reshape\n");
00485 
00486     if (p_num_features * p_num_vectors
00487             == this->num_features * this->num_vectors)
00488     {
00489         num_features = p_num_features;
00490         num_vectors = p_num_vectors;
00491         return true;
00492     } else
00493         return false;
00494 }
00495 
00496 template<class ST> int32_t CSimpleFeatures<ST>::get_dim_feature_space() const { return num_features; }
00497 
00498 template<class ST> float64_t CSimpleFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
00499         int32_t vec_idx2)
00500 {
00501     ASSERT(df);
00502     ASSERT(df->get_feature_type() == get_feature_type());
00503     ASSERT(df->get_feature_class() == get_feature_class());
00504     CSimpleFeatures<ST>* sf = (CSimpleFeatures<ST>*) df;
00505 
00506     int32_t len1, len2;
00507     bool free1, free2;
00508 
00509     ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
00510     ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
00511 
00512     float64_t result = CMath::dot(vec1, vec2, len1);
00513 
00514     free_feature_vector(vec1, vec_idx1, free1);
00515     sf->free_feature_vector(vec2, vec_idx2, free2);
00516 
00517     return result;
00518 }
00519 
00520 template<class ST> void CSimpleFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00521         float64_t* vec2, int32_t vec2_len, bool abs_val)
00522 {
00523     ASSERT(vec2_len == num_features);
00524 
00525     int32_t vlen;
00526     bool vfree;
00527     ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00528 
00529     ASSERT(vlen == num_features);
00530 
00531     if (abs_val)
00532     {
00533         for (int32_t i = 0; i < num_features; i++)
00534             vec2[i] += alpha * CMath::abs(vec1[i]);
00535     }
00536     else
00537     {
00538         for (int32_t i = 0; i < num_features; i++)
00539             vec2[i] += alpha * vec1[i];
00540     }
00541 
00542     free_feature_vector(vec1, vec_idx1, vfree);
00543 }
00544 
00545 template<class ST> int32_t CSimpleFeatures<ST>::get_nnz_features_for_vector(int32_t num)
00546 {
00547     /* H.Strathmann: TODO fix according to Soerens mail */
00548     return num_features;
00549 }
00550 
00551 template<class ST> bool CSimpleFeatures<ST>::Align_char_features(CStringFeatures<char>* cf,
00552         CStringFeatures<char>* Ref, float64_t gapCost)
00553 {
00554     return false;
00555 }
00556 
00557 template<class ST> void* CSimpleFeatures<ST>::get_feature_iterator(int32_t vector_index)
00558 {
00559     if (vector_index>=get_num_vectors())
00560     {
00561         SG_ERROR("Index out of bounds (number of vectors %d, you "
00562         "requested %d)\n", get_num_vectors(), vector_index);
00563     }
00564 
00565     simple_feature_iterator* iterator = SG_MALLOC(simple_feature_iterator, 1);
00566     iterator->vec = get_feature_vector(vector_index, iterator->vlen,
00567             iterator->vfree);
00568     iterator->vidx = vector_index;
00569     iterator->index = 0;
00570     return iterator;
00571 }
00572 
00573 template<class ST> bool CSimpleFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
00574         void* iterator)
00575 {
00576     simple_feature_iterator* it = (simple_feature_iterator*) iterator;
00577     if (!it || it->index >= it->vlen)
00578         return false;
00579 
00580     index = it->index++;
00581     value = (float64_t) it->vec[index];
00582 
00583     return true;
00584 }
00585 
00586 template<class ST> void CSimpleFeatures<ST>::free_feature_iterator(void* iterator)
00587 {
00588     if (!iterator)
00589         return;
00590 
00591     simple_feature_iterator* it = (simple_feature_iterator*) iterator;
00592     free_feature_vector(it->vec, it->vidx, it->vfree);
00593     SG_FREE(it);
00594 }
00595 
00596 template<class ST> CFeatures* CSimpleFeatures<ST>::copy_subset(SGVector<index_t> indices)
00597 {
00598     SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
00599 
00600     for (index_t i=0; i<indices.vlen; ++i)
00601     {
00602         index_t real_idx=subset_idx_conversion(indices.vector[i]);
00603         memcpy(&feature_matrix_copy.matrix[i*num_features],
00604                 &feature_matrix[real_idx*num_features],
00605                 num_features*sizeof(ST));
00606     }
00607 
00608     return new CSimpleFeatures(feature_matrix_copy);
00609 }
00610 
00611 template<class ST> ST* CSimpleFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
00612         ST* target)
00613 {
00614     SG_NOTIMPLEMENTED;
00615     len = 0;
00616     return NULL;
00617 }
00618 
00619 template<class ST> void CSimpleFeatures<ST>::init()
00620 {
00621     num_vectors = 0;
00622     num_features = 0;
00623 
00624     feature_matrix = NULL;
00625     feature_matrix_num_vectors = 0;
00626     feature_matrix_num_features = 0;
00627 
00628     feature_cache = NULL;
00629 
00630     set_generic<ST>();
00631     /* not store number of vectors in subset */
00632     m_parameters->add(&num_vectors, "num_vectors",
00633             "Number of vectors.");
00634     m_parameters->add(&num_features, "num_features", "Number of features.");
00635     m_parameters->add_matrix(&feature_matrix, &feature_matrix_num_features,
00636             &feature_matrix_num_vectors, "feature_matrix",
00637             "Matrix of feature vectors / 1 vector per column.");
00638 }
00639 
00640 #define GET_FEATURE_TYPE(f_type, sg_type)   \
00641 template<> EFeatureType CSimpleFeatures<sg_type>::get_feature_type() \
00642 {                                                                           \
00643     return f_type;                                                          \
00644 }
00645 
00646 GET_FEATURE_TYPE(F_BOOL, bool)
00647 GET_FEATURE_TYPE(F_CHAR, char)
00648 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00649 GET_FEATURE_TYPE(F_BYTE, int8_t)
00650 GET_FEATURE_TYPE(F_SHORT, int16_t)
00651 GET_FEATURE_TYPE(F_WORD, uint16_t)
00652 GET_FEATURE_TYPE(F_INT, int32_t)
00653 GET_FEATURE_TYPE(F_UINT, uint32_t)
00654 GET_FEATURE_TYPE(F_LONG, int64_t)
00655 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00656 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00657 GET_FEATURE_TYPE(F_DREAL, float64_t)
00658 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00659 #undef GET_FEATURE_TYPE
00660 
00669 template<> bool CSimpleFeatures<float64_t>::Align_char_features(
00670         CStringFeatures<char>* cf, CStringFeatures<char>* Ref,
00671         float64_t gapCost)
00672 {
00673     ASSERT(cf);
00674     /*num_vectors=cf->get_num_vectors();
00675      num_features=Ref->get_num_vectors();
00676 
00677      int64_t len=((int64_t) num_vectors)*num_features;
00678      free_feature_matrix();
00679      feature_matrix=SG_MALLOC(float64_t, len);
00680      int32_t num_cf_feat=0;
00681      int32_t num_cf_vec=0;
00682      int32_t num_ref_feat=0;
00683      int32_t num_ref_vec=0;
00684      char* fm_cf=NULL; //cf->get_feature_matrix(num_cf_feat, num_cf_vec);
00685      char* fm_ref=NULL; //Ref->get_feature_matrix(num_ref_feat, num_ref_vec);
00686 
00687      ASSERT(num_cf_vec==num_vectors);
00688      ASSERT(num_ref_vec==num_features);
00689 
00690      SG_INFO( "computing aligments of %i vectors to %i reference vectors: ", num_cf_vec, num_ref_vec) ;
00691      for (int32_t i=0; i< num_ref_vec; i++)
00692      {
00693      SG_PROGRESS(i, num_ref_vec) ;
00694      for (int32_t j=0; j<num_cf_vec; j++)
00695      feature_matrix[i+j*num_features] = CMath::Align(&fm_cf[j*num_cf_feat], &fm_ref[i*num_ref_feat], num_cf_feat, num_ref_feat, gapCost);
00696      } ;
00697 
00698      SG_INFO( "created %i x %i matrix (0x%p)\n", num_features, num_vectors, feature_matrix) ;*/
00699     return true;
00700 }
00701 
00702 template<> float64_t CSimpleFeatures<bool>::dense_dot(int32_t vec_idx1,
00703         const float64_t* vec2, int32_t vec2_len)
00704 {
00705     ASSERT(vec2_len == num_features);
00706 
00707     int32_t vlen;
00708     bool vfree;
00709     bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00710 
00711     ASSERT(vlen == num_features);
00712     float64_t result = 0;
00713 
00714     for (int32_t i = 0; i < num_features; i++)
00715         result += vec1[i] ? vec2[i] : 0;
00716 
00717     free_feature_vector(vec1, vec_idx1, vfree);
00718 
00719     return result;
00720 }
00721 
00722 template<> float64_t CSimpleFeatures<char>::dense_dot(int32_t vec_idx1,
00723         const float64_t* vec2, int32_t vec2_len)
00724 {
00725     ASSERT(vec2_len == num_features);
00726 
00727     int32_t vlen;
00728     bool vfree;
00729     char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00730 
00731     ASSERT(vlen == num_features);
00732     float64_t result = 0;
00733 
00734     for (int32_t i = 0; i < num_features; i++)
00735         result += vec1[i] * vec2[i];
00736 
00737     free_feature_vector(vec1, vec_idx1, vfree);
00738 
00739     return result;
00740 }
00741 
00742 template<> float64_t CSimpleFeatures<int8_t>::dense_dot(int32_t vec_idx1,
00743         const float64_t* vec2, int32_t vec2_len)
00744 {
00745     ASSERT(vec2_len == num_features);
00746 
00747     int32_t vlen;
00748     bool vfree;
00749     int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00750 
00751     ASSERT(vlen == num_features);
00752     float64_t result = 0;
00753 
00754     for (int32_t i = 0; i < num_features; i++)
00755         result += vec1[i] * vec2[i];
00756 
00757     free_feature_vector(vec1, vec_idx1, vfree);
00758 
00759     return result;
00760 }
00761 
00762 template<> float64_t CSimpleFeatures<uint8_t>::dense_dot(
00763         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00764 {
00765     ASSERT(vec2_len == num_features);
00766 
00767     int32_t vlen;
00768     bool vfree;
00769     uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00770 
00771     ASSERT(vlen == num_features);
00772     float64_t result = 0;
00773 
00774     for (int32_t i = 0; i < num_features; i++)
00775         result += vec1[i] * vec2[i];
00776 
00777     free_feature_vector(vec1, vec_idx1, vfree);
00778 
00779     return result;
00780 }
00781 
00782 template<> float64_t CSimpleFeatures<int16_t>::dense_dot(
00783         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00784 {
00785     ASSERT(vec2_len == num_features);
00786 
00787     int32_t vlen;
00788     bool vfree;
00789     int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00790 
00791     ASSERT(vlen == num_features);
00792     float64_t result = 0;
00793 
00794     for (int32_t i = 0; i < num_features; i++)
00795         result += vec1[i] * vec2[i];
00796 
00797     free_feature_vector(vec1, vec_idx1, vfree);
00798 
00799     return result;
00800 }
00801 
00802 template<> float64_t CSimpleFeatures<uint16_t>::dense_dot(
00803         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00804 {
00805     ASSERT(vec2_len == num_features);
00806 
00807     int32_t vlen;
00808     bool vfree;
00809     uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00810 
00811     ASSERT(vlen == num_features);
00812     float64_t result = 0;
00813 
00814     for (int32_t i = 0; i < num_features; i++)
00815         result += vec1[i] * vec2[i];
00816 
00817     free_feature_vector(vec1, vec_idx1, vfree);
00818 
00819     return result;
00820 }
00821 
00822 template<> float64_t CSimpleFeatures<int32_t>::dense_dot(
00823         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00824 {
00825     ASSERT(vec2_len == num_features);
00826 
00827     int32_t vlen;
00828     bool vfree;
00829     int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00830 
00831     ASSERT(vlen == num_features);
00832     float64_t result = 0;
00833 
00834     for (int32_t i = 0; i < num_features; i++)
00835         result += vec1[i] * vec2[i];
00836 
00837     free_feature_vector(vec1, vec_idx1, vfree);
00838 
00839     return result;
00840 }
00841 
00842 template<> float64_t CSimpleFeatures<uint32_t>::dense_dot(
00843         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00844 {
00845     ASSERT(vec2_len == num_features);
00846 
00847     int32_t vlen;
00848     bool vfree;
00849     uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00850 
00851     ASSERT(vlen == num_features);
00852     float64_t result = 0;
00853 
00854     for (int32_t i = 0; i < num_features; i++)
00855         result += vec1[i] * vec2[i];
00856 
00857     free_feature_vector(vec1, vec_idx1, vfree);
00858 
00859     return result;
00860 }
00861 
00862 template<> float64_t CSimpleFeatures<int64_t>::dense_dot(
00863         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00864 {
00865     ASSERT(vec2_len == num_features);
00866 
00867     int32_t vlen;
00868     bool vfree;
00869     int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00870 
00871     ASSERT(vlen == num_features);
00872     float64_t result = 0;
00873 
00874     for (int32_t i = 0; i < num_features; i++)
00875         result += vec1[i] * vec2[i];
00876 
00877     free_feature_vector(vec1, vec_idx1, vfree);
00878 
00879     return result;
00880 }
00881 
00882 template<> float64_t CSimpleFeatures<uint64_t>::dense_dot(
00883         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00884 {
00885     ASSERT(vec2_len == num_features);
00886 
00887     int32_t vlen;
00888     bool vfree;
00889     uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00890 
00891     ASSERT(vlen == num_features);
00892     float64_t result = 0;
00893 
00894     for (int32_t i = 0; i < num_features; i++)
00895         result += vec1[i] * vec2[i];
00896 
00897     free_feature_vector(vec1, vec_idx1, vfree);
00898 
00899     return result;
00900 }
00901 
00902 template<> float64_t CSimpleFeatures<float32_t>::dense_dot(
00903         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00904 {
00905     ASSERT(vec2_len == num_features);
00906 
00907     int32_t vlen;
00908     bool vfree;
00909     float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00910 
00911     ASSERT(vlen == num_features);
00912     float64_t result = 0;
00913 
00914     for (int32_t i = 0; i < num_features; i++)
00915         result += vec1[i] * vec2[i];
00916 
00917     free_feature_vector(vec1, vec_idx1, vfree);
00918 
00919     return result;
00920 }
00921 
00922 template<> float64_t CSimpleFeatures<float64_t>::dense_dot(
00923         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00924 {
00925     ASSERT(vec2_len == num_features);
00926 
00927     int32_t vlen;
00928     bool vfree;
00929     float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00930 
00931     ASSERT(vlen == num_features);
00932     float64_t result = CMath::dot(vec1, vec2, num_features);
00933 
00934     free_feature_vector(vec1, vec_idx1, vfree);
00935 
00936     return result;
00937 }
00938 
00939 template<> float64_t CSimpleFeatures<floatmax_t>::dense_dot(
00940         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00941 {
00942     ASSERT(vec2_len == num_features);
00943 
00944     int32_t vlen;
00945     bool vfree;
00946     floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00947 
00948     ASSERT(vlen == num_features);
00949     float64_t result = 0;
00950 
00951     for (int32_t i = 0; i < num_features; i++)
00952         result += vec1[i] * vec2[i];
00953 
00954     free_feature_vector(vec1, vec_idx1, vfree);
00955 
00956     return result;
00957 }
00958 
00959 #define LOAD(f_load, sg_type)                                               \
00960 template<> void CSimpleFeatures<sg_type>::load(CFile* loader)       \
00961 {                                                                           \
00962     SG_SET_LOCALE_C;                                                    \
00963     ASSERT(loader);                                                         \
00964     sg_type* matrix;                                                        \
00965     int32_t num_feat;                                                       \
00966     int32_t num_vec;                                                        \
00967     loader->f_load(matrix, num_feat, num_vec);                              \
00968     set_feature_matrix(matrix, num_feat, num_vec);                          \
00969     SG_RESET_LOCALE;                                                    \
00970 }
00971 
00972 LOAD(get_matrix, bool)
00973 LOAD(get_matrix, char)
00974 LOAD(get_int8_matrix, int8_t)
00975 LOAD(get_matrix, uint8_t)
00976 LOAD(get_matrix, int16_t)
00977 LOAD(get_matrix, uint16_t)
00978 LOAD(get_matrix, int32_t)
00979 LOAD(get_uint_matrix, uint32_t)
00980 LOAD(get_long_matrix, int64_t)
00981 LOAD(get_ulong_matrix, uint64_t)
00982 LOAD(get_matrix, float32_t)
00983 LOAD(get_matrix, float64_t)
00984 LOAD(get_longreal_matrix, floatmax_t)
00985 #undef LOAD
00986 
00987 #define SAVE(f_write, sg_type)                                              \
00988 template<> void CSimpleFeatures<sg_type>::save(CFile* writer)       \
00989 {                                                                           \
00990     SG_SET_LOCALE_C;                                                    \
00991     ASSERT(writer);                                                         \
00992     writer->f_write(feature_matrix, num_features, num_vectors);             \
00993     SG_RESET_LOCALE;                                                    \
00994 }
00995 
00996 SAVE(set_matrix, bool)
00997 SAVE(set_matrix, char)
00998 SAVE(set_int8_matrix, int8_t)
00999 SAVE(set_matrix, uint8_t)
01000 SAVE(set_matrix, int16_t)
01001 SAVE(set_matrix, uint16_t)
01002 SAVE(set_matrix, int32_t)
01003 SAVE(set_uint_matrix, uint32_t)
01004 SAVE(set_long_matrix, int64_t)
01005 SAVE(set_ulong_matrix, uint64_t)
01006 SAVE(set_matrix, float32_t)
01007 SAVE(set_matrix, float64_t)
01008 SAVE(set_longreal_matrix, floatmax_t)
01009 #undef SAVE
01010 
01011 template class CSimpleFeatures<bool>;
01012 template class CSimpleFeatures<char>;
01013 template class CSimpleFeatures<int8_t>;
01014 template class CSimpleFeatures<uint8_t>;
01015 template class CSimpleFeatures<int16_t>;
01016 template class CSimpleFeatures<uint16_t>;
01017 template class CSimpleFeatures<int32_t>;
01018 template class CSimpleFeatures<uint32_t>;
01019 template class CSimpleFeatures<int64_t>;
01020 template class CSimpleFeatures<uint64_t>;
01021 template class CSimpleFeatures<float32_t>;
01022 template class CSimpleFeatures<float64_t>;
01023 template class CSimpleFeatures<floatmax_t>;
01024 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation