DenseFeatures.cpp

Go to the documentation of this file.
00001 #include <shogun/features/DenseFeatures.h>
00002 #include <shogun/preprocessor/DensePreprocessor.h>
00003 #include <shogun/io/SGIO.h>
00004 #include <shogun/base/Parameter.h>
00005 #include <shogun/mathematics/Math.h>
00006 
00007 #include <string.h>
00008 
00009 namespace shogun {
00010 
00011 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
00012 {
00013     init();
00014 }
00015 
00016 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
00017         CDotFeatures(orig)
00018 {
00019     init();
00020     set_feature_matrix(orig.feature_matrix);
00021     initialize_cache();
00022 
00023     if (orig.m_subset_stack != NULL)
00024     {
00025         SG_UNREF(m_subset_stack);
00026         m_subset_stack=new CSubsetStack(*orig.m_subset_stack);
00027         SG_REF(m_subset_stack);
00028     }
00029 }
00030 
00031 template<class ST> CDenseFeatures<ST>::CDenseFeatures(SGMatrix<ST> matrix) :
00032         CDotFeatures()
00033 {
00034     init();
00035     set_feature_matrix(matrix);
00036 }
00037 
00038 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
00039         CDotFeatures()
00040 {
00041     init();
00042     set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
00043 }
00044 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
00045         CDotFeatures(loader)
00046 {
00047     init();
00048     load(loader);
00049 }
00050 
00051 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
00052 {
00053     return new CDenseFeatures<ST>(*this);
00054 }
00055 
00056 template<class ST> CDenseFeatures<ST>::~CDenseFeatures()
00057 {
00058     free_features();
00059 }
00060 
00061 template<class ST> void CDenseFeatures<ST>::free_features()
00062 {
00063     m_subset_stack->remove_all_subsets();
00064     free_feature_matrix();
00065     SG_UNREF(feature_cache);
00066 }
00067 
00068 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
00069 {
00070     m_subset_stack->remove_all_subsets();
00071     feature_matrix=SGMatrix<ST>();
00072     num_vectors = 0;
00073     num_features = 0;
00074 }
00075 
00076 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
00077 {
00078     /* index conversion for subset, only for array access */
00079     int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00080 
00081     len = num_features;
00082 
00083     if (feature_matrix.matrix)
00084     {
00085         dofree = false;
00086         return &feature_matrix.matrix[real_num * int64_t(num_features)];
00087     }
00088 
00089     ST* feat = NULL;
00090     dofree = false;
00091 
00092     if (feature_cache)
00093     {
00094         feat = feature_cache->lock_entry(real_num);
00095 
00096         if (feat)
00097             return feat;
00098         else
00099             feat = feature_cache->set_entry(real_num);
00100     }
00101 
00102     if (!feat)
00103         dofree = true;
00104     feat = compute_feature_vector(num, len, feat);
00105 
00106     if (get_num_preprocessors())
00107     {
00108         int32_t tmp_len = len;
00109         ST* tmp_feat_before = feat;
00110         ST* tmp_feat_after = NULL;
00111 
00112         for (int32_t i = 0; i < get_num_preprocessors(); i++)
00113         {
00114             CDensePreprocessor<ST>* p =
00115                     (CDensePreprocessor<ST>*) get_preprocessor(i);
00116             // temporary hack
00117             SGVector<ST> applied = p->apply_to_feature_vector(
00118                     SGVector<ST>(tmp_feat_before, tmp_len));
00119             tmp_feat_after = applied.vector;
00120             SG_UNREF(p);
00121 
00122             if (i != 0) // delete feature vector, except for the the first one, i.e., feat
00123                 SG_FREE(tmp_feat_before);
00124             tmp_feat_before = tmp_feat_after;
00125         }
00126 
00127         memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
00128         SG_FREE(tmp_feat_after);
00129 
00130         len = tmp_len;
00131     }
00132     return feat;
00133 }
00134 
00135 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
00136 {
00137     /* index conversion for subset, only for array access */
00138     int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00139 
00140     if (num>=get_num_vectors())
00141     {
00142         SG_ERROR("Index out of bounds (number of vectors %d, you "
00143         "requested %d)\n", get_num_vectors(), num);
00144     }
00145 
00146     if (!feature_matrix.matrix)
00147         SG_ERROR("Requires a in-memory feature matrix\n");
00148 
00149     if (vector.vlen != num_features)
00150         SG_ERROR(
00151                 "Vector not of length %d (has %d)\n", num_features, vector.vlen);
00152 
00153     memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
00154             int64_t(num_features) * sizeof(ST));
00155 }
00156 
00157 template<class ST> SGVector<ST> CDenseFeatures<ST>::get_feature_vector(int32_t num)
00158 {
00159     /* index conversion for subset, only for array access */
00160     int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00161 
00162     if (num >= get_num_vectors())
00163     {
00164         SG_ERROR("Index out of bounds (number of vectors %d, you "
00165         "requested %d)\n", get_num_vectors(), real_num);
00166     }
00167 
00168     int32_t vlen;
00169     bool do_free;
00170     ST* vector= get_feature_vector(num, vlen, do_free);
00171     return SGVector<ST>(vector, vlen, do_free);
00172 }
00173 
00174 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
00175 {
00176     if (feature_cache)
00177         feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
00178 
00179     if (dofree)
00180         SG_FREE(feat_vec);
00181 }
00182 
00183 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
00184 {
00185     free_feature_vector(vec.vector, num, false);
00186     vec=SGVector<ST>();
00187 }
00188 
00189 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
00190 {
00191     if (m_subset_stack->has_subsets())
00192         SG_ERROR("A subset is set, cannot call vector_subset\n");
00193 
00194     ASSERT(feature_matrix.matrix);
00195     ASSERT(idx_len<=num_vectors);
00196 
00197     int32_t num_vec = num_vectors;
00198     num_vectors = idx_len;
00199 
00200     int32_t old_ii = -1;
00201 
00202     for (int32_t i = 0; i < idx_len; i++)
00203     {
00204         int32_t ii = idx[i];
00205         ASSERT(old_ii<ii);
00206 
00207         if (ii < 0 || ii >= num_vec)
00208             SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec);
00209 
00210         if (i == ii)
00211             continue;
00212 
00213         memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
00214                 &feature_matrix.matrix[int64_t(num_features) * ii],
00215                 num_features * sizeof(ST));
00216         old_ii = ii;
00217     }
00218 }
00219 
00220 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
00221 {
00222     if (m_subset_stack->has_subsets())
00223         SG_ERROR("A subset is set, cannot call feature_subset\n");
00224 
00225     ASSERT(feature_matrix.matrix);
00226     ASSERT(idx_len<=num_features);
00227     int32_t num_feat = num_features;
00228     num_features = idx_len;
00229 
00230     for (int32_t i = 0; i < num_vectors; i++)
00231     {
00232         ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
00233         ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
00234 
00235         int32_t old_jj = -1;
00236         for (int32_t j = 0; j < idx_len; j++)
00237         {
00238             int32_t jj = idx[j];
00239             ASSERT(old_jj<jj);
00240             if (jj < 0 || jj >= num_feat)
00241                 SG_ERROR(
00242                         "Index out of range: should be 0<%d<%d\n", jj, num_feat);
00243 
00244             dst[j] = src[jj];
00245             old_jj = jj;
00246         }
00247     }
00248 }
00249 
00250 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix()
00251 {
00252     if (!m_subset_stack->has_subsets())
00253         return feature_matrix;
00254 
00255     SGMatrix<ST> submatrix(num_features, get_num_vectors());
00256 
00257     /* copy a subset vector wise */
00258     for (int32_t i=0; i<submatrix.num_cols; ++i)
00259     {
00260         int32_t real_i = m_subset_stack->subset_idx_conversion(i);
00261         memcpy(&submatrix.matrix[i*int64_t(num_features)],
00262                 &feature_matrix.matrix[real_i * int64_t(num_features)],
00263                 num_features * sizeof(ST));
00264     }
00265 
00266     return submatrix;
00267 }
00268 
00269 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::steal_feature_matrix()
00270 {
00271     SGMatrix<ST> st_feature_matrix=feature_matrix;
00272     m_subset_stack->remove_all_subsets();
00273     SG_UNREF(feature_cache);
00274     clean_preprocessors();
00275     free_feature_matrix();
00276     return st_feature_matrix;
00277 }
00278 
00279 template<class ST> void CDenseFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix)
00280 {
00281     m_subset_stack->remove_all_subsets();
00282     free_feature_matrix();
00283     feature_matrix = matrix;
00284     num_features = matrix.num_rows;
00285     num_vectors = matrix.num_cols;
00286 }
00287 
00288 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
00289 {
00290     num_feat = num_features;
00291     num_vec = num_vectors;
00292     return feature_matrix.matrix;
00293 }
00294 
00295 template<class ST> CDenseFeatures<ST>* CDenseFeatures<ST>::get_transposed()
00296 {
00297     int32_t num_feat;
00298     int32_t num_vec;
00299     ST* fm = get_transposed(num_feat, num_vec);
00300 
00301     return new CDenseFeatures<ST>(fm, num_feat, num_vec);
00302 }
00303 
00304 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
00305 {
00306     num_feat = get_num_vectors();
00307     num_vec = num_features;
00308 
00309     int32_t old_num_vec=get_num_vectors();
00310 
00311     ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
00312 
00313     for (int32_t i=0; i<old_num_vec; i++)
00314     {
00315         SGVector<ST> vec=get_feature_vector(i);
00316 
00317         for (int32_t j=0; j<vec.vlen; j++)
00318             fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
00319 
00320         free_feature_vector(vec, i);
00321     }
00322 
00323     return fm;
00324 }
00325 
00326 template<class ST> void CDenseFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src)
00327 {
00328     if (m_subset_stack->has_subsets())
00329         SG_ERROR("A subset is set, cannot call copy_feature_matrix\n");
00330 
00331     free_feature_matrix();
00332     feature_matrix = src.clone();
00333     num_features = src.num_rows;
00334     num_vectors = src.num_cols;
00335     initialize_cache();
00336 }
00337 
00338 template<class ST> void CDenseFeatures<ST>::obtain_from_dot(CDotFeatures* df)
00339 {
00340     m_subset_stack->remove_all_subsets();
00341 
00342     int32_t num_feat = df->get_dim_feature_space();
00343     int32_t num_vec = df->get_num_vectors();
00344 
00345     ASSERT(num_feat>0 && num_vec>0);
00346 
00347     free_feature_matrix();
00348     feature_matrix = SGMatrix<ST>(num_feat, num_vec);
00349 
00350     for (int32_t i = 0; i < num_vec; i++)
00351     {
00352         SGVector<float64_t> v = df->get_computed_dot_feature_vector(i);
00353         ASSERT(num_feat==v.vlen);
00354 
00355         for (int32_t j = 0; j < num_feat; j++)
00356             feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
00357     }
00358     num_features = num_feat;
00359     num_vectors = num_vec;
00360 }
00361 
00362 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
00363 {
00364     if (m_subset_stack->has_subsets())
00365         SG_ERROR("A subset is set, cannot call apply_preproc\n");
00366 
00367     SG_DEBUG( "force: %d\n", force_preprocessing);
00368 
00369     if (feature_matrix.matrix && get_num_preprocessors())
00370     {
00371         for (int32_t i = 0; i < get_num_preprocessors(); i++)
00372         {
00373             if ((!is_preprocessed(i) || force_preprocessing))
00374             {
00375                 set_preprocessed(i);
00376                 CDensePreprocessor<ST>* p =
00377                         (CDensePreprocessor<ST>*) get_preprocessor(i);
00378                 SG_INFO( "preprocessing using preproc %s\n", p->get_name());
00379 
00380                 if (p->apply_to_feature_matrix(this).matrix == NULL)
00381                 {
00382                     SG_UNREF(p);
00383                     return false;
00384                 }
00385                 SG_UNREF(p);
00386 
00387             }
00388         }
00389 
00390         return true;
00391     }
00392     else
00393     {
00394         if (!feature_matrix.matrix)
00395             SG_ERROR( "no feature matrix\n");
00396 
00397         if (!get_num_preprocessors())
00398             SG_ERROR( "no preprocessors available\n");
00399 
00400         return false;
00401     }
00402 }
00403 
00404 template<class ST> int32_t CDenseFeatures<ST>::get_size() const  { return sizeof(ST); }
00405 
00406 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
00407 {
00408     return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
00409 }
00410 
00411 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() { return num_features; }
00412 
00413 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
00414 {
00415     num_features = num;
00416     initialize_cache();
00417 }
00418 
00419 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
00420 {
00421     if (m_subset_stack->has_subsets())
00422         SG_ERROR("A subset is set, cannot call set_num_vectors\n");
00423 
00424     num_vectors = num;
00425     initialize_cache();
00426 }
00427 
00428 template<class ST> void CDenseFeatures<ST>::initialize_cache()
00429 {
00430     if (m_subset_stack->has_subsets())
00431         SG_ERROR("A subset is set, cannot call initialize_cache\n");
00432 
00433     if (num_features && num_vectors)
00434     {
00435         SG_UNREF(feature_cache);
00436         feature_cache = new CCache<ST>(get_cache_size(), num_features,
00437                 num_vectors);
00438         SG_REF(feature_cache);
00439     }
00440 }
00441 
00442 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const  { return C_DENSE; }
00443 
00444 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
00445 {
00446     if (m_subset_stack->has_subsets())
00447         SG_ERROR("A subset is set, cannot call reshape\n");
00448 
00449     if (p_num_features * p_num_vectors
00450             == this->num_features * this->num_vectors)
00451     {
00452         num_features = p_num_features;
00453         num_vectors = p_num_vectors;
00454         return true;
00455     } else
00456         return false;
00457 }
00458 
00459 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
00460 
00461 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
00462         int32_t vec_idx2)
00463 {
00464     ASSERT(df);
00465     ASSERT(df->get_feature_type() == get_feature_type());
00466     ASSERT(df->get_feature_class() == get_feature_class());
00467     CDenseFeatures<ST>* sf = (CDenseFeatures<ST>*) df;
00468 
00469     int32_t len1, len2;
00470     bool free1, free2;
00471 
00472     ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
00473     ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
00474 
00475     float64_t result = SGVector<ST>::dot(vec1, vec2, len1);
00476 
00477     free_feature_vector(vec1, vec_idx1, free1);
00478     sf->free_feature_vector(vec2, vec_idx2, free2);
00479 
00480     return result;
00481 }
00482 
00483 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00484         float64_t* vec2, int32_t vec2_len, bool abs_val)
00485 {
00486     ASSERT(vec2_len == num_features);
00487 
00488     int32_t vlen;
00489     bool vfree;
00490     ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00491 
00492     ASSERT(vlen == num_features);
00493 
00494     if (abs_val)
00495     {
00496         for (int32_t i = 0; i < num_features; i++)
00497             vec2[i] += alpha * CMath::abs(vec1[i]);
00498     }
00499     else
00500     {
00501         for (int32_t i = 0; i < num_features; i++)
00502             vec2[i] += alpha * vec1[i];
00503     }
00504 
00505     free_feature_vector(vec1, vec_idx1, vfree);
00506 }
00507 
00508 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
00509 {
00510     return num_features;
00511 }
00512 
00513 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
00514 {
00515     if (vector_index>=get_num_vectors())
00516     {
00517         SG_ERROR("Index out of bounds (number of vectors %d, you "
00518         "requested %d)\n", get_num_vectors(), vector_index);
00519     }
00520 
00521     dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
00522     iterator->vec = get_feature_vector(vector_index, iterator->vlen,
00523             iterator->vfree);
00524     iterator->vidx = vector_index;
00525     iterator->index = 0;
00526     return iterator;
00527 }
00528 
00529 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
00530         void* iterator)
00531 {
00532     dense_feature_iterator* it = (dense_feature_iterator*) iterator;
00533     if (!it || it->index >= it->vlen)
00534         return false;
00535 
00536     index = it->index++;
00537     value = (float64_t) it->vec[index];
00538 
00539     return true;
00540 }
00541 
00542 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
00543 {
00544     if (!iterator)
00545         return;
00546 
00547     dense_feature_iterator* it = (dense_feature_iterator*) iterator;
00548     free_feature_vector(it->vec, it->vidx, it->vfree);
00549     SG_FREE(it);
00550 }
00551 
00552 template<class ST> CFeatures* CDenseFeatures<ST>::copy_subset(SGVector<index_t> indices)
00553 {
00554     SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
00555 
00556     for (index_t i=0; i<indices.vlen; ++i)
00557     {
00558         index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
00559         memcpy(&feature_matrix_copy.matrix[i*num_features],
00560                 &feature_matrix.matrix[real_idx*num_features],
00561                 num_features*sizeof(ST));
00562     }
00563 
00564     CFeatures* result=new CDenseFeatures(feature_matrix_copy);
00565     SG_REF(result);
00566     return result;
00567 }
00568 
00569 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
00570         ST* target)
00571 {
00572     SG_NOTIMPLEMENTED;
00573     len = 0;
00574     return NULL;
00575 }
00576 
00577 template<class ST> void CDenseFeatures<ST>::init()
00578 {
00579     num_vectors = 0;
00580     num_features = 0;
00581 
00582     feature_matrix = SGMatrix<ST>();
00583     feature_cache = NULL;
00584 
00585     set_generic<ST>();
00586 
00587     /* not store number of vectors in subset */
00588     SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
00589     SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
00590     SG_ADD(&feature_matrix, "feature_matrix",
00591             "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
00592 }
00593 
00594 #define GET_FEATURE_TYPE(f_type, sg_type)   \
00595 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
00596 {                                                                           \
00597     return f_type;                                                          \
00598 }
00599 
00600 GET_FEATURE_TYPE(F_BOOL, bool)
00601 GET_FEATURE_TYPE(F_CHAR, char)
00602 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00603 GET_FEATURE_TYPE(F_BYTE, int8_t)
00604 GET_FEATURE_TYPE(F_SHORT, int16_t)
00605 GET_FEATURE_TYPE(F_WORD, uint16_t)
00606 GET_FEATURE_TYPE(F_INT, int32_t)
00607 GET_FEATURE_TYPE(F_UINT, uint32_t)
00608 GET_FEATURE_TYPE(F_LONG, int64_t)
00609 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00610 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00611 GET_FEATURE_TYPE(F_DREAL, float64_t)
00612 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00613 #undef GET_FEATURE_TYPE
00614 
00615 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
00616         const float64_t* vec2, int32_t vec2_len)
00617 {
00618     ASSERT(vec2_len == num_features);
00619 
00620     int32_t vlen;
00621     bool vfree;
00622     bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00623 
00624     ASSERT(vlen == num_features);
00625     float64_t result = 0;
00626 
00627     for (int32_t i = 0; i < num_features; i++)
00628         result += vec1[i] ? vec2[i] : 0;
00629 
00630     free_feature_vector(vec1, vec_idx1, vfree);
00631 
00632     return result;
00633 }
00634 
00635 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
00636         const float64_t* vec2, int32_t vec2_len)
00637 {
00638     ASSERT(vec2_len == num_features);
00639 
00640     int32_t vlen;
00641     bool vfree;
00642     char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00643 
00644     ASSERT(vlen == num_features);
00645     float64_t result = 0;
00646 
00647     for (int32_t i = 0; i < num_features; i++)
00648         result += vec1[i] * vec2[i];
00649 
00650     free_feature_vector(vec1, vec_idx1, vfree);
00651 
00652     return result;
00653 }
00654 
00655 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
00656         const float64_t* vec2, int32_t vec2_len)
00657 {
00658     ASSERT(vec2_len == num_features);
00659 
00660     int32_t vlen;
00661     bool vfree;
00662     int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00663 
00664     ASSERT(vlen == num_features);
00665     float64_t result = 0;
00666 
00667     for (int32_t i = 0; i < num_features; i++)
00668         result += vec1[i] * vec2[i];
00669 
00670     free_feature_vector(vec1, vec_idx1, vfree);
00671 
00672     return result;
00673 }
00674 
00675 template<> float64_t CDenseFeatures<uint8_t>::dense_dot(
00676         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00677 {
00678     ASSERT(vec2_len == num_features);
00679 
00680     int32_t vlen;
00681     bool vfree;
00682     uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00683 
00684     ASSERT(vlen == num_features);
00685     float64_t result = 0;
00686 
00687     for (int32_t i = 0; i < num_features; i++)
00688         result += vec1[i] * vec2[i];
00689 
00690     free_feature_vector(vec1, vec_idx1, vfree);
00691 
00692     return result;
00693 }
00694 
00695 template<> float64_t CDenseFeatures<int16_t>::dense_dot(
00696         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00697 {
00698     ASSERT(vec2_len == num_features);
00699 
00700     int32_t vlen;
00701     bool vfree;
00702     int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00703 
00704     ASSERT(vlen == num_features);
00705     float64_t result = 0;
00706 
00707     for (int32_t i = 0; i < num_features; i++)
00708         result += vec1[i] * vec2[i];
00709 
00710     free_feature_vector(vec1, vec_idx1, vfree);
00711 
00712     return result;
00713 }
00714 
00715 template<> float64_t CDenseFeatures<uint16_t>::dense_dot(
00716         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00717 {
00718     ASSERT(vec2_len == num_features);
00719 
00720     int32_t vlen;
00721     bool vfree;
00722     uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00723 
00724     ASSERT(vlen == num_features);
00725     float64_t result = 0;
00726 
00727     for (int32_t i = 0; i < num_features; i++)
00728         result += vec1[i] * vec2[i];
00729 
00730     free_feature_vector(vec1, vec_idx1, vfree);
00731 
00732     return result;
00733 }
00734 
00735 template<> float64_t CDenseFeatures<int32_t>::dense_dot(
00736         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00737 {
00738     ASSERT(vec2_len == num_features);
00739 
00740     int32_t vlen;
00741     bool vfree;
00742     int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00743 
00744     ASSERT(vlen == num_features);
00745     float64_t result = 0;
00746 
00747     for (int32_t i = 0; i < num_features; i++)
00748         result += vec1[i] * vec2[i];
00749 
00750     free_feature_vector(vec1, vec_idx1, vfree);
00751 
00752     return result;
00753 }
00754 
00755 template<> float64_t CDenseFeatures<uint32_t>::dense_dot(
00756         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00757 {
00758     ASSERT(vec2_len == num_features);
00759 
00760     int32_t vlen;
00761     bool vfree;
00762     uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00763 
00764     ASSERT(vlen == num_features);
00765     float64_t result = 0;
00766 
00767     for (int32_t i = 0; i < num_features; i++)
00768         result += vec1[i] * vec2[i];
00769 
00770     free_feature_vector(vec1, vec_idx1, vfree);
00771 
00772     return result;
00773 }
00774 
00775 template<> float64_t CDenseFeatures<int64_t>::dense_dot(
00776         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00777 {
00778     ASSERT(vec2_len == num_features);
00779 
00780     int32_t vlen;
00781     bool vfree;
00782     int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00783 
00784     ASSERT(vlen == num_features);
00785     float64_t result = 0;
00786 
00787     for (int32_t i = 0; i < num_features; i++)
00788         result += vec1[i] * vec2[i];
00789 
00790     free_feature_vector(vec1, vec_idx1, vfree);
00791 
00792     return result;
00793 }
00794 
00795 template<> float64_t CDenseFeatures<uint64_t>::dense_dot(
00796         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00797 {
00798     ASSERT(vec2_len == num_features);
00799 
00800     int32_t vlen;
00801     bool vfree;
00802     uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00803 
00804     ASSERT(vlen == num_features);
00805     float64_t result = 0;
00806 
00807     for (int32_t i = 0; i < num_features; i++)
00808         result += vec1[i] * vec2[i];
00809 
00810     free_feature_vector(vec1, vec_idx1, vfree);
00811 
00812     return result;
00813 }
00814 
00815 template<> float64_t CDenseFeatures<float32_t>::dense_dot(
00816         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00817 {
00818     ASSERT(vec2_len == num_features);
00819 
00820     int32_t vlen;
00821     bool vfree;
00822     float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00823 
00824     ASSERT(vlen == num_features);
00825     float64_t result = 0;
00826 
00827     for (int32_t i = 0; i < num_features; i++)
00828         result += vec1[i] * vec2[i];
00829 
00830     free_feature_vector(vec1, vec_idx1, vfree);
00831 
00832     return result;
00833 }
00834 
00835 template<> float64_t CDenseFeatures<float64_t>::dense_dot(
00836         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00837 {
00838     ASSERT(vec2_len == num_features);
00839 
00840     int32_t vlen;
00841     bool vfree;
00842     float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00843 
00844     ASSERT(vlen == num_features);
00845     float64_t result = SGVector<float64_t>::dot(vec1, vec2, num_features);
00846 
00847     free_feature_vector(vec1, vec_idx1, vfree);
00848 
00849     return result;
00850 }
00851 
00852 template<> float64_t CDenseFeatures<floatmax_t>::dense_dot(
00853         int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00854 {
00855     ASSERT(vec2_len == num_features);
00856 
00857     int32_t vlen;
00858     bool vfree;
00859     floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00860 
00861     ASSERT(vlen == num_features);
00862     float64_t result = 0;
00863 
00864     for (int32_t i = 0; i < num_features; i++)
00865         result += vec1[i] * vec2[i];
00866 
00867     free_feature_vector(vec1, vec_idx1, vfree);
00868 
00869     return result;
00870 }
00871 
00872 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
00873 {
00874     if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
00875         return false;
00876 
00877     ST* vec1;
00878     ST* vec2;
00879     int32_t v1len, v2len;
00880     bool v1free, v2free, stop = false;
00881 
00882     for (int32_t i = 0; i < num_vectors; i++)
00883     {
00884         vec1 = get_feature_vector(i, v1len, v1free);
00885         vec2 = rhs->get_feature_vector(i, v2len, v2free);
00886 
00887         if (v1len!=v2len)
00888             stop = true;
00889 
00890         for (int32_t j=0; j<v1len; j++)
00891         {
00892             if (vec1[j]!=vec2[j])
00893                 stop = true;
00894         }
00895 
00896         free_feature_vector(vec1, i, v1free);
00897         free_feature_vector(vec2, i, v2free);
00898 
00899         if (stop)
00900             return false;
00901     }
00902 
00903     return true;
00904 }
00905 
00906 template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy(
00907         CFeatures* other)
00908 {
00909     SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
00910     if (get_feature_type()!=other->get_feature_type() ||
00911             get_feature_class()!=other->get_feature_class() ||
00912             strcmp(get_name(), other->get_name()))
00913     {
00914         SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
00915                 get_name());
00916     }
00917 
00918     CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
00919 
00920     if (!casted)
00921     {
00922         SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
00923                 "same type as %s\n",get_name(), other->get_name(), get_name());
00924     }
00925 
00926     if (num_features!=casted->num_features)
00927     {
00928         SG_ERROR("%s::create_merged_copy(): Provided feature object has "
00929                 "different dimension than this one\n");
00930     }
00931 
00932     /* create new feature matrix and copy both instances data into it */
00933     SGMatrix<ST> data(num_features, num_vectors+casted->get_num_vectors());
00934 
00935     /* copy data of this instance */
00936     SG_DEBUG("copying matrix of this instance\n");
00937     memcpy(data.matrix, feature_matrix.matrix,
00938             num_features*num_vectors*sizeof(ST));
00939 
00940     /* copy data of provided instance */
00941     SG_DEBUG("copying matrix of provided instance\n");
00942     memcpy(&data.matrix[num_vectors*num_features],
00943             casted->feature_matrix.matrix,
00944             casted->num_features*casted->num_vectors*sizeof(ST));
00945 
00946     /* create new instance and return */
00947     CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
00948 
00949     SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
00950     return result;
00951 }
00952 
00953 #define LOAD(f_load, sg_type)                                               \
00954 template<> void CDenseFeatures<sg_type>::load(CFile* loader)                \
00955 {                                                                           \
00956     SG_SET_LOCALE_C;                                                        \
00957     ASSERT(loader);                                                         \
00958     sg_type* matrix;                                                        \
00959     int32_t num_feat;                                                       \
00960     int32_t num_vec;                                                        \
00961     loader->f_load(matrix, num_feat, num_vec);                              \
00962     set_feature_matrix(SGMatrix<sg_type>(matrix, num_feat, num_vec));       \
00963     SG_RESET_LOCALE;                                                        \
00964 }
00965 
00966 LOAD(get_matrix, bool)
00967 LOAD(get_matrix, char)
00968 LOAD(get_int8_matrix, int8_t)
00969 LOAD(get_matrix, uint8_t)
00970 LOAD(get_matrix, int16_t)
00971 LOAD(get_matrix, uint16_t)
00972 LOAD(get_matrix, int32_t)
00973 LOAD(get_uint_matrix, uint32_t)
00974 LOAD(get_long_matrix, int64_t)
00975 LOAD(get_ulong_matrix, uint64_t)
00976 LOAD(get_matrix, float32_t)
00977 LOAD(get_matrix, float64_t)
00978 LOAD(get_longreal_matrix, floatmax_t)
00979 #undef LOAD
00980 
00981 #define SAVE(f_write, sg_type)                                              \
00982 template<> void CDenseFeatures<sg_type>::save(CFile* writer)                \
00983 {                                                                           \
00984     SG_SET_LOCALE_C;                                                        \
00985     ASSERT(writer);                                                         \
00986     writer->f_write(feature_matrix.matrix, feature_matrix.num_rows,         \
00987             feature_matrix.num_cols);                                       \
00988     SG_RESET_LOCALE;                                                        \
00989 }
00990 
00991 SAVE(set_matrix, bool)
00992 SAVE(set_matrix, char)
00993 SAVE(set_int8_matrix, int8_t)
00994 SAVE(set_matrix, uint8_t)
00995 SAVE(set_matrix, int16_t)
00996 SAVE(set_matrix, uint16_t)
00997 SAVE(set_matrix, int32_t)
00998 SAVE(set_uint_matrix, uint32_t)
00999 SAVE(set_long_matrix, int64_t)
01000 SAVE(set_ulong_matrix, uint64_t)
01001 SAVE(set_matrix, float32_t)
01002 SAVE(set_matrix, float64_t)
01003 SAVE(set_longreal_matrix, floatmax_t)
01004 #undef SAVE
01005 
01006 template class CDenseFeatures<bool>;
01007 template class CDenseFeatures<char>;
01008 template class CDenseFeatures<int8_t>;
01009 template class CDenseFeatures<uint8_t>;
01010 template class CDenseFeatures<int16_t>;
01011 template class CDenseFeatures<uint16_t>;
01012 template class CDenseFeatures<int32_t>;
01013 template class CDenseFeatures<uint32_t>;
01014 template class CDenseFeatures<int64_t>;
01015 template class CDenseFeatures<uint64_t>;
01016 template class CDenseFeatures<float32_t>;
01017 template class CDenseFeatures<float64_t>;
01018 template class CDenseFeatures<floatmax_t>;
01019 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation