00001 #include <shogun/features/DenseFeatures.h>
00002 #include <shogun/preprocessor/DensePreprocessor.h>
00003 #include <shogun/io/SGIO.h>
00004 #include <shogun/base/Parameter.h>
00005 #include <shogun/mathematics/Math.h>
00006
00007 #include <string.h>
00008
00009 namespace shogun {
00010
00011 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
00012 {
00013 init();
00014 }
00015
00016 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
00017 CDotFeatures(orig)
00018 {
00019 init();
00020 set_feature_matrix(orig.feature_matrix);
00021 initialize_cache();
00022
00023 if (orig.m_subset_stack != NULL)
00024 {
00025 SG_UNREF(m_subset_stack);
00026 m_subset_stack=new CSubsetStack(*orig.m_subset_stack);
00027 SG_REF(m_subset_stack);
00028 }
00029 }
00030
00031 template<class ST> CDenseFeatures<ST>::CDenseFeatures(SGMatrix<ST> matrix) :
00032 CDotFeatures()
00033 {
00034 init();
00035 set_feature_matrix(matrix);
00036 }
00037
00038 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
00039 CDotFeatures()
00040 {
00041 init();
00042 set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
00043 }
00044 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
00045 CDotFeatures(loader)
00046 {
00047 init();
00048 load(loader);
00049 }
00050
00051 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
00052 {
00053 return new CDenseFeatures<ST>(*this);
00054 }
00055
00056 template<class ST> CDenseFeatures<ST>::~CDenseFeatures()
00057 {
00058 free_features();
00059 }
00060
00061 template<class ST> void CDenseFeatures<ST>::free_features()
00062 {
00063 m_subset_stack->remove_all_subsets();
00064 free_feature_matrix();
00065 SG_UNREF(feature_cache);
00066 }
00067
00068 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
00069 {
00070 m_subset_stack->remove_all_subsets();
00071 feature_matrix=SGMatrix<ST>();
00072 num_vectors = 0;
00073 num_features = 0;
00074 }
00075
00076 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
00077 {
00078
00079 int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00080
00081 len = num_features;
00082
00083 if (feature_matrix.matrix)
00084 {
00085 dofree = false;
00086 return &feature_matrix.matrix[real_num * int64_t(num_features)];
00087 }
00088
00089 ST* feat = NULL;
00090 dofree = false;
00091
00092 if (feature_cache)
00093 {
00094 feat = feature_cache->lock_entry(real_num);
00095
00096 if (feat)
00097 return feat;
00098 else
00099 feat = feature_cache->set_entry(real_num);
00100 }
00101
00102 if (!feat)
00103 dofree = true;
00104 feat = compute_feature_vector(num, len, feat);
00105
00106 if (get_num_preprocessors())
00107 {
00108 int32_t tmp_len = len;
00109 ST* tmp_feat_before = feat;
00110 ST* tmp_feat_after = NULL;
00111
00112 for (int32_t i = 0; i < get_num_preprocessors(); i++)
00113 {
00114 CDensePreprocessor<ST>* p =
00115 (CDensePreprocessor<ST>*) get_preprocessor(i);
00116
00117 SGVector<ST> applied = p->apply_to_feature_vector(
00118 SGVector<ST>(tmp_feat_before, tmp_len));
00119 tmp_feat_after = applied.vector;
00120 SG_UNREF(p);
00121
00122 if (i != 0)
00123 SG_FREE(tmp_feat_before);
00124 tmp_feat_before = tmp_feat_after;
00125 }
00126
00127 memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
00128 SG_FREE(tmp_feat_after);
00129
00130 len = tmp_len;
00131 }
00132 return feat;
00133 }
00134
00135 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
00136 {
00137
00138 int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00139
00140 if (num>=get_num_vectors())
00141 {
00142 SG_ERROR("Index out of bounds (number of vectors %d, you "
00143 "requested %d)\n", get_num_vectors(), num);
00144 }
00145
00146 if (!feature_matrix.matrix)
00147 SG_ERROR("Requires a in-memory feature matrix\n");
00148
00149 if (vector.vlen != num_features)
00150 SG_ERROR(
00151 "Vector not of length %d (has %d)\n", num_features, vector.vlen);
00152
00153 memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
00154 int64_t(num_features) * sizeof(ST));
00155 }
00156
00157 template<class ST> SGVector<ST> CDenseFeatures<ST>::get_feature_vector(int32_t num)
00158 {
00159
00160 int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00161
00162 if (num >= get_num_vectors())
00163 {
00164 SG_ERROR("Index out of bounds (number of vectors %d, you "
00165 "requested %d)\n", get_num_vectors(), real_num);
00166 }
00167
00168 int32_t vlen;
00169 bool do_free;
00170 ST* vector= get_feature_vector(num, vlen, do_free);
00171 return SGVector<ST>(vector, vlen, do_free);
00172 }
00173
00174 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
00175 {
00176 if (feature_cache)
00177 feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
00178
00179 if (dofree)
00180 SG_FREE(feat_vec);
00181 }
00182
00183 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
00184 {
00185 free_feature_vector(vec.vector, num, false);
00186 vec=SGVector<ST>();
00187 }
00188
00189 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
00190 {
00191 if (m_subset_stack->has_subsets())
00192 SG_ERROR("A subset is set, cannot call vector_subset\n");
00193
00194 ASSERT(feature_matrix.matrix);
00195 ASSERT(idx_len<=num_vectors);
00196
00197 int32_t num_vec = num_vectors;
00198 num_vectors = idx_len;
00199
00200 int32_t old_ii = -1;
00201
00202 for (int32_t i = 0; i < idx_len; i++)
00203 {
00204 int32_t ii = idx[i];
00205 ASSERT(old_ii<ii);
00206
00207 if (ii < 0 || ii >= num_vec)
00208 SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec);
00209
00210 if (i == ii)
00211 continue;
00212
00213 memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
00214 &feature_matrix.matrix[int64_t(num_features) * ii],
00215 num_features * sizeof(ST));
00216 old_ii = ii;
00217 }
00218 }
00219
00220 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
00221 {
00222 if (m_subset_stack->has_subsets())
00223 SG_ERROR("A subset is set, cannot call feature_subset\n");
00224
00225 ASSERT(feature_matrix.matrix);
00226 ASSERT(idx_len<=num_features);
00227 int32_t num_feat = num_features;
00228 num_features = idx_len;
00229
00230 for (int32_t i = 0; i < num_vectors; i++)
00231 {
00232 ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
00233 ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
00234
00235 int32_t old_jj = -1;
00236 for (int32_t j = 0; j < idx_len; j++)
00237 {
00238 int32_t jj = idx[j];
00239 ASSERT(old_jj<jj);
00240 if (jj < 0 || jj >= num_feat)
00241 SG_ERROR(
00242 "Index out of range: should be 0<%d<%d\n", jj, num_feat);
00243
00244 dst[j] = src[jj];
00245 old_jj = jj;
00246 }
00247 }
00248 }
00249
00250 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix()
00251 {
00252 if (!m_subset_stack->has_subsets())
00253 return feature_matrix;
00254
00255 SGMatrix<ST> submatrix(num_features, get_num_vectors());
00256
00257
00258 for (int32_t i=0; i<submatrix.num_cols; ++i)
00259 {
00260 int32_t real_i = m_subset_stack->subset_idx_conversion(i);
00261 memcpy(&submatrix.matrix[i*int64_t(num_features)],
00262 &feature_matrix.matrix[real_i * int64_t(num_features)],
00263 num_features * sizeof(ST));
00264 }
00265
00266 return submatrix;
00267 }
00268
00269 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::steal_feature_matrix()
00270 {
00271 SGMatrix<ST> st_feature_matrix=feature_matrix;
00272 m_subset_stack->remove_all_subsets();
00273 SG_UNREF(feature_cache);
00274 clean_preprocessors();
00275 free_feature_matrix();
00276 return st_feature_matrix;
00277 }
00278
00279 template<class ST> void CDenseFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix)
00280 {
00281 m_subset_stack->remove_all_subsets();
00282 free_feature_matrix();
00283 feature_matrix = matrix;
00284 num_features = matrix.num_rows;
00285 num_vectors = matrix.num_cols;
00286 }
00287
00288 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
00289 {
00290 num_feat = num_features;
00291 num_vec = num_vectors;
00292 return feature_matrix.matrix;
00293 }
00294
00295 template<class ST> CDenseFeatures<ST>* CDenseFeatures<ST>::get_transposed()
00296 {
00297 int32_t num_feat;
00298 int32_t num_vec;
00299 ST* fm = get_transposed(num_feat, num_vec);
00300
00301 return new CDenseFeatures<ST>(fm, num_feat, num_vec);
00302 }
00303
00304 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
00305 {
00306 num_feat = get_num_vectors();
00307 num_vec = num_features;
00308
00309 int32_t old_num_vec=get_num_vectors();
00310
00311 ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
00312
00313 for (int32_t i=0; i<old_num_vec; i++)
00314 {
00315 SGVector<ST> vec=get_feature_vector(i);
00316
00317 for (int32_t j=0; j<vec.vlen; j++)
00318 fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
00319
00320 free_feature_vector(vec, i);
00321 }
00322
00323 return fm;
00324 }
00325
00326 template<class ST> void CDenseFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src)
00327 {
00328 if (m_subset_stack->has_subsets())
00329 SG_ERROR("A subset is set, cannot call copy_feature_matrix\n");
00330
00331 free_feature_matrix();
00332 feature_matrix = src.clone();
00333 num_features = src.num_rows;
00334 num_vectors = src.num_cols;
00335 initialize_cache();
00336 }
00337
00338 template<class ST> void CDenseFeatures<ST>::obtain_from_dot(CDotFeatures* df)
00339 {
00340 m_subset_stack->remove_all_subsets();
00341
00342 int32_t num_feat = df->get_dim_feature_space();
00343 int32_t num_vec = df->get_num_vectors();
00344
00345 ASSERT(num_feat>0 && num_vec>0);
00346
00347 free_feature_matrix();
00348 feature_matrix = SGMatrix<ST>(num_feat, num_vec);
00349
00350 for (int32_t i = 0; i < num_vec; i++)
00351 {
00352 SGVector<float64_t> v = df->get_computed_dot_feature_vector(i);
00353 ASSERT(num_feat==v.vlen);
00354
00355 for (int32_t j = 0; j < num_feat; j++)
00356 feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
00357 }
00358 num_features = num_feat;
00359 num_vectors = num_vec;
00360 }
00361
00362 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
00363 {
00364 if (m_subset_stack->has_subsets())
00365 SG_ERROR("A subset is set, cannot call apply_preproc\n");
00366
00367 SG_DEBUG( "force: %d\n", force_preprocessing);
00368
00369 if (feature_matrix.matrix && get_num_preprocessors())
00370 {
00371 for (int32_t i = 0; i < get_num_preprocessors(); i++)
00372 {
00373 if ((!is_preprocessed(i) || force_preprocessing))
00374 {
00375 set_preprocessed(i);
00376 CDensePreprocessor<ST>* p =
00377 (CDensePreprocessor<ST>*) get_preprocessor(i);
00378 SG_INFO( "preprocessing using preproc %s\n", p->get_name());
00379
00380 if (p->apply_to_feature_matrix(this).matrix == NULL)
00381 {
00382 SG_UNREF(p);
00383 return false;
00384 }
00385 SG_UNREF(p);
00386
00387 }
00388 }
00389
00390 return true;
00391 }
00392 else
00393 {
00394 if (!feature_matrix.matrix)
00395 SG_ERROR( "no feature matrix\n");
00396
00397 if (!get_num_preprocessors())
00398 SG_ERROR( "no preprocessors available\n");
00399
00400 return false;
00401 }
00402 }
00403
00404 template<class ST> int32_t CDenseFeatures<ST>::get_size() const { return sizeof(ST); }
00405
00406 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
00407 {
00408 return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
00409 }
00410
00411 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() { return num_features; }
00412
00413 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
00414 {
00415 num_features = num;
00416 initialize_cache();
00417 }
00418
00419 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
00420 {
00421 if (m_subset_stack->has_subsets())
00422 SG_ERROR("A subset is set, cannot call set_num_vectors\n");
00423
00424 num_vectors = num;
00425 initialize_cache();
00426 }
00427
00428 template<class ST> void CDenseFeatures<ST>::initialize_cache()
00429 {
00430 if (m_subset_stack->has_subsets())
00431 SG_ERROR("A subset is set, cannot call initialize_cache\n");
00432
00433 if (num_features && num_vectors)
00434 {
00435 SG_UNREF(feature_cache);
00436 feature_cache = new CCache<ST>(get_cache_size(), num_features,
00437 num_vectors);
00438 SG_REF(feature_cache);
00439 }
00440 }
00441
00442 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }
00443
00444 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
00445 {
00446 if (m_subset_stack->has_subsets())
00447 SG_ERROR("A subset is set, cannot call reshape\n");
00448
00449 if (p_num_features * p_num_vectors
00450 == this->num_features * this->num_vectors)
00451 {
00452 num_features = p_num_features;
00453 num_vectors = p_num_vectors;
00454 return true;
00455 } else
00456 return false;
00457 }
00458
00459 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
00460
00461 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
00462 int32_t vec_idx2)
00463 {
00464 ASSERT(df);
00465 ASSERT(df->get_feature_type() == get_feature_type());
00466 ASSERT(df->get_feature_class() == get_feature_class());
00467 CDenseFeatures<ST>* sf = (CDenseFeatures<ST>*) df;
00468
00469 int32_t len1, len2;
00470 bool free1, free2;
00471
00472 ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
00473 ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
00474
00475 float64_t result = SGVector<ST>::dot(vec1, vec2, len1);
00476
00477 free_feature_vector(vec1, vec_idx1, free1);
00478 sf->free_feature_vector(vec2, vec_idx2, free2);
00479
00480 return result;
00481 }
00482
00483 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00484 float64_t* vec2, int32_t vec2_len, bool abs_val)
00485 {
00486 ASSERT(vec2_len == num_features);
00487
00488 int32_t vlen;
00489 bool vfree;
00490 ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00491
00492 ASSERT(vlen == num_features);
00493
00494 if (abs_val)
00495 {
00496 for (int32_t i = 0; i < num_features; i++)
00497 vec2[i] += alpha * CMath::abs(vec1[i]);
00498 }
00499 else
00500 {
00501 for (int32_t i = 0; i < num_features; i++)
00502 vec2[i] += alpha * vec1[i];
00503 }
00504
00505 free_feature_vector(vec1, vec_idx1, vfree);
00506 }
00507
00508 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
00509 {
00510 return num_features;
00511 }
00512
00513 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
00514 {
00515 if (vector_index>=get_num_vectors())
00516 {
00517 SG_ERROR("Index out of bounds (number of vectors %d, you "
00518 "requested %d)\n", get_num_vectors(), vector_index);
00519 }
00520
00521 dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
00522 iterator->vec = get_feature_vector(vector_index, iterator->vlen,
00523 iterator->vfree);
00524 iterator->vidx = vector_index;
00525 iterator->index = 0;
00526 return iterator;
00527 }
00528
00529 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
00530 void* iterator)
00531 {
00532 dense_feature_iterator* it = (dense_feature_iterator*) iterator;
00533 if (!it || it->index >= it->vlen)
00534 return false;
00535
00536 index = it->index++;
00537 value = (float64_t) it->vec[index];
00538
00539 return true;
00540 }
00541
00542 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
00543 {
00544 if (!iterator)
00545 return;
00546
00547 dense_feature_iterator* it = (dense_feature_iterator*) iterator;
00548 free_feature_vector(it->vec, it->vidx, it->vfree);
00549 SG_FREE(it);
00550 }
00551
00552 template<class ST> CFeatures* CDenseFeatures<ST>::copy_subset(SGVector<index_t> indices)
00553 {
00554 SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
00555
00556 for (index_t i=0; i<indices.vlen; ++i)
00557 {
00558 index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
00559 memcpy(&feature_matrix_copy.matrix[i*num_features],
00560 &feature_matrix.matrix[real_idx*num_features],
00561 num_features*sizeof(ST));
00562 }
00563
00564 CFeatures* result=new CDenseFeatures(feature_matrix_copy);
00565 SG_REF(result);
00566 return result;
00567 }
00568
00569 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
00570 ST* target)
00571 {
00572 SG_NOTIMPLEMENTED;
00573 len = 0;
00574 return NULL;
00575 }
00576
00577 template<class ST> void CDenseFeatures<ST>::init()
00578 {
00579 num_vectors = 0;
00580 num_features = 0;
00581
00582 feature_matrix = SGMatrix<ST>();
00583 feature_cache = NULL;
00584
00585 set_generic<ST>();
00586
00587
00588 SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
00589 SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
00590 SG_ADD(&feature_matrix, "feature_matrix",
00591 "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
00592 }
00593
00594 #define GET_FEATURE_TYPE(f_type, sg_type) \
00595 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
00596 { \
00597 return f_type; \
00598 }
00599
00600 GET_FEATURE_TYPE(F_BOOL, bool)
00601 GET_FEATURE_TYPE(F_CHAR, char)
00602 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00603 GET_FEATURE_TYPE(F_BYTE, int8_t)
00604 GET_FEATURE_TYPE(F_SHORT, int16_t)
00605 GET_FEATURE_TYPE(F_WORD, uint16_t)
00606 GET_FEATURE_TYPE(F_INT, int32_t)
00607 GET_FEATURE_TYPE(F_UINT, uint32_t)
00608 GET_FEATURE_TYPE(F_LONG, int64_t)
00609 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00610 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00611 GET_FEATURE_TYPE(F_DREAL, float64_t)
00612 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00613 #undef GET_FEATURE_TYPE
00614
00615 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
00616 const float64_t* vec2, int32_t vec2_len)
00617 {
00618 ASSERT(vec2_len == num_features);
00619
00620 int32_t vlen;
00621 bool vfree;
00622 bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00623
00624 ASSERT(vlen == num_features);
00625 float64_t result = 0;
00626
00627 for (int32_t i = 0; i < num_features; i++)
00628 result += vec1[i] ? vec2[i] : 0;
00629
00630 free_feature_vector(vec1, vec_idx1, vfree);
00631
00632 return result;
00633 }
00634
00635 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
00636 const float64_t* vec2, int32_t vec2_len)
00637 {
00638 ASSERT(vec2_len == num_features);
00639
00640 int32_t vlen;
00641 bool vfree;
00642 char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00643
00644 ASSERT(vlen == num_features);
00645 float64_t result = 0;
00646
00647 for (int32_t i = 0; i < num_features; i++)
00648 result += vec1[i] * vec2[i];
00649
00650 free_feature_vector(vec1, vec_idx1, vfree);
00651
00652 return result;
00653 }
00654
00655 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
00656 const float64_t* vec2, int32_t vec2_len)
00657 {
00658 ASSERT(vec2_len == num_features);
00659
00660 int32_t vlen;
00661 bool vfree;
00662 int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00663
00664 ASSERT(vlen == num_features);
00665 float64_t result = 0;
00666
00667 for (int32_t i = 0; i < num_features; i++)
00668 result += vec1[i] * vec2[i];
00669
00670 free_feature_vector(vec1, vec_idx1, vfree);
00671
00672 return result;
00673 }
00674
00675 template<> float64_t CDenseFeatures<uint8_t>::dense_dot(
00676 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00677 {
00678 ASSERT(vec2_len == num_features);
00679
00680 int32_t vlen;
00681 bool vfree;
00682 uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00683
00684 ASSERT(vlen == num_features);
00685 float64_t result = 0;
00686
00687 for (int32_t i = 0; i < num_features; i++)
00688 result += vec1[i] * vec2[i];
00689
00690 free_feature_vector(vec1, vec_idx1, vfree);
00691
00692 return result;
00693 }
00694
00695 template<> float64_t CDenseFeatures<int16_t>::dense_dot(
00696 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00697 {
00698 ASSERT(vec2_len == num_features);
00699
00700 int32_t vlen;
00701 bool vfree;
00702 int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00703
00704 ASSERT(vlen == num_features);
00705 float64_t result = 0;
00706
00707 for (int32_t i = 0; i < num_features; i++)
00708 result += vec1[i] * vec2[i];
00709
00710 free_feature_vector(vec1, vec_idx1, vfree);
00711
00712 return result;
00713 }
00714
00715 template<> float64_t CDenseFeatures<uint16_t>::dense_dot(
00716 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00717 {
00718 ASSERT(vec2_len == num_features);
00719
00720 int32_t vlen;
00721 bool vfree;
00722 uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00723
00724 ASSERT(vlen == num_features);
00725 float64_t result = 0;
00726
00727 for (int32_t i = 0; i < num_features; i++)
00728 result += vec1[i] * vec2[i];
00729
00730 free_feature_vector(vec1, vec_idx1, vfree);
00731
00732 return result;
00733 }
00734
00735 template<> float64_t CDenseFeatures<int32_t>::dense_dot(
00736 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00737 {
00738 ASSERT(vec2_len == num_features);
00739
00740 int32_t vlen;
00741 bool vfree;
00742 int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00743
00744 ASSERT(vlen == num_features);
00745 float64_t result = 0;
00746
00747 for (int32_t i = 0; i < num_features; i++)
00748 result += vec1[i] * vec2[i];
00749
00750 free_feature_vector(vec1, vec_idx1, vfree);
00751
00752 return result;
00753 }
00754
00755 template<> float64_t CDenseFeatures<uint32_t>::dense_dot(
00756 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00757 {
00758 ASSERT(vec2_len == num_features);
00759
00760 int32_t vlen;
00761 bool vfree;
00762 uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00763
00764 ASSERT(vlen == num_features);
00765 float64_t result = 0;
00766
00767 for (int32_t i = 0; i < num_features; i++)
00768 result += vec1[i] * vec2[i];
00769
00770 free_feature_vector(vec1, vec_idx1, vfree);
00771
00772 return result;
00773 }
00774
00775 template<> float64_t CDenseFeatures<int64_t>::dense_dot(
00776 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00777 {
00778 ASSERT(vec2_len == num_features);
00779
00780 int32_t vlen;
00781 bool vfree;
00782 int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00783
00784 ASSERT(vlen == num_features);
00785 float64_t result = 0;
00786
00787 for (int32_t i = 0; i < num_features; i++)
00788 result += vec1[i] * vec2[i];
00789
00790 free_feature_vector(vec1, vec_idx1, vfree);
00791
00792 return result;
00793 }
00794
00795 template<> float64_t CDenseFeatures<uint64_t>::dense_dot(
00796 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00797 {
00798 ASSERT(vec2_len == num_features);
00799
00800 int32_t vlen;
00801 bool vfree;
00802 uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00803
00804 ASSERT(vlen == num_features);
00805 float64_t result = 0;
00806
00807 for (int32_t i = 0; i < num_features; i++)
00808 result += vec1[i] * vec2[i];
00809
00810 free_feature_vector(vec1, vec_idx1, vfree);
00811
00812 return result;
00813 }
00814
00815 template<> float64_t CDenseFeatures<float32_t>::dense_dot(
00816 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00817 {
00818 ASSERT(vec2_len == num_features);
00819
00820 int32_t vlen;
00821 bool vfree;
00822 float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00823
00824 ASSERT(vlen == num_features);
00825 float64_t result = 0;
00826
00827 for (int32_t i = 0; i < num_features; i++)
00828 result += vec1[i] * vec2[i];
00829
00830 free_feature_vector(vec1, vec_idx1, vfree);
00831
00832 return result;
00833 }
00834
00835 template<> float64_t CDenseFeatures<float64_t>::dense_dot(
00836 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00837 {
00838 ASSERT(vec2_len == num_features);
00839
00840 int32_t vlen;
00841 bool vfree;
00842 float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00843
00844 ASSERT(vlen == num_features);
00845 float64_t result = SGVector<float64_t>::dot(vec1, vec2, num_features);
00846
00847 free_feature_vector(vec1, vec_idx1, vfree);
00848
00849 return result;
00850 }
00851
00852 template<> float64_t CDenseFeatures<floatmax_t>::dense_dot(
00853 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00854 {
00855 ASSERT(vec2_len == num_features);
00856
00857 int32_t vlen;
00858 bool vfree;
00859 floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00860
00861 ASSERT(vlen == num_features);
00862 float64_t result = 0;
00863
00864 for (int32_t i = 0; i < num_features; i++)
00865 result += vec1[i] * vec2[i];
00866
00867 free_feature_vector(vec1, vec_idx1, vfree);
00868
00869 return result;
00870 }
00871
00872 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
00873 {
00874 if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
00875 return false;
00876
00877 ST* vec1;
00878 ST* vec2;
00879 int32_t v1len, v2len;
00880 bool v1free, v2free, stop = false;
00881
00882 for (int32_t i = 0; i < num_vectors; i++)
00883 {
00884 vec1 = get_feature_vector(i, v1len, v1free);
00885 vec2 = rhs->get_feature_vector(i, v2len, v2free);
00886
00887 if (v1len!=v2len)
00888 stop = true;
00889
00890 for (int32_t j=0; j<v1len; j++)
00891 {
00892 if (vec1[j]!=vec2[j])
00893 stop = true;
00894 }
00895
00896 free_feature_vector(vec1, i, v1free);
00897 free_feature_vector(vec2, i, v2free);
00898
00899 if (stop)
00900 return false;
00901 }
00902
00903 return true;
00904 }
00905
00906 template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy(
00907 CFeatures* other)
00908 {
00909 SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
00910 if (get_feature_type()!=other->get_feature_type() ||
00911 get_feature_class()!=other->get_feature_class() ||
00912 strcmp(get_name(), other->get_name()))
00913 {
00914 SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
00915 get_name());
00916 }
00917
00918 CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
00919
00920 if (!casted)
00921 {
00922 SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
00923 "same type as %s\n",get_name(), other->get_name(), get_name());
00924 }
00925
00926 if (num_features!=casted->num_features)
00927 {
00928 SG_ERROR("%s::create_merged_copy(): Provided feature object has "
00929 "different dimension than this one\n");
00930 }
00931
00932
00933 SGMatrix<ST> data(num_features, num_vectors+casted->get_num_vectors());
00934
00935
00936 SG_DEBUG("copying matrix of this instance\n");
00937 memcpy(data.matrix, feature_matrix.matrix,
00938 num_features*num_vectors*sizeof(ST));
00939
00940
00941 SG_DEBUG("copying matrix of provided instance\n");
00942 memcpy(&data.matrix[num_vectors*num_features],
00943 casted->feature_matrix.matrix,
00944 casted->num_features*casted->num_vectors*sizeof(ST));
00945
00946
00947 CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
00948
00949 SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
00950 return result;
00951 }
00952
00953 #define LOAD(f_load, sg_type) \
00954 template<> void CDenseFeatures<sg_type>::load(CFile* loader) \
00955 { \
00956 SG_SET_LOCALE_C; \
00957 ASSERT(loader); \
00958 sg_type* matrix; \
00959 int32_t num_feat; \
00960 int32_t num_vec; \
00961 loader->f_load(matrix, num_feat, num_vec); \
00962 set_feature_matrix(SGMatrix<sg_type>(matrix, num_feat, num_vec)); \
00963 SG_RESET_LOCALE; \
00964 }
00965
00966 LOAD(get_matrix, bool)
00967 LOAD(get_matrix, char)
00968 LOAD(get_int8_matrix, int8_t)
00969 LOAD(get_matrix, uint8_t)
00970 LOAD(get_matrix, int16_t)
00971 LOAD(get_matrix, uint16_t)
00972 LOAD(get_matrix, int32_t)
00973 LOAD(get_uint_matrix, uint32_t)
00974 LOAD(get_long_matrix, int64_t)
00975 LOAD(get_ulong_matrix, uint64_t)
00976 LOAD(get_matrix, float32_t)
00977 LOAD(get_matrix, float64_t)
00978 LOAD(get_longreal_matrix, floatmax_t)
00979 #undef LOAD
00980
00981 #define SAVE(f_write, sg_type) \
00982 template<> void CDenseFeatures<sg_type>::save(CFile* writer) \
00983 { \
00984 SG_SET_LOCALE_C; \
00985 ASSERT(writer); \
00986 writer->f_write(feature_matrix.matrix, feature_matrix.num_rows, \
00987 feature_matrix.num_cols); \
00988 SG_RESET_LOCALE; \
00989 }
00990
00991 SAVE(set_matrix, bool)
00992 SAVE(set_matrix, char)
00993 SAVE(set_int8_matrix, int8_t)
00994 SAVE(set_matrix, uint8_t)
00995 SAVE(set_matrix, int16_t)
00996 SAVE(set_matrix, uint16_t)
00997 SAVE(set_matrix, int32_t)
00998 SAVE(set_uint_matrix, uint32_t)
00999 SAVE(set_long_matrix, int64_t)
01000 SAVE(set_ulong_matrix, uint64_t)
01001 SAVE(set_matrix, float32_t)
01002 SAVE(set_matrix, float64_t)
01003 SAVE(set_longreal_matrix, floatmax_t)
01004 #undef SAVE
01005
01006 template class CDenseFeatures<bool>;
01007 template class CDenseFeatures<char>;
01008 template class CDenseFeatures<int8_t>;
01009 template class CDenseFeatures<uint8_t>;
01010 template class CDenseFeatures<int16_t>;
01011 template class CDenseFeatures<uint16_t>;
01012 template class CDenseFeatures<int32_t>;
01013 template class CDenseFeatures<uint32_t>;
01014 template class CDenseFeatures<int64_t>;
01015 template class CDenseFeatures<uint64_t>;
01016 template class CDenseFeatures<float32_t>;
01017 template class CDenseFeatures<float64_t>;
01018 template class CDenseFeatures<floatmax_t>;
01019 }