Template class SparseFeatures implements sparse matrices.
Features are an array of TSparse, sorted w.r.t. vec_index (increasing) and withing same vec_index w.r.t. feat_index (increasing);
Sparse feature vectors can be accessed via get_sparse_feature_vector() and should be freed (this operation is a NOP in most cases) via free_sparse_feature_vector().
As this is a template class it can directly be used for different data types like sparse matrices of real valued, integer, byte etc type.
Definition at line 55 of file SparseFeatures.h.

Classes | |
| struct | sparse_feature_iterator |
Public Member Functions | |
| CSparseFeatures (int32_t size=0) | |
| CSparseFeatures (TSparse< ST > *src, int32_t num_feat, int32_t num_vec, bool copy=false) | |
| CSparseFeatures (ST *src, int32_t num_feat, int32_t num_vec) | |
| CSparseFeatures (const CSparseFeatures &orig) | |
| CSparseFeatures (CFile *loader) | |
| virtual | ~CSparseFeatures () |
| void | free_sparse_feature_matrix () |
| void | free_sparse_features () |
| virtual CFeatures * | duplicate () const |
| ST | get_feature (int32_t num, int32_t index) |
| ST * | get_full_feature_vector (int32_t num, int32_t &len) |
| void | get_full_feature_vector (ST **dst, int32_t *len, int32_t num) |
| virtual int32_t | get_nnz_features_for_vector (int32_t num) |
| TSparseEntry< ST > * | get_sparse_feature_vector (int32_t num, int32_t &len, bool &vfree) |
| ST | dense_dot (ST alpha, int32_t num, ST *vec, int32_t dim, ST b) |
| void | add_to_dense_vec (float64_t alpha, int32_t num, float64_t *vec, int32_t dim, bool abs_val=false) |
| void | free_sparse_feature_vector (TSparseEntry< ST > *feat_vec, int32_t num, bool free) |
| TSparse< ST > * | get_sparse_feature_matrix (int32_t &num_feat, int32_t &num_vec) |
| void | get_sparse_feature_matrix (TSparse< ST > **dst, int32_t *num_feat, int32_t *num_vec, int64_t *nnz) |
| void | clean_tsparse (TSparse< ST > *sfm, int32_t num_vec) |
| CSparseFeatures< ST > * | get_transposed () |
| TSparse< ST > * | get_transposed (int32_t &num_feat, int32_t &num_vec) |
| virtual void | set_sparse_feature_matrix (TSparse< ST > *src, int32_t num_feat, int32_t num_vec) |
| ST * | get_full_feature_matrix (int32_t &num_feat, int32_t &num_vec) |
| void | get_full_feature_matrix (ST **dst, int32_t *num_feat, int32_t *num_vec) |
| virtual bool | set_full_feature_matrix (ST *src, int32_t num_feat, int32_t num_vec) |
| virtual bool | apply_preproc (bool force_preprocessing=false) |
| virtual int32_t | get_size () |
| bool | obtain_from_simple (CSimpleFeatures< ST > *sf) |
| virtual int32_t | get_num_vectors () |
| int32_t | get_num_features () |
| int32_t | set_num_features (int32_t num) |
| virtual EFeatureClass | get_feature_class () |
| virtual EFeatureType | get_feature_type () |
| void | free_feature_vector (TSparseEntry< ST > *feat_vec, int32_t num, bool free) |
| int64_t | get_num_nonzero_entries () |
| float64_t * | compute_squared (float64_t *sq) |
| float64_t | compute_squared_norm (CSparseFeatures< float64_t > *lhs, float64_t *sq_lhs, int32_t idx_a, CSparseFeatures< float64_t > *rhs, float64_t *sq_rhs, int32_t idx_b) |
| void | load (CFile *loader) |
| void | save (CFile *writer) |
| CLabels * | load_svmlight_file (char *fname, bool do_sort_features=true) |
| void | sort_features () |
| bool | write_svmlight_file (char *fname, CLabels *label) |
| virtual int32_t | get_dim_feature_space () |
| virtual float64_t | dot (int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2) |
| virtual float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
| virtual void * | get_feature_iterator (int32_t vector_index) |
| virtual bool | get_next_feature (int32_t &index, float64_t &value, void *iterator) |
| virtual void | free_feature_iterator (void *iterator) |
| virtual const char * | get_name () const |
Static Public Member Functions | |
| static ST | sparse_dot (ST alpha, TSparseEntry< ST > *avec, int32_t alen, TSparseEntry< ST > *bvec, int32_t blen) |
Protected Member Functions | |
| virtual TSparseEntry< ST > * | compute_sparse_feature_vector (int32_t num, int32_t &len, TSparseEntry< ST > *target=NULL) |
Protected Attributes | |
| int32_t | num_vectors |
| total number of vectors | |
| int32_t | num_features |
| total number of features | |
| TSparse< ST > * | sparse_feature_matrix |
| array of sparse vectors of size num_vectors | |
| CCache< TSparseEntry< ST > > * | feature_cache |
| CSparseFeatures | ( | int32_t | size = 0 |
) |
| CSparseFeatures | ( | TSparse< ST > * | src, | |
| int32_t | num_feat, | |||
| int32_t | num_vec, | |||
| bool | copy = false | |||
| ) |
convenience constructor that creates sparse features from the ones passed as argument
| src | dense feature matrix | |
| num_feat | number of features | |
| num_vec | number of vectors | |
| copy | true to copy feature matrix |
Definition at line 85 of file SparseFeatures.h.
| CSparseFeatures | ( | ST * | src, | |
| int32_t | num_feat, | |||
| int32_t | num_vec | |||
| ) |
convenience constructor that creates sparse features from dense features
| src | dense feature matrix | |
| num_feat | number of features | |
| num_vec | number of vectors |
Definition at line 113 of file SparseFeatures.h.
| CSparseFeatures | ( | const CSparseFeatures< ST > & | orig | ) |
copy constructor
Definition at line 123 of file SparseFeatures.h.
| CSparseFeatures | ( | CFile * | loader | ) |
constructor loading features from file
| loader | File object to load data from |
Definition at line 149 of file SparseFeatures.h.
| virtual ~CSparseFeatures | ( | ) | [virtual] |
default destructor
Definition at line 159 of file SparseFeatures.h.
| void add_to_dense_vec | ( | float64_t | alpha, | |
| int32_t | num, | |||
| float64_t * | vec, | |||
| int32_t | dim, | |||
| bool | abs_val = false | |||
| ) | [virtual] |
add a sparse feature vector onto a dense one dense+=alpha*sparse
| alpha | scalar to multiply with | |
| num | index of feature vector | |
| vec | dense vector | |
| dim | length of the dense vector | |
| abs_val | if true, do dense+=alpha*abs(sparse) |
Implements CDotFeatures.
Definition at line 471 of file SparseFeatures.h.
| virtual bool apply_preproc | ( | bool | force_preprocessing = false |
) | [virtual] |
apply preprocessor
| force_preprocessing | if preprocssing shall be forced |
Definition at line 823 of file SparseFeatures.h.
| void clean_tsparse | ( | TSparse< ST > * | sfm, | |
| int32_t | num_vec | |||
| ) |
clean TSparse
| sfm | sparse feature matrix | |
| num_vec | number of vectors in matrix |
Definition at line 553 of file SparseFeatures.h.
| virtual TSparseEntry<ST>* compute_sparse_feature_vector | ( | int32_t | num, | |
| int32_t & | len, | |||
| TSparseEntry< ST > * | target = NULL | |||
| ) | [protected, virtual] |
compute feature vector for sample num if target is set the vector is written to target len is returned by reference
NOT IMPLEMENTED!
| num | num | |
| len | len | |
| target | target |
Definition at line 1476 of file SparseFeatures.h.
compute a^2 on all feature vectors
| sq | the square for each vector is stored in here |
Definition at line 946 of file SparseFeatures.h.
| float64_t compute_squared_norm | ( | CSparseFeatures< float64_t > * | lhs, | |
| float64_t * | sq_lhs, | |||
| int32_t | idx_a, | |||
| CSparseFeatures< float64_t > * | rhs, | |||
| float64_t * | sq_rhs, | |||
| int32_t | idx_b | |||
| ) |
compute (a-b)^2 (== a^2+b^2-2ab) usually called by kernels'/distances' compute functions works on two feature vectors, although it is a member of a single feature: can either be called by lhs or rhs.
| lhs | left-hand side features | |
| sq_lhs | squared values of left-hand side | |
| idx_a | index of left-hand side's vector to compute | |
| rhs | right-hand side features | |
| sq_rhs | squared values of right-hand side | |
| idx_b | index of right-hand side's vector to compute |
Definition at line 979 of file SparseFeatures.h.
| virtual float64_t dense_dot | ( | int32_t | vec_idx1, | |
| const float64_t * | vec2, | |||
| int32_t | vec2_len | |||
| ) | [virtual] |
compute dot product between vector1 and a dense vector
| vec_idx1 | index of first vector | |
| vec2 | pointer to real valued vector | |
| vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 1347 of file SparseFeatures.h.
| ST dense_dot | ( | ST | alpha, | |
| int32_t | num, | |||
| ST * | vec, | |||
| int32_t | dim, | |||
| ST | b | |||
| ) |
compute the dot product between dense weights and a sparse feature vector alpha * sparse^T * w + b
| alpha | scalar to multiply with | |
| num | index of feature vector | |
| vec | dense vector to compute dot product with | |
| dim | length of the dense vector | |
| b | bias |
Definition at line 442 of file SparseFeatures.h.
| virtual float64_t dot | ( | int32_t | vec_idx1, | |
| CDotFeatures * | df, | |||
| int32_t | vec_idx2 | |||
| ) | [virtual] |
compute dot product between vector1 and vector2, appointed by their indices
| vec_idx1 | index of first vector | |
| df | DotFeatures (of same kind) to compute dot product with | |
| vec_idx2 | index of second vector |
Implements CDotFeatures.
Definition at line 1321 of file SparseFeatures.h.
| virtual CFeatures* duplicate | ( | ) | const [virtual] |
duplicate feature object
Implements CFeatures.
Definition at line 189 of file SparseFeatures.h.
| virtual void free_feature_iterator | ( | void * | iterator | ) | [virtual] |
clean up iterator call this function with the iterator returned by get_first_feature
| iterator | as returned by get_first_feature |
Implements CDotFeatures.
Definition at line 1452 of file SparseFeatures.h.
| void free_feature_vector | ( | TSparseEntry< ST > * | feat_vec, | |
| int32_t | num, | |||
| bool | free | |||
| ) |
free feature vector
| feat_vec | feature vector to free | |
| num | index of vector in cache | |
| free | if vector really should be deleted |
Definition at line 919 of file SparseFeatures.h.
| void free_sparse_feature_matrix | ( | ) |
free sparse feature matrix
Definition at line 167 of file SparseFeatures.h.
| void free_sparse_feature_vector | ( | TSparseEntry< ST > * | feat_vec, | |
| int32_t | num, | |||
| bool | free | |||
| ) |
free sparse feature vector
| feat_vec | feature vector to free | |
| num | index of this vector in the cache | |
| free | if vector should be really deleted |
Definition at line 507 of file SparseFeatures.h.
| void free_sparse_features | ( | ) |
free sparse feature matrix and cache
Definition at line 178 of file SparseFeatures.h.
| virtual int32_t get_dim_feature_space | ( | ) | [virtual] |
obtain the dimensionality of the feature space
(not mix this up with the dimensionality of the input space, usually obtained via get_num_features())
Implements CDotFeatures.
Definition at line 1309 of file SparseFeatures.h.
| ST get_feature | ( | int32_t | num, | |
| int32_t | index | |||
| ) |
get a single feature
| num | number of feature vector to retrieve | |
| index | index of feature in this vector |
Definition at line 201 of file SparseFeatures.h.
| virtual EFeatureClass get_feature_class | ( | ) | [virtual] |
get feature class
Implements CFeatures.
Definition at line 905 of file SparseFeatures.h.
| virtual void* get_feature_iterator | ( | int32_t | vector_index | ) | [virtual] |
iterate over the non-zero features
call get_feature_iterator first, followed by get_next_feature and free_feature_iterator to cleanup
| vector_index | the index of the vector over whose components to iterate over |
Implements CDotFeatures.
Definition at line 1404 of file SparseFeatures.h.
| virtual EFeatureType get_feature_type | ( | ) | [virtual] |
| ST* get_full_feature_matrix | ( | int32_t & | num_feat, | |
| int32_t & | num_vec | |||
| ) |
gets a copy of a full feature matrix num_feat,num_vectors are returned by reference
| num_feat | number of features in matrix | |
| num_vec | number of vectors in matrix |
Definition at line 664 of file SparseFeatures.h.
| void get_full_feature_matrix | ( | ST ** | dst, | |
| int32_t * | num_feat, | |||
| int32_t * | num_vec | |||
| ) |
gets a copy of a full feature matrix (swig compatible) num_feat,num_vectors are returned by reference
| dst | full feature matrix | |
| num_feat | number of features in matrix | |
| num_vec | number of vectors in matrix |
Definition at line 699 of file SparseFeatures.h.
| ST* get_full_feature_vector | ( | int32_t | num, | |
| int32_t & | len | |||
| ) |
converts a sparse feature vector into a dense one preprocessed compute_feature_vector caller cleans up
| num | index of feature vector | |
| len | length is returned by reference |
Definition at line 233 of file SparseFeatures.h.
| void get_full_feature_vector | ( | ST ** | dst, | |
| int32_t * | len, | |||
| int32_t | num | |||
| ) |
get the fully expanded dense feature vector num
| dst | feature vector | |
| len | length is returned by reference | |
| num | index of feature vector |
Definition at line 265 of file SparseFeatures.h.
| virtual const char* get_name | ( | void | ) | const [virtual] |
| virtual bool get_next_feature | ( | int32_t & | index, | |
| float64_t & | value, | |||
| void * | iterator | |||
| ) | [virtual] |
iterate over the non-zero features
call this function with the iterator returned by get_first_feature and call free_feature_iterator to cleanup
| index | is returned by reference (-1 when not available) | |
| value | is returned by reference | |
| iterator | as returned by get_first_feature |
Implements CDotFeatures.
Definition at line 1433 of file SparseFeatures.h.
| virtual int32_t get_nnz_features_for_vector | ( | int32_t | num | ) | [virtual] |
get number of non-zero features in vector
| num | which vector |
Implements CDotFeatures.
Definition at line 297 of file SparseFeatures.h.
| int32_t get_num_features | ( | ) |
| int64_t get_num_nonzero_entries | ( | ) |
get number of non-zero entries in sparse feature matrix
Definition at line 932 of file SparseFeatures.h.
| virtual int32_t get_num_vectors | ( | ) | [virtual] |
get number of feature vectors
Implements CFeatures.
Definition at line 874 of file SparseFeatures.h.
| virtual int32_t get_size | ( | ) | [virtual] |
get memory footprint of one feature
Implements CFeatures.
Definition at line 853 of file SparseFeatures.h.
| TSparse<ST>* get_sparse_feature_matrix | ( | int32_t & | num_feat, | |
| int32_t & | num_vec | |||
| ) |
get the pointer to the sparse feature matrix num_feat,num_vectors are returned by reference
| num_feat | number of features in matrix | |
| num_vec | number of vectors in matrix |
Definition at line 523 of file SparseFeatures.h.
| void get_sparse_feature_matrix | ( | TSparse< ST > ** | dst, | |
| int32_t * | num_feat, | |||
| int32_t * | num_vec, | |||
| int64_t * | nnz | |||
| ) |
get the pointer to the sparse feature matrix (swig compatible) num_feat,num_vectors are returned by reference
| dst | feature matrix | |
| num_feat | number of features in matrix | |
| num_vec | number of vectors in matrix | |
| nnz | number of nonzero elements |
Definition at line 539 of file SparseFeatures.h.
| TSparseEntry<ST>* get_sparse_feature_vector | ( | int32_t | num, | |
| int32_t & | len, | |||
| bool & | vfree | |||
| ) |
get sparse feature vector for sample num from the matrix as it is if matrix is initialized, else return preprocessed compute_feature_vector
| num | index of feature vector | |
| len | number of sparse entries is returned by reference | |
| vfree | whether returned vector must be freed by caller via free_sparse_feature_vector |
Definition at line 316 of file SparseFeatures.h.
| TSparse<ST>* get_transposed | ( | int32_t & | num_feat, | |
| int32_t & | num_vec | |||
| ) |
compute and return the transpose of the sparse feature matrix which will be prepocessed. num_feat, num_vectors are returned by reference caller has to clean up
| num_feat | number of features in matrix | |
| num_vec | number of vectors in matrix |
Definition at line 585 of file SparseFeatures.h.
| CSparseFeatures<ST>* get_transposed | ( | ) |
get a transposed copy of the features
Definition at line 568 of file SparseFeatures.h.
| void load | ( | CFile * | loader | ) | [virtual] |
load features from file
| loader | File object to load data from |
Reimplemented from CFeatures.
| CLabels* load_svmlight_file | ( | char * | fname, | |
| bool | do_sort_features = true | |||
| ) |
load features from file
| fname | filename to load from | |
| do_sort_features | if true features will be sorted to ensure they are in ascending order |
Definition at line 1054 of file SparseFeatures.h.
| bool obtain_from_simple | ( | CSimpleFeatures< ST > * | sf | ) |
obtain sparse features from simple features
| sf | simple features |
Definition at line 860 of file SparseFeatures.h.
| void save | ( | CFile * | writer | ) | [virtual] |
| virtual bool set_full_feature_matrix | ( | ST * | src, | |
| int32_t | num_feat, | |||
| int32_t | num_vec | |||
| ) | [virtual] |
creates a sparse feature matrix from a full dense feature matrix necessary to set feature_matrix, num_features and num_vectors where num_features is the column offset, and columns are linear in memory see above for definition of sparse_feature_matrix
| src | full feature matrix | |
| num_feat | number of features in matrix | |
| num_vec | number of vectors in matrix |
Definition at line 734 of file SparseFeatures.h.
| int32_t set_num_features | ( | int32_t | num | ) |
set number of features
Sometimes when loading sparse features not all possible dimensions are used. This may pose a problem to classifiers when being applied to higher dimensional test-data. This function allows to artificially explode the feature space
| num | the number of features, must be larger than the current number of features |
Definition at line 893 of file SparseFeatures.h.
| virtual void set_sparse_feature_matrix | ( | TSparse< ST > * | src, | |
| int32_t | num_feat, | |||
| int32_t | num_vec | |||
| ) | [virtual] |
set feature matrix necessary to set feature_matrix, num_features, num_vectors, where num_features is the column offset, and columns are linear in memory see below for definition of feature_matrix
| src | new sparse feature matrix | |
| num_feat | number of features in matrix | |
| num_vec | number of vectors in matrix |
Definition at line 648 of file SparseFeatures.h.
| void sort_features | ( | ) |
ensure that features occur in ascending order, only call when no preprocessors are attached
Definition at line 1221 of file SparseFeatures.h.
| static ST sparse_dot | ( | ST | alpha, | |
| TSparseEntry< ST > * | avec, | |||
| int32_t | alen, | |||
| TSparseEntry< ST > * | bvec, | |||
| int32_t | blen | |||
| ) | [static] |
compute the dot product between two sparse feature vectors alpha * vec^T * vec
| alpha | scalar to multiply with | |
| avec | first sparse feature vector | |
| alen | avec's length | |
| bvec | second sparse feature vector | |
| blen | bvec's length |
Definition at line 384 of file SparseFeatures.h.
| bool write_svmlight_file | ( | char * | fname, | |
| CLabels * | label | |||
| ) |
write features to file using svm light format
| fname | filename to write to | |
| label | Label object (number of labels must correspond to number of features) |
Definition at line 1269 of file SparseFeatures.h.
CCache< TSparseEntry<ST> >* feature_cache [protected] |
feature cache
Definition at line 1494 of file SparseFeatures.h.
int32_t num_features [protected] |
total number of features
Definition at line 1488 of file SparseFeatures.h.
int32_t num_vectors [protected] |
total number of vectors
Definition at line 1485 of file SparseFeatures.h.
TSparse<ST>* sparse_feature_matrix [protected] |
array of sparse vectors of size num_vectors
Definition at line 1491 of file SparseFeatures.h.