The CommUlongString kernel may be used to compute the spectrum kernel from strings that have been mapped into unsigned 64bit integers.
These 64bit integers correspond to k-mers. To be applicable in this kernel they need to be sorted (e.g. via the SortUlongString pre-processor).
It basically uses the algorithm in the unix "comm" command (hence the name) to compute:
where
maps a sequence
that consists of letters in
to a feature vector of size
. In this feature vector each entry denotes how often the k-mer appears in that
.
Note that this representation enables spectrum kernels of order 8 for 8bit alphabets (like binaries) and order 32 for 2-bit alphabets like DNA.
For this kernel the linadd speedups are implemented (though there is room for improvement here when a whole set of sequences is ADDed) using sorted lists.
Definition at line 48 of file CommUlongStringKernel.h.

Public Member Functions | |
| CCommUlongStringKernel (int32_t size=10, bool use_sign=false) | |
| CCommUlongStringKernel (CStringFeatures< uint64_t > *l, CStringFeatures< uint64_t > *r, bool use_sign=false, int32_t size=10) | |
| virtual | ~CCommUlongStringKernel () |
| virtual bool | init (CFeatures *l, CFeatures *r) |
| virtual void | cleanup () |
| virtual EKernelType | get_kernel_type () |
| virtual const char * | get_name () const |
| virtual bool | init_optimization (int32_t count, int32_t *IDX, float64_t *weights) |
| virtual bool | delete_optimization () |
| virtual float64_t | compute_optimized (int32_t idx) |
| void | merge_dictionaries (int32_t &t, int32_t j, int32_t &k, uint64_t *vec, uint64_t *dic, float64_t *dic_weights, float64_t weight, int32_t vec_idx) |
| virtual void | add_to_normal (int32_t idx, float64_t weight) |
| virtual void | clear_normal () |
| virtual void | remove_lhs () |
| virtual void | remove_rhs () |
| virtual EFeatureType | get_feature_type () |
| void | get_dictionary (int32_t &dsize, uint64_t *&dict, float64_t *&dweights) |
Protected Member Functions | |
| float64_t | compute (int32_t idx_a, int32_t idx_b) |
Protected Attributes | |
| CDynamicArray< uint64_t > | dictionary |
| CDynamicArray< float64_t > | dictionary_weights |
| bool | use_sign |
| CCommUlongStringKernel | ( | int32_t | size = 10, |
|
| bool | use_sign = false | |||
| ) |
constructor
| size | cache size | |
| use_sign | if sign shall be used |
Definition at line 19 of file CommUlongStringKernel.cpp.
| CCommUlongStringKernel | ( | CStringFeatures< uint64_t > * | l, | |
| CStringFeatures< uint64_t > * | r, | |||
| bool | use_sign = false, |
|||
| int32_t | size = 10 | |||
| ) |
constructor
| l | features of left-hand side | |
| r | features of right-hand side | |
| use_sign | if sign shall be used | |
| size | cache size |
Definition at line 28 of file CommUlongStringKernel.cpp.
| ~CCommUlongStringKernel | ( | ) | [virtual] |
Definition at line 39 of file CommUlongStringKernel.cpp.
| void add_to_normal | ( | int32_t | idx, | |
| float64_t | weight | |||
| ) | [virtual] |
add to normal
| idx | where to add | |
| weight | what to add |
Reimplemented from CKernel.
Definition at line 145 of file CommUlongStringKernel.cpp.
| void cleanup | ( | ) | [virtual] |
clean up kernel
Reimplemented from CKernel.
Definition at line 73 of file CommUlongStringKernel.cpp.
| void clear_normal | ( | ) | [virtual] |
| float64_t compute | ( | int32_t | idx_a, | |
| int32_t | idx_b | |||
| ) | [protected, virtual] |
compute kernel function for features a and b idx_{a,b} denote the index of the feature vectors in the corresponding feature object
| idx_a | index a | |
| idx_b | index b |
Implements CKernel.
Definition at line 80 of file CommUlongStringKernel.cpp.
| float64_t compute_optimized | ( | int32_t | idx | ) | [virtual] |
compute optimized
| idx | index to compute |
Reimplemented from CKernel.
Definition at line 257 of file CommUlongStringKernel.cpp.
| bool delete_optimization | ( | ) | [virtual] |
delete optimization
Reimplemented from CKernel.
Definition at line 248 of file CommUlongStringKernel.cpp.
| void get_dictionary | ( | int32_t & | dsize, | |
| uint64_t *& | dict, | |||
| float64_t *& | dweights | |||
| ) |
get dictionary
| dsize | dictionary size will be stored in here | |
| dict | dictionary will be stored in here | |
| dweights | dictionary weights will be stored in here |
Definition at line 183 of file CommUlongStringKernel.h.
| virtual EFeatureType get_feature_type | ( | ) | [virtual] |
return feature type the kernel can deal with
Reimplemented from CStringKernel< uint64_t >.
Definition at line 175 of file CommUlongStringKernel.h.
| virtual EKernelType get_kernel_type | ( | ) | [virtual] |
return what type of kernel we are
Implements CStringKernel< uint64_t >.
Definition at line 87 of file CommUlongStringKernel.h.
| virtual const char* get_name | ( | void | ) | const [virtual] |
return the kernel's name
Reimplemented from CStringKernel< uint64_t >.
Definition at line 93 of file CommUlongStringKernel.h.
initialize kernel
| l | features of left-hand side | |
| r | features of right-hand side |
Reimplemented from CStringKernel< uint64_t >.
Definition at line 67 of file CommUlongStringKernel.cpp.
| bool init_optimization | ( | int32_t | count, | |
| int32_t * | IDX, | |||
| float64_t * | weights | |||
| ) | [virtual] |
initialize optimization
| count | count | |
| IDX | index | |
| weights | weights |
Reimplemented from CKernel.
Definition at line 220 of file CommUlongStringKernel.cpp.
| void merge_dictionaries | ( | int32_t & | t, | |
| int32_t | j, | |||
| int32_t & | k, | |||
| uint64_t * | vec, | |||
| uint64_t * | dic, | |||
| float64_t * | dic_weights, | |||
| float64_t | weight, | |||
| int32_t | vec_idx | |||
| ) |
merge dictionaries
| t | t | |
| j | j | |
| k | k | |
| vec | vector | |
| dic | dictionary | |
| dic_weights | dictionary weights | |
| weight | weight | |
| vec_idx | vector index |
Definition at line 129 of file CommUlongStringKernel.h.
| void remove_lhs | ( | ) | [virtual] |
remove lhs from kernel
Reimplemented from CKernel.
Definition at line 44 of file CommUlongStringKernel.cpp.
| void remove_rhs | ( | ) | [virtual] |
remove rhs from kernel
Reimplemented from CKernel.
Definition at line 57 of file CommUlongStringKernel.cpp.
CDynamicArray<uint64_t> dictionary [protected] |
dictionary
Definition at line 204 of file CommUlongStringKernel.h.
CDynamicArray<float64_t> dictionary_weights [protected] |
dictionary weights
Definition at line 206 of file CommUlongStringKernel.h.
bool use_sign [protected] |
if sign shall be used
Definition at line 209 of file CommUlongStringKernel.h.