00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "kernel/MultitaskKernelMklNormalizer.h"
00019 #include "features/StringFeatures.h"
00020
00021 namespace shogun
00022 {
00023
00024 enum EWDKernType
00025 {
00026 E_WD=0,
00027 E_EXTERNAL=1,
00028
00029 E_BLOCK_CONST=2,
00030 E_BLOCK_LINEAR=3,
00031 E_BLOCK_SQPOLY=4,
00032 E_BLOCK_CUBICPOLY=5,
00033 E_BLOCK_EXP=6,
00034 E_BLOCK_LOG=7,
00035 };
00036
00037
00052 class CWeightedDegreeStringKernel: public CStringKernel<char>
00053 {
00054 public:
00055
00059 CWeightedDegreeStringKernel();
00060
00061
00067 CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00068
00074 CWeightedDegreeStringKernel(float64_t* weights, int32_t degree);
00075
00082 CWeightedDegreeStringKernel(
00083 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00084
00085 virtual ~CWeightedDegreeStringKernel();
00086
00093 virtual bool init(CFeatures* l, CFeatures* r);
00094
00096 virtual void cleanup();
00097
00105 EWDKernType get_type() const
00106 {
00107 return type;
00108 }
00109
00114 int32_t get_degree() const
00115 {
00116 return degree;
00117 }
00118
00124 int32_t get_max_mismatch() const
00125 {
00126 return max_mismatch;
00127 }
00128
00133 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00134
00139 virtual const char* get_name() const {
00140 return "WeightedDegreeStringKernel";
00141 }
00142
00150 inline virtual bool init_optimization(
00151 int32_t count, int32_t *IDX, float64_t* alphas)
00152 {
00153 return init_optimization(count, IDX, alphas, -1);
00154 }
00155
00166 virtual bool init_optimization(
00167 int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00168
00173 virtual bool delete_optimization();
00174
00180 virtual float64_t compute_optimized(int32_t idx)
00181 {
00182 if (get_is_initialized())
00183 return compute_by_tree(idx);
00184
00185 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00186 return 0;
00187 }
00188
00193 static void* compute_batch_helper(void* p);
00194
00205 virtual void compute_batch(
00206 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00207 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00208 float64_t factor=1.0);
00209
00213 inline virtual void clear_normal()
00214 {
00215 if (get_is_initialized())
00216 {
00217
00218 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00219 SG_ERROR("not implemented");
00220
00221 tries->delete_trees(max_mismatch==0);
00222 set_is_initialized(false);
00223 }
00224 }
00225
00231 inline virtual void add_to_normal(int32_t idx, float64_t weight)
00232 {
00233
00234 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00235 SG_ERROR("not implemented");
00236
00237 if (max_mismatch==0)
00238 add_example_to_tree(idx, weight);
00239 else
00240 add_example_to_tree_mismatch(idx, weight);
00241
00242 set_is_initialized(true);
00243 }
00244
00249 inline virtual int32_t get_num_subkernels()
00250 {
00251 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00252 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
00253 if (position_weights!=NULL)
00254 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00255 if (length==0)
00256 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00257 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00258 }
00259
00265 inline void compute_by_subkernel(
00266 int32_t idx, float64_t * subkernel_contrib)
00267 {
00268
00269 if (get_is_initialized())
00270 {
00271
00272 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00273 SG_ERROR("not implemented");
00274
00275 compute_by_tree(idx, subkernel_contrib);
00276 return ;
00277 }
00278
00279 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00280 }
00281
00287 inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00288 {
00289
00290 num_weights = get_num_subkernels();
00291
00292 delete[] weights_buffer ;
00293 weights_buffer = new float64_t[num_weights];
00294
00295 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00296 for (int32_t i=0; i<num_weights; i++)
00297 weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i);
00298 else if (position_weights!=NULL)
00299 for (int32_t i=0; i<num_weights; i++)
00300 weights_buffer[i] = position_weights[i*mkl_stepsize];
00301 else
00302 for (int32_t i=0; i<num_weights; i++)
00303 weights_buffer[i] = weights[i*mkl_stepsize];
00304
00305 return weights_buffer;
00306 }
00307
00313 inline void set_subkernel_weights(
00314 float64_t* weights2, int32_t num_weights2)
00315 {
00316 int32_t num_weights = get_num_subkernels();
00317 if (num_weights!=num_weights2)
00318 SG_ERROR( "number of weights do not match\n");
00319
00320
00321 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00322 for (int32_t i=0; i<num_weights; i++)
00323 ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
00324 else if (position_weights!=NULL)
00325 {
00326 for (int32_t i=0; i<num_weights; i++)
00327 {
00328 for (int32_t j=0; j<mkl_stepsize; j++)
00329 {
00330 if (i*mkl_stepsize+j<seq_length)
00331 position_weights[i*mkl_stepsize+j] = weights2[i];
00332 }
00333 }
00334 }
00335 else if (length==0)
00336 {
00337 for (int32_t i=0; i<num_weights; i++)
00338 {
00339 for (int32_t j=0; j<mkl_stepsize; j++)
00340 {
00341 if (i*mkl_stepsize+j<get_degree())
00342 weights[i*mkl_stepsize+j] = weights2[i];
00343 }
00344 }
00345 }
00346 else
00347 {
00348 for (int32_t i=0; i<num_weights; i++)
00349 {
00350 for (int32_t j=0; j<mkl_stepsize; j++)
00351 {
00352 if (i*mkl_stepsize+j<get_degree()*length)
00353 weights[i*mkl_stepsize+j] = weights2[i];
00354 }
00355 }
00356 }
00357 }
00358
00363 virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
00364
00365 if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
00366 unset_property(KP_LINADD);
00367 unset_property(KP_BATCHEVALUATION);
00368 }
00369 else
00370 {
00371 set_property(KP_LINADD);
00372 set_property(KP_BATCHEVALUATION);
00373 }
00374
00375
00376 return CStringKernel<char>::set_normalizer(normalizer_);
00377
00378 }
00379
00380
00386 float64_t *compute_abs_weights(int32_t & len);
00387
00394 void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00395
00400 bool is_tree_initialized() { return tree_initialized; }
00401
00407 inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00408 {
00409 d=degree;
00410 len=length;
00411 return weights;
00412 }
00413
00419 inline float64_t *get_weights(int32_t& num_weights)
00420 {
00421
00422 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00423 SG_ERROR("not implemented");
00424
00425 if (position_weights!=NULL)
00426 {
00427 num_weights = seq_length ;
00428 return position_weights ;
00429 }
00430 if (length==0)
00431 num_weights = degree ;
00432 else
00433 num_weights = degree*length ;
00434 return weights;
00435 }
00436
00442 inline float64_t *get_position_weights(int32_t& len)
00443 {
00444 len=seq_length;
00445 return position_weights;
00446 }
00447
00453 bool set_wd_weights_by_type(EWDKernType type);
00454
00461 void set_wd_weights(float64_t* p_weights, int32_t d)
00462 {
00463 set_weights(p_weights,d,0);
00464 }
00465
00472 bool set_weights(float64_t* weights, int32_t d, int32_t len);
00473
00480 bool set_position_weights(float64_t* pws, int32_t len=0);
00481
00486 bool init_block_weights();
00487
00492 bool init_block_weights_from_wd();
00493
00498 bool init_block_weights_from_wd_external();
00499
00504 bool init_block_weights_const();
00505
00510 bool init_block_weights_linear();
00511
00516 bool init_block_weights_sqpoly();
00517
00522 bool init_block_weights_cubicpoly();
00523
00528 bool init_block_weights_exp();
00529
00534 bool init_block_weights_log();
00535
00540 bool delete_position_weights()
00541 {
00542 delete[] position_weights;
00543 position_weights=NULL;
00544 return true;
00545 }
00546
00552 bool set_max_mismatch(int32_t max);
00553
00558 inline int32_t get_max_mismatch() { return max_mismatch; }
00559
00565 inline bool set_degree(int32_t deg) { degree=deg; return true; }
00566
00571 inline int32_t get_degree() { return degree; }
00572
00578 inline bool set_use_block_computation(bool block)
00579 {
00580 block_computation=block;
00581 return true;
00582 }
00583
00588 inline bool get_use_block_computation() { return block_computation; }
00589
00595 inline bool set_mkl_stepsize(int32_t step)
00596 {
00597 if (step<1)
00598 SG_ERROR("Stepsize must be a positive integer\n");
00599 mkl_stepsize=step;
00600 return true;
00601 }
00602
00607 inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00608
00614 inline bool set_which_degree(int32_t which)
00615 {
00616 which_degree=which;
00617 return true;
00618 }
00619
00624 inline int32_t get_which_degree() { return which_degree; }
00625
00626 protected:
00628 void create_empty_tries();
00629
00635 void add_example_to_tree(int32_t idx, float64_t weight);
00636
00643 void add_example_to_single_tree(
00644 int32_t idx, float64_t weight, int32_t tree_num);
00645
00651 void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00652
00659 void add_example_to_single_tree_mismatch(
00660 int32_t idx, float64_t weight, int32_t tree_num);
00661
00667 float64_t compute_by_tree(int32_t idx);
00668
00677 float64_t compute(int32_t idx_a, int32_t idx_b);
00678
00687 float64_t compute_with_mismatch(
00688 char* avec, int32_t alen, char* bvec, int32_t blen);
00689
00698 float64_t compute_without_mismatch(
00699 char* avec, int32_t alen, char* bvec, int32_t blen);
00700
00709 float64_t compute_without_mismatch_matrix(
00710 char* avec, int32_t alen, char* bvec, int32_t blen);
00711
00720 float64_t compute_using_block(char* avec, int32_t alen,
00721 char* bvec, int32_t blen);
00722
00724 virtual void remove_lhs();
00725
00726 private:
00729 void init();
00730
00731 protected:
00735 float64_t* weights;
00737 int32_t weights_degree;
00739 int32_t weights_length;
00740
00741
00743 float64_t* position_weights;
00745 int32_t position_weights_len;
00747 float64_t* weights_buffer;
00749 int32_t mkl_stepsize;
00751 int32_t degree;
00753 int32_t length;
00754
00756 int32_t max_mismatch;
00758 int32_t seq_length;
00759
00761 bool initialized;
00762
00764 bool block_computation;
00765
00767 float64_t* block_weights;
00769 EWDKernType type;
00771 int32_t which_degree;
00772
00774 CTrie<DNATrie>* tries;
00775
00777 bool tree_initialized;
00778
00780 CAlphabet* alphabet;
00781 };
00782
00783 }
00784
00785 #endif