00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014
00015 #include <shogun/lib/common.h>
00016 #include <shogun/lib/Trie.h>
00017 #include <shogun/kernel/string/StringKernel.h>
00018 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h>
00019 #include <shogun/features/StringFeatures.h>
00020
00021 namespace shogun
00022 {
00023
00025 enum EWDKernType
00026 {
00027 E_WD=0,
00028 E_EXTERNAL=1,
00029
00030 E_BLOCK_CONST=2,
00031 E_BLOCK_LINEAR=3,
00032 E_BLOCK_SQPOLY=4,
00033 E_BLOCK_CUBICPOLY=5,
00034 E_BLOCK_EXP=6,
00035 E_BLOCK_LOG=7,
00036 };
00037
00038
00053 class CWeightedDegreeStringKernel: public CStringKernel<char>
00054 {
00055 public:
00056
00060 CWeightedDegreeStringKernel();
00061
00062
00068 CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00069
00074 CWeightedDegreeStringKernel(SGVector<float64_t> weights);
00075
00082 CWeightedDegreeStringKernel(
00083 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00084
00085 virtual ~CWeightedDegreeStringKernel();
00086
00093 virtual bool init(CFeatures* l, CFeatures* r);
00094
00096 virtual void cleanup();
00097
00105 EWDKernType get_type() const
00106 {
00107 return type;
00108 }
00109
00114 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00115
00120 virtual const char* get_name() const {
00121 return "WeightedDegreeStringKernel";
00122 }
00123
00131 virtual bool init_optimization(
00132 int32_t count, int32_t *IDX, float64_t* alphas)
00133 {
00134 return init_optimization(count, IDX, alphas, -1);
00135 }
00136
00147 virtual bool init_optimization(
00148 int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00149
00154 virtual bool delete_optimization();
00155
00161 virtual float64_t compute_optimized(int32_t idx)
00162 {
00163 if (get_is_initialized())
00164 return compute_by_tree(idx);
00165
00166 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00167 return 0;
00168 }
00169
00174 static void* compute_batch_helper(void* p);
00175
00186 virtual void compute_batch(
00187 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00188 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00189 float64_t factor=1.0);
00190
00194 virtual void clear_normal()
00195 {
00196 if (get_is_initialized())
00197 {
00198
00199 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00200 SG_ERROR("not implemented");
00201
00202 tries->delete_trees(max_mismatch==0);
00203 set_is_initialized(false);
00204 }
00205 }
00206
00212 virtual void add_to_normal(int32_t idx, float64_t weight)
00213 {
00214
00215 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00216 SG_ERROR("not implemented");
00217
00218 if (max_mismatch==0)
00219 add_example_to_tree(idx, weight);
00220 else
00221 add_example_to_tree_mismatch(idx, weight);
00222
00223 set_is_initialized(true);
00224 }
00225
00230 virtual int32_t get_num_subkernels()
00231 {
00232 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00233 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
00234 if (position_weights!=NULL)
00235 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00236 if (length==0)
00237 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00238 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00239 }
00240
00246 inline void compute_by_subkernel(
00247 int32_t idx, float64_t * subkernel_contrib)
00248 {
00249
00250 if (get_is_initialized())
00251 {
00252
00253 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00254 SG_ERROR("not implemented");
00255
00256 compute_by_tree(idx, subkernel_contrib);
00257 return ;
00258 }
00259
00260 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00261 }
00262
00268 inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00269 {
00270
00271 num_weights = get_num_subkernels();
00272
00273 SG_FREE(weights_buffer);
00274 weights_buffer = SG_MALLOC(float64_t, num_weights);
00275
00276 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00277 for (int32_t i=0; i<num_weights; i++)
00278 weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i);
00279 else if (position_weights!=NULL)
00280 for (int32_t i=0; i<num_weights; i++)
00281 weights_buffer[i] = position_weights[i*mkl_stepsize];
00282 else
00283 for (int32_t i=0; i<num_weights; i++)
00284 weights_buffer[i] = weights[i*mkl_stepsize];
00285
00286 return weights_buffer;
00287 }
00288
00293 virtual void set_subkernel_weights(SGVector<float64_t> w)
00294 {
00295 float64_t* weights2=w.vector;
00296 int32_t num_weights2=w.vlen;
00297 int32_t num_weights = get_num_subkernels();
00298 if (num_weights!=num_weights2)
00299 SG_ERROR( "number of weights do not match\n");
00300
00301
00302 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00303 for (int32_t i=0; i<num_weights; i++)
00304 ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
00305 else if (position_weights!=NULL)
00306 {
00307 for (int32_t i=0; i<num_weights; i++)
00308 {
00309 for (int32_t j=0; j<mkl_stepsize; j++)
00310 {
00311 if (i*mkl_stepsize+j<seq_length)
00312 position_weights[i*mkl_stepsize+j] = weights2[i];
00313 }
00314 }
00315 }
00316 else if (length==0)
00317 {
00318 for (int32_t i=0; i<num_weights; i++)
00319 {
00320 for (int32_t j=0; j<mkl_stepsize; j++)
00321 {
00322 if (i*mkl_stepsize+j<get_degree())
00323 weights[i*mkl_stepsize+j] = weights2[i];
00324 }
00325 }
00326 }
00327 else
00328 {
00329 for (int32_t i=0; i<num_weights; i++)
00330 {
00331 for (int32_t j=0; j<mkl_stepsize; j++)
00332 {
00333 if (i*mkl_stepsize+j<get_degree()*length)
00334 weights[i*mkl_stepsize+j] = weights2[i];
00335 }
00336 }
00337 }
00338 }
00339
00344 virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
00345
00346 if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
00347 unset_property(KP_LINADD);
00348 unset_property(KP_BATCHEVALUATION);
00349 }
00350 else
00351 {
00352 set_property(KP_LINADD);
00353 set_property(KP_BATCHEVALUATION);
00354 }
00355
00356
00357 return CStringKernel<char>::set_normalizer(normalizer_);
00358
00359 }
00360
00361
00367 float64_t *compute_abs_weights(int32_t & len);
00368
00375 void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00376
00381 bool is_tree_initialized() { return tree_initialized; }
00382
00388 inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00389 {
00390 d=degree;
00391 len=length;
00392 return weights;
00393 }
00394
00400 inline float64_t *get_weights(int32_t& num_weights)
00401 {
00402
00403 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00404 SG_ERROR("not implemented");
00405
00406 if (position_weights!=NULL)
00407 {
00408 num_weights = seq_length ;
00409 return position_weights ;
00410 }
00411 if (length==0)
00412 num_weights = degree ;
00413 else
00414 num_weights = degree*length ;
00415 return weights;
00416 }
00417
00423 inline float64_t *get_position_weights(int32_t& len)
00424 {
00425 len=seq_length;
00426 return position_weights;
00427 }
00428
00434 bool set_wd_weights_by_type(EWDKernType type);
00435
00440 inline void set_wd_weights(SGVector<float64_t> new_weights)
00441 {
00442 SGMatrix<float64_t> matrix = SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0);
00443 set_weights(matrix);
00444 matrix.matrix = NULL;
00445 }
00446
00451 bool set_weights(SGMatrix<float64_t> new_weights);
00452
00459 bool set_position_weights(float64_t* pws, int32_t len);
00460
00465 bool init_block_weights();
00466
00471 bool init_block_weights_from_wd();
00472
00477 bool init_block_weights_from_wd_external();
00478
00483 bool init_block_weights_const();
00484
00489 bool init_block_weights_linear();
00490
00495 bool init_block_weights_sqpoly();
00496
00501 bool init_block_weights_cubicpoly();
00502
00507 bool init_block_weights_exp();
00508
00513 bool init_block_weights_log();
00514
00519 bool delete_position_weights()
00520 {
00521 SG_FREE(position_weights);
00522 position_weights=NULL;
00523 return true;
00524 }
00525
00531 bool set_max_mismatch(int32_t max);
00532
00537 inline int32_t get_max_mismatch() const { return max_mismatch; }
00538
00544 inline bool set_degree(int32_t deg) { degree=deg; return true; }
00545
00550 inline int32_t get_degree() const { return degree; }
00551
00557 inline bool set_use_block_computation(bool block)
00558 {
00559 block_computation=block;
00560 return true;
00561 }
00562
00567 inline bool get_use_block_computation() { return block_computation; }
00568
00574 inline bool set_mkl_stepsize(int32_t step)
00575 {
00576 if (step<1)
00577 SG_ERROR("Stepsize must be a positive integer\n");
00578 mkl_stepsize=step;
00579 return true;
00580 }
00581
00586 inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00587
00593 inline bool set_which_degree(int32_t which)
00594 {
00595 which_degree=which;
00596 return true;
00597 }
00598
00603 inline int32_t get_which_degree() { return which_degree; }
00604
00605 protected:
00607 void create_empty_tries();
00608
00614 void add_example_to_tree(int32_t idx, float64_t weight);
00615
00622 void add_example_to_single_tree(
00623 int32_t idx, float64_t weight, int32_t tree_num);
00624
00630 void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00631
00638 void add_example_to_single_tree_mismatch(
00639 int32_t idx, float64_t weight, int32_t tree_num);
00640
00646 float64_t compute_by_tree(int32_t idx);
00647
00656 float64_t compute(int32_t idx_a, int32_t idx_b);
00657
00666 float64_t compute_with_mismatch(
00667 char* avec, int32_t alen, char* bvec, int32_t blen);
00668
00677 float64_t compute_without_mismatch(
00678 char* avec, int32_t alen, char* bvec, int32_t blen);
00679
00688 float64_t compute_without_mismatch_matrix(
00689 char* avec, int32_t alen, char* bvec, int32_t blen);
00690
00699 float64_t compute_using_block(char* avec, int32_t alen,
00700 char* bvec, int32_t blen);
00701
00703 virtual void remove_lhs();
00704
00705 private:
00708 void init();
00709
00710 protected:
00714 float64_t* weights;
00716 int32_t weights_degree;
00718 int32_t weights_length;
00719
00720
00722 float64_t* position_weights;
00724 int32_t position_weights_len;
00726 float64_t* weights_buffer;
00728 int32_t mkl_stepsize;
00730 int32_t degree;
00732 int32_t length;
00733
00735 int32_t max_mismatch;
00737 int32_t seq_length;
00738
00740 bool initialized;
00741
00743 bool block_computation;
00744
00746 float64_t* block_weights;
00748 EWDKernType type;
00750 int32_t which_degree;
00751
00753 CTrie<DNATrie>* tries;
00754
00756 bool tree_initialized;
00757
00759 CAlphabet* alphabet;
00760 };
00761
00762 }
00763
00764 #endif