WeightedDegreeStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014 
00015 #include <shogun/lib/common.h>
00016 #include <shogun/lib/Trie.h>
00017 #include <shogun/kernel/StringKernel.h>
00018 #include <shogun/kernel/MultitaskKernelMklNormalizer.h>
00019 #include <shogun/features/StringFeatures.h>
00020 
00021 namespace shogun
00022 {
00023 
00025 enum EWDKernType
00026 {
00027     E_WD=0,
00028     E_EXTERNAL=1,
00029 
00030     E_BLOCK_CONST=2,
00031     E_BLOCK_LINEAR=3,
00032     E_BLOCK_SQPOLY=4,
00033     E_BLOCK_CUBICPOLY=5,
00034     E_BLOCK_EXP=6,
00035     E_BLOCK_LOG=7,
00036 };
00037 
00038 
00053 class CWeightedDegreeStringKernel: public CStringKernel<char>
00054 {
00055     public:
00056 
00060         CWeightedDegreeStringKernel();
00061 
00062 
00068         CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00069 
00075         CWeightedDegreeStringKernel(float64_t* weights, int32_t degree);
00076 
00083         CWeightedDegreeStringKernel(
00084             CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00085 
00086         virtual ~CWeightedDegreeStringKernel();
00087 
00094         virtual bool init(CFeatures* l, CFeatures* r);
00095 
00097         virtual void cleanup();
00098 
00106         EWDKernType get_type() const
00107         {
00108             return type;
00109         }
00110 
00115         virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00116 
00121         virtual const char* get_name() const {
00122             return "WeightedDegreeStringKernel";
00123         }
00124 
00132         inline virtual bool init_optimization(
00133             int32_t count, int32_t *IDX, float64_t* alphas)
00134         {
00135             return init_optimization(count, IDX, alphas, -1);
00136         }
00137 
00148         virtual bool init_optimization(
00149             int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00150 
00155         virtual bool delete_optimization();
00156 
00162         virtual float64_t compute_optimized(int32_t idx)
00163         {
00164             if (get_is_initialized())
00165                 return compute_by_tree(idx);
00166 
00167             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00168             return 0;
00169         }
00170 
00175         static void* compute_batch_helper(void* p);
00176 
00187         virtual void compute_batch(
00188             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00189             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00190             float64_t factor=1.0);
00191 
00195         inline virtual void clear_normal()
00196         {
00197             if (get_is_initialized())
00198             {
00199 
00200                 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00201                     SG_ERROR("not implemented");
00202 
00203                 tries->delete_trees(max_mismatch==0);
00204                 set_is_initialized(false);
00205             }
00206         }
00207 
00213         inline virtual void add_to_normal(int32_t idx, float64_t weight)
00214         {
00215 
00216             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00217                 SG_ERROR("not implemented");
00218 
00219             if (max_mismatch==0)
00220                 add_example_to_tree(idx, weight);
00221             else
00222                 add_example_to_tree_mismatch(idx, weight);
00223 
00224             set_is_initialized(true);
00225         }
00226 
00231         inline virtual int32_t get_num_subkernels()
00232         {
00233             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00234                 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
00235             if (position_weights!=NULL)
00236                 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00237             if (length==0)
00238                 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00239             return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00240         }
00241 
00247         inline void compute_by_subkernel(
00248             int32_t idx, float64_t * subkernel_contrib)
00249         {
00250 
00251             if (get_is_initialized())
00252             {
00253 
00254                 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00255                     SG_ERROR("not implemented");
00256 
00257                 compute_by_tree(idx, subkernel_contrib);
00258                 return ;
00259             }
00260 
00261             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00262         }
00263 
00269         inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00270         {
00271 
00272             num_weights = get_num_subkernels();
00273 
00274             SG_FREE(weights_buffer);
00275             weights_buffer = SG_MALLOC(float64_t, num_weights);
00276 
00277             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00278                 for (int32_t i=0; i<num_weights; i++)
00279                     weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i);
00280             else if (position_weights!=NULL)
00281                 for (int32_t i=0; i<num_weights; i++)
00282                     weights_buffer[i] = position_weights[i*mkl_stepsize];
00283             else
00284                 for (int32_t i=0; i<num_weights; i++)
00285                     weights_buffer[i] = weights[i*mkl_stepsize];
00286 
00287             return weights_buffer;
00288         }
00289 
00295         inline void set_subkernel_weights(
00296             float64_t* weights2, int32_t num_weights2)
00297         {
00298             int32_t num_weights = get_num_subkernels();
00299             if (num_weights!=num_weights2)
00300                 SG_ERROR( "number of weights do not match\n");
00301 
00302 
00303             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00304                 for (int32_t i=0; i<num_weights; i++)
00305                     ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
00306             else if (position_weights!=NULL)
00307             {
00308                 for (int32_t i=0; i<num_weights; i++)
00309                 {
00310                     for (int32_t j=0; j<mkl_stepsize; j++)
00311                     {
00312                         if (i*mkl_stepsize+j<seq_length)
00313                             position_weights[i*mkl_stepsize+j] = weights2[i];
00314                     }
00315                 }
00316             }
00317             else if (length==0)
00318             {
00319                 for (int32_t i=0; i<num_weights; i++)
00320                 {
00321                     for (int32_t j=0; j<mkl_stepsize; j++)
00322                     {
00323                         if (i*mkl_stepsize+j<get_degree())
00324                             weights[i*mkl_stepsize+j] = weights2[i];
00325                     }
00326                 }
00327             }
00328             else
00329             {
00330                 for (int32_t i=0; i<num_weights; i++)
00331                 {
00332                     for (int32_t j=0; j<mkl_stepsize; j++)
00333                     {
00334                         if (i*mkl_stepsize+j<get_degree()*length)
00335                             weights[i*mkl_stepsize+j] = weights2[i];
00336                     }
00337                 }
00338             }
00339         }
00340 
00345         virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
00346 
00347             if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
00348                 unset_property(KP_LINADD);
00349                 unset_property(KP_BATCHEVALUATION);
00350             }
00351             else
00352             {
00353                 set_property(KP_LINADD);
00354                 set_property(KP_BATCHEVALUATION);
00355             }
00356 
00357 
00358             return CStringKernel<char>::set_normalizer(normalizer_);
00359 
00360         }
00361 
00362         // other kernel tree operations
00368         float64_t *compute_abs_weights(int32_t & len);
00369 
00376         void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00377 
00382         bool is_tree_initialized() { return tree_initialized; }
00383 
00389         inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00390         {
00391             d=degree;
00392             len=length;
00393             return weights;
00394         }
00395 
00401         inline float64_t *get_weights(int32_t& num_weights)
00402         {
00403 
00404             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00405                 SG_ERROR("not implemented");
00406 
00407             if (position_weights!=NULL)
00408             {
00409                 num_weights = seq_length ;
00410                 return position_weights ;
00411             }
00412             if (length==0)
00413                 num_weights = degree ;
00414             else
00415                 num_weights = degree*length ;
00416             return weights;
00417         }
00418 
00424         inline float64_t *get_position_weights(int32_t& len)
00425         {
00426             len=seq_length;
00427             return position_weights;
00428         }
00429 
00435         bool set_wd_weights_by_type(EWDKernType type);
00436 
00441         inline void set_wd_weights(SGVector<float64_t> new_weights)
00442         {
00443             set_weights(SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0));
00444         }
00445 
00450         bool set_weights(SGMatrix<float64_t> new_weights);
00451 
00458         bool set_position_weights(float64_t* pws, int32_t len);
00459 
00464         bool init_block_weights();
00465 
00470         bool init_block_weights_from_wd();
00471 
00476         bool init_block_weights_from_wd_external();
00477 
00482         bool init_block_weights_const();
00483 
00488         bool init_block_weights_linear();
00489 
00494         bool init_block_weights_sqpoly();
00495 
00500         bool init_block_weights_cubicpoly();
00501 
00506         bool init_block_weights_exp();
00507 
00512         bool init_block_weights_log();
00513 
00518         bool delete_position_weights()
00519         {
00520             SG_FREE(position_weights);
00521             position_weights=NULL;
00522             return true;
00523         }
00524 
00530         bool set_max_mismatch(int32_t max);
00531 
00536         inline int32_t get_max_mismatch() const { return max_mismatch; }
00537 
00543         inline bool set_degree(int32_t deg) { degree=deg; return true; }
00544 
00549         inline int32_t get_degree() const { return degree; }
00550 
00556         inline bool set_use_block_computation(bool block)
00557         {
00558             block_computation=block;
00559             return true;
00560         }
00561 
00566         inline bool get_use_block_computation() { return block_computation; }
00567 
00573         inline bool set_mkl_stepsize(int32_t step)
00574         {
00575             if (step<1)
00576                 SG_ERROR("Stepsize must be a positive integer\n");
00577             mkl_stepsize=step;
00578             return true;
00579         }
00580 
00585         inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00586 
00592         inline bool set_which_degree(int32_t which)
00593         {
00594             which_degree=which;
00595             return true;
00596         }
00597 
00602         inline int32_t get_which_degree() { return which_degree; }
00603 
00604     protected:
00606         void create_empty_tries();
00607 
00613         void add_example_to_tree(int32_t idx, float64_t weight);
00614 
00621         void add_example_to_single_tree(
00622             int32_t idx, float64_t weight, int32_t tree_num);
00623 
00629         void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00630 
00637         void add_example_to_single_tree_mismatch(
00638             int32_t idx, float64_t weight, int32_t tree_num);
00639 
00645         float64_t compute_by_tree(int32_t idx);
00646 
00655         float64_t compute(int32_t idx_a, int32_t idx_b);
00656 
00665         float64_t compute_with_mismatch(
00666             char* avec, int32_t alen, char* bvec, int32_t blen);
00667 
00676         float64_t compute_without_mismatch(
00677             char* avec, int32_t alen, char* bvec, int32_t blen);
00678 
00687         float64_t compute_without_mismatch_matrix(
00688             char* avec, int32_t alen, char* bvec, int32_t blen);
00689 
00698         float64_t compute_using_block(char* avec, int32_t alen,
00699             char* bvec, int32_t blen);
00700 
00702         virtual void remove_lhs();
00703 
00704     private:
00707         void init();
00708 
00709     protected:
00713         float64_t* weights;
00715         int32_t weights_degree;
00717         int32_t weights_length;
00718 
00719 
00721         float64_t* position_weights;
00723         int32_t position_weights_len;
00725         float64_t* weights_buffer;
00727         int32_t mkl_stepsize;
00729         int32_t degree;
00731         int32_t length;
00732 
00734         int32_t max_mismatch;
00736         int32_t seq_length;
00737 
00739         bool initialized;
00740 
00742         bool block_computation;
00743 
00745         float64_t* block_weights;
00747         EWDKernType type;
00749         int32_t which_degree;
00750 
00752         CTrie<DNATrie>* tries;
00753 
00755         bool tree_initialized;
00756 
00758         CAlphabet* alphabet;
00759 };
00760 
00761 }
00762 
00763 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation