WeightedDegreeStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014 
00015 #include <shogun/lib/common.h>
00016 #include <shogun/lib/Trie.h>
00017 #include <shogun/kernel/string/StringKernel.h>
00018 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h>
00019 #include <shogun/features/StringFeatures.h>
00020 
00021 namespace shogun
00022 {
00023 
00025 enum EWDKernType
00026 {
00027     E_WD=0,
00028     E_EXTERNAL=1,
00029 
00030     E_BLOCK_CONST=2,
00031     E_BLOCK_LINEAR=3,
00032     E_BLOCK_SQPOLY=4,
00033     E_BLOCK_CUBICPOLY=5,
00034     E_BLOCK_EXP=6,
00035     E_BLOCK_LOG=7,
00036 };
00037 
00038 
00053 class CWeightedDegreeStringKernel: public CStringKernel<char>
00054 {
00055     public:
00056 
00060         CWeightedDegreeStringKernel();
00061 
00062 
00068         CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00069 
00074         CWeightedDegreeStringKernel(SGVector<float64_t> weights);
00075 
00082         CWeightedDegreeStringKernel(
00083             CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00084 
00085         virtual ~CWeightedDegreeStringKernel();
00086 
00093         virtual bool init(CFeatures* l, CFeatures* r);
00094 
00096         virtual void cleanup();
00097 
00105         EWDKernType get_type() const
00106         {
00107             return type;
00108         }
00109 
00114         virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00115 
00120         virtual const char* get_name() const {
00121             return "WeightedDegreeStringKernel";
00122         }
00123 
00131         virtual bool init_optimization(
00132             int32_t count, int32_t *IDX, float64_t* alphas)
00133         {
00134             return init_optimization(count, IDX, alphas, -1);
00135         }
00136 
00147         virtual bool init_optimization(
00148             int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00149 
00154         virtual bool delete_optimization();
00155 
00161         virtual float64_t compute_optimized(int32_t idx)
00162         {
00163             if (get_is_initialized())
00164                 return compute_by_tree(idx);
00165 
00166             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00167             return 0;
00168         }
00169 
00174         static void* compute_batch_helper(void* p);
00175 
00186         virtual void compute_batch(
00187             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00188             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00189             float64_t factor=1.0);
00190 
00194         virtual void clear_normal()
00195         {
00196             if (get_is_initialized())
00197             {
00198 
00199                 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00200                     SG_ERROR("not implemented");
00201 
00202                 tries->delete_trees(max_mismatch==0);
00203                 set_is_initialized(false);
00204             }
00205         }
00206 
00212         virtual void add_to_normal(int32_t idx, float64_t weight)
00213         {
00214 
00215             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00216                 SG_ERROR("not implemented");
00217 
00218             if (max_mismatch==0)
00219                 add_example_to_tree(idx, weight);
00220             else
00221                 add_example_to_tree_mismatch(idx, weight);
00222 
00223             set_is_initialized(true);
00224         }
00225 
00230         virtual int32_t get_num_subkernels()
00231         {
00232             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00233                 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
00234             if (position_weights!=NULL)
00235                 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00236             if (length==0)
00237                 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00238             return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00239         }
00240 
00246         inline void compute_by_subkernel(
00247             int32_t idx, float64_t * subkernel_contrib)
00248         {
00249 
00250             if (get_is_initialized())
00251             {
00252 
00253                 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00254                     SG_ERROR("not implemented");
00255 
00256                 compute_by_tree(idx, subkernel_contrib);
00257                 return ;
00258             }
00259 
00260             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00261         }
00262 
00268         inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00269         {
00270 
00271             num_weights = get_num_subkernels();
00272 
00273             SG_FREE(weights_buffer);
00274             weights_buffer = SG_MALLOC(float64_t, num_weights);
00275 
00276             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00277                 for (int32_t i=0; i<num_weights; i++)
00278                     weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i);
00279             else if (position_weights!=NULL)
00280                 for (int32_t i=0; i<num_weights; i++)
00281                     weights_buffer[i] = position_weights[i*mkl_stepsize];
00282             else
00283                 for (int32_t i=0; i<num_weights; i++)
00284                     weights_buffer[i] = weights[i*mkl_stepsize];
00285 
00286             return weights_buffer;
00287         }
00288 
00293         virtual void set_subkernel_weights(SGVector<float64_t> w)
00294         {
00295             float64_t* weights2=w.vector;
00296             int32_t num_weights2=w.vlen;
00297             int32_t num_weights = get_num_subkernels();
00298             if (num_weights!=num_weights2)
00299                 SG_ERROR( "number of weights do not match\n");
00300 
00301 
00302             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00303                 for (int32_t i=0; i<num_weights; i++)
00304                     ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
00305             else if (position_weights!=NULL)
00306             {
00307                 for (int32_t i=0; i<num_weights; i++)
00308                 {
00309                     for (int32_t j=0; j<mkl_stepsize; j++)
00310                     {
00311                         if (i*mkl_stepsize+j<seq_length)
00312                             position_weights[i*mkl_stepsize+j] = weights2[i];
00313                     }
00314                 }
00315             }
00316             else if (length==0)
00317             {
00318                 for (int32_t i=0; i<num_weights; i++)
00319                 {
00320                     for (int32_t j=0; j<mkl_stepsize; j++)
00321                     {
00322                         if (i*mkl_stepsize+j<get_degree())
00323                             weights[i*mkl_stepsize+j] = weights2[i];
00324                     }
00325                 }
00326             }
00327             else
00328             {
00329                 for (int32_t i=0; i<num_weights; i++)
00330                 {
00331                     for (int32_t j=0; j<mkl_stepsize; j++)
00332                     {
00333                         if (i*mkl_stepsize+j<get_degree()*length)
00334                             weights[i*mkl_stepsize+j] = weights2[i];
00335                     }
00336                 }
00337             }
00338         }
00339 
00344         virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
00345 
00346             if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
00347                 unset_property(KP_LINADD);
00348                 unset_property(KP_BATCHEVALUATION);
00349             }
00350             else
00351             {
00352                 set_property(KP_LINADD);
00353                 set_property(KP_BATCHEVALUATION);
00354             }
00355 
00356 
00357             return CStringKernel<char>::set_normalizer(normalizer_);
00358 
00359         }
00360 
00361         // other kernel tree operations
00367         float64_t *compute_abs_weights(int32_t & len);
00368 
00375         void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00376 
00381         bool is_tree_initialized() { return tree_initialized; }
00382 
00388         inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00389         {
00390             d=degree;
00391             len=length;
00392             return weights;
00393         }
00394 
00400         inline float64_t *get_weights(int32_t& num_weights)
00401         {
00402 
00403             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00404                 SG_ERROR("not implemented");
00405 
00406             if (position_weights!=NULL)
00407             {
00408                 num_weights = seq_length ;
00409                 return position_weights ;
00410             }
00411             if (length==0)
00412                 num_weights = degree ;
00413             else
00414                 num_weights = degree*length ;
00415             return weights;
00416         }
00417 
00423         inline float64_t *get_position_weights(int32_t& len)
00424         {
00425             len=seq_length;
00426             return position_weights;
00427         }
00428 
00434         bool set_wd_weights_by_type(EWDKernType type);
00435 
00440         inline void set_wd_weights(SGVector<float64_t> new_weights)
00441         {
00442             SGMatrix<float64_t> matrix = SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0);
00443             set_weights(matrix);
00444             matrix.matrix = NULL;
00445         }
00446 
00451         bool set_weights(SGMatrix<float64_t> new_weights);
00452 
00459         bool set_position_weights(float64_t* pws, int32_t len);
00460 
00465         bool init_block_weights();
00466 
00471         bool init_block_weights_from_wd();
00472 
00477         bool init_block_weights_from_wd_external();
00478 
00483         bool init_block_weights_const();
00484 
00489         bool init_block_weights_linear();
00490 
00495         bool init_block_weights_sqpoly();
00496 
00501         bool init_block_weights_cubicpoly();
00502 
00507         bool init_block_weights_exp();
00508 
00513         bool init_block_weights_log();
00514 
00519         bool delete_position_weights()
00520         {
00521             SG_FREE(position_weights);
00522             position_weights=NULL;
00523             return true;
00524         }
00525 
00531         bool set_max_mismatch(int32_t max);
00532 
00537         inline int32_t get_max_mismatch() const { return max_mismatch; }
00538 
00544         inline bool set_degree(int32_t deg) { degree=deg; return true; }
00545 
00550         inline int32_t get_degree() const { return degree; }
00551 
00557         inline bool set_use_block_computation(bool block)
00558         {
00559             block_computation=block;
00560             return true;
00561         }
00562 
00567         inline bool get_use_block_computation() { return block_computation; }
00568 
00574         inline bool set_mkl_stepsize(int32_t step)
00575         {
00576             if (step<1)
00577                 SG_ERROR("Stepsize must be a positive integer\n");
00578             mkl_stepsize=step;
00579             return true;
00580         }
00581 
00586         inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00587 
00593         inline bool set_which_degree(int32_t which)
00594         {
00595             which_degree=which;
00596             return true;
00597         }
00598 
00603         inline int32_t get_which_degree() { return which_degree; }
00604 
00605     protected:
00607         void create_empty_tries();
00608 
00614         void add_example_to_tree(int32_t idx, float64_t weight);
00615 
00622         void add_example_to_single_tree(
00623             int32_t idx, float64_t weight, int32_t tree_num);
00624 
00630         void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00631 
00638         void add_example_to_single_tree_mismatch(
00639             int32_t idx, float64_t weight, int32_t tree_num);
00640 
00646         float64_t compute_by_tree(int32_t idx);
00647 
00656         float64_t compute(int32_t idx_a, int32_t idx_b);
00657 
00666         float64_t compute_with_mismatch(
00667             char* avec, int32_t alen, char* bvec, int32_t blen);
00668 
00677         float64_t compute_without_mismatch(
00678             char* avec, int32_t alen, char* bvec, int32_t blen);
00679 
00688         float64_t compute_without_mismatch_matrix(
00689             char* avec, int32_t alen, char* bvec, int32_t blen);
00690 
00699         float64_t compute_using_block(char* avec, int32_t alen,
00700             char* bvec, int32_t blen);
00701 
00703         virtual void remove_lhs();
00704 
00705     private:
00708         void init();
00709 
00710     protected:
00714         float64_t* weights;
00716         int32_t weights_degree;
00718         int32_t weights_length;
00719 
00720 
00722         float64_t* position_weights;
00724         int32_t position_weights_len;
00726         float64_t* weights_buffer;
00728         int32_t mkl_stepsize;
00730         int32_t degree;
00732         int32_t length;
00733 
00735         int32_t max_mismatch;
00737         int32_t seq_length;
00738 
00740         bool initialized;
00741 
00743         bool block_computation;
00744 
00746         float64_t* block_weights;
00748         EWDKernType type;
00750         int32_t which_degree;
00751 
00753         CTrie<DNATrie>* tries;
00754 
00756         bool tree_initialized;
00757 
00759         CAlphabet* alphabet;
00760 };
00761 
00762 }
00763 
00764 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation