WeightedDegreeStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "kernel/MultitaskKernelMklNormalizer.h"
00019 #include "features/StringFeatures.h"
00020 
00021 namespace shogun
00022 {
00023 
00024 enum EWDKernType
00025 {
00026     E_WD=0,
00027     E_EXTERNAL=1,
00028 
00029     E_BLOCK_CONST=2,
00030     E_BLOCK_LINEAR=3,
00031     E_BLOCK_SQPOLY=4,
00032     E_BLOCK_CUBICPOLY=5,
00033     E_BLOCK_EXP=6,
00034     E_BLOCK_LOG=7,
00035 };
00036 
00037 
00052 class CWeightedDegreeStringKernel: public CStringKernel<char>
00053 {
00054     public:
00055 
00059         CWeightedDegreeStringKernel();
00060 
00061 
00067         CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00068 
00074         CWeightedDegreeStringKernel(float64_t* weights, int32_t degree);
00075 
00082         CWeightedDegreeStringKernel(
00083             CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00084 
00085         virtual ~CWeightedDegreeStringKernel();
00086 
00093         virtual bool init(CFeatures* l, CFeatures* r);
00094 
00096         virtual void cleanup();
00097 
00105         EWDKernType get_type() const
00106         {
00107             return type;
00108         }
00109 
00114         int32_t get_degree() const
00115         {
00116             return degree;
00117         }
00118 
00124         int32_t get_max_mismatch() const
00125         {
00126             return max_mismatch;
00127         }
00128 
00133         virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00134 
00139         virtual const char* get_name() const {
00140             return "WeightedDegreeStringKernel";
00141         }
00142 
00150         inline virtual bool init_optimization(
00151             int32_t count, int32_t *IDX, float64_t* alphas)
00152         {
00153             return init_optimization(count, IDX, alphas, -1);
00154         }
00155 
00166         virtual bool init_optimization(
00167             int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00168 
00173         virtual bool delete_optimization();
00174 
00180         virtual float64_t compute_optimized(int32_t idx)
00181         {
00182             if (get_is_initialized())
00183                 return compute_by_tree(idx);
00184 
00185             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00186             return 0;
00187         }
00188 
00193         static void* compute_batch_helper(void* p);
00194 
00205         virtual void compute_batch(
00206             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00207             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00208             float64_t factor=1.0);
00209 
00213         inline virtual void clear_normal()
00214         {
00215             if (get_is_initialized())
00216             {
00217 
00218                 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00219                     SG_ERROR("not implemented");
00220 
00221                 tries->delete_trees(max_mismatch==0);
00222                 set_is_initialized(false);
00223             }
00224         }
00225 
00231         inline virtual void add_to_normal(int32_t idx, float64_t weight)
00232         {
00233 
00234             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00235                 SG_ERROR("not implemented");
00236 
00237             if (max_mismatch==0)
00238                 add_example_to_tree(idx, weight);
00239             else
00240                 add_example_to_tree_mismatch(idx, weight);
00241 
00242             set_is_initialized(true);
00243         }
00244 
00249         inline virtual int32_t get_num_subkernels()
00250         {
00251             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00252                 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
00253             if (position_weights!=NULL)
00254                 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00255             if (length==0)
00256                 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00257             return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00258         }
00259 
00265         inline void compute_by_subkernel(
00266             int32_t idx, float64_t * subkernel_contrib)
00267         {
00268 
00269             if (get_is_initialized())
00270             {
00271 
00272                 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00273                     SG_ERROR("not implemented");
00274 
00275                 compute_by_tree(idx, subkernel_contrib);
00276                 return ;
00277             }
00278 
00279             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00280         }
00281 
00287         inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00288         {
00289 
00290             num_weights = get_num_subkernels();
00291 
00292             delete[] weights_buffer ;
00293             weights_buffer = new float64_t[num_weights];
00294 
00295             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00296                 for (int32_t i=0; i<num_weights; i++)
00297                     weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i);
00298             else if (position_weights!=NULL)
00299                 for (int32_t i=0; i<num_weights; i++)
00300                     weights_buffer[i] = position_weights[i*mkl_stepsize];
00301             else
00302                 for (int32_t i=0; i<num_weights; i++)
00303                     weights_buffer[i] = weights[i*mkl_stepsize];
00304 
00305             return weights_buffer;
00306         }
00307 
00313         inline void set_subkernel_weights(
00314             float64_t* weights2, int32_t num_weights2)
00315         {
00316             int32_t num_weights = get_num_subkernels();
00317             if (num_weights!=num_weights2)
00318                 SG_ERROR( "number of weights do not match\n");
00319 
00320 
00321             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00322                 for (int32_t i=0; i<num_weights; i++)
00323                     ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
00324             else if (position_weights!=NULL)
00325             {
00326                 for (int32_t i=0; i<num_weights; i++)
00327                 {
00328                     for (int32_t j=0; j<mkl_stepsize; j++)
00329                     {
00330                         if (i*mkl_stepsize+j<seq_length)
00331                             position_weights[i*mkl_stepsize+j] = weights2[i];
00332                     }
00333                 }
00334             }
00335             else if (length==0)
00336             {
00337                 for (int32_t i=0; i<num_weights; i++)
00338                 {
00339                     for (int32_t j=0; j<mkl_stepsize; j++)
00340                     {
00341                         if (i*mkl_stepsize+j<get_degree())
00342                             weights[i*mkl_stepsize+j] = weights2[i];
00343                     }
00344                 }
00345             }
00346             else
00347             {
00348                 for (int32_t i=0; i<num_weights; i++)
00349                 {
00350                     for (int32_t j=0; j<mkl_stepsize; j++)
00351                     {
00352                         if (i*mkl_stepsize+j<get_degree()*length)
00353                             weights[i*mkl_stepsize+j] = weights2[i];
00354                     }
00355                 }
00356             }
00357         }
00358 
00363         virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
00364 
00365             if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
00366                 unset_property(KP_LINADD);
00367                 unset_property(KP_BATCHEVALUATION);
00368             }
00369             else
00370             {
00371                 set_property(KP_LINADD);
00372                 set_property(KP_BATCHEVALUATION);
00373             }
00374 
00375 
00376             return CStringKernel<char>::set_normalizer(normalizer_);
00377 
00378         }
00379 
00380         // other kernel tree operations
00386         float64_t *compute_abs_weights(int32_t & len);
00387 
00394         void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00395 
00400         bool is_tree_initialized() { return tree_initialized; }
00401 
00407         inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00408         {
00409             d=degree;
00410             len=length;
00411             return weights;
00412         }
00413 
00419         inline float64_t *get_weights(int32_t& num_weights)
00420         {
00421 
00422             if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK)
00423                 SG_ERROR("not implemented");
00424 
00425             if (position_weights!=NULL)
00426             {
00427                 num_weights = seq_length ;
00428                 return position_weights ;
00429             }
00430             if (length==0)
00431                 num_weights = degree ;
00432             else
00433                 num_weights = degree*length ;
00434             return weights;
00435         }
00436 
00442         inline float64_t *get_position_weights(int32_t& len)
00443         {
00444             len=seq_length;
00445             return position_weights;
00446         }
00447 
00453         bool set_wd_weights_by_type(EWDKernType type);
00454 
00461         void set_wd_weights(float64_t* p_weights, int32_t d)
00462         {
00463             set_weights(p_weights,d,0);
00464         }
00465 
00472         bool set_weights(float64_t* weights, int32_t d, int32_t len);
00473 
00480         bool set_position_weights(float64_t* pws, int32_t len=0);
00481 
00486         bool init_block_weights();
00487 
00492         bool init_block_weights_from_wd();
00493 
00498         bool init_block_weights_from_wd_external();
00499 
00504         bool init_block_weights_const();
00505 
00510         bool init_block_weights_linear();
00511 
00516         bool init_block_weights_sqpoly();
00517 
00522         bool init_block_weights_cubicpoly();
00523 
00528         bool init_block_weights_exp();
00529 
00534         bool init_block_weights_log();
00535 
00540         bool delete_position_weights()
00541         {
00542             delete[] position_weights;
00543             position_weights=NULL;
00544             return true;
00545         }
00546 
00552         bool set_max_mismatch(int32_t max);
00553 
00558         inline int32_t get_max_mismatch() { return max_mismatch; }
00559 
00565         inline bool set_degree(int32_t deg) { degree=deg; return true; }
00566 
00571         inline int32_t get_degree() { return degree; }
00572 
00578         inline bool set_use_block_computation(bool block)
00579         {
00580             block_computation=block;
00581             return true;
00582         }
00583 
00588         inline bool get_use_block_computation() { return block_computation; }
00589 
00595         inline bool set_mkl_stepsize(int32_t step)
00596         {
00597             if (step<1)
00598                 SG_ERROR("Stepsize must be a positive integer\n");
00599             mkl_stepsize=step;
00600             return true;
00601         }
00602 
00607         inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00608 
00614         inline bool set_which_degree(int32_t which)
00615         {
00616             which_degree=which;
00617             return true;
00618         }
00619 
00624         inline int32_t get_which_degree() { return which_degree; }
00625 
00626     protected:
00628         void create_empty_tries();
00629 
00635         void add_example_to_tree(int32_t idx, float64_t weight);
00636 
00643         void add_example_to_single_tree(
00644             int32_t idx, float64_t weight, int32_t tree_num);
00645 
00651         void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00652 
00659         void add_example_to_single_tree_mismatch(
00660             int32_t idx, float64_t weight, int32_t tree_num);
00661 
00667         float64_t compute_by_tree(int32_t idx);
00668 
00677         float64_t compute(int32_t idx_a, int32_t idx_b);
00678 
00687         float64_t compute_with_mismatch(
00688             char* avec, int32_t alen, char* bvec, int32_t blen);
00689 
00698         float64_t compute_without_mismatch(
00699             char* avec, int32_t alen, char* bvec, int32_t blen);
00700 
00709         float64_t compute_without_mismatch_matrix(
00710             char* avec, int32_t alen, char* bvec, int32_t blen);
00711 
00720         float64_t compute_using_block(char* avec, int32_t alen,
00721             char* bvec, int32_t blen);
00722 
00724         virtual void remove_lhs();
00725 
00726     private:
00729         void init();
00730 
00731     protected:
00735         float64_t* weights;
00737         int32_t weights_degree;
00739         int32_t weights_length;
00740 
00741 
00743         float64_t* position_weights;
00745         int32_t position_weights_len;
00747         float64_t* weights_buffer;
00749         int32_t mkl_stepsize;
00751         int32_t degree;
00753         int32_t length;
00754 
00756         int32_t max_mismatch;
00758         int32_t seq_length;
00759 
00761         bool initialized;
00762 
00764         bool block_computation;
00765 
00767         float64_t* block_weights;
00769         EWDKernType type;
00771         int32_t which_degree;
00772 
00774         CTrie<DNATrie>* tries;
00775 
00777         bool tree_initialized;
00778 
00780         CAlphabet* alphabet;
00781 };
00782 
00783 }
00784 
00785 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation