SimpleLocalityImprovedStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/lib/common.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/kernel/string/SimpleLocalityImprovedStringKernel.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/features/StringFeatures.h>
00016 
00017 using namespace shogun;
00018 
00019 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel()
00020 : CStringKernel<char>()
00021 {
00022     SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
00023     init();
00024 }
00025 
00026 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00027     int32_t size, int32_t l, int32_t id, int32_t od)
00028 : CStringKernel<char>(size)
00029 {
00030     SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
00031     init();
00032 
00033     length=l;
00034     inner_degree=id;
00035     outer_degree=od;
00036 }
00037 
00038 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00039     CStringFeatures<char>* l, CStringFeatures<char>* r,
00040     int32_t len, int32_t id, int32_t od)
00041 : CStringKernel<char>()
00042 {
00043     SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
00044     init();
00045 
00046     length=len;
00047     inner_degree=id;
00048     outer_degree=od;
00049 
00050     init(l, r);
00051 }
00052 
00053 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00054 {
00055     cleanup();
00056 }
00057 
00058 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00059 {
00060     bool result = CStringKernel<char>::init(l,r);
00061 
00062     if (!result)
00063         return false;
00064     const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00065     const int32_t PYRAL = 2 * length - 1; // total window length
00066     const int32_t pyra_len  = num_features-PYRAL+1;
00067     const int32_t pyra_len2 = (int32_t) pyra_len/2;
00068 
00069     SG_FREE(pyramid_weights);
00070 
00071     pyramid_weights = SG_MALLOC(float64_t, pyra_len);
00072     num_pyramid_weights=pyra_len;
00073 
00074     SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00075         num_features, length);
00076 
00077     float64_t PYRAL_pot;
00078     int32_t DEGREE1_1  = (inner_degree & 0x1)==0;
00079     int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00080     int32_t DEGREE1_2  = (inner_degree & 0x2)!=0;
00081     int32_t DEGREE1_3  = (inner_degree & ~0x3)!=0;
00082     int32_t DEGREE1_4  = (inner_degree & 0x4)!=0;
00083     {
00084     float64_t PYRAL_ = PYRAL;
00085     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00086     if (DEGREE1_1n)
00087     {
00088         PYRAL_ *= PYRAL_;
00089         if (DEGREE1_2)
00090             PYRAL_pot *= PYRAL_;
00091         if (DEGREE1_3)
00092         {
00093             PYRAL_ *= PYRAL_;
00094             if (DEGREE1_4)
00095                 PYRAL_pot *= PYRAL_;
00096         }
00097     }
00098     }
00099 
00100     {
00101     int32_t j;
00102     for (j = 0; j < pyra_len; j++)
00103         pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00104     for (j = 0; j < pyra_len; j++)
00105         pyramid_weights[j] /= PYRAL_pot;
00106     }
00107 
00108     return init_normalizer();
00109 }
00110 
00111 void CSimpleLocalityImprovedStringKernel::cleanup()
00112 {
00113     SG_FREE(pyramid_weights);
00114     pyramid_weights = NULL;
00115     num_pyramid_weights = 0;
00116 
00117     CKernel::cleanup();
00118 }
00119 
00120 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00121          const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00122          const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00123 {
00124     const int32_t PYRAL = 2*NTWIDTH-1; // total window length
00125     int32_t pyra_len, pyra_len2;
00126     float64_t pot, PYRAL_pot;
00127     float64_t sum;
00128     int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00129     int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00130     int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00131     int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00132     int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00133     {
00134     float64_t PYRAL_ = PYRAL;
00135     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00136     if (DEGREE1_1n)
00137     {
00138         PYRAL_ *= PYRAL_;
00139         if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00140         if (DEGREE1_3)
00141         {
00142             PYRAL_ *= PYRAL_;
00143             if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00144         }
00145     }
00146     }
00147 
00148     ASSERT((DEGREE1 & ~0x7) == 0);
00149     ASSERT((DEGREE2 & ~0x7) == 0);
00150 
00151     pyra_len = NOF_NTS-PYRAL+1;
00152     pyra_len2 = (int32_t) pyra_len/2;
00153     {
00154     int32_t j;
00155     for (j = 0; j < pyra_len; j++)
00156         pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
00157     for (j = 0; j < pyra_len; j++)
00158         pyra[j] /= PYRAL_pot;
00159     }
00160 
00161     register int32_t conv;
00162     register int32_t i;
00163     register int32_t j;
00164 
00165     sum = 0.0;
00166     conv = 0;
00167     for (j = 0; j < PYRAL; j++)
00168         conv += (x1[j] == x2[j]) ? 1 : 0;
00169 
00170     for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00171     {
00172         register float64_t pot2;
00173         if (i>0)
00174             conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
00175                 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00176         { /* potencing of conv -- float64_t is faster*/
00177         register float64_t conv2 = conv;
00178         pot2 = (DEGREE1_1) ? 1.0 : conv2;
00179             if (DEGREE1_1n)
00180             {
00181                 conv2 *= conv2;
00182                 if (DEGREE1_2)
00183                     pot2 *= conv2;
00184                 if (DEGREE1_3 && DEGREE1_4)
00185                     pot2 *= conv2*conv2;
00186             }
00187         }
00188         sum += pot2*pyra[i];
00189     }
00190 
00191     pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00192     if ((DEGREE2 & ~0x1) != 0)
00193     {
00194         sum *= sum;
00195         if ((DEGREE2 & 0x2) != 0)
00196             pot *= sum;
00197         if ((DEGREE2 & ~0x3) != 0)
00198         {
00199             sum *= sum;
00200             if ((DEGREE2 & 0x4) != 0)
00201                 pot *= sum;
00202         }
00203     }
00204     return pot;
00205 }
00206 
00207 float64_t CSimpleLocalityImprovedStringKernel::compute(
00208     int32_t idx_a, int32_t idx_b)
00209 {
00210     int32_t alen, blen;
00211     bool free_avec, free_bvec;
00212 
00213     char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00214     char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00215 
00216     // can only deal with strings of same length
00217     ASSERT(alen==blen);
00218 
00219     float64_t dpt;
00220 
00221     dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00222     dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree);
00223 
00224     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00225     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00226     return (float64_t) dpt;
00227 }
00228 
00229 void CSimpleLocalityImprovedStringKernel::init()
00230 {
00231     length = 3;
00232     inner_degree = 3;
00233     outer_degree = 1;
00234     pyramid_weights=NULL;
00235     num_pyramid_weights=0;
00236 
00237     SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
00238     SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
00239     SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
00240 
00241     m_parameters->add_vector(&pyramid_weights, &num_pyramid_weights,
00242             "pyramid_weights", "Pyramid weights.");
00243 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation