DistantSegmentsKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Heiko Strathmann
00008  * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3
00009  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/kernel/DistantSegmentsKernel.h>
00013 #include <string>
00014 
00015 using namespace shogun;
00016 
00017 CDistantSegmentsKernel::CDistantSegmentsKernel() : CStringKernel<char>(),
00018     m_delta(0), m_theta(0)
00019 {
00020     init();
00021 }
00022 
00023 CDistantSegmentsKernel::CDistantSegmentsKernel(int32_t size, int32_t delta,
00024         int32_t theta) : CStringKernel<char>(), m_delta(delta), m_theta(theta)
00025 {
00026     init();
00027 }
00028 
00029 CDistantSegmentsKernel::CDistantSegmentsKernel(CStringFeatures<char>* l,
00030         CStringFeatures<char>* r, int32_t size, int32_t delta, int32_t theta) :
00031     CStringKernel<char>(), m_delta(delta), m_theta(theta)
00032 {
00033     init();
00034     CStringKernel<char>::init(l, r);
00035 }
00036 
00037 bool CDistantSegmentsKernel::init(CFeatures* l, CFeatures* r)
00038 {
00039     CKernel::init(l, r);
00040     return init_normalizer();
00041 }
00042 
00043 void CDistantSegmentsKernel::init()
00044 {
00045     SG_ADD(&m_delta, "delta", "Delta parameter of the DS-Kernel", MS_AVAILABLE);
00046     SG_ADD(&m_theta, "theta", "Theta parameter of the DS-Kernel", MS_AVAILABLE);
00047 }
00048 
00049 float64_t CDistantSegmentsKernel::compute(int32_t idx_a, int32_t idx_b)
00050 {
00051     bool free_a, free_b;
00052     int32_t aLength=0, bLength=0;
00053     char* a=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, aLength,
00054             free_a);
00055     char* b=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, bLength,
00056             free_b);
00057     ASSERT(a && b);
00058 
00059     if ((aLength<1)||(bLength<1))
00060         SG_ERROR("Empty sequences");
00061 
00062     float64_t result=compute(a, aLength, b, bLength, m_delta, m_theta);
00063 
00064     ((CStringFeatures<char>*) lhs)->free_feature_vector(a, idx_a, free_a);
00065     ((CStringFeatures<char>*) rhs)->free_feature_vector(b, idx_b, free_b);
00066 
00067     return result;
00068 }
00069 
00070 int32_t CDistantSegmentsKernel::bin(int32_t j, int32_t i)
00071 {
00072     if (i>j)
00073         return 0;
00074     if (i==3 && j>=3)
00075     {
00076         return j*(j-1)*(j-2)/6;
00077     }
00078     else if (i==2 && j>=2)
00079     {
00080         return j*(j-1)/2;
00081     }
00082     return 0;
00083 }
00084 
00085 int32_t CDistantSegmentsKernel::compute(char* s, int32_t sLength, char* t,
00086         int32_t tLength, int32_t delta_m, int32_t theta_m)
00087 {
00088     int32_t c=0;
00089     int32_t* i_=SG_MALLOC(int32_t, delta_m+1);
00090     int32_t* l_=SG_MALLOC(int32_t, delta_m+1);
00091     for (int32_t j_s=0; j_s<=(int32_t) sLength-1; j_s++)
00092     {
00093         for (int32_t j_t=0; j_t<=(int32_t) tLength-1; j_t++)
00094         {
00095             if (s[j_s-1+1]==t[j_t-1+1])
00096             {
00097                 int32_t n=CMath::min(CMath::min(sLength-j_s, tLength-j_t), delta_m);
00098                 int32_t k=-1;
00099                 int32_t i=1;
00100                 while (i<=n)
00101                 {
00102                     k++;
00103                     i_[2*k]=i;
00104                     i++;
00105                     while (i<=n&&s[j_s-1+i]==t[j_t-1+i])
00106                         i++;
00107                     i_[2*k+1]=i;
00108                     l_[k]=i_[2*k+1]-i_[2*k]+1;
00109                     i++;
00110                     while (i<=n&&s[j_s-1+i]!=t[j_t-1+i])
00111                         i++;
00112                 }
00113                 c+=bin(l_[0], 3)-2*bin(l_[0]-theta_m, 3)
00114                         +bin(l_[0]-2*theta_m, 3);
00115                 int32_t c1=0;
00116                 for (int32_t r=1; r<=k; r++)
00117                 {
00118                     c1+=bin(l_[r], 2)-bin(l_[r]-theta_m, 2);
00119                 }
00120                 c+=CMath::min(theta_m, i_[1]-i_[0])*c1;
00121             }
00122         }
00123     }
00124     delete l_;
00125     delete i_;
00126     return c;
00127 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation