DistantSegmentsKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Heiko Strathmann
00008  * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3
00009  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/kernel/string/DistantSegmentsKernel.h>
00013 #include <string>
00014 
00015 using namespace shogun;
00016 
00017 CDistantSegmentsKernel::CDistantSegmentsKernel() : CStringKernel<char>(),
00018     m_delta(0), m_theta(0)
00019 {
00020     init();
00021 }
00022 
00023 CDistantSegmentsKernel::CDistantSegmentsKernel(int32_t size, int32_t delta,
00024         int32_t theta) : CStringKernel<char>(size), m_delta(delta),
00025         m_theta(theta)
00026 {
00027     init();
00028 }
00029 
00030 CDistantSegmentsKernel::CDistantSegmentsKernel(CStringFeatures<char>* l,
00031         CStringFeatures<char>* r, int32_t size, int32_t delta, int32_t theta) :
00032     CStringKernel<char>(size), m_delta(delta), m_theta(theta)
00033 {
00034     init();
00035     CStringKernel<char>::init(l, r);
00036 }
00037 
00038 bool CDistantSegmentsKernel::init(CFeatures* l, CFeatures* r)
00039 {
00040     CKernel::init(l, r);
00041     return init_normalizer();
00042 }
00043 
00044 void CDistantSegmentsKernel::init()
00045 {
00046     SG_ADD(&m_delta, "delta", "Delta parameter of the DS-Kernel", MS_AVAILABLE);
00047     SG_ADD(&m_theta, "theta", "Theta parameter of the DS-Kernel", MS_AVAILABLE);
00048 }
00049 
00050 float64_t CDistantSegmentsKernel::compute(int32_t idx_a, int32_t idx_b)
00051 {
00052     bool free_a, free_b;
00053     int32_t aLength=0, bLength=0;
00054     char* a=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, aLength,
00055             free_a);
00056     char* b=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, bLength,
00057             free_b);
00058     ASSERT(a && b);
00059 
00060     if ((aLength<1)||(bLength<1))
00061         SG_ERROR("Empty sequences");
00062 
00063     float64_t result=compute(a, aLength, b, bLength, m_delta, m_theta);
00064 
00065     ((CStringFeatures<char>*) lhs)->free_feature_vector(a, idx_a, free_a);
00066     ((CStringFeatures<char>*) rhs)->free_feature_vector(b, idx_b, free_b);
00067 
00068     return result;
00069 }
00070 
00071 int32_t CDistantSegmentsKernel::bin(int32_t j, int32_t i)
00072 {
00073     if (i>j)
00074         return 0;
00075     if (i==3 && j>=3)
00076     {
00077         return j*(j-1)*(j-2)/6;
00078     }
00079     else if (i==2 && j>=2)
00080     {
00081         return j*(j-1)/2;
00082     }
00083     return 0;
00084 }
00085 
00086 int32_t CDistantSegmentsKernel::compute(char* s, int32_t sLength, char* t,
00087         int32_t tLength, int32_t delta_m, int32_t theta_m)
00088 {
00089     int32_t c=0;
00090     int32_t* i_=SG_MALLOC(int32_t, delta_m+1);
00091     int32_t* l_=SG_MALLOC(int32_t, delta_m+1);
00092     for (int32_t j_s=0; j_s<=(int32_t) sLength-1; j_s++)
00093     {
00094         for (int32_t j_t=0; j_t<=(int32_t) tLength-1; j_t++)
00095         {
00096             if (s[j_s-1+1]==t[j_t-1+1])
00097             {
00098                 int32_t n=CMath::min(CMath::min(sLength-j_s, tLength-j_t), delta_m);
00099                 int32_t k=-1;
00100                 int32_t i=1;
00101                 while (i<=n)
00102                 {
00103                     k++;
00104                     i_[2*k]=i;
00105                     i++;
00106                     while (i<=n&&s[j_s-1+i]==t[j_t-1+i])
00107                         i++;
00108                     i_[2*k+1]=i;
00109                     l_[k]=i_[2*k+1]-i_[2*k]+1;
00110                     i++;
00111                     while (i<=n&&s[j_s-1+i]!=t[j_t-1+i])
00112                         i++;
00113                 }
00114                 c+=bin(l_[0], 3)-2*bin(l_[0]-theta_m, 3)
00115                         +bin(l_[0]-2*theta_m, 3);
00116                 int32_t c1=0;
00117                 for (int32_t r=1; r<=k; r++)
00118                 {
00119                     c1+=bin(l_[r], 2)-bin(l_[r]-theta_m, 2);
00120                 }
00121                 c+=CMath::min(theta_m, i_[1]-i_[0])*c1;
00122             }
00123         }
00124     }
00125     SG_FREE(l_);
00126     SG_FREE(i_);
00127     return c;
00128 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation