SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SubsequenceStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Soumyajit De
8  */
9 
13 
14 using namespace shogun;
15 
17 : CStringKernel<char>(0), m_maxlen(1), m_lambda(1.0)
18 {
21 }
22 
24  float64_t lambda)
25 : CStringKernel<char>(size), m_maxlen(maxlen), m_lambda(lambda)
26 {
29 }
30 
32  CStringFeatures<char>* r, int32_t maxlen, float64_t lambda)
33 : CStringKernel<char>(10), m_maxlen(maxlen), m_lambda(lambda)
34 {
36  init(l, r);
38 }
39 
41 {
42  cleanup();
43 }
44 
45 bool CSubsequenceStringKernel::init(CFeatures* l, CFeatures* r)
46 {
48  return init_normalizer();
49 }
50 
52 {
54 }
55 
56 float64_t CSubsequenceStringKernel::compute(int32_t idx_a, int32_t idx_b)
57 {
58  // sanity check
59  REQUIRE(lhs, "lhs feature vector is not set!\n")
60  REQUIRE(rhs, "rhs feature vector is not set!\n")
61 
62  int32_t alen, blen;
63  bool free_avec, free_bvec;
64 
65  char* avec=dynamic_cast<CStringFeatures<char>*>(lhs)
66  ->get_feature_vector(idx_a, alen, free_avec);
67  char* bvec=dynamic_cast<CStringFeatures<char>*>(rhs)
68  ->get_feature_vector(idx_b, blen, free_bvec);
69 
70  REQUIRE(avec, "Feature vector for lhs is NULL!\n");
71  REQUIRE(bvec, "Feature vector for rhs is NULL!\n");
72 
73  // allocating memory for computing K' (Kp)
74  float64_t ***Kp=SG_MALLOC(float64_t**, m_maxlen+1);
75  for (index_t i=0; i<m_maxlen+1; ++i)
76  {
77  Kp[i]=SG_MALLOC(float64_t*, alen);
78  for (index_t j=0; j<alen; ++j)
79  Kp[i][j]=SG_CALLOC(float64_t, blen);
80  }
81 
82  // initialize for 0 subsequence length for both the strings
83  for (index_t j=0; j<alen; j++)
84  for (index_t k=0; k<blen; ++k)
85  Kp[0][j][k]=1.0;
86 
87  // computing of the K' (Kp) function using equations
88  // shown in Lodhi et. al. See the class documentation for
89  // definitions of Kp and Kpp
90  for (index_t i=0; i<m_maxlen; i++)
91  {
92  for (index_t j=0; j<alen-1; j++)
93  {
94  float64_t Kpp=0.0;
95  for (index_t k=0; k<blen-1; k++)
96  {
97  Kpp=m_lambda*(Kpp+m_lambda*(avec[j]==bvec[k])
98  *Kp[i][j][k]);
99  Kp[i+1][j+1][k+1]=m_lambda*Kp[i+1][j][k+1]+Kpp;
100  }
101  }
102  }
103 
104  // compute the kernel function
105  float64_t K=0.0;
106  for (index_t i=0; i<m_maxlen; i++)
107  {
108  for (index_t j=0; j<alen; j++)
109  {
110  for (index_t k=0; k<blen; k++)
111  {
112  K+=m_lambda*m_lambda*(avec[j]==bvec[k])
113  *Kp[i][j][k];
114  }
115  }
116  }
117 
118  // cleanup
119  dynamic_cast<CStringFeatures<char>*>(lhs)->free_feature_vector(avec, idx_a,
120  free_avec);
121  dynamic_cast<CStringFeatures<char>*>(rhs)->free_feature_vector(bvec, idx_b,
122  free_bvec);
123 
124  for (index_t i=0; i<m_maxlen+1; ++i)
125  {
126  for (index_t j=0; j<alen; ++j)
127  SG_FREE(Kp[i][j]);
128  SG_FREE(Kp[i]);
129  }
130  SG_FREE(Kp);
131 
132  return K;
133 }
134 
136 {
137  SG_ADD(&m_maxlen, "m_maxlen", "maximum length of common subsequences", MS_AVAILABLE);
138  SG_ADD(&m_lambda, "m_lambda", "gap penalty", MS_AVAILABLE);
139 }
virtual void cleanup()
Definition: Kernel.cpp:173
int32_t index_t
Definition: common.h:62
virtual bool init(CFeatures *lhs, CFeatures *rhs)
virtual bool set_normalizer(CKernelNormalizer *normalizer)
Definition: Kernel.cpp:150
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual float64_t compute(int32_t idx_a, int32_t idx_b)
double float64_t
Definition: common.h:50
virtual bool init_normalizer()
Definition: Kernel.cpp:168
CFeatures * rhs
feature vectors to occur on right hand side
Definition: Kernel.h:1061
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on left hand side
Definition: Kernel.h:1059
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_ADD(...)
Definition: SGObject.h:81
friend class CSqrtDiagKernelNormalizer
Definition: Kernel.h:161
Template class StringKernel, is the base class of all String Kernels.
Definition: StringKernel.h:26

SHOGUN Machine Learning Toolbox - Documentation