SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LocalityImprovedStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 1999-2008 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
13 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
21 : CStringKernel<char>()
22 {
23  init();
24 }
25 
27  int32_t size, int32_t l, int32_t id, int32_t od)
28 : CStringKernel<char>(size)
29 {
30  init();
31 
32  length=l;
33  inner_degree=id;
34  outer_degree=od;
35 
36  SG_DEBUG("LIK with parms: l=%d, id=%d, od=%d created!\n", l, id, od)
37 }
38 
40  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t len,
41  int32_t id, int32_t od)
42 : CStringKernel<char>()
43 {
44  init();
45 
46  length=len;
47  inner_degree=id;
48  outer_degree=od;
49 
50  SG_DEBUG("LIK with parms: l=%d, id=%d, od=%d created!\n", len, id, od)
51 
52  init(l, r);
53 }
54 
56 {
57  cleanup();
58 }
59 
60 bool CLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
61 {
63  return init_normalizer();
64 }
65 
67 {
68  int32_t alen, blen;
69  bool free_avec, free_bvec;
70 
71  char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
72  char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
73  // can only deal with strings of same length
74  ASSERT(alen==blen && alen>0)
75 
76  int32_t i,t;
77  float64_t* match=SG_MALLOC(float64_t, alen);
78 
79  // initialize match table 1 -> match; 0 -> no match
80  for (i = 0; i<alen; i++)
81  match[i] = (avec[i] == bvec[i])? 1 : 0;
82 
83  float64_t outer_sum = 0;
84 
85  for (t = 0; t<alen-length; t++)
86  {
87  float64_t sum = 0;
88  for (i = 0; i<length && t+i+length+1<alen; i++)
89  sum += (i+1)*match[t+i]+(length-i)*match[t+i+length+1];
90  //add middle element + normalize with sum_i=0^2l+1 i = (2l+1)(l+1)
91  float64_t inner_sum = (sum + (length+1)*match[t+length]) / ((2*length+1)*(length+1));
92  inner_sum = pow(inner_sum, inner_degree + 1);
93  outer_sum += inner_sum;
94  }
95  SG_FREE(match);
96 
97  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
98  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
99  return pow(outer_sum, outer_degree + 1);
100 }
101 
102 void CLocalityImprovedStringKernel::init()
103 {
105 
106  length = 0;
107  inner_degree = 0;
108  outer_degree = 0;
109 
110  SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
111  SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
112  SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
113 }

SHOGUN Machine Learning Toolbox - Documentation