SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LocalityImprovedStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 1999-2008 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
13 #include <shogun/io/SGIO.h>
16 
17 using namespace shogun;
18 
20 : CStringKernel<char>(0)
21 {
22  init();
23 }
24 
26  int32_t size, int32_t l, int32_t id, int32_t od)
27 : CStringKernel<char>(size)
28 {
29  init();
30 
31  length=l;
32  inner_degree=id;
33  outer_degree=od;
34 
35  SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", l, id, od);
36 }
37 
39  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t len,
40  int32_t id, int32_t od)
41 : CStringKernel<char>(10)
42 {
43  init();
44 
45  length=len;
46  inner_degree=id;
47  outer_degree=od;
48 
49  SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", len, id, od);
50 
51  init(l, r);
52 }
53 
55 {
56  cleanup();
57 }
58 
59 bool CLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
60 {
62  return init_normalizer();
63 }
64 
66 {
67  int32_t alen, blen;
68  bool free_avec, free_bvec;
69 
70  char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
71  char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
72  // can only deal with strings of same length
73  ASSERT(alen==blen && alen>0);
74 
75  int32_t i,t;
76  float64_t* match=SG_MALLOC(float64_t, alen);
77 
78  // initialize match table 1 -> match; 0 -> no match
79  for (i = 0; i<alen; i++)
80  match[i] = (avec[i] == bvec[i])? 1 : 0;
81 
82  float64_t outer_sum = 0;
83 
84  for (t = 0; t<alen-length; t++)
85  {
86  float64_t sum = 0;
87  for (i = 0; i<length && t+i+length+1<alen; i++)
88  sum += (i+1)*match[t+i]+(length-i)*match[t+i+length+1];
89  //add middle element + normalize with sum_i=0^2l+1 i = (2l+1)(l+1)
90  float64_t inner_sum = (sum + (length+1)*match[t+length]) / ((2*length+1)*(length+1));
91  inner_sum = pow(inner_sum, inner_degree + 1);
92  outer_sum += inner_sum;
93  }
94  SG_FREE(match);
95 
96  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
97  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
98  return pow(outer_sum, outer_degree + 1);
99 }
100 
101 void CLocalityImprovedStringKernel::init()
102 {
103  length = 0;
104  inner_degree = 0;
105  outer_degree = 0;
106 
107  SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
108  SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
109  SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
110 }

SHOGUN Machine Learning Toolbox - Documentation