SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SimpleLocalityImprovedStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 2013 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  * Copyright (C) 2013 Soeren Sonnenburg
11  */
12 
13 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
19 
20 using namespace shogun;
21 
23 : CStringKernel<char>()
24 {
25  init();
26 }
27 
29  int32_t size, int32_t l, int32_t id, int32_t od)
30 : CStringKernel<char>(size)
31 {
32  init();
33 
34  length=l;
35  inner_degree=id;
36  outer_degree=od;
37 }
38 
41  int32_t len, int32_t id, int32_t od)
42 : CStringKernel<char>()
43 {
44  init();
45 
46  length=len;
47  inner_degree=id;
48  outer_degree=od;
49 
50  init(l, r);
51 }
52 
54 {
55  cleanup();
56 }
57 
58 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
59 {
60  bool result = CStringKernel<char>::init(l,r);
61 
62  if (!result)
63  return false;
64  const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
65  const int32_t PYRAL = 2 * length - 1; // total window length
66  const int32_t pyra_len = num_features-PYRAL+1;
67  const int32_t pyra_len2 = (int32_t) pyra_len/2;
68 
70 
71  SG_DEBUG("initializing pyramid weights: size=%ld length=%i\n",
72  num_features, length);
73 
74  float64_t PYRAL_pot;
75  int32_t DEGREE1_1 = (inner_degree & 0x1)==0;
76  int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
77  int32_t DEGREE1_2 = (inner_degree & 0x2)!=0;
78  int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0;
79  int32_t DEGREE1_4 = (inner_degree & 0x4)!=0;
80  {
81  float64_t PYRAL_ = PYRAL;
82  PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
83  if (DEGREE1_1n)
84  {
85  PYRAL_ *= PYRAL_;
86  if (DEGREE1_2)
87  PYRAL_pot *= PYRAL_;
88  if (DEGREE1_3)
89  {
90  PYRAL_ *= PYRAL_;
91  if (DEGREE1_4)
92  PYRAL_pot *= PYRAL_;
93  }
94  }
95  }
96 
97  {
98  int32_t j;
99  for (j = 0; j < pyra_len; j++)
100  pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
101  for (j = 0; j < pyra_len; j++)
102  pyramid_weights[j] /= PYRAL_pot;
103  }
104 
105  return init_normalizer();
106 }
107 
109 {
112 }
113 
114 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
115  const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
116  const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
117 {
118  const int32_t PYRAL = 2*NTWIDTH-1; // total window length
119  float64_t pot;
120  float64_t sum;
121  int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
122  int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
123  int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
124  int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
125  int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
126 
127  ASSERT((DEGREE1 & ~0x7) == 0)
128  ASSERT((DEGREE2 & ~0x7) == 0)
129 
130  register int32_t conv;
131  register int32_t i;
132  register int32_t j;
133 
134  sum = 0.0;
135  conv = 0;
136  for (j = 0; j < PYRAL; j++)
137  conv += (x1[j] == x2[j]) ? 1 : 0;
138 
139  for (i = 0; i < NOF_NTS-PYRAL+1; i++)
140  {
141  register float64_t pot2;
142  if (i>0)
143  conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
144  ((x1[i-1] == x2[i-1]) ? 1 : 0);
145  { /* potencing of conv -- float64_t is faster*/
146  register float64_t conv2 = conv;
147  pot2 = (DEGREE1_1) ? 1.0 : conv2;
148  if (DEGREE1_1n)
149  {
150  conv2 *= conv2;
151  if (DEGREE1_2)
152  pot2 *= conv2;
153  if (DEGREE1_3 && DEGREE1_4)
154  pot2 *= conv2*conv2;
155  }
156  }
157  sum += pot2*pyra[i];
158  }
159 
160  pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
161  if ((DEGREE2 & ~0x1) != 0)
162  {
163  sum *= sum;
164  if ((DEGREE2 & 0x2) != 0)
165  pot *= sum;
166  if ((DEGREE2 & ~0x3) != 0)
167  {
168  sum *= sum;
169  if ((DEGREE2 & 0x4) != 0)
170  pot *= sum;
171  }
172  }
173  return pot;
174 }
175 
177  int32_t idx_a, int32_t idx_b)
178 {
179  int32_t alen, blen;
180  bool free_avec, free_bvec;
181 
182  char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
183  char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
184 
185  // can only deal with strings of same length
186  ASSERT(alen==blen)
187 
188  float64_t dpt;
189 
190  dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
191  dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree);
192 
193  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
194  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
195  return (float64_t) dpt;
196 }
197 
198 void CSimpleLocalityImprovedStringKernel::init()
199 {
201 
202  length = 3;
203  inner_degree = 3;
204  outer_degree = 1;
205 
206  SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
207  SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
208  SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
209  SG_ADD(&pyramid_weights,"pyramid_weights", "Pyramid weights.", MS_AVAILABLE);
210 }

SHOGUN Machine Learning Toolbox - Documentation