SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SimpleLocalityImprovedStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 #include <shogun/io/SGIO.h>
16 
17 using namespace shogun;
18 
20 : CStringKernel<char>()
21 {
22  SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
23  init();
24 }
25 
27  int32_t size, int32_t l, int32_t id, int32_t od)
28 : CStringKernel<char>(size)
29 {
30  SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
31  init();
32 
33  length=l;
34  inner_degree=id;
35  outer_degree=od;
36 }
37 
40  int32_t len, int32_t id, int32_t od)
41 : CStringKernel<char>()
42 {
43  SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
44  init();
45 
46  length=len;
47  inner_degree=id;
48  outer_degree=od;
49 
50  init(l, r);
51 }
52 
54 {
55  cleanup();
56 }
57 
58 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
59 {
60  bool result = CStringKernel<char>::init(l,r);
61 
62  if (!result)
63  return false;
64  const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
65  const int32_t PYRAL = 2 * length - 1; // total window length
66  const int32_t pyra_len = num_features-PYRAL+1;
67  const int32_t pyra_len2 = (int32_t) pyra_len/2;
68 
70 
71  pyramid_weights = SG_MALLOC(float64_t, pyra_len);
72  num_pyramid_weights=pyra_len;
73 
74  SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
75  num_features, length);
76 
77  float64_t PYRAL_pot;
78  int32_t DEGREE1_1 = (inner_degree & 0x1)==0;
79  int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
80  int32_t DEGREE1_2 = (inner_degree & 0x2)!=0;
81  int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0;
82  int32_t DEGREE1_4 = (inner_degree & 0x4)!=0;
83  {
84  float64_t PYRAL_ = PYRAL;
85  PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
86  if (DEGREE1_1n)
87  {
88  PYRAL_ *= PYRAL_;
89  if (DEGREE1_2)
90  PYRAL_pot *= PYRAL_;
91  if (DEGREE1_3)
92  {
93  PYRAL_ *= PYRAL_;
94  if (DEGREE1_4)
95  PYRAL_pot *= PYRAL_;
96  }
97  }
98  }
99 
100  {
101  int32_t j;
102  for (j = 0; j < pyra_len; j++)
103  pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
104  for (j = 0; j < pyra_len; j++)
105  pyramid_weights[j] /= PYRAL_pot;
106  }
107 
108  return init_normalizer();
109 }
110 
112 {
114  pyramid_weights = NULL;
116 
118 }
119 
120 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
121  const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
122  const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
123 {
124  const int32_t PYRAL = 2*NTWIDTH-1; // total window length
125  int32_t pyra_len, pyra_len2;
126  float64_t pot, PYRAL_pot;
127  float64_t sum;
128  int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
129  int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
130  int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
131  int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
132  int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
133  {
134  float64_t PYRAL_ = PYRAL;
135  PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
136  if (DEGREE1_1n)
137  {
138  PYRAL_ *= PYRAL_;
139  if (DEGREE1_2) PYRAL_pot *= PYRAL_;
140  if (DEGREE1_3)
141  {
142  PYRAL_ *= PYRAL_;
143  if (DEGREE1_4) PYRAL_pot *= PYRAL_;
144  }
145  }
146  }
147 
148  ASSERT((DEGREE1 & ~0x7) == 0);
149  ASSERT((DEGREE2 & ~0x7) == 0);
150 
151  pyra_len = NOF_NTS-PYRAL+1;
152  pyra_len2 = (int32_t) pyra_len/2;
153  {
154  int32_t j;
155  for (j = 0; j < pyra_len; j++)
156  pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
157  for (j = 0; j < pyra_len; j++)
158  pyra[j] /= PYRAL_pot;
159  }
160 
161  register int32_t conv;
162  register int32_t i;
163  register int32_t j;
164 
165  sum = 0.0;
166  conv = 0;
167  for (j = 0; j < PYRAL; j++)
168  conv += (x1[j] == x2[j]) ? 1 : 0;
169 
170  for (i = 0; i < NOF_NTS-PYRAL+1; i++)
171  {
172  register float64_t pot2;
173  if (i>0)
174  conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
175  ((x1[i-1] == x2[i-1]) ? 1 : 0);
176  { /* potencing of conv -- float64_t is faster*/
177  register float64_t conv2 = conv;
178  pot2 = (DEGREE1_1) ? 1.0 : conv2;
179  if (DEGREE1_1n)
180  {
181  conv2 *= conv2;
182  if (DEGREE1_2)
183  pot2 *= conv2;
184  if (DEGREE1_3 && DEGREE1_4)
185  pot2 *= conv2*conv2;
186  }
187  }
188  sum += pot2*pyra[i];
189  }
190 
191  pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
192  if ((DEGREE2 & ~0x1) != 0)
193  {
194  sum *= sum;
195  if ((DEGREE2 & 0x2) != 0)
196  pot *= sum;
197  if ((DEGREE2 & ~0x3) != 0)
198  {
199  sum *= sum;
200  if ((DEGREE2 & 0x4) != 0)
201  pot *= sum;
202  }
203  }
204  return pot;
205 }
206 
208  int32_t idx_a, int32_t idx_b)
209 {
210  int32_t alen, blen;
211  bool free_avec, free_bvec;
212 
213  char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
214  char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
215 
216  // can only deal with strings of same length
217  ASSERT(alen==blen);
218 
219  float64_t dpt;
220 
221  dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
222  dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree);
223 
224  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
225  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
226  return (float64_t) dpt;
227 }
228 
229 void CSimpleLocalityImprovedStringKernel::init()
230 {
231  length = 3;
232  inner_degree = 3;
233  outer_degree = 1;
234  pyramid_weights=NULL;
236 
237  SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
238  SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
239  SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
240 
242  "pyramid_weights", "Pyramid weights.");
243 }

SHOGUN Machine Learning Toolbox - Documentation