SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CommUlongStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
15 #include <shogun/io/SGIO.h>
16 
17 using namespace shogun;
18 
20 : CStringKernel<uint64_t>(size), use_sign(us)
21 {
23  clear_normal();
24 
26 }
27 
30  int32_t size)
31 : CStringKernel<uint64_t>(size), use_sign(us)
32 {
34  clear_normal();
36  init(l,r);
37 }
38 
40 {
41  cleanup();
42 }
43 
45 {
47 
48 #ifdef SVMLIGHT
49  if (lhs)
50  cache_reset();
51 #endif
52 
53  lhs = NULL ;
54  rhs = NULL ;
55 }
56 
58 {
59 #ifdef SVMLIGHT
60  if (rhs)
61  cache_reset();
62 #endif
63 
64  rhs = lhs;
65 }
66 
67 bool CCommUlongStringKernel::init(CFeatures* l, CFeatures* r)
68 {
70  return init_normalizer();
71 }
72 
74 {
76  clear_normal();
78 }
79 
80 float64_t CCommUlongStringKernel::compute(int32_t idx_a, int32_t idx_b)
81 {
82  int32_t alen, blen;
83  bool free_avec, free_bvec;
84  uint64_t* avec=((CStringFeatures<uint64_t>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
85  uint64_t* bvec=((CStringFeatures<uint64_t>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
86 
87  float64_t result=0;
88 
89  int32_t left_idx=0;
90  int32_t right_idx=0;
91 
92  if (use_sign)
93  {
94  while (left_idx < alen && right_idx < blen)
95  {
96  if (avec[left_idx]==bvec[right_idx])
97  {
98  uint64_t sym=avec[left_idx];
99 
100  while (left_idx< alen && avec[left_idx]==sym)
101  left_idx++;
102 
103  while (right_idx< blen && bvec[right_idx]==sym)
104  right_idx++;
105 
106  result++;
107  }
108  else if (avec[left_idx]<bvec[right_idx])
109  left_idx++;
110  else
111  right_idx++;
112  }
113  }
114  else
115  {
116  while (left_idx < alen && right_idx < blen)
117  {
118  if (avec[left_idx]==bvec[right_idx])
119  {
120  int32_t old_left_idx=left_idx;
121  int32_t old_right_idx=right_idx;
122 
123  uint64_t sym=avec[left_idx];
124 
125  while (left_idx< alen && avec[left_idx]==sym)
126  left_idx++;
127 
128  while (right_idx< blen && bvec[right_idx]==sym)
129  right_idx++;
130 
131  result+=((float64_t) (left_idx-old_left_idx)) * ((float64_t) (right_idx-old_right_idx));
132  }
133  else if (avec[left_idx]<bvec[right_idx])
134  left_idx++;
135  else
136  right_idx++;
137  }
138  }
139  ((CStringFeatures<uint64_t>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
140  ((CStringFeatures<uint64_t>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
141 
142  return result;
143 }
144 
146 {
147  int32_t t=0;
148  int32_t j=0;
149  int32_t k=0;
150  int32_t last_j=0;
151  int32_t len=-1;
152  bool free_vec;
153  uint64_t* vec=((CStringFeatures<uint64_t>*) lhs)->get_feature_vector(vec_idx, len, free_vec);
154 
155  if (vec && len>0)
156  {
157  uint64_t* dic= SG_MALLOC(uint64_t, len+dictionary.get_num_elements());
159 
160  if (use_sign)
161  {
162  for (j=1; j<len; j++)
163  {
164  if (vec[j]==vec[j-1])
165  continue;
166 
167  merge_dictionaries(t, j, k, vec, dic, dic_weights, weight, vec_idx);
168  }
169 
170  merge_dictionaries(t, j, k, vec, dic, dic_weights, weight, vec_idx);
171 
172  while (k<dictionary.get_num_elements())
173  {
174  dic[t]=dictionary[k];
175  dic_weights[t]=dictionary_weights[k];
176  t++;
177  k++;
178  }
179  }
180  else
181  {
182  for (j=1; j<len; j++)
183  {
184  if (vec[j]==vec[j-1])
185  continue;
186 
187  merge_dictionaries(t, j, k, vec, dic, dic_weights, weight*(j-last_j), vec_idx);
188  last_j = j;
189  }
190 
191  merge_dictionaries(t, j, k, vec, dic, dic_weights, weight*(j-last_j), vec_idx);
192 
193  while (k<dictionary.get_num_elements())
194  {
195  dic[t]=dictionary[k];
196  dic_weights[t]=dictionary_weights[k];
197  t++;
198  k++;
199  }
200  }
201 
204  }
205  ((CStringFeatures<uint64_t>*) lhs)->free_feature_vector(vec, vec_idx, free_vec);
206 
207  set_is_initialized(true);
208 }
209 
211 {
214  set_is_initialized(false);
215 }
216 
218  int32_t count, int32_t *IDX, float64_t * weights)
219 {
220  clear_normal();
221 
222  if (count<=0)
223  {
224  set_is_initialized(true);
225  SG_DEBUG( "empty set of SVs\n");
226  return true;
227  }
228 
229  SG_DEBUG( "initializing CCommUlongStringKernel optimization\n");
230 
231  for (int32_t i=0; i<count; i++)
232  {
233  if ( (i % (count/10+1)) == 0)
234  SG_PROGRESS(i, 0, count);
235 
236  add_to_normal(IDX[i], weights[i]);
237  }
238 
239  SG_PRINT( "Done. \n");
240 
241  set_is_initialized(true);
242  return true;
243 }
244 
246 {
247  SG_DEBUG( "deleting CCommUlongStringKernel optimization\n");
248  clear_normal();
249  return true;
250 }
251 
252 // binary search for each feature. trick: as features are sorted save last found idx in old_idx and
253 // only search in the remainder of the dictionary
255 {
256  float64_t result = 0;
257  int32_t j, last_j=0;
258  int32_t old_idx = 0;
259 
260  if (!get_is_initialized())
261  {
262  SG_ERROR( "CCommUlongStringKernel optimization not initialized\n");
263  return 0 ;
264  }
265 
266 
267 
268  int32_t alen = -1;
269  bool free_avec;
270  uint64_t* avec=((CStringFeatures<uint64_t>*) rhs)->
271  get_feature_vector(i, alen, free_avec);
272 
273  if (avec && alen>0)
274  {
275  if (use_sign)
276  {
277  for (j=1; j<alen; j++)
278  {
279  if (avec[j]==avec[j-1])
280  continue;
281 
282  int32_t idx = CMath::binary_search_max_lower_equal(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[j-1]);
283 
284  if (idx!=-1)
285  {
286  if (dictionary[idx+old_idx] == avec[j-1])
287  result += dictionary_weights[idx+old_idx];
288 
289  old_idx+=idx;
290  }
291  }
292 
293  int32_t idx = CMath::binary_search(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[alen-1]);
294  if (idx!=-1)
295  result += dictionary_weights[idx+old_idx];
296  }
297  else
298  {
299  for (j=1; j<alen; j++)
300  {
301  if (avec[j]==avec[j-1])
302  continue;
303 
304  int32_t idx = CMath::binary_search_max_lower_equal(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[j-1]);
305 
306  if (idx!=-1)
307  {
308  if (dictionary[idx+old_idx] == avec[j-1])
309  result += dictionary_weights[idx+old_idx]*(j-last_j);
310 
311  old_idx+=idx;
312  }
313 
314  last_j = j;
315  }
316 
317  int32_t idx = CMath::binary_search(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[alen-1]);
318  if (idx!=-1)
319  result += dictionary_weights[idx+old_idx]*(alen-last_j);
320  }
321  }
322 
323  ((CStringFeatures<uint64_t>*) rhs)->free_feature_vector(avec, i, free_avec);
324 
325  return normalizer->normalize_rhs(result, i);
326 }

SHOGUN Machine Learning Toolbox - Documentation