SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HistogramWordStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
17 #include <shogun/io/SGIO.h>
18 
19 using namespace shogun;
20 
22 : CStringKernel<uint16_t>()
23 {
24  init();
25 }
26 
28 : CStringKernel<uint16_t>(size)
29 {
30  init();
31  SG_REF(pie);
32  estimate=pie;
33 
34 }
35 
38 : CStringKernel<uint16_t>()
39 {
40  init();
41  SG_REF(pie);
42  estimate=pie;
43  init(l, r);
44 }
45 
47 {
49 
50  SG_FREE(variance);
51  SG_FREE(mean);
53  SG_FREE(sqrtdiag_rhs);
54  SG_FREE(sqrtdiag_lhs);
56  SG_FREE(ld_mean_rhs);
57  SG_FREE(ld_mean_lhs);
58  if (plo_lhs!=plo_rhs)
59  SG_FREE(plo_rhs);
60  SG_FREE(plo_lhs);
61 }
62 
63 bool CHistogramWordStringKernel::init(CFeatures* p_l, CFeatures* p_r)
64 {
68  ASSERT(l)
69  ASSERT(r)
70 
71  SG_DEBUG("init: lhs: %ld rhs: %ld\n", l, r)
72  int32_t i;
73  initialized=false;
74 
76  SG_FREE(sqrtdiag_rhs);
77  sqrtdiag_rhs=NULL ;
78  SG_FREE(sqrtdiag_lhs);
79  sqrtdiag_lhs=NULL ;
81  SG_FREE(ld_mean_rhs);
82  ld_mean_rhs=NULL ;
83  SG_FREE(ld_mean_lhs);
84  ld_mean_lhs=NULL ;
85  if (plo_lhs!=plo_rhs)
86  SG_FREE(plo_rhs);
87  plo_rhs=NULL ;
88  SG_FREE(plo_lhs);
89  plo_lhs=NULL ;
90 
91  sqrtdiag_lhs= SG_MALLOC(float64_t, l->get_num_vectors());
92  ld_mean_lhs = SG_MALLOC(float64_t, l->get_num_vectors());
93  plo_lhs = SG_MALLOC(float64_t, l->get_num_vectors());
94 
95  for (i=0; i<l->get_num_vectors(); i++)
96  sqrtdiag_lhs[i]=1;
97 
98  if (l==r)
99  {
103  }
104  else
105  {
106  sqrtdiag_rhs=SG_MALLOC(float64_t, r->get_num_vectors());
107  for (i=0; i<r->get_num_vectors(); i++)
108  sqrtdiag_rhs[i]=1;
109 
110  ld_mean_rhs=SG_MALLOC(float64_t, r->get_num_vectors());
111  plo_rhs=SG_MALLOC(float64_t, r->get_num_vectors());
112  }
113 
114  float64_t* l_plo_lhs=plo_lhs;
115  float64_t* l_plo_rhs=plo_rhs;
116  float64_t* l_ld_mean_lhs=ld_mean_lhs;
117  float64_t* l_ld_mean_rhs=ld_mean_rhs;
118 
119  //from our knowledge first normalize variance to 1 and then norm=1 does the job
120  if (!initialized)
121  {
122  int32_t num_vectors=l->get_num_vectors();
123  num_symbols=(int32_t) l->get_num_symbols();
124  int32_t llen=l->get_vector_length(0);
125  int32_t rlen=r->get_vector_length(0);
126  num_params=llen*((int32_t) l->get_num_symbols());
127  num_params2=llen*((int32_t) l->get_num_symbols())+rlen*((int32_t) r->get_num_symbols());
128 
129  if ((!estimate) || (!estimate->check_models()))
130  {
131  SG_ERROR("no estimate available\n")
132  return false ;
133  } ;
134  if (num_params2!=estimate->get_num_params())
135  {
136  SG_ERROR("number of parameters of estimate and feature representation do not match\n")
137  return false ;
138  } ;
139 
140  //add 1 as we have the 'bias' also in this vector
141  num_params2++;
142 
143  SG_FREE(mean);
144  mean=SG_MALLOC(float64_t, num_params2);
145  SG_FREE(variance);
146  variance=SG_MALLOC(float64_t, num_params2);
147 
148  for (i=0; i<num_params2; i++)
149  {
150  mean[i]=0;
151  variance[i]=0;
152  }
153 
154  // compute mean
155  for (i=0; i<num_vectors; i++)
156  {
157  int32_t len;
158  bool free_vec;
159  uint16_t* vec=l->get_feature_vector(i, len, free_vec);
160 
161  mean[0]+=estimate->posterior_log_odds_obsolete(vec, len)/num_vectors;
162 
163  for (int32_t j=0; j<len; j++)
164  {
165  int32_t idx=compute_index(j, vec[j]);
166  mean[idx] += estimate->log_derivative_pos_obsolete(vec[j], j)/num_vectors;
167  mean[idx+num_params] += estimate->log_derivative_neg_obsolete(vec[j], j)/num_vectors;
168  }
169 
170  l->free_feature_vector(vec, i, free_vec);
171  }
172 
173  // compute variance
174  for (i=0; i<num_vectors; i++)
175  {
176  int32_t len;
177  bool free_vec;
178  uint16_t* vec=l->get_feature_vector(i, len, free_vec);
179 
180  variance[0] += CMath::sq(estimate->posterior_log_odds_obsolete(vec, len)-mean[0])/num_vectors;
181 
182  for (int32_t j=0; j<len; j++)
183  {
184  for (int32_t k=0; k<4; k++)
185  {
186  int32_t idx=compute_index(j, k);
187  if (k!=vec[j])
188  {
189  variance[idx]+=mean[idx]*mean[idx]/num_vectors;
190  variance[idx+num_params]+=mean[idx+num_params]*mean[idx+num_params]/num_vectors;
191  }
192  else
193  {
195  -mean[idx])/num_vectors;
197  -mean[idx+num_params])/num_vectors;
198  }
199  }
200  }
201 
202  l->free_feature_vector(vec, i, free_vec);
203  }
204 
205 
206  // compute sum_i m_i^2/s_i^2
207  sum_m2_s2=0 ;
208  for (i=1; i<num_params2; i++)
209  {
210  if (variance[i]<1e-14) // then it is likely to be numerical inaccuracy
211  variance[i]=1 ;
212 
213  //fprintf(stderr, "%i: mean=%1.2e std=%1.2e\n", i, mean[i], std[i]) ;
214  sum_m2_s2 += mean[i]*mean[i]/(variance[i]) ;
215  } ;
216  }
217 
218  // compute sum of
219  //result -= estimate->log_derivative_pos(avec[i], i)*mean[a_idx]/variance[a_idx] ;
220  //result -= estimate->log_derivative_neg(avec[i], i)*mean[a_idx+num_params]/variance[a_idx+num_params] ;
221  for (i=0; i<l->get_num_vectors(); i++)
222  {
223  int32_t alen;
224  bool free_avec;
225  uint16_t* avec = l->get_feature_vector(i, alen, free_avec);
226 
227  float64_t result=0 ;
228  for (int32_t j=0; j<alen; j++)
229  {
230  int32_t a_idx = compute_index(j, avec[j]);
231  result -= estimate->log_derivative_pos_obsolete(avec[j], j)*mean[a_idx]/variance[a_idx] ;
232  result -= estimate->log_derivative_neg_obsolete(avec[j], j)*mean[a_idx+num_params]/variance[a_idx+num_params] ;
233  }
234  ld_mean_lhs[i]=result ;
235 
236  // precompute posterior-log-odds
237  plo_lhs[i] = estimate->posterior_log_odds_obsolete(avec, alen)-mean[0] ;
238  l->free_feature_vector(avec, i, free_avec);
239  } ;
240 
241  if (ld_mean_lhs!=ld_mean_rhs)
242  {
243  // compute sum of
244  //result -= estimate->log_derivative_pos(bvec[i], i)*mean[b_idx]/variance[b_idx] ;
245  //result -= estimate->log_derivative_neg(bvec[i], i)*mean[b_idx+num_params]/variance[b_idx+num_params] ;
246  for (i=0; i < r->get_num_vectors(); i++)
247  {
248  int32_t alen;
249  bool free_avec;
250  uint16_t* avec=r->get_feature_vector(i, alen, free_avec);
251 
252  float64_t result=0 ;
253  for (int32_t j=0; j<alen; j++)
254  {
255  int32_t a_idx = compute_index(j, avec[j]) ;
256  result -= estimate->log_derivative_pos_obsolete(avec[j], j)*mean[a_idx]/variance[a_idx] ;
257  result -= estimate->log_derivative_neg_obsolete(avec[j], j)*mean[a_idx+num_params]/variance[a_idx+num_params] ;
258  }
259  ld_mean_rhs[i]=result ;
260 
261  // precompute posterior-log-odds
262  plo_rhs[i] = estimate->posterior_log_odds_obsolete(avec, alen)-mean[0] ;
263  r->free_feature_vector(avec, i, free_avec);
264  } ;
265  } ;
266 
267  //warning hacky
268  //
269  this->lhs=l;
270  this->rhs=l;
271  plo_lhs = l_plo_lhs ;
272  plo_rhs = l_plo_lhs ;
273  ld_mean_lhs = l_ld_mean_lhs ;
274  ld_mean_rhs = l_ld_mean_lhs ;
275 
276  //compute normalize to 1 values
277  for (i=0; i<l->get_num_vectors(); i++)
278  {
279  sqrtdiag_lhs[i]=sqrt(compute(i,i));
280 
281  //trap divide by zero exception
282  if (sqrtdiag_lhs[i]==0)
283  sqrtdiag_lhs[i]=1e-16;
284  }
285 
286  // if lhs is different from rhs (train/test data)
287  // compute also the normalization for rhs
289  {
290  this->lhs=r;
291  this->rhs=r;
292  plo_lhs = l_plo_rhs ;
293  plo_rhs = l_plo_rhs ;
294  ld_mean_lhs = l_ld_mean_rhs ;
295  ld_mean_rhs = l_ld_mean_rhs ;
296 
297  //compute normalize to 1 values
298  for (i=0; i<r->get_num_vectors(); i++)
299  {
300  sqrtdiag_rhs[i]=sqrt(compute(i,i));
301 
302  //trap divide by zero exception
303  if (sqrtdiag_rhs[i]==0)
304  sqrtdiag_rhs[i]=1e-16;
305  }
306  }
307 
308  this->lhs=l;
309  this->rhs=r;
310  plo_lhs = l_plo_lhs ;
311  plo_rhs = l_plo_rhs ;
312  ld_mean_lhs = l_ld_mean_lhs ;
313  ld_mean_rhs = l_ld_mean_rhs ;
314 
315  initialized = true ;
316  return init_normalizer();
317 }
318 
320 {
321  SG_FREE(variance);
322  variance=NULL;
323 
324  SG_FREE(mean);
325  mean=NULL;
326 
327  if (sqrtdiag_lhs != sqrtdiag_rhs)
328  SG_FREE(sqrtdiag_rhs);
329  sqrtdiag_rhs=NULL;
330 
331  SG_FREE(sqrtdiag_lhs);
332  sqrtdiag_lhs=NULL;
333 
335  SG_FREE(ld_mean_rhs);
336  ld_mean_rhs=NULL;
337 
338  SG_FREE(ld_mean_lhs);
339  ld_mean_lhs=NULL;
340 
341  if (plo_lhs!=plo_rhs)
342  SG_FREE(plo_rhs);
343  plo_rhs=NULL;
344 
345  SG_FREE(plo_lhs);
346  plo_lhs=NULL;
347 
348  num_params2=0;
349  num_params=0;
350  num_symbols=0;
351  sum_m2_s2=0;
352  initialized = false;
353 
355 }
356 
357 float64_t CHistogramWordStringKernel::compute(int32_t idx_a, int32_t idx_b)
358 {
359  int32_t alen, blen;
360  bool free_avec, free_bvec;
361  uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
362  uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
363  // can only deal with strings of same length
364  ASSERT(alen==blen)
365 
366  float64_t result = plo_lhs[idx_a]*plo_rhs[idx_b]/variance[0];
367  result+= sum_m2_s2 ; // does not contain 0-th element
368 
369  for (int32_t i=0; i<alen; i++)
370  {
371  if (avec[i]==bvec[i])
372  {
373  int32_t a_idx = compute_index(i, avec[i]) ;
375  result += dd*dd/variance[a_idx] ;
376  dd = estimate->log_derivative_neg_obsolete(avec[i], i) ;
377  result += dd*dd/variance[a_idx+num_params] ;
378  } ;
379  }
380  result += ld_mean_lhs[idx_a] + ld_mean_rhs[idx_b] ;
381 
382  if (initialized)
383  result /= (sqrtdiag_lhs[idx_a]*sqrtdiag_rhs[idx_b]) ;
384 
385 #ifdef DEBUG_HWSK_COMPUTATION
386  float64_t result2 = compute_slow(idx_a, idx_b) ;
387  if (fabs(result - result2)>1e-10)
388  SG_ERROR("new=%e old = %e diff = %e\n", result, result2, result - result2)
389 #endif
390  ((CStringFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
391  ((CStringFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
392  return result;
393 }
394 
395 void CHistogramWordStringKernel::init()
396 {
397  estimate=NULL;
398  mean=NULL;
399  variance=NULL;
400 
401  sqrtdiag_lhs=NULL;
402  sqrtdiag_rhs=NULL;
403 
404  ld_mean_lhs=NULL;
405  ld_mean_rhs=NULL;
406 
407  plo_lhs=NULL;
408  plo_rhs=NULL;
409  num_params=0;
410  num_params2=0;
411 
412  num_symbols=0;
413  sum_m2_s2=0;
414  initialized=false;
415 
416  SG_ADD(&initialized, "initialized", "If kernel is initalized.",
418  m_parameters->add_vector(&plo_lhs, &num_lhs, "plo_lhs");
419  m_parameters->add_vector(&plo_rhs, &num_rhs, "plo_rhs");
420  m_parameters->add_vector(&ld_mean_lhs, &num_lhs, "ld_mean_lhs");
421  m_parameters->add_vector(&ld_mean_rhs, &num_rhs, "ld_mean_rhs");
422  m_parameters->add_vector(&sqrtdiag_lhs, &num_lhs, "sqrtdiag_lhs");
423  m_parameters->add_vector(&sqrtdiag_rhs, &num_rhs, "sqrtdiag_rhs");
425  m_parameters->add_vector(&variance, &num_params2, "variance");
426 
427  SG_ADD((CSGObject**) &estimate, "estimate", "Plugin Estimate.",
429 }
430 
431 #ifdef DEBUG_HWSK_COMPUTATION
432 float64_t CHistogramWordStringKernel::compute_slow(int32_t idx_a, int32_t idx_b)
433 {
434  int32_t alen, blen;
435  bool free_avec, free_bvec;
436  uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
437  uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
438  // can only deal with strings of same length
439  ASSERT(alen==blen)
440 
441  float64_t result=(estimate->posterior_log_odds_obsolete(avec, alen)-mean[0])*
442  (estimate->posterior_log_odds_obsolete(bvec, blen)-mean[0])/(variance[0]);
443  result+= sum_m2_s2 ; // does not contain 0-th element
444 
445  for (int32_t i=0; i<alen; i++)
446  {
447  int32_t a_idx = compute_index(i, avec[i]) ;
448  int32_t b_idx = compute_index(i, bvec[i]) ;
449 
450  if (avec[i]==bvec[i])
451  {
453  result += dd*dd/variance[a_idx] ;
454  dd = estimate->log_derivative_neg_obsolete(avec[i], i) ;
455  result += dd*dd/variance[a_idx+num_params] ;
456  } ;
457 
458  result -= estimate->log_derivative_pos_obsolete(avec[i], i)*mean[a_idx]/variance[a_idx] ;
459  result -= estimate->log_derivative_pos_obsolete(bvec[i], i)*mean[b_idx]/variance[b_idx] ;
460  result -= estimate->log_derivative_neg_obsolete(avec[i], i)*mean[a_idx+num_params]/variance[a_idx+num_params] ;
461  result -= estimate->log_derivative_neg_obsolete(bvec[i], i)*mean[b_idx+num_params]/variance[b_idx+num_params] ;
462  }
463 
464  if (initialized)
465  result /= (sqrtdiag_lhs[idx_a]*sqrtdiag_rhs[idx_b]) ;
466 
467  ((CStringFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
468  ((CStringFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
469  return result;
470 }
471 
472 #endif

SHOGUN Machine Learning Toolbox - Documentation