SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HSIC.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012-2013 Heiko Strathmann
8  */
9 
10 #include <shogun/statistics/HSIC.h>
13 #include <shogun/kernel/Kernel.h>
15 
16 using namespace shogun;
17 
20 {
21  init();
22 }
23 
24 CHSIC::CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p_and_q,
25  index_t m) :
26  CKernelIndependenceTestStatistic(kernel_p, kernel_q, m_p_and_q, m)
27 {
28  if (p_and_q && p_and_q->get_num_vectors()/2!=m)
29  {
30  SG_ERROR("%s: Only features with equal number of vectors are currently "
31  "possible\n", get_name());
32  }
33 
34  init();
35 }
36 
37 CHSIC::CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p,
38  CFeatures* q) :
39  CKernelIndependenceTestStatistic(kernel_p, kernel_q, p, q)
40 {
41  if (p && q && p->get_num_vectors()!=q->get_num_vectors())
42  {
43  SG_ERROR("%s: Only features with equal number of vectors are currently "
44  "possible\n", get_name());
45  }
46 
47  init();
48 }
49 
51 {
52 
53 }
54 
55 void CHSIC::init()
56 {
57 
58 }
59 
61 {
62  REQUIRE(m_kernel_p && m_kernel_q, "%s::fit_null_gamma(): No or only one "
63  "kernel specified!\n", get_name());
64 
65  REQUIRE(m_p_and_q, "%s::compute_statistic: features needed!\n", get_name())
66 
67  /* compute kernel matrices */
70 
71  /* center matrices (MATLAB: Kc=H*K*H) */
72  K.center();
73 
74  /* compute MATLAB: sum(sum(Kc' .* (L))), which is biased HSIC */
75  index_t m=m_m;
76  float64_t result=0;
77  for (index_t i=0; i<m; ++i)
78  {
79  for (index_t j=0; j<m; ++j)
80  result+=K(j, i)*L(i, j);
81  }
82 
83  /* return m times statistic */
84  result/=m;
85 
86  return result;
87 }
88 
90 {
91  float64_t result=0;
93  {
94  case HSIC_GAMMA:
95  {
96  /* fit gamma and return cdf at statistic */
98  result=CStatistics::gamma_cdf(statistic, params[0], params[1]);
99  break;
100  }
101 
102  default:
103  /* bootstrapping is handled there */
105  break;
106  }
107 
108  return result;
109 }
110 
112 {
113  float64_t result=0;
115  {
116  case HSIC_GAMMA:
117  {
118  /* fit gamma and return inverse cdf at statistic */
120  result=CStatistics::inverse_gamma_cdf(alpha, params[0], params[1]);
121  break;
122  }
123 
124  default:
125  /* bootstrapping is handled there */
127  break;
128  }
129 
130  return result;
131 }
132 
134 {
135  REQUIRE(m_kernel_p && m_kernel_q, "%s::fit_null_gamma(): No or only one "
136  "kernel specified!\n", get_name());
137 
138  REQUIRE(m_p_and_q, "%s::fit_null_gamma: features needed!\n", get_name())
139 
140  index_t m=m_m;
141 
142  /* compute kernel matrices */
145 
146  /* compute sum and trace of uncentered kernel matrices, needed later */
147  float64_t trace_K=0;
148  float64_t trace_L=0;
149  float64_t sum_K=0;
150  float64_t sum_L=0;
151  for (index_t i=0; i<m; ++i)
152  {
153  trace_K+=K(i,i);
154  trace_L+=L(i,i);
155  for (index_t j=0; j<m; ++j)
156  {
157  sum_K+=K(i,j);
158  sum_L+=L(i,j);
159  }
160  }
161  SG_DEBUG("sum_K: %f, sum_L: %f, trace_K: %f, trace_L: %f\n", sum_K, sum_L,
162  trace_K, trace_L);
163 
164  /* center both matrices: K=H*K*H, L=H*L*H in MATLAB */
165  K.center();
166  L.center();
167 
168  /* compute the trace of MATLAB: (1/6 * Kc.*Lc).^2 Ü */
169  float64_t trace=0;
170  for (index_t i=0; i<m; ++i)
171  trace+=CMath::pow(K(i,i)*L(i,i), 2);
172 
173  trace/=36.0;
174  SG_DEBUG("trace %f\n", trace)
175 
176  /* compute sum of elements of MATLAB: (1/6 * Kc.*Lc).^2 */
177  float64_t sum=0;
178  for (index_t i=0; i<m; ++i)
179  {
180  for (index_t j=0; j<m; ++j)
181  sum+=CMath::pow(K(i,j)*L(i,j), 2);
182  }
183  sum/=36.0;
184  SG_DEBUG("sum %f\n", sum)
185 
186  /* compute MATLAB: 1/m/(m-1)*(sum(sum(varHSIC)) - sum(diag(varHSIC))),
187  * second term is bias correction */
188  float64_t var_hsic=1.0/m/(m-1)*(sum-trace);
189  SG_DEBUG("1.0/m/(m-1)*(sum-trace): %f\n", var_hsic)
190 
191  /* finally, compute variance of hsic under H0
192  * MATLAB: varHSIC = 72*(m-4)*(m-5)/m/(m-1)/(m-2)/(m-3) * varHSIC */
193  var_hsic=72.0*(m-4)*(m-5)/m/(m-1)/(m-2)/(m-3)*var_hsic;
194  SG_DEBUG("var_hsic: %f\n", var_hsic)
195 
196  /* compute mean of matrices with diagonal elements zero on the base of sums
197  * and trace from K and L which were computed above */
198  float64_t mu_x=1.0/m/(m-1)*(sum_K-trace_K);
199  float64_t mu_y=1.0/m/(m-1)*(sum_L-trace_L);
200  SG_DEBUG("mu_x: %f, mu_y: %f\n", mu_x, mu_y)
201 
202  /* compute mean under H0, MATLAB: 1/m * ( 1 +muX*muY - muX - muY ) */
203  float64_t m_hsic=1.0/m*(1+mu_x*mu_y-mu_x-mu_y);
204  SG_DEBUG("m_hsic: %f\n", m_hsic)
205 
206  /* finally, compute parameters of gamma distirbution */
207  float64_t a=CMath::pow(m_hsic, 2)/var_hsic;
208  float64_t b=var_hsic*m/m_hsic;
209  SG_DEBUG("a: %f, b: %f\n", a, b)
210 
211  SGVector<float64_t> result(2);
212  result[0]=a;
213  result[1]=b;
214 
215  SG_DEBUG(("leaving %s::fit_null_gamma()\n"), get_name())
216  return result;
217 }
218 
220 {
222 
223  /* subset for selecting only data from one distribution */
224  SGVector<index_t> subset(m_m);
225  subset.range_fill();
226 
227  /* distinguish between custom and normal kernels */
229  {
230  /* custom kernels need to to be initialised when a subset is added */
231  CCustomKernel* custom_kernel_p=(CCustomKernel*)m_kernel_p;
232  custom_kernel_p->add_row_subset(subset);
233  custom_kernel_p->add_col_subset(subset);
234 
235  K=custom_kernel_p->get_kernel_matrix();
236 
237  custom_kernel_p->remove_row_subset();
238  custom_kernel_p->remove_col_subset();
239  }
240  else
241  {
242  m_p_and_q->add_subset(subset);
246 
247  /* re-init kernel since subset was changed */
249  }
250 
251  return K;
252 }
253 
255 {
257 
258  /* subset for selecting only data from one distribution */
259  SGVector<index_t> subset(m_m);
260  subset.range_fill();
261  subset.add(m_m);
262 
263  /* now second half of data for L */
265  {
266  /* custom kernels need to to be initialised when a subset is added */
267  CCustomKernel* custom_kernel_q=(CCustomKernel*)m_kernel_q;
268  custom_kernel_q->add_row_subset(subset);
269  custom_kernel_q->add_col_subset(subset);
270 
271  L=custom_kernel_q->get_kernel_matrix();
272 
273  custom_kernel_q->remove_row_subset();
274  custom_kernel_q->remove_col_subset();
275  }
276  else
277  {
278  m_p_and_q->add_subset(subset);
282 
283  /* re-init kernel since subset was changed */
285  }
286 
287  return L;
288 }
289 
291 {
292  SG_DEBUG("entering CHSIC::bootstrap_null()\n")
293 
294  /* replace current kernel via precomputed custom kernel and call superclass
295  * method */
296 
297  /* backup references to old kernels */
298  CKernel* kernel_p=m_kernel_p;
299  CKernel* kernel_q=m_kernel_q;
300 
301  /* init kernels before to be sure that everything is fine */
304 
305  /* precompute kernel matrices */
306  CCustomKernel* precomputed_p=new CCustomKernel(m_kernel_p);
307  CCustomKernel* precomputed_q=new CCustomKernel(m_kernel_q);
308  SG_REF(precomputed_p);
309  SG_REF(precomputed_q);
310 
311  /* temporarily replace own kernels */
312  m_kernel_p=precomputed_p;
313  m_kernel_q=precomputed_q;
314 
315  /* use superclass bootstrapping which permutes custom kernels */
316  SGVector<float64_t> null_samples=
318 
319  /* restore kernels */
320  m_kernel_p=kernel_p;
321  m_kernel_q=kernel_q;
322 
323  SG_UNREF(precomputed_p);
324  SG_UNREF(precomputed_q);
325 
326 
327  SG_DEBUG("leaving CHSIC::bootstrap_null()\n")
328  return null_samples;
329 }

SHOGUN Machine Learning Toolbox - Documentation