SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LinearTimeMMD.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Heiko Strathmann
8  */
9 
15 
17 
18 using namespace shogun;
19 
22 {
23  init();
24 }
25 
27  index_t q_start) :
28  CKernelTwoSampleTestStatistic(kernel, p_and_q, q_start)
29 {
30  init();
31 
32  if (p_and_q && q_start!=p_and_q->get_num_vectors()/2)
33  {
34  SG_ERROR("CLinearTimeMMD: Only features with equal number of vectors "
35  "are currently possible\n");
36  }
37 }
38 
40  CKernelTwoSampleTestStatistic(kernel, p, q)
41 {
42  init();
43 
44  if (p->get_num_vectors()!=q->get_num_vectors())
45  {
46  SG_ERROR("CLinearTimeMMD: Only features with equal number of vectors "
47  "are currently possible\n");
48  }
49 }
50 
52 {
53 
54 }
55 
56 void CLinearTimeMMD::init()
57 {
58  SG_ADD(&m_opt_max_iterations, "opt_max_iterations", "Maximum number of "
59  "iterations for qp solver", MS_NOT_AVAILABLE);
60  SG_ADD(&m_opt_epsilon, "opt_epsilon", "Stopping criterion for qp solver",
62  SG_ADD(&m_opt_low_cut, "opt_low_cut", "Low cut value for optimization "
63  "kernel weights", MS_NOT_AVAILABLE);
64  SG_ADD(&m_opt_regularization_eps, "opt_regularization_eps", "Regularization"
65  " value that is added to diagonal of Q matrix", MS_NOT_AVAILABLE);
66 
68  m_opt_epsilon=10E-15;
69  m_opt_low_cut=10E-7;
71 }
72 
74 {
75  SG_DEBUG("entering CLinearTimeMMD::compute_statistic()\n");
76 
77  REQUIRE(m_p_and_q, "%s::compute_statistic: features needed!\n", get_name());
78 
79  REQUIRE(m_kernel, "%s::compute_statistic: kernel needed!\n", get_name());
80 
81  /* m is number of samples from each distribution, m_2 is half of it
82  * using names from JLMR paper (see class documentation) */
84  index_t m_2=m/2;
85 
86  SG_DEBUG("m_q_start=%d\n", m_q_start);
87 
88  /* compute traces of kernel matrices for linear MMD */
90 
91  float64_t pp=0;
92  float64_t qq=0;
93  float64_t pq=0;
94  float64_t qp=0;
95 
96  /* compute traces */
97  for (index_t i=0; i<m_2; ++i)
98  {
99  pp+=m_kernel->kernel(i, m_2+i);
100  qq+=m_kernel->kernel(m+i, m+m_2+i);
101  pq+=m_kernel->kernel(i, m+m_2+i);
102  qp+=m_kernel->kernel(m_2+i, m+i);
103  }
104 
105  SG_DEBUG("returning: 1/%d*(%f+%f-%f-%f)\n", m_2, pp, qq, pq, qp);
106 
107  /* mean of sum all traces is linear time mmd */
108  SG_DEBUG("leaving CLinearTimeMMD::compute_statistic()\n");
109  return 1.0/m_2*(pp+qq-pq-qp);
110 }
111 
113 {
114  float64_t result=0;
115 
117  {
118  case MMD1_GAUSSIAN:
119  {
120  /* compute variance and use to estimate Gaussian distribution */
122  result=1.0-CStatistics::normal_cdf(statistic, std_dev);
123  }
124  break;
125 
126  default:
127  /* bootstrapping is handled here */
129  break;
130  }
131 
132  return result;
133 }
134 
136 {
137  float64_t result=0;
138 
140  {
141  case MMD1_GAUSSIAN:
142  {
143  /* compute variance and use to estimate Gaussian distribution */
145  result=1.0-CStatistics::inverse_normal_cdf(1-alpha, 0, std_dev);
146  }
147  break;
148 
149  default:
150  /* bootstrapping is handled here */
152  break;
153  }
154 
155  return result;
156 }
157 
159 {
160  REQUIRE(m_p_and_q, "%s::compute_variance_estimate: features needed!\n",
161  get_name());
162 
163  REQUIRE(m_kernel, "%s::compute_variance_estimate: kernel needed!\n",
164  get_name());
165 
166  if (m_p_and_q->get_num_vectors()<1000)
167  {
168  SG_WARNING("%s::compute_variance_estimate: The number of samples"
169  " should be very large (at least 1000) in order to get a"
170  " good Gaussian approximation using MMD1_GAUSSIAN.\n",
171  get_name());
172  }
173 
174  /* this corresponds to computing the statistic itself, however, the
175  * difference is that all terms (of the traces) have to be stored */
176  index_t m=m_q_start;
177  index_t m_2=m/2;
178 
180 
181  /* allocate memory for traces */
182  SGVector<float64_t> traces(m_2);
183 
184  /* sum up diagonals of all kernel matrices */
185  for (index_t i=0; i<m_2; ++i)
186  {
187  /* init for code beauty :) */
188  traces[i]=0;
189 
190  /* p and p */
191  traces[i]+=m_kernel->kernel(i, m_2+i);
192 
193  /* q and q */
194  traces[i]+=m_kernel->kernel(m+i, m+m_2+i);
195 
196  /* p and q */
197  traces[i]-=m_kernel->kernel(i, m+m_2+i);
198 
199  /* q and p */
200  traces[i]-=m_kernel->kernel(m_2+i, m+i);
201  }
202 
203  /* return linear time variance estimate */
204  return CStatistics::variance(traces)/m_2;
205 }
206 
207 #ifdef HAVE_LAPACK
209 {
213  {
214  SG_ERROR("CLinearTimeMMD::optimize_kernel_weights(): Only possible "
215  "with a combined kernel!\n");
216  }
217 
219  {
220  SG_ERROR("CLinearTimeMMD::optimize_kernel_weights(): Only possible "
221  "with combined features!\n");
222  }
223 
224  /* think about casting and types of different kernels/features here */
225  CCombinedFeatures* combined_p_and_q=
226  dynamic_cast<CCombinedFeatures*>(m_p_and_q);
227  CCombinedKernel* combined_kernel=dynamic_cast<CCombinedKernel*>(m_kernel);
228  ASSERT(combined_p_and_q);
229  ASSERT(combined_kernel);
230 
231  if (combined_kernel->get_num_subkernels()!=
232  combined_p_and_q->get_num_feature_obj())
233  {
234  SG_ERROR("CLinearTimeMMD::optimize_kernel_weights(): Only possible "
235  "when number of sub-kernels (%d) equal number of sub-features "
236  "(%d)\n", combined_kernel->get_num_subkernels(),
237  combined_p_and_q->get_num_feature_obj());
238  }
239 
240  /* init kernel with features */
242 
243  /* number of kernels and data */
244  index_t num_kernels=combined_kernel->get_num_subkernels();
245  index_t m2=m_q_start/2;
246 
247  /* matrix with all h entries for all kernels and data */
248  SGMatrix<float64_t> hs(m2, num_kernels);
249 
250  /* mmds are needed and are means of columns of hs */
251  SGVector<float64_t> mmds(num_kernels);
252 
253  float64_t pp;
254  float64_t qq;
255  float64_t pq;
256  float64_t qp;
257  /* compute all h entries */
258  for (index_t i=0; i<num_kernels; ++i)
259  {
260  CKernel* current=combined_kernel->get_kernel(i);
261  mmds[i]=0;
262  for (index_t j=0; j<m2; ++j)
263  {
264  pp=current->kernel(j, m2+j);
265  qq=current->kernel(m_q_start+j, m_q_start+m2+j);
266  pq=current->kernel(j, m_q_start+m2+j);
267  qp=current->kernel(m2+j, m_q_start+j);
268  hs(j, i)=pp+qq-pq-qp;
269  mmds[i]+=hs(j, i);
270  }
271 
272  /* mmd is simply mean. This is the unbiased linear time estimate */
273  mmds[i]/=m2;
274 
275  SG_UNREF(current);
276  }
277 
278  /* compute covariance matrix of h vector, in place is safe now since h
279  * is not needed anymore */
281 
282  /* evtl regularize to avoid numerical problems (ratio of MMD and std-dev
283  * blows up when variance is small */
285  {
286  SG_DEBUG("regularizing matrix Q by adding %f to diagonal\n",
288  for (index_t i=0; i<num_kernels; ++i)
290  }
291 
292  if (sg_io->get_loglevel()==MSG_DEBUG)
293  {
294  m_Q.display_matrix("(evtl. regularized) Q");
295  mmds.display_vector("mmds");
296  }
297 
298  /* compute sum of mmds to generate feasible point for convex program */
299  float64_t sum_mmds=0;
300  for (index_t i=0; i<mmds.vlen; ++i)
301  sum_mmds+=mmds[i];
302 
303  /* QP: 0.5*x'*Q*x + f'*x
304  * subject to
305  * mmds'*x = b
306  * LB[i] <= x[i] <= UB[i] for all i=1..n */
307  SGVector<float64_t> Q_diag(num_kernels);
308  SGVector<float64_t> f(num_kernels);
309  SGVector<float64_t> lb(num_kernels);
310  SGVector<float64_t> ub(num_kernels);
311  SGVector<float64_t> x(num_kernels);
312 
313  /* init everything, there are two cases possible: i) at least one mmd is
314  * is positive, ii) all mmds are negative */
315  bool one_pos;
316  for (index_t i=0; i<mmds.vlen; ++i)
317  {
318  if (mmds[i]>0)
319  {
320  SG_DEBUG("found at least one positive MMD\n");
321  one_pos=true;
322  break;
323  }
324  one_pos=false;
325  }
326 
327  if (!one_pos)
328  {
329  SG_WARNING("All mmd estimates are negative. This is techical possible,"
330  " although extremely rare. Current problem might bad\n");
331 
332  /* if no element is positive, Q has to be replaced by -Q */
333  for (index_t i=0; i<num_kernels*num_kernels; ++i)
334  m_Q.matrix[i]=-m_Q.matrix[i];
335  }
336 
337  /* init vectors */
338  for (index_t i=0; i<num_kernels; ++i)
339  {
340  Q_diag[i]=m_Q(i,i);
341  f[i]=0;
342  lb[i]=0;
343  ub[i]=CMath::INFTY;
344 
345  /* initial point has to be feasible, i.e. mmds'*x = b */
346  x[i]=1.0/sum_mmds;
347  }
348 
349  /* start libqp solver with desired parameters */
350  SG_DEBUG("starting libqp optimization\n");
351  libqp_state_T qp_exitflag=libqp_gsmo_solver(&get_Q_col, Q_diag.vector,
352  f.vector, mmds.vector,
353  one_pos ? 1 : -1,
354  lb.vector, ub.vector,
355  x.vector, num_kernels, m_opt_max_iterations,
357 
358  SG_DEBUG("libqp returns: nIts=%d, exit_flag: %d\n", qp_exitflag.nIter,
359  qp_exitflag.exitflag);
360 
361  /* set really small entries to zero and sum up for normalization */
362  float64_t sum_weights=0;
363  for (index_t i=0; i<x.vlen; ++i)
364  {
365  if (x[i]<m_opt_low_cut)
366  {
367  SG_DEBUG("lowcut: weight[%i]=%f<%f; setting to zero\n", i, x[i],
368  m_opt_low_cut);
369  x[i]=0;
370  }
371 
372  sum_weights+=x[i];
373  }
374 
375  /* normalize (allowed since problem is scale invariant) */
376  for (index_t i=0; i<x.vlen; ++i)
377  x[i]/=sum_weights;
378 
379  /* set weights to kernel */
381 }
382 
384 
386 {
387  return &m_Q[m_Q.num_rows*i];
388 }
389 
390 void CLinearTimeMMD::print_state(libqp_state_T state)
391 {
392  SG_SDEBUG("libqp state: primal=%f\n", state.QP);
393 }
394 
395 #endif //HAVE_LAPACK
396 

SHOGUN Machine Learning Toolbox - Documentation