SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LaplacianInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Copyright (C) 2012 Jacob Walker
9  * Copyright (C) 2013 Roman Votyakov
10  *
11  * Code adapted from Gaussian Process Machine Learning Toolbox
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  * This code specifically adapted from infLaplace.m
14  */
15 
17 
18 #ifdef HAVE_EIGEN3
19 
22 #include <shogun/lib/external/brent.h>
24 
25 using namespace shogun;
26 using namespace Eigen;
27 
28 namespace shogun
29 {
30 
31 #ifndef DOXYGEN_SHOULD_SKIP_THIS
32 
34 class CPsiLine : public func_base
35 {
36 public:
37  float64_t scale;
38  MatrixXd K;
39  VectorXd dalpha;
40  VectorXd start_alpha;
41  Map<VectorXd>* alpha;
46  CLikelihoodModel* lik;
47  CLabels* lab;
48 
49  virtual double operator() (double x)
50  {
51  Map<VectorXd> eigen_f(f->vector, f->vlen);
52  Map<VectorXd> eigen_m(m->vector, m->vlen);
53 
54  // compute alpha=alpha+x*dalpha and f=K*alpha+m
55  (*alpha)=start_alpha+x*dalpha;
56  eigen_f=K*(*alpha)*CMath::sq(scale)+eigen_m;
57 
58  // get first and second derivatives of log likelihood
59  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
60 
61  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
62  W->scale(-1.0);
63 
64  // compute psi=alpha'*(f-m)/2-lp
65  float64_t result = (*alpha).dot(eigen_f-eigen_m)/2.0-
66  SGVector<float64_t>::sum(lik->get_log_probability_f(lab, *f));
67 
68  return result;
69  }
70 };
71 
72 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
73 
75 {
76  init();
77 }
78 
80  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
81  : CInferenceMethod(kern, feat, m, lab, mod)
82 {
83  init();
84 }
85 
86 void CLaplacianInferenceMethod::init()
87 {
88  m_iter=20;
89  m_tolerance=1e-6;
90  m_opt_tolerance=1e-10;
91  m_opt_max=10;
92 }
93 
95 {
96 }
97 
99 {
100  SG_DEBUG("entering\n");
101 
103  update_alpha();
104  update_chol();
106  update_deriv();
108 
109  SG_DEBUG("leaving\n");
110 }
111 
113 {
115  update();
116 
117  return SGVector<float64_t>(sW);
118 }
119 
121 {
123  update();
124 
125  // create eigen representations alpha, f, W, L
126  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
127  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
128  Map<VectorXd> eigen_W(W.vector, W.vlen);
129  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
130 
131  // get mean vector and create eigen representation of it
133  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
134 
135  // get log likelihood
137  m_mu));
138 
139  float64_t result;
140 
141  if (eigen_W.minCoeff()<0)
142  {
143  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
144  Map<MatrixXd> eigen_ktrtr(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
145 
146  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
147  eigen_ktrtr*CMath::sq(m_scale)*eigen_sW.asDiagonal());
148 
149  result=(eigen_alpha.dot(eigen_mu-eigen_mean))/2.0-
150  lp+log(lu.determinant())/2.0;
151  }
152  else
153  {
154  result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
155  eigen_L.diagonal().array().log().sum();
156  }
157 
158  return result;
159 }
160 
162 {
164  update();
165 
167  return result;
168 }
169 
171 {
173  update();
174 
175  return SGMatrix<float64_t>(m_L);
176 }
177 
179 {
181  update();
182 
183  return SGVector<float64_t>(m_mu);
184 }
185 
187 {
189  update();
190 
192 }
193 
195 {
196  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
197  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
198  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
199 
201  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
202 
203  // compute V = L^(-1) * W^(1/2) * K, using upper triangular factor L^T
204  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
205  eigen_sW.asDiagonal()*eigen_K*CMath::sq(m_scale));
206 
207  // compute covariance matrix of the posterior:
208  // Sigma = K - K * W^(1/2) * (L * L^T)^(-1) * W^(1/2) * K =
209  // K - (K * W^(1/2)) * (L^T)^(-1) * L^(-1) * W^(1/2) * K =
210  // K - (W^(1/2) * K)^T * (L^(-1))^T * L^(-1) * W^(1/2) * K = K - V^T * V
211  eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V;
212 }
213 
215 {
216  Map<VectorXd> eigen_W(W.vector, W.vlen);
217 
218  // create eigen representation of kernel matrix
219  Map<MatrixXd> eigen_ktrtr(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
220 
221  // create shogun and eigen representation of posterior cholesky
223  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
224 
225  if (eigen_W.minCoeff() < 0)
226  {
227  // compute inverse of diagonal noise: iW = 1/W
228  VectorXd eigen_iW = (VectorXd::Ones(W.vlen)).cwiseQuotient(eigen_W);
229 
230  FullPivLU<MatrixXd> lu(
231  eigen_ktrtr*CMath::sq(m_scale)+MatrixXd(eigen_iW.asDiagonal()));
232 
233  // compute cholesky: L = -(K + iW)^-1
234  eigen_L = -lu.inverse();
235  }
236  else
237  {
238  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
239 
240  // compute cholesky: L = chol(sW * sW' .* K + I)
241  LLT<MatrixXd> L(
242  (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::sq(m_scale))+
243  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
244 
245  eigen_L = L.matrixU();
246  }
247 }
248 
250 {
251  float64_t Psi_Old = CMath::INFTY;
252  float64_t Psi_New;
253  float64_t Psi_Def;
254 
255  // get mean vector and create eigen representation of it
257  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
258 
259  // create eigen representation of kernel matrix
260  Map<MatrixXd> eigen_ktrtr(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
261 
262  // create shogun and eigen representation of function vector
264  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
265 
267  {
268  // set alpha a zero vector
270  m_alpha.zero();
271 
272  // f = mean, if length of alpha and length of y doesn't match
273  eigen_mu=eigen_mean;
274 
275  // compute W = -d2lp
277  W.scale(-1.0);
278 
280  m_labels, m_mu));
281  }
282  else
283  {
284  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
285 
286  // compute f = K * alpha + m
287  eigen_mu=eigen_ktrtr*CMath::sq(m_scale)*eigen_alpha+eigen_mean;
288 
289  // compute W = -d2lp
291  W.scale(-1.0);
292 
293  Psi_New=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-
295 
297 
298  // if default is better, then use it
299  if (Psi_Def < Psi_New)
300  {
301  m_alpha.zero();
302  eigen_mu=eigen_mean;
304  m_labels, m_mu));
305  }
306  }
307 
308  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
309 
310  // get first derivative of log probability function
312 
313  // create shogun and eigen representation of sW
315  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
316 
317  index_t iter=0;
318 
319  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
320  {
321  Map<VectorXd> eigen_W(W.vector, W.vlen);
322  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
323 
324  Psi_Old = Psi_New;
325  iter++;
326 
327  if (eigen_W.minCoeff() < 0)
328  {
329  // Suggested by Vanhatalo et. al.,
330  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
331  // Quoted from infLaplace.m
332  float64_t df;
333 
335  {
337  df=lik->get_degrees_freedom();
338  SG_UNREF(lik);
339  }
340  else
341  df=1;
342 
343  eigen_W+=(2.0/df)*eigen_dlp.cwiseProduct(eigen_dlp);
344  }
345 
346  // compute sW = sqrt(W)
347  eigen_sW=eigen_W.cwiseSqrt();
348 
349  LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::sq(m_scale))+
350  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
351 
352  VectorXd b=eigen_W.cwiseProduct(eigen_mu - eigen_mean)+eigen_dlp;
353 
354  VectorXd dalpha=b-eigen_sW.cwiseProduct(
355  L.solve(eigen_sW.cwiseProduct(eigen_ktrtr*b*CMath::sq(m_scale))))-eigen_alpha;
356 
357  // perform Brent's optimization
358  CPsiLine func;
359 
360  func.scale=m_scale;
361  func.K=eigen_ktrtr;
362  func.dalpha=dalpha;
363  func.start_alpha=eigen_alpha;
364  func.alpha=&eigen_alpha;
365  func.dlp=&dlp;
366  func.f=&m_mu;
367  func.m=&mean;
368  func.W=&W;
369  func.lik=m_model;
370  func.lab=m_labels;
371 
372  float64_t x;
373  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
374  }
375 
376  // compute f = K * alpha + m
377  eigen_mu=eigen_ktrtr*CMath::sq(m_scale)*eigen_alpha+eigen_mean;
378 
379  // get log probability derivatives
383 
384  // W = -d2lp
385  W=d2lp.clone();
386  W.scale(-1.0);
387 
388  // compute sW
389  Map<VectorXd> eigen_W(W.vector, W.vlen);
390 
391  if (eigen_W.minCoeff()>0)
392  eigen_sW=eigen_W.cwiseSqrt();
393  else
394  eigen_sW.setZero();
395 }
396 
398 {
399  // create eigen representation of W, sW, dlp, d3lp, K, alpha and L
400  Map<VectorXd> eigen_W(W.vector, W.vlen);
401  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
402  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
403  Map<VectorXd> eigen_d3lp(d3lp.vector, d3lp.vlen);
404  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
405  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
406  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
407 
408  // create shogun and eigen representation of matrix Z
410  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
411 
412  // create shogun and eigen representation of the vector g
414  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
415 
416  if (eigen_W.minCoeff()<0)
417  {
418  eigen_Z=-eigen_L;
419 
420  // compute iA = (I + K * diag(W))^-1
421  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
422  eigen_K*CMath::sq(m_scale)*eigen_W.asDiagonal());
423  MatrixXd iA=lu.inverse();
424 
425  // compute derivative ln|L'*L| wrt W: g=sum(iA.*K,2)/2
426  eigen_g=(iA.cwiseProduct(eigen_K*CMath::sq(m_scale))).rowwise().sum()/2.0;
427  }
428  else
429  {
430  // solve L'*L*Z=diag(sW) and compute Z=diag(sW)*Z
431  eigen_Z=eigen_L.triangularView<Upper>().adjoint().solve(
432  MatrixXd(eigen_sW.asDiagonal()));
433  eigen_Z=eigen_L.triangularView<Upper>().solve(eigen_Z);
434  eigen_Z=eigen_sW.asDiagonal()*eigen_Z;
435 
436  // solve L'*C=diag(sW)*K
437  MatrixXd C=eigen_L.triangularView<Upper>().adjoint().solve(
438  eigen_sW.asDiagonal()*eigen_K*CMath::sq(m_scale));
439 
440  // compute derivative ln|L'*L| wrt W: g=(diag(K)-sum(C.^2,1)')/2
441  eigen_g=(eigen_K.diagonal()*CMath::sq(m_scale)-
442  (C.cwiseProduct(C)).colwise().sum().adjoint())/2.0;
443  }
444 
445  // create shogun and eigen representation of the vector dfhat
447  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
448 
449  // compute derivative of nlZ wrt fhat
450  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
451 }
452 
454  const TParameter* param)
455 {
456  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
457  "the nagative log marginal likelihood wrt %s.%s parameter\n",
458  get_name(), param->m_name)
459 
460  // create eigen representation of K, Z, dfhat, dlp and alpha
461  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
462  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
463  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
464  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
465  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
466 
467  SGVector<float64_t> result(1);
468 
469  // compute derivative K wrt scale
470  MatrixXd dK=eigen_K*m_scale*2.0;
471 
472  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
473  result[0]=(eigen_Z.cwiseProduct(dK)).sum()/2.0-
474  (eigen_alpha.adjoint()*dK).dot(eigen_alpha)/2.0;
475 
476  // compute b=dK*dlp
477  VectorXd b=dK*eigen_dlp;
478 
479  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
480  result[0]=result[0]-eigen_dfhat.dot(b-eigen_K*CMath::sq(m_scale)*(eigen_Z*b));
481 
482  return result;
483 }
484 
486  const TParameter* param)
487 {
488  // create eigen representation of K, Z, g and dfhat
489  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
490  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
491  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
492  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
493 
494  // get derivatives wrt likelihood model parameters
496  m_mu, param);
498  m_mu, param);
500  m_mu, param);
501 
502  // create eigen representation of the derivatives
503  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
504  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
505  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
506 
507  SGVector<float64_t> result(1);
508 
509  // compute b vector
510  VectorXd b=eigen_K*eigen_dlp_dhyp;
511 
512  // compute dnlZ=-g'*d2lp_dhyp-sum(lp_dhyp)-dfhat'*(b-K*(Z*b))
513  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum()-
514  eigen_dfhat.dot(b-eigen_K*CMath::sq(m_scale)*(eigen_Z*b));
515 
516  return result;
517 }
518 
520  const TParameter* param)
521 {
522  // create eigen representation of K, Z, dfhat, dlp and alpha
523  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
524  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
525  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
526  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
527  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
528 
529  SGVector<float64_t> result;
530 
531  if (param->m_datatype.m_ctype==CT_VECTOR ||
532  param->m_datatype.m_ctype==CT_SGVECTOR)
533  {
535  "Length of the parameter %s should not be NULL\n", param->m_name)
536  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
537  }
538  else
539  {
540  result=SGVector<float64_t>(1);
541  }
542 
543  for (index_t i=0; i<result.vlen; i++)
544  {
546 
547  if (result.vlen==1)
548  dK=m_kernel->get_parameter_gradient(param);
549  else
550  dK=m_kernel->get_parameter_gradient(param, i);
551 
552  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_cols, dK.num_rows);
553 
554  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
555  result[i]=(eigen_Z.cwiseProduct(eigen_dK)).sum()/2.0-
556  (eigen_alpha.adjoint()*eigen_dK).dot(eigen_alpha)/2.0;
557 
558  // compute b=dK*dlp
559  VectorXd b=eigen_dK*eigen_dlp;
560 
561  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
562  result[i]=result[i]-eigen_dfhat.dot(b-eigen_K*CMath::sq(m_scale)*
563  (eigen_Z*b));
564  }
565 
566  return result;
567 }
568 
570  const TParameter* param)
571 {
572  // create eigen representation of K, Z, dfhat and alpha
573  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
574  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
575  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
576  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
577 
578  SGVector<float64_t> result;
579 
580  if (param->m_datatype.m_ctype==CT_VECTOR ||
581  param->m_datatype.m_ctype==CT_SGVECTOR)
582  {
584  "Length of the parameter %s should not be NULL\n", param->m_name)
585 
586  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
587  }
588  else
589  {
590  result=SGVector<float64_t>(1);
591  }
592 
593  for (index_t i=0; i<result.vlen; i++)
594  {
596 
597  if (result.vlen==1)
599  else
601 
602  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
603 
604  // compute dnlZ=-alpha'*dm-dfhat'*(dm-K*(Z*dm))
605  result[i]=-eigen_alpha.dot(eigen_dmu)-eigen_dfhat.dot(eigen_dmu-
606  eigen_K*CMath::sq(m_scale)*(eigen_Z*eigen_dmu));
607  }
608 
609  return result;
610 }
611 }
612 
613 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation