SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LaplacianInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Copyright (C) 2012 Jacob Walker
9  * Copyright (C) 2013 Roman Votyakov
10  *
11  * Code adapted from Gaussian Process Machine Learning Toolbox
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  * This code specifically adapted from infLaplace.m
14  */
15 
17 
18 #ifdef HAVE_EIGEN3
19 
22 #include <shogun/lib/external/brent.h>
24 
25 using namespace shogun;
26 using namespace Eigen;
27 
28 namespace shogun
29 {
30 
31 #ifndef DOXYGEN_SHOULD_SKIP_THIS
32 
34 class CPsiLine : public func_base
35 {
36 public:
37  float64_t scale;
38  MatrixXd K;
39  VectorXd dalpha;
40  VectorXd start_alpha;
41  Map<VectorXd>* alpha;
46  CLikelihoodModel* lik;
47  CLabels* lab;
48 
49  virtual double operator() (double x)
50  {
51  Map<VectorXd> eigen_f(f->vector, f->vlen);
52  Map<VectorXd> eigen_m(m->vector, m->vlen);
53 
54  // compute alpha=alpha+x*dalpha and f=K*alpha+m
55  (*alpha)=start_alpha+x*dalpha;
56  eigen_f=K*(*alpha)*CMath::sq(scale)+eigen_m;
57 
58  // get first and second derivatives of log likelihood
59  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
60 
61  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
62  W->scale(-1.0);
63 
64  // compute psi=alpha'*(f-m)/2-lp
65  float64_t result = (*alpha).dot(eigen_f-eigen_m)/2.0-
66  SGVector<float64_t>::sum(lik->get_log_probability_f(lab, *f));
67 
68  return result;
69  }
70 };
71 
72 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
73 
75 {
76  init();
77 }
78 
80  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
81  : CInferenceMethod(kern, feat, m, lab, mod)
82 {
83  init();
84 }
85 
86 void CLaplacianInferenceMethod::init()
87 {
88  m_iter=20;
89  m_tolerance=1e-6;
90  m_opt_tolerance=1e-10;
91  m_opt_max=10;
92 }
93 
95 {
96 }
97 
99 {
101  update_alpha();
102  update_chol();
104  update_deriv();
105 }
106 
108 {
109  if (update_parameter_hash())
110  update();
111 
112  return SGVector<float64_t>(sW);
113 }
114 
116 {
117  if (update_parameter_hash())
118  update();
119 
120  // create eigen representations alpha, f, W, L
121  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
122  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
123  Map<VectorXd> eigen_W(W.vector, W.vlen);
124  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
125 
126  // get mean vector and create eigen representation of it
128  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
129 
130  // get log likelihood
132  m_mu));
133 
134  float64_t result;
135 
136  if (eigen_W.minCoeff()<0)
137  {
138  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
139  Map<MatrixXd> eigen_ktrtr(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
140 
141  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
142  eigen_ktrtr*CMath::sq(m_scale)*eigen_sW.asDiagonal());
143 
144  result=(eigen_alpha.dot(eigen_mu-eigen_mean))/2.0-
145  lp+log(lu.determinant())/2.0;
146  }
147  else
148  {
149  result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
150  eigen_L.diagonal().array().log().sum();
151  }
152 
153  return result;
154 }
155 
157 {
158  if (update_parameter_hash())
159  update();
160 
162  return result;
163 }
164 
166 {
167  if (update_parameter_hash())
168  update();
169 
170  return SGMatrix<float64_t>(m_L);
171 }
172 
174 {
175  if (update_parameter_hash())
176  update();
177 
178  return SGVector<float64_t>(m_mu);
179 }
180 
182 {
183  if (update_parameter_hash())
184  update();
185 
186  return SGMatrix<float64_t>(m_Sigma);
187 }
188 
190 {
191  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
192  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
193  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
194 
196  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
197 
198  // compute V = L^(-1) * W^(1/2) * K, using upper triangular factor L^T
199  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
200  eigen_sW.asDiagonal()*eigen_K*CMath::sq(m_scale));
201 
202  // compute covariance matrix of the posterior:
203  // Sigma = K - K * W^(1/2) * (L * L^T)^(-1) * W^(1/2) * K =
204  // K - (K * W^(1/2)) * (L^T)^(-1) * L^(-1) * W^(1/2) * K =
205  // K - (W^(1/2) * K)^T * (L^(-1))^T * L^(-1) * W^(1/2) * K = K - V^T * V
206  eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V;
207 }
208 
210 {
211  Map<VectorXd> eigen_W(W.vector, W.vlen);
212 
213  // create eigen representation of kernel matrix
214  Map<MatrixXd> eigen_ktrtr(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
215 
216  // create shogun and eigen representation of posterior cholesky
218  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
219 
220  if (eigen_W.minCoeff() < 0)
221  {
222  // compute inverse of diagonal noise: iW = 1/W
223  VectorXd eigen_iW = (VectorXd::Ones(W.vlen)).cwiseQuotient(eigen_W);
224 
225  FullPivLU<MatrixXd> lu(
226  eigen_ktrtr*CMath::sq(m_scale)+MatrixXd(eigen_iW.asDiagonal()));
227 
228  // compute cholesky: L = -(K + iW)^-1
229  eigen_L = -lu.inverse();
230  }
231  else
232  {
233  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
234 
235  // compute cholesky: L = chol(sW * sW' .* K + I)
236  LLT<MatrixXd> L(
237  (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::sq(m_scale))+
238  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
239 
240  eigen_L = L.matrixU();
241  }
242 }
243 
245 {
246  float64_t Psi_Old = CMath::INFTY;
247  float64_t Psi_New;
248  float64_t Psi_Def;
249 
250  // get mean vector and create eigen representation of it
252  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
253 
254  // create eigen representation of kernel matrix
255  Map<MatrixXd> eigen_ktrtr(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
256 
257  // create shogun and eigen representation of function vector
258  m_mu=SGVector<float64_t>(mean.vlen);
259  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
260 
262  {
263  // set alpha a zero vector
265  m_alpha.zero();
266 
267  // f = mean, if length of alpha and length of y doesn't match
268  eigen_mu=eigen_mean;
269 
270  // compute W = -d2lp
272  W.scale(-1.0);
273 
275  m_labels, m_mu));
276  }
277  else
278  {
279  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
280 
281  // compute f = K * alpha + m
282  eigen_mu=eigen_ktrtr*CMath::sq(m_scale)*eigen_alpha+eigen_mean;
283 
284  // compute W = -d2lp
286  W.scale(-1.0);
287 
288  Psi_New=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-
290 
292 
293  // if default is better, then use it
294  if (Psi_Def < Psi_New)
295  {
296  m_alpha.zero();
297  eigen_mu=eigen_mean;
299  m_labels, m_mu));
300  }
301  }
302 
303  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
304 
305  // get first derivative of log probability function
307 
308  // create shogun and eigen representation of sW
310  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
311 
312  index_t iter=0;
313 
314  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
315  {
316  Map<VectorXd> eigen_W(W.vector, W.vlen);
317  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
318 
319  Psi_Old = Psi_New;
320  iter++;
321 
322  if (eigen_W.minCoeff() < 0)
323  {
324  // Suggested by Vanhatalo et. al.,
325  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
326  // Quoted from infLaplace.m
327  float64_t df;
328 
330  {
332  df=lik->get_degrees_freedom();
333  SG_UNREF(lik);
334  }
335  else
336  df=1;
337 
338  eigen_W+=(2.0/df)*eigen_dlp.cwiseProduct(eigen_dlp);
339  }
340 
341  // compute sW = sqrt(W)
342  eigen_sW=eigen_W.cwiseSqrt();
343 
344  LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::sq(m_scale))+
345  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
346 
347  VectorXd b=eigen_W.cwiseProduct(eigen_mu - eigen_mean)+eigen_dlp;
348 
349  VectorXd dalpha=b-eigen_sW.cwiseProduct(
350  L.solve(eigen_sW.cwiseProduct(eigen_ktrtr*b*CMath::sq(m_scale))))-eigen_alpha;
351 
352  // perform Brent's optimization
353  CPsiLine func;
354 
355  func.scale=m_scale;
356  func.K=eigen_ktrtr;
357  func.dalpha=dalpha;
358  func.start_alpha=eigen_alpha;
359  func.alpha=&eigen_alpha;
360  func.dlp=&dlp;
361  func.f=&m_mu;
362  func.m=&mean;
363  func.W=&W;
364  func.lik=m_model;
365  func.lab=m_labels;
366 
367  float64_t x;
368  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
369  }
370 
371  // compute f = K * alpha + m
372  eigen_mu=eigen_ktrtr*CMath::sq(m_scale)*eigen_alpha+eigen_mean;
373 
374  // get log probability derivatives
378 
379  // W = -d2lp
380  W=d2lp.clone();
381  W.scale(-1.0);
382 
383  // compute sW
384  Map<VectorXd> eigen_W(W.vector, W.vlen);
385 
386  if (eigen_W.minCoeff()>0)
387  eigen_sW=eigen_W.cwiseSqrt();
388  else
389  eigen_sW.setZero();
390 }
391 
393 {
394  // create eigen representation of W, sW, dlp, d3lp, K, alpha and L
395  Map<VectorXd> eigen_W(W.vector, W.vlen);
396  Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
397  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
398  Map<VectorXd> eigen_d3lp(d3lp.vector, d3lp.vlen);
399  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
400  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
401  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
402 
403  // create shogun and eigen representation of matrix Z
405  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
406 
407  // create shogun and eigen representation of the vector g
408  m_g=SGVector<float64_t>(m_Z.num_rows);
409  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
410 
411  if (eigen_W.minCoeff()<0)
412  {
413  eigen_Z=-eigen_L;
414 
415  // compute iA = (I + K * diag(W))^-1
416  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
417  eigen_K*CMath::sq(m_scale)*eigen_W.asDiagonal());
418  MatrixXd iA=lu.inverse();
419 
420  // compute derivative ln|L'*L| wrt W: g=sum(iA.*K,2)/2
421  eigen_g=(iA.cwiseProduct(eigen_K*CMath::sq(m_scale))).rowwise().sum()/2.0;
422  }
423  else
424  {
425  // solve L'*L*Z=diag(sW) and compute Z=diag(sW)*Z
426  eigen_Z=eigen_L.triangularView<Upper>().adjoint().solve(
427  MatrixXd(eigen_sW.asDiagonal()));
428  eigen_Z=eigen_L.triangularView<Upper>().solve(eigen_Z);
429  eigen_Z=eigen_sW.asDiagonal()*eigen_Z;
430 
431  // solve L'*C=diag(sW)*K
432  MatrixXd C=eigen_L.triangularView<Upper>().adjoint().solve(
433  eigen_sW.asDiagonal()*eigen_K*CMath::sq(m_scale));
434 
435  // compute derivative ln|L'*L| wrt W: g=(diag(K)-sum(C.^2,1)')/2
436  eigen_g=(eigen_K.diagonal()*CMath::sq(m_scale)-
437  (C.cwiseProduct(C)).colwise().sum().adjoint())/2.0;
438  }
439 
440  // create shogun and eigen representation of the vector dfhat
441  m_dfhat=SGVector<float64_t>(m_g.vlen);
442  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
443 
444  // compute derivative of nlZ wrt fhat
445  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
446 }
447 
449  const TParameter* param)
450 {
451  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
452  "the nagative log marginal likelihood wrt %s.%s parameter\n",
453  get_name(), param->m_name)
454 
455  // create eigen representation of K, Z, dfhat, dlp and alpha
456  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
457  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
458  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
459  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
460  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
461 
462  SGVector<float64_t> result(1);
463 
464  // compute derivative K wrt scale
465  MatrixXd dK=eigen_K*m_scale*2.0;
466 
467  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
468  result[0]=(eigen_Z.cwiseProduct(dK)).sum()/2.0-
469  (eigen_alpha.adjoint()*dK).dot(eigen_alpha)/2.0;
470 
471  // compute b=dK*dlp
472  VectorXd b=dK*eigen_dlp;
473 
474  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
475  result[0]=result[0]-eigen_dfhat.dot(b-eigen_K*CMath::sq(m_scale)*(eigen_Z*b));
476 
477  return result;
478 }
479 
481  const TParameter* param)
482 {
483  // create eigen representation of K, Z, g and dfhat
484  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
485  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
486  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
487  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
488 
489  // get derivatives wrt likelihood model parameters
491  m_mu, param);
493  m_mu, param);
495  m_mu, param);
496 
497  // create eigen representation of the derivatives
498  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
499  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
500  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
501 
502  SGVector<float64_t> result(1);
503 
504  // compute b vector
505  VectorXd b=eigen_K*eigen_dlp_dhyp;
506 
507  // compute dnlZ=-g'*d2lp_dhyp-sum(lp_dhyp)-dfhat'*(b-K*(Z*b))
508  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum()-
509  eigen_dfhat.dot(b-eigen_K*CMath::sq(m_scale)*(eigen_Z*b));
510 
511  return result;
512 }
513 
515  const TParameter* param)
516 {
517  // create eigen representation of K, Z, dfhat, dlp and alpha
518  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
519  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
520  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
521  Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
522  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
523 
524  SGVector<float64_t> result;
525 
526  if (param->m_datatype.m_ctype==CT_VECTOR ||
527  param->m_datatype.m_ctype==CT_SGVECTOR)
528  {
530  "Length of the parameter %s should not be NULL\n", param->m_name)
531  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
532  }
533  else
534  {
535  result=SGVector<float64_t>(1);
536  }
537 
538  for (index_t i=0; i<result.vlen; i++)
539  {
541 
542  if (result.vlen==1)
543  dK=m_kernel->get_parameter_gradient(param);
544  else
545  dK=m_kernel->get_parameter_gradient(param, i);
546 
547  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_cols, dK.num_rows);
548 
549  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
550  result[i]=(eigen_Z.cwiseProduct(eigen_dK)).sum()/2.0-
551  (eigen_alpha.adjoint()*eigen_dK).dot(eigen_alpha)/2.0;
552 
553  // compute b=dK*dlp
554  VectorXd b=eigen_dK*eigen_dlp;
555 
556  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
557  result[i]=result[i]-eigen_dfhat.dot(b-eigen_K*CMath::sq(m_scale)*
558  (eigen_Z*b));
559  }
560 
561  return result;
562 }
563 
565  const TParameter* param)
566 {
567  // create eigen representation of K, Z, dfhat and alpha
568  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
569  Map<MatrixXd> eigen_Z(m_Z.matrix, m_Z.num_rows, m_Z.num_cols);
570  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
571  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
572 
573  SGVector<float64_t> result;
574 
575  if (param->m_datatype.m_ctype==CT_VECTOR ||
576  param->m_datatype.m_ctype==CT_SGVECTOR)
577  {
579  "Length of the parameter %s should not be NULL\n", param->m_name)
580 
581  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
582  }
583  else
584  {
585  result=SGVector<float64_t>(1);
586  }
587 
588  for (index_t i=0; i<result.vlen; i++)
589  {
591 
592  if (result.vlen==1)
594  else
596 
597  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
598 
599  // compute dnlZ=-alpha'*dm-dfhat'*(dm-K*(Z*dm))
600  result[i]=-eigen_alpha.dot(eigen_dmu)-eigen_dfhat.dot(eigen_dmu-
601  eigen_K*CMath::sq(m_scale)*(eigen_Z*eigen_dmu));
602  }
603 
604  return result;
605 }
606 }
607 
608 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation