SHOGUN  4.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
EPInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  *
9  * Based on ideas from GAUSSIAN PROCESS REGRESSION AND CLASSIFICATION Toolbox
10  * Copyright (C) 2005-2013 by Carl Edward Rasmussen & Hannes Nickisch under the
11  * FreeBSD License
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  */
14 
16 
17 #ifdef HAVE_EIGEN3
18 
24 
26 
27 using namespace shogun;
28 using namespace Eigen;
29 
30 // try to use previously allocated memory for SGVector
31 #define CREATE_SGVECTOR(vec, len, sg_type) \
32  { \
33  if (!vec.vector || vec.vlen!=len) \
34  vec=SGVector<sg_type>(len); \
35  }
36 
37 // try to use previously allocated memory for SGMatrix
38 #define CREATE_SGMATRIX(mat, rows, cols, sg_type) \
39  { \
40  if (!mat.matrix || mat.num_rows!=rows || mat.num_cols!=cols) \
41  mat=SGMatrix<sg_type>(rows, cols); \
42  }
43 
45 {
46  init();
47 }
48 
50  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
51  : CInferenceMethod(kernel, features, mean, labels, model)
52 {
53  init();
54 }
55 
57 {
58 }
59 
60 void CEPInferenceMethod::init()
61 {
62  m_max_sweep=15;
63  m_min_sweep=2;
64  m_tol=1e-4;
65 }
66 
68  CInferenceMethod* inference)
69 {
70  REQUIRE(inference, "Inference pointer not set.\n");
71 
72  if (inference->get_inference_type()!=INF_EP)
73  SG_SERROR("Provided inference is not of type CEPInferenceMethod!\n")
74 
75  SG_REF(inference);
76  return (CEPInferenceMethod*)inference;
77 }
78 
80 {
82  update();
83 
84  return m_nlZ;
85 }
86 
88 {
90  update();
91 
93 }
94 
96 {
98  update();
99 
100  return SGMatrix<float64_t>(m_L);
101 }
102 
104 {
106  update();
107 
108  return SGVector<float64_t>(m_sttau);
109 }
110 
112 {
114  update();
115 
116  return SGVector<float64_t>(m_mu);
117 }
118 
120 {
122  update();
123 
124  return SGMatrix<float64_t>(m_Sigma);
125 }
126 
128 {
129  SG_DEBUG("entering\n");
130 
131  // update kernel and feature matrix
133 
134  // get number of labels (trainig examples)
136 
137  // try to use tilde values from previous call
138  if (m_ttau.vlen==n)
139  {
140  update_chol();
144  }
145 
146  // get mean vector
148 
149  // get and scale diagonal of the kernel matrix
151  ktrtr_diag.scale(CMath::sq(m_scale));
152 
153  // marginal likelihood for ttau = tnu = 0
155  mean, ktrtr_diag, m_labels));
156 
157  // use zero values if we have no better guess or it's better
158  if (m_ttau.vlen!=n || m_nlZ>nlZ0)
159  {
160  CREATE_SGVECTOR(m_ttau, n, float64_t);
161  m_ttau.zero();
162 
163  CREATE_SGVECTOR(m_sttau, n, float64_t);
164  m_sttau.zero();
165 
166  CREATE_SGVECTOR(m_tnu, n, float64_t);
167  m_tnu.zero();
168 
170 
171  // copy data manually, since we don't have appropriate method
172  for (index_t i=0; i<m_ktrtr.num_rows; i++)
173  for (index_t j=0; j<m_ktrtr.num_cols; j++)
174  m_Sigma(i,j)=m_ktrtr(i,j)*CMath::sq(m_scale);
175 
176  CREATE_SGVECTOR(m_mu, n, float64_t);
177  m_mu.zero();
178 
179  // set marginal likelihood
180  m_nlZ=nlZ0;
181  }
182 
183  // create vector of the random permutation
184  SGVector<index_t> v(n);
185  v.range_fill();
186 
187  // cavity tau and nu vectors
188  SGVector<float64_t> tau_n(n);
189  SGVector<float64_t> nu_n(n);
190 
191  // cavity mu and s2 vectors
192  SGVector<float64_t> mu_n(n);
193  SGVector<float64_t> s2_n(n);
194 
195  float64_t nlZ_old=CMath::INFTY;
196  uint32_t sweep=0;
197 
198  while ((CMath::abs(m_nlZ-nlZ_old)>m_tol && sweep<m_max_sweep) ||
199  sweep<m_min_sweep)
200  {
201  nlZ_old=m_nlZ;
202  sweep++;
203 
204  // shuffle random permutation
205  CMath::permute(v);
206 
207  for (index_t j=0; j<n; j++)
208  {
209  index_t i=v[j];
210 
211  // find cavity paramters
212  tau_n[i]=1.0/m_Sigma(i,i)-m_ttau[i];
213  nu_n[i]=m_mu[i]/m_Sigma(i,i)+mean[i]*tau_n[i]-m_tnu[i];
214 
215  // compute cavity mean and variance
216  mu_n[i]=nu_n[i]/tau_n[i];
217  s2_n[i]=1.0/tau_n[i];
218 
219  // get moments
220  float64_t mu=m_model->get_first_moment(mu_n, s2_n, m_labels, i);
221  float64_t s2=m_model->get_second_moment(mu_n, s2_n, m_labels, i);
222 
223  // save old value of ttau
224  float64_t ttau_old=m_ttau[i];
225 
226  // compute ttau and sqrt(ttau)
227  m_ttau[i]=CMath::max(1.0/s2-tau_n[i], 0.0);
228  m_sttau[i]=CMath::sqrt(m_ttau[i]);
229 
230  // compute tnu
231  m_tnu[i]=mu/s2-nu_n[i];
232 
233  // compute difference ds2=ttau_new-ttau_old
234  float64_t ds2=m_ttau[i]-ttau_old;
235 
236  // create eigen representation of Sigma, tnu and mu
237  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
238  m_Sigma.num_cols);
239  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
240  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
241 
242  VectorXd eigen_si=eigen_Sigma.col(i);
243 
244  // rank-1 update Sigma
245  eigen_Sigma=eigen_Sigma-ds2/(1.0+ds2*eigen_si(i))*eigen_si*
246  eigen_si.adjoint();
247 
248  // update mu
249  eigen_mu=eigen_Sigma*eigen_tnu;
250  }
251 
252  // update upper triangular factor (L^T) of Cholesky decomposition of
253  // matrix B, approximate posterior covariance and mean, negative
254  // marginal likelihood
255  update_chol();
259  }
260 
261  if (sweep==m_max_sweep && CMath::abs(m_nlZ-nlZ_old)>m_tol)
262  {
263  SG_ERROR("Maximum number (%d) of sweeps reached, but tolerance (%f) was "
264  "not yet reached. You can manually set maximum number of sweeps "
265  "or tolerance to fix this problem.\n", m_max_sweep, m_tol);
266  }
267 
268  // update vector alpha
269  update_alpha();
270 
271  // update matrices to compute derivatives
272  update_deriv();
273 
274  // update hash of the parameters
276 
277  SG_DEBUG("leaving\n");
278 }
279 
281 {
282  // create eigen representations kernel matrix, L^T, sqrt(ttau) and tnu
283  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
284  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
285  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
286  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
287 
288  // create shogun and eigen representation of the alpha vector
290  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
291 
292  // solve LL^T * v = tS^(1/2) * K * tnu
293  VectorXd eigen_v=eigen_L.triangularView<Upper>().adjoint().solve(
294  eigen_sttau.cwiseProduct(eigen_K*CMath::sq(m_scale)*eigen_tnu));
295  eigen_v=eigen_L.triangularView<Upper>().solve(eigen_v);
296 
297  // compute alpha = (I - tS^(1/2) * B^(-1) * tS(1/2) * K) * tnu =
298  // tnu - tS(1/2) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K * tnu =
299  // tnu - tS(1/2) * v
300  eigen_alpha=eigen_tnu-eigen_sttau.cwiseProduct(eigen_v);
301 }
302 
304 {
305  // create eigen representations of kernel matrix and sqrt(ttau)
306  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
307  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
308 
309  // create shogun and eigen representation of the upper triangular factor
310  // (L^T) of the Cholesky decomposition of the matrix B
312  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
313 
314  // compute upper triangular factor L^T of the Cholesky decomposion of the
315  // matrix: B = tS^(1/2) * K * tS^(1/2) + I
316  LLT<MatrixXd> eigen_chol((eigen_sttau*eigen_sttau.adjoint()).cwiseProduct(
317  eigen_K*CMath::sq(m_scale))+
318  MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
319 
320  eigen_L=eigen_chol.matrixU();
321 }
322 
324 {
325  // create eigen representations of kernel matrix, L^T matrix and sqrt(ttau)
326  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
327  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
328  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
329 
330  // create shogun and eigen representation of the approximate covariance
331  // matrix
333  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
334 
335  // compute V = L^(-1) * tS^(1/2) * K, using upper triangular factor L^T
336  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
337  eigen_sttau.asDiagonal()*eigen_K*CMath::sq(m_scale));
338 
339  // compute covariance matrix of the posterior:
340  // Sigma = K - K * tS^(1/2) * (L * L^T)^(-1) * tS^(1/2) * K =
341  // K - (K * tS^(1/2)) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K =
342  // K - (tS^(1/2) * K)^T * (L^(-1))^T * L^(-1) * tS^(1/2) * K = K - V^T * V
343  eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V;
344 }
345 
347 {
348  // create eigen representation of posterior covariance matrix and tnu
349  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
350  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
351 
352  // create shogun and eigen representation of the approximate mean vector
353  CREATE_SGVECTOR(m_mu, m_tnu.vlen, float64_t);
354  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
355 
356  // compute mean vector of the approximate posterior: mu = Sigma * tnu
357  eigen_mu=eigen_Sigma*eigen_tnu;
358 }
359 
361 {
362  // create eigen representation of Sigma, L, mu, tnu, ttau
363  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
364  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
365  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
366  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
367  Map<VectorXd> eigen_ttau(m_ttau.vector, m_ttau.vlen);
368 
369  // get and create eigen representation of the mean vector
371  Map<VectorXd> eigen_m(m.vector, m.vlen);
372 
373  // compute vector of cavity parameter tau_n
374  VectorXd eigen_tau_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(
375  eigen_Sigma.diagonal())-eigen_ttau;
376 
377  // compute vector of cavity parameter nu_n
378  VectorXd eigen_nu_n=eigen_mu.cwiseQuotient(eigen_Sigma.diagonal())-
379  eigen_tnu+eigen_m.cwiseProduct(eigen_tau_n);
380 
381  // compute cavity mean: mu_n=nu_n/tau_n
382  SGVector<float64_t> mu_n(m_ttau.vlen);
383  Map<VectorXd> eigen_mu_n(mu_n.vector, mu_n.vlen);
384 
385  eigen_mu_n=eigen_nu_n.cwiseQuotient(eigen_tau_n);
386 
387  // compute cavity variance: s2_n=1.0/tau_n
388  SGVector<float64_t> s2_n(m_ttau.vlen);
389  Map<VectorXd> eigen_s2_n(s2_n.vector, s2_n.vlen);
390 
391  eigen_s2_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(eigen_tau_n);
392 
394  m_model->get_log_zeroth_moments(mu_n, s2_n, m_labels));
395 
396  // compute nlZ_part1=sum(log(diag(L)))-sum(lZ)-tnu'*Sigma*tnu/2
397  float64_t nlZ_part1=eigen_L.diagonal().array().log().sum()-lZ-
398  (eigen_tnu.adjoint()*eigen_Sigma).dot(eigen_tnu)/2.0;
399 
400  // compute nlZ_part2=sum(tnu.^2./(tau_n+ttau))/2-sum(log(1+ttau./tau_n))/2
401  float64_t nlZ_part2=(eigen_tnu.array().square()/
402  (eigen_tau_n+eigen_ttau).array()).sum()/2.0-(1.0+eigen_ttau.array()/
403  eigen_tau_n.array()).log().sum()/2.0;
404 
405  // compute nlZ_part3=-(nu_n-m.*tau_n)'*((ttau./tau_n.*(nu_n-m.*tau_n)-2*tnu)
406  // ./(ttau+tau_n))/2
407  float64_t nlZ_part3=-(eigen_nu_n-eigen_m.cwiseProduct(eigen_tau_n)).dot(
408  ((eigen_ttau.array()/eigen_tau_n.array()*(eigen_nu_n.array()-
409  eigen_m.array()*eigen_tau_n.array())-2*eigen_tnu.array())/
410  (eigen_ttau.array()+eigen_tau_n.array())).matrix())/2.0;
411 
412  // compute nlZ=nlZ_part1+nlZ_part2+nlZ_part3
413  m_nlZ=nlZ_part1+nlZ_part2+nlZ_part3;
414 }
415 
417 {
418  // create eigen representation of L, sstau, alpha
419  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
420  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
421  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
422 
423  // create shogun and eigen representation of F
425  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
426 
427  // solve L*L^T * V = diag(sqrt(ttau))
428  MatrixXd V=eigen_L.triangularView<Upper>().adjoint().solve(
429  MatrixXd(eigen_sttau.asDiagonal()));
430  V=eigen_L.triangularView<Upper>().solve(V);
431 
432  // compute F=alpha*alpha'-repmat(sW,1,n).*solve_chol(L,diag(sW))
433  eigen_F=eigen_alpha*eigen_alpha.adjoint()-eigen_sttau.asDiagonal()*V;
434 }
435 
437  const TParameter* param)
438 {
439  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
440  "the nagative log marginal likelihood wrt %s.%s parameter\n",
441  get_name(), param->m_name)
442 
443  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
444  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
445 
446  SGVector<float64_t> result(1);
447 
448  // compute derivative wrt kernel scale: dnlZ=-sum(F.*K*scale*2)/2
449  result[0]=-(eigen_F.cwiseProduct(eigen_K)*m_scale*2.0).sum()/2.0;
450 
451  return result;
452 }
453 
455  const TParameter* param)
456 {
458  return SGVector<float64_t>();
459 }
460 
462  const TParameter* param)
463 {
464  // create eigen representation of the matrix Q
465  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
466 
467  SGVector<float64_t> result;
468 
469  if (param->m_datatype.m_ctype==CT_VECTOR ||
470  param->m_datatype.m_ctype==CT_SGVECTOR ||
471  param->m_datatype.m_ctype==CT_MATRIX ||
472  param->m_datatype.m_ctype==CT_SGMATRIX)
473  {
475  "Length of the parameter %s should not be NULL\n", param->m_name)
476  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
477  }
478  else
479  {
480  result=SGVector<float64_t>(1);
481  }
482 
483  for (index_t i=0; i<result.vlen; i++)
484  {
486 
487  if (result.vlen==1)
488  dK=m_kernel->get_parameter_gradient(param);
489  else
490  dK=m_kernel->get_parameter_gradient(param, i);
491 
492  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
493 
494  // compute derivative wrt kernel parameter: dnlZ=-sum(F.*dK*scale^2)/2.0
495  result[i]=-(eigen_F.cwiseProduct(eigen_dK)*CMath::sq(m_scale)).sum()/2.0;
496  }
497 
498  return result;
499 }
500 
502  const TParameter* param)
503 {
505  return SGVector<float64_t>();
506 }
507 
508 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation