SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
EPInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2013 Roman Votyakov
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  *
31  * Based on ideas from GAUSSIAN PROCESS REGRESSION AND CLASSIFICATION Toolbox
32  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
33  */
35 
36 #ifdef HAVE_EIGEN3
37 
43 
45 
46 using namespace shogun;
47 using namespace Eigen;
48 
49 // try to use previously allocated memory for SGVector
50 #define CREATE_SGVECTOR(vec, len, sg_type) \
51  { \
52  if (!vec.vector || vec.vlen!=len) \
53  vec=SGVector<sg_type>(len); \
54  }
55 
56 // try to use previously allocated memory for SGMatrix
57 #define CREATE_SGMATRIX(mat, rows, cols, sg_type) \
58  { \
59  if (!mat.matrix || mat.num_rows!=rows || mat.num_cols!=cols) \
60  mat=SGMatrix<sg_type>(rows, cols); \
61  }
62 
64 {
65  init();
66 }
67 
69  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
70  : CInferenceMethod(kernel, features, mean, labels, model)
71 {
72  init();
73 }
74 
76 {
77 }
78 
79 void CEPInferenceMethod::init()
80 {
81  m_max_sweep=15;
82  m_min_sweep=2;
83  m_tol=1e-4;
84 }
85 
87  CInferenceMethod* inference)
88 {
89  if (inference==NULL)
90  return NULL;
91 
92  if (inference->get_inference_type()!=INF_EP)
93  SG_SERROR("Provided inference is not of type CEPInferenceMethod!\n")
94 
95  SG_REF(inference);
96  return (CEPInferenceMethod*)inference;
97 }
98 
100 {
102  update();
103 
104  return m_nlZ;
105 }
106 
108 {
110  update();
111 
113 }
114 
116 {
118  update();
119 
120  return SGMatrix<float64_t>(m_L);
121 }
122 
124 {
126  update();
127 
128  return SGVector<float64_t>(m_sttau);
129 }
130 
132 {
134 
135  return SGVector<float64_t>(m_mu);
136 }
137 
139 {
141 
142  return SGMatrix<float64_t>(m_Sigma);
143 }
144 
146 {
148 
149  if (!m_gradient_update)
150  {
151  // update matrices to compute derivatives
152  update_deriv();
153  m_gradient_update=true;
155  }
156 }
157 
159 {
160  SG_DEBUG("entering\n");
161 
162  // update kernel and feature matrix
164 
165  // get number of labels (trainig examples)
167 
168  // try to use tilde values from previous call
169  if (m_ttau.vlen==n)
170  {
171  update_chol();
175  }
176 
177  // get mean vector
179 
180  // get and scale diagonal of the kernel matrix
182  ktrtr_diag.scale(CMath::exp(m_log_scale*2.0));
183 
184  // marginal likelihood for ttau = tnu = 0
186  mean, ktrtr_diag, m_labels));
187 
188  // use zero values if we have no better guess or it's better
189  if (m_ttau.vlen!=n || m_nlZ>nlZ0)
190  {
191  CREATE_SGVECTOR(m_ttau, n, float64_t);
192  m_ttau.zero();
193 
194  CREATE_SGVECTOR(m_sttau, n, float64_t);
195  m_sttau.zero();
196 
197  CREATE_SGVECTOR(m_tnu, n, float64_t);
198  m_tnu.zero();
199 
201 
202  // copy data manually, since we don't have appropriate method
203  for (index_t i=0; i<m_ktrtr.num_rows; i++)
204  for (index_t j=0; j<m_ktrtr.num_cols; j++)
205  m_Sigma(i,j)=m_ktrtr(i,j)*CMath::exp(m_log_scale);
206 
207  CREATE_SGVECTOR(m_mu, n, float64_t);
208  m_mu.zero();
209 
210  // set marginal likelihood
211  m_nlZ=nlZ0;
212  }
213 
214  // create vector of the random permutation
215  SGVector<index_t> v(n);
216  v.range_fill();
217 
218  // cavity tau and nu vectors
219  SGVector<float64_t> tau_n(n);
220  SGVector<float64_t> nu_n(n);
221 
222  // cavity mu and s2 vectors
223  SGVector<float64_t> mu_n(n);
224  SGVector<float64_t> s2_n(n);
225 
226  float64_t nlZ_old=CMath::INFTY;
227  uint32_t sweep=0;
228 
229  while ((CMath::abs(m_nlZ-nlZ_old)>m_tol && sweep<m_max_sweep) ||
230  sweep<m_min_sweep)
231  {
232  nlZ_old=m_nlZ;
233  sweep++;
234 
235  // shuffle random permutation
236  CMath::permute(v);
237 
238  for (index_t j=0; j<n; j++)
239  {
240  index_t i=v[j];
241 
242  // find cavity paramters
243  tau_n[i]=1.0/m_Sigma(i,i)-m_ttau[i];
244  nu_n[i]=m_mu[i]/m_Sigma(i,i)+mean[i]*tau_n[i]-m_tnu[i];
245 
246  // compute cavity mean and variance
247  mu_n[i]=nu_n[i]/tau_n[i];
248  s2_n[i]=1.0/tau_n[i];
249 
250  // get moments
251  float64_t mu=m_model->get_first_moment(mu_n, s2_n, m_labels, i);
252  float64_t s2=m_model->get_second_moment(mu_n, s2_n, m_labels, i);
253 
254  // save old value of ttau
255  float64_t ttau_old=m_ttau[i];
256 
257  // compute ttau and sqrt(ttau)
258  m_ttau[i]=CMath::max(1.0/s2-tau_n[i], 0.0);
259  m_sttau[i]=CMath::sqrt(m_ttau[i]);
260 
261  // compute tnu
262  m_tnu[i]=mu/s2-nu_n[i];
263 
264  // compute difference ds2=ttau_new-ttau_old
265  float64_t ds2=m_ttau[i]-ttau_old;
266 
267  // create eigen representation of Sigma, tnu and mu
268  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
269  m_Sigma.num_cols);
270  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
271  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
272 
273  VectorXd eigen_si=eigen_Sigma.col(i);
274 
275  // rank-1 update Sigma
276  eigen_Sigma=eigen_Sigma-ds2/(1.0+ds2*eigen_si(i))*eigen_si*
277  eigen_si.adjoint();
278 
279  // update mu
280  eigen_mu=eigen_Sigma*eigen_tnu;
281  }
282 
283  // update upper triangular factor (L^T) of Cholesky decomposition of
284  // matrix B, approximate posterior covariance and mean, negative
285  // marginal likelihood
286  update_chol();
290  }
291 
292  if (sweep==m_max_sweep && CMath::abs(m_nlZ-nlZ_old)>m_tol)
293  {
294  SG_ERROR("Maximum number (%d) of sweeps reached, but tolerance (%f) was "
295  "not yet reached. You can manually set maximum number of sweeps "
296  "or tolerance to fix this problem.\n", m_max_sweep, m_tol);
297  }
298 
299  // update vector alpha
300  update_alpha();
301 
302  m_gradient_update=false;
303 
304  // update hash of the parameters
306 
307  SG_DEBUG("leaving\n");
308 }
309 
311 {
312  // create eigen representations kernel matrix, L^T, sqrt(ttau) and tnu
314  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
315  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
317 
318  // create shogun and eigen representation of the alpha vector
320  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
321 
322  // solve LL^T * v = tS^(1/2) * K * tnu
323  VectorXd eigen_v=eigen_L.triangularView<Upper>().adjoint().solve(
324  eigen_sttau.cwiseProduct(eigen_K*CMath::exp(m_log_scale*2.0)*eigen_tnu));
325  eigen_v=eigen_L.triangularView<Upper>().solve(eigen_v);
326 
327  // compute alpha = (I - tS^(1/2) * B^(-1) * tS(1/2) * K) * tnu =
328  // tnu - tS(1/2) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K * tnu =
329  // tnu - tS(1/2) * v
330  eigen_alpha=eigen_tnu-eigen_sttau.cwiseProduct(eigen_v);
331 }
332 
334 {
335  // create eigen representations of kernel matrix and sqrt(ttau)
337  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
338 
339  // create shogun and eigen representation of the upper triangular factor
340  // (L^T) of the Cholesky decomposition of the matrix B
343 
344  // compute upper triangular factor L^T of the Cholesky decomposion of the
345  // matrix: B = tS^(1/2) * K * tS^(1/2) + I
346  LLT<MatrixXd> eigen_chol((eigen_sttau*eigen_sttau.adjoint()).cwiseProduct(
347  eigen_K*CMath::exp(m_log_scale*2.0))+
348  MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
349 
350  eigen_L=eigen_chol.matrixU();
351 }
352 
354 {
355  // create eigen representations of kernel matrix, L^T matrix and sqrt(ttau)
358  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
359 
360  // create shogun and eigen representation of the approximate covariance
361  // matrix
363  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
364 
365  // compute V = L^(-1) * tS^(1/2) * K, using upper triangular factor L^T
366  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
367  eigen_sttau.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
368 
369  // compute covariance matrix of the posterior:
370  // Sigma = K - K * tS^(1/2) * (L * L^T)^(-1) * tS^(1/2) * K =
371  // K - (K * tS^(1/2)) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K =
372  // K - (tS^(1/2) * K)^T * (L^(-1))^T * L^(-1) * tS^(1/2) * K = K - V^T * V
373  eigen_Sigma=eigen_K*CMath::exp(m_log_scale*2.0)-eigen_V.adjoint()*eigen_V;
374 }
375 
377 {
378  // create eigen representation of posterior covariance matrix and tnu
379  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
380  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
381 
382  // create shogun and eigen representation of the approximate mean vector
383  CREATE_SGVECTOR(m_mu, m_tnu.vlen, float64_t);
384  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
385 
386  // compute mean vector of the approximate posterior: mu = Sigma * tnu
387  eigen_mu=eigen_Sigma*eigen_tnu;
388 }
389 
391 {
392  // create eigen representation of Sigma, L, mu, tnu, ttau
393  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
395  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
396  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
397  Map<VectorXd> eigen_ttau(m_ttau.vector, m_ttau.vlen);
398 
399  // get and create eigen representation of the mean vector
401  Map<VectorXd> eigen_m(m.vector, m.vlen);
402 
403  // compute vector of cavity parameter tau_n
404  VectorXd eigen_tau_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(
405  eigen_Sigma.diagonal())-eigen_ttau;
406 
407  // compute vector of cavity parameter nu_n
408  VectorXd eigen_nu_n=eigen_mu.cwiseQuotient(eigen_Sigma.diagonal())-
409  eigen_tnu+eigen_m.cwiseProduct(eigen_tau_n);
410 
411  // compute cavity mean: mu_n=nu_n/tau_n
412  SGVector<float64_t> mu_n(m_ttau.vlen);
413  Map<VectorXd> eigen_mu_n(mu_n.vector, mu_n.vlen);
414 
415  eigen_mu_n=eigen_nu_n.cwiseQuotient(eigen_tau_n);
416 
417  // compute cavity variance: s2_n=1.0/tau_n
418  SGVector<float64_t> s2_n(m_ttau.vlen);
419  Map<VectorXd> eigen_s2_n(s2_n.vector, s2_n.vlen);
420 
421  eigen_s2_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(eigen_tau_n);
422 
424  m_model->get_log_zeroth_moments(mu_n, s2_n, m_labels));
425 
426  // compute nlZ_part1=sum(log(diag(L)))-sum(lZ)-tnu'*Sigma*tnu/2
427  float64_t nlZ_part1=eigen_L.diagonal().array().log().sum()-lZ-
428  (eigen_tnu.adjoint()*eigen_Sigma).dot(eigen_tnu)/2.0;
429 
430  // compute nlZ_part2=sum(tnu.^2./(tau_n+ttau))/2-sum(log(1+ttau./tau_n))/2
431  float64_t nlZ_part2=(eigen_tnu.array().square()/
432  (eigen_tau_n+eigen_ttau).array()).sum()/2.0-(1.0+eigen_ttau.array()/
433  eigen_tau_n.array()).log().sum()/2.0;
434 
435  // compute nlZ_part3=-(nu_n-m.*tau_n)'*((ttau./tau_n.*(nu_n-m.*tau_n)-2*tnu)
436  // ./(ttau+tau_n))/2
437  float64_t nlZ_part3=-(eigen_nu_n-eigen_m.cwiseProduct(eigen_tau_n)).dot(
438  ((eigen_ttau.array()/eigen_tau_n.array()*(eigen_nu_n.array()-
439  eigen_m.array()*eigen_tau_n.array())-2*eigen_tnu.array())/
440  (eigen_ttau.array()+eigen_tau_n.array())).matrix())/2.0;
441 
442  // compute nlZ=nlZ_part1+nlZ_part2+nlZ_part3
443  m_nlZ=nlZ_part1+nlZ_part2+nlZ_part3;
444 }
445 
447 {
448  // create eigen representation of L, sstau, alpha
450  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
451  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
452 
453  // create shogun and eigen representation of F
455  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
456 
457  // solve L*L^T * V = diag(sqrt(ttau))
458  MatrixXd V=eigen_L.triangularView<Upper>().adjoint().solve(
459  MatrixXd(eigen_sttau.asDiagonal()));
460  V=eigen_L.triangularView<Upper>().solve(V);
461 
462  // compute F=alpha*alpha'-repmat(sW,1,n).*solve_chol(L,diag(sW))
463  eigen_F=eigen_alpha*eigen_alpha.adjoint()-eigen_sttau.asDiagonal()*V;
464 }
465 
467  const TParameter* param)
468 {
469  REQUIRE(!strcmp(param->m_name, "log_scale"), "Can't compute derivative of "
470  "the nagative log marginal likelihood wrt %s.%s parameter\n",
471  get_name(), param->m_name)
472 
474  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
475 
476  SGVector<float64_t> result(1);
477 
478  // compute derivative wrt kernel scale: dnlZ=-sum(F.*K*scale*2)/2
479  result[0]=-(eigen_F.cwiseProduct(eigen_K)).sum();
480  result[0]*=CMath::exp(m_log_scale*2.0);
481 
482  return result;
483 }
484 
486  const TParameter* param)
487 {
489  return SGVector<float64_t>();
490 }
491 
493  const TParameter* param)
494 {
495  // create eigen representation of the matrix Q
496  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
497 
498  REQUIRE(param, "Param not set\n");
499  SGVector<float64_t> result;
500  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
501  result=SGVector<float64_t>(len);
502 
503  for (index_t i=0; i<result.vlen; i++)
504  {
506 
507  if (result.vlen==1)
508  dK=m_kernel->get_parameter_gradient(param);
509  else
510  dK=m_kernel->get_parameter_gradient(param, i);
511 
512  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
513 
514  // compute derivative wrt kernel parameter: dnlZ=-sum(F.*dK*scale^2)/2.0
515  result[i]=-(eigen_F.cwiseProduct(eigen_dK)).sum();
516  result[i]*=CMath::exp(m_log_scale*2.0)/2.0;
517  }
518 
519  return result;
520 }
521 
523  const TParameter* param)
524 {
526  return SGVector<float64_t>();
527 }
528 
529 #endif /* HAVE_EIGEN3 */
void range_fill(T start=0)
Definition: SGVector.cpp:173
static void permute(SGVector< T > v, CRandom *rand=NULL)
Definition: Math.h:1144
virtual SGVector< float64_t > get_diagonal_vector()
virtual void update_parameter_hash()
Definition: SGObject.cpp:248
virtual SGVector< float64_t > get_alpha()
SGVector< float64_t > m_alpha
The Inference Method base class.
int32_t index_t
Definition: common.h:62
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:56
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual SGMatrix< float64_t > get_posterior_covariance()
virtual int32_t get_num_labels() const =0
Definition: SGMatrix.h:20
parameter struct
#define SG_ERROR(...)
Definition: SGIO.h:129
virtual SGVector< float64_t > get_log_zeroth_moments(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab) const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
#define CREATE_SGMATRIX(mat, rows, cols, sg_type)
virtual float64_t get_negative_log_marginal_likelihood()
virtual float64_t get_second_moment(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab, index_t i) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:49
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:843
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_posterior_mean()
SGVector< T > get_diagonal_vector() const
Definition: SGMatrix.cpp:1099
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
double float64_t
Definition: common.h:50
#define CREATE_SGVECTOR(vec, len, sg_type)
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
static T max(T a, T b)
Definition: Math.h:168
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
static CEPInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
Class of the Expectation Propagation (EP) posterior approximation inference method.
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:850
virtual EInferenceType get_inference_type() const
The Kernel base class.
Definition: Kernel.h:158
virtual SGMatrix< float64_t > get_cholesky()
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual float64_t get_first_moment(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab, index_t i) const =0
virtual const char * get_name() const
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:262
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
CLikelihoodModel * m_model
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation