SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExactInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Copyright (C) 2012 Jacob Walker
9  * Copyright (C) 2013 Roman Votyakov
10  *
11  * Code adapted from Gaussian Process Machine Learning Toolbox
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  */
14 
16 
17 #ifdef HAVE_EIGEN3
18 
23 
24 using namespace shogun;
25 using namespace Eigen;
26 
28 {
29 }
30 
32  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
33  CInferenceMethod(kern, feat, m, lab, mod)
34 {
35 }
36 
38 {
39 }
40 
42 {
44  update_chol();
45  update_alpha();
46  update_deriv();
47  update_mean();
48  update_cov();
49 }
50 
52 {
54 
56  "Exact inference method can only use Gaussian likelihood function\n")
58  "Labels must be type of CRegressionLabels\n")
59 }
60 
62 {
64  update();
65 
66  // get the sigma variable from the Gaussian likelihood model
68  float64_t sigma=lik->get_sigma();
69  SG_UNREF(lik);
70 
71  // compute diagonal vector: sW=1/sigma
73  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
74 
75  return result;
76 }
77 
79 {
81  update();
82 
83  // get the sigma variable from the Gaussian likelihood model
85  float64_t sigma=lik->get_sigma();
86  SG_UNREF(lik);
87 
88  // create eigen representation of alpha and L
89  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
90  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
91 
92  // get labels and mean vectors and create eigen representation
93  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
94  Map<VectorXd> eigen_y(y.vector, y.vlen);
96  Map<VectorXd> eigen_m(m.vector, m.vlen);
97 
98  // compute negative log of the marginal likelihood:
99  // nlZ=(y-m)'*alpha/2+sum(log(diag(L)))+n*log(2*pi*sigma^2)/2
100  float64_t result=(eigen_y-eigen_m).dot(eigen_alpha)/2.0+
101  eigen_L.diagonal().array().log().sum()+m_L.num_rows*
102  CMath::log(2*CMath::PI*CMath::sq(sigma))/2.0;
103 
104  return result;
105 }
106 
108 {
109  if (update_parameter_hash())
110  update();
111 
113 }
114 
116 {
117  if (update_parameter_hash())
118  update();
119 
120  return SGMatrix<float64_t>(m_L);
121 }
122 
124 {
125  if (update_parameter_hash())
126  update();
127 
128  return SGVector<float64_t>(m_mu);
129 }
130 
132 {
133  if (update_parameter_hash())
134  update();
135 
136  return SGMatrix<float64_t>(m_Sigma);
137 }
138 
140 {
141  // get the sigma variable from the Gaussian likelihood model
143  float64_t sigma=lik->get_sigma();
144  SG_UNREF(lik);
145 
146  /* check whether to allocate cholesky memory */
149 
150  /* creates views on kernel and cholesky matrix and perform cholesky */
151  Map<MatrixXd> K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
152  Map<MatrixXd> L(m_L.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
153  LLT<MatrixXd> llt(K*(CMath::sq(m_scale)/CMath::sq(sigma))+
154  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
155  L=llt.matrixU();
156 }
157 
159 {
160  // get the sigma variable from the Gaussian likelihood model
162  float64_t sigma=lik->get_sigma();
163  SG_UNREF(lik);
164 
165  // get labels and mean vector and create eigen representation
166  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
167  Map<VectorXd> eigen_y(y.vector, y.vlen);
169  Map<VectorXd> eigen_m(m.vector, m.vlen);
170 
172 
173  /* creates views on cholesky matrix and alpha and solve system
174  * (L * L^T) * a = y for a */
175  Map<VectorXd> a(m_alpha.vector, m_alpha.vlen);
176  Map<MatrixXd> L(m_L.matrix, m_L.num_rows, m_L.num_cols);
177 
178  a=L.triangularView<Upper>().adjoint().solve(eigen_y-eigen_m);
179  a=L.triangularView<Upper>().solve(a);
180 
181  a/=CMath::sq(sigma);
182 }
183 
185 {
186  // create eigen representataion of kernel matrix and alpha
187  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
188  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
189 
190  // get mean and create eigen representation of it
192  Map<VectorXd> eigen_m(m.vector, m.vlen);
193 
194  m_mu=SGVector<float64_t>(m.vlen);
195  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
196 
197  // compute mean: mu=K'*alpha+m
198  eigen_mu=eigen_K*CMath::sq(m_scale)*eigen_alpha+eigen_m;
199 }
200 
202 {
203  // create eigen representataion of upper triangular factor L^T and kernel
204  // matrix
205  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
206  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
207 
209  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
210  m_Sigma.num_cols);
211 
212  // compute V = L^(-1) * K, using upper triangular factor L^T
213  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
214  eigen_K*CMath::sq(m_scale));
215 
216  // compute covariance matrix of the posterior: Sigma = K - V^T * V
217  eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V;
218 }
219 
221 {
222  // get the sigma variable from the Gaussian likelihood model
224  float64_t sigma=lik->get_sigma();
225  SG_UNREF(lik);
226 
227  // create eigen representation of derivative matrix and cholesky
228  Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
229  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
230 
232  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
233 
234  // solve L * L' * Q = I
235  eigen_Q=eigen_L.triangularView<Upper>().adjoint().solve(
236  MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
237  eigen_Q=eigen_L.triangularView<Upper>().solve(eigen_Q);
238 
239  // divide Q by sigma^2
240  eigen_Q/=CMath::sq(sigma);
241 
242  // create eigen representation of alpha and compute Q=Q-alpha*alpha'
243  eigen_Q-=eigen_alpha*eigen_alpha.transpose();
244 }
245 
247  const TParameter* param)
248 {
249  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
250  "the nagative log marginal likelihood wrt %s.%s parameter\n",
251  get_name(), param->m_name)
252 
253  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
254  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
255 
256  SGVector<float64_t> result(1);
257 
258  // compute derivative wrt kernel scale: dnlZ=sum(Q.*K*scale*2)/2
259  result[0]=(eigen_Q.cwiseProduct(eigen_K)*m_scale*2.0).sum()/2.0;
260 
261  return result;
262 }
263 
265  const TParameter* param)
266 {
267  REQUIRE(!strcmp(param->m_name, "sigma"), "Can't compute derivative of "
268  "the nagative log marginal likelihood wrt %s.%s parameter\n",
269  m_model->get_name(), param->m_name)
270 
271  // get the sigma variable from the Gaussian likelihood model
273  float64_t sigma=lik->get_sigma();
274  SG_UNREF(lik);
275 
276  // create eigen representation of the matrix Q
277  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
278 
279  SGVector<float64_t> result(1);
280 
281  // compute derivative wrt likelihood model parameter sigma:
282  // dnlZ=sigma^2*trace(Q)
283  result[0]=CMath::sq(sigma)*eigen_Q.trace();
284 
285  return result;
286 }
287 
289  const TParameter* param)
290 {
291  // create eigen representation of the matrix Q
292  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
293 
294  SGVector<float64_t> result;
295 
296  if (param->m_datatype.m_ctype==CT_VECTOR ||
297  param->m_datatype.m_ctype==CT_SGVECTOR)
298  {
300  "Length of the parameter %s should not be NULL\n", param->m_name)
301  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
302  }
303  else
304  {
305  result=SGVector<float64_t>(1);
306  }
307 
308  for (index_t i=0; i<result.vlen; i++)
309  {
311 
312  if (result.vlen==1)
313  dK=m_kernel->get_parameter_gradient(param);
314  else
315  dK=m_kernel->get_parameter_gradient(param, i);
316 
317  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
318 
319  // compute derivative wrt kernel parameter: dnlZ=sum(Q.*dK*scale)/2.0
320  result[i]=(eigen_Q.cwiseProduct(eigen_dK)*CMath::sq(m_scale)).sum()/2.0;
321  }
322 
323  return result;
324 }
325 
327  const TParameter* param)
328 {
329  // create eigen representation of alpha vector
330  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
331 
332  SGVector<float64_t> result;
333 
334  if (param->m_datatype.m_ctype==CT_VECTOR ||
335  param->m_datatype.m_ctype==CT_SGVECTOR)
336  {
338  "Length of the parameter %s should not be NULL\n", param->m_name)
339 
340  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
341  }
342  else
343  {
344  result=SGVector<float64_t>(1);
345  }
346 
347  for (index_t i=0; i<result.vlen; i++)
348  {
350 
351  if (result.vlen==1)
353  else
355 
356  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
357 
358  // compute derivative wrt mean parameter: dnlZ=-dmu'*alpha
359  result[i]=-eigen_dmu.dot(eigen_alpha);
360  }
361 
362  return result;
363 }
364 
365 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation