SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FITCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Copyright (C) 2012 Jacob Walker
9  * Copyright (C) 2013 Roman Votyakov
10  *
11  * Code adapted from Gaussian Process Machine Learning Toolbox
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  */
14 
16 
17 #ifdef HAVE_EIGEN3
18 
23 
24 using namespace shogun;
25 using namespace Eigen;
26 
28 {
29  init();
30 }
31 
33  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
34  : CInferenceMethod(kern, feat, m, lab, mod)
35 {
36  init();
38 }
39 
40 void CFITCInferenceMethod::init()
41 {
42  SG_ADD((CSGObject**)&m_latent_features, "latent_features", "Latent features",
44 
45  m_latent_features=NULL;
46  m_ind_noise=1e-10;
47 }
48 
50 {
51  SG_UNREF(m_latent_features);
52 }
53 
55  CInferenceMethod* inference)
56 {
57  ASSERT(inference!=NULL);
58 
59  if (inference->get_inference_type()!=INF_FITC)
60  SG_SERROR("Provided inference is not of type CFITCInferenceMethod!\n")
61 
62  SG_REF(inference);
63  return (CFITCInferenceMethod*)inference;
64 }
65 
67 {
68  SG_DEBUG("entering\n");
69 
71  update_chol();
72  update_alpha();
73  update_deriv();
75 
76  SG_DEBUG("leaving\n");
77 }
78 
80 {
82 
84  "FITC inference method can only use Gaussian likelihood function\n")
85  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
86  "of CRegressionLabels\n")
87  REQUIRE(m_latent_features, "Latent features should not be NULL\n")
88  REQUIRE(m_latent_features->get_num_vectors(),
89  "Number of latent features must be greater than zero\n")
90 }
91 
93 {
95  update();
96 
97  // get the sigma variable from the Gaussian likelihood model
99  float64_t sigma=lik->get_sigma();
100  SG_UNREF(lik);
101 
102  // compute diagonal vector: sW=1/sigma
104  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
105 
106  return result;
107 }
108 
110 {
112  update();
113 
114  // create eigen representations of chol_utr, dg, r, be
115  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
116  m_chol_utr.num_cols);
117  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
118  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
119  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
120 
121  // compute negative log marginal likelihood:
122  // nlZ=sum(log(diag(utr)))+(sum(log(dg))+r'*r-be'*be+n*log(2*pi))/2
123  float64_t result=eigen_chol_utr.diagonal().array().log().sum()+
124  (eigen_dg.array().log().sum()+eigen_r.dot(eigen_r)-eigen_be.dot(eigen_be)+
126 
127  return result;
128 }
129 
131 {
133  update();
134 
136  return result;
137 }
138 
140 {
142  update();
143 
144  SGMatrix<float64_t> result(m_L);
145  return result;
146 }
147 
149 {
151  return SGVector<float64_t>();
152 }
153 
155 {
157  return SGMatrix<float64_t>();
158 }
159 
161 {
163 
164  // create kernel matrix for latent features
165  m_kernel->init(m_latent_features, m_latent_features);
166  m_kuu=m_kernel->get_kernel_matrix();
167 
168  // create kernel matrix for latent and training features
169  m_kernel->init(m_latent_features, m_features);
170  m_ktru=m_kernel->get_kernel_matrix();
171 }
172 
174 {
175  // get the sigma variable from the Gaussian likelihood model
177  float64_t sigma=lik->get_sigma();
178  SG_UNREF(lik);
179 
180  // eigen3 representation of covariance matrix of latent features (m_kuu)
181  // and training features (m_ktru)
182  Map<MatrixXd> eigen_kuu(m_kuu.matrix, m_kuu.num_rows, m_kuu.num_cols);
183  Map<MatrixXd> eigen_ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
184  Map<MatrixXd> eigen_ktrtr(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
185 
186  // solve Luu' * Luu = Kuu + m_ind_noise * I
187  LLT<MatrixXd> Luu(eigen_kuu*CMath::sq(m_scale)+m_ind_noise*MatrixXd::Identity(
188  m_kuu.num_rows, m_kuu.num_cols));
189 
190  // create shogun and eigen3 representation of cholesky of covariance of
191  // latent features Luu (m_chol_uu and eigen_chol_uu)
192  m_chol_uu=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
193  Map<MatrixXd> eigen_chol_uu(m_chol_uu.matrix, m_chol_uu.num_rows,
194  m_chol_uu.num_cols);
195  eigen_chol_uu=Luu.matrixU();
196 
197  // solve Luu' * V = Ktru
198  MatrixXd V=eigen_chol_uu.triangularView<Upper>().adjoint().solve(eigen_ktru*
199  CMath::sq(m_scale));
200 
201  // create shogun and eigen3 representation of
202  // dg = diag(K) + sn2 - diag(Q)
204  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
205 
206  eigen_dg=eigen_ktrtr.diagonal()*CMath::sq(m_scale)+CMath::sq(sigma)*
207  VectorXd::Ones(m_dg.vlen)-(V.cwiseProduct(V)).colwise().sum().adjoint();
208 
209  // solve Lu' * Lu = V * diag(1/dg) * V' + I
210  LLT<MatrixXd> Lu(V*((VectorXd::Ones(m_dg.vlen)).cwiseQuotient(eigen_dg)).asDiagonal()*
211  V.adjoint()+MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
212 
213  // create shogun and eigen3 representation of cholesky of covariance of
214  // training features Luu (m_chol_utr and eigen_chol_utr)
215  m_chol_utr=SGMatrix<float64_t>(Lu.rows(), Lu.cols());
216  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
217  m_chol_utr.num_cols);
218  eigen_chol_utr=Lu.matrixU();
219 
220  // create eigen representation of labels and mean vectors
221  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
222  Map<VectorXd> eigen_y(y.vector, y.vlen);
224  Map<VectorXd> eigen_m(m.vector, m.vlen);
225 
226  // compute sgrt_dg = sqrt(dg)
227  VectorXd sqrt_dg=eigen_dg.array().sqrt();
228 
229  // create shogun and eigen3 representation of labels adjusted for
230  // noise and means (m_r)
231  m_r=SGVector<float64_t>(y.vlen);
232  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
233  eigen_r=(eigen_y-eigen_m).cwiseQuotient(sqrt_dg);
234 
235  // compute be
236  m_be=SGVector<float64_t>(m_chol_utr.num_cols);
237  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
238  eigen_be=eigen_chol_utr.triangularView<Upper>().adjoint().solve(
239  V*eigen_r.cwiseQuotient(sqrt_dg));
240 
241  // compute iKuu
242  MatrixXd iKuu=Luu.solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
243 
244  // create shogun and eigen3 representation of posterior cholesky
245  MatrixXd eigen_prod=eigen_chol_utr*eigen_chol_uu;
247  Map<MatrixXd> eigen_chol(m_L.matrix, m_L.num_rows, m_L.num_cols);
248 
249  eigen_chol=eigen_prod.triangularView<Upper>().adjoint().solve(
250  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
251  eigen_chol=eigen_prod.triangularView<Upper>().solve(eigen_chol)-iKuu;
252 }
253 
255 {
256  Map<MatrixXd> eigen_chol_uu(m_chol_uu.matrix, m_chol_uu.num_rows,
257  m_chol_uu.num_cols);
258  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
259  m_chol_utr.num_cols);
260  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
261 
262  // create shogun and eigen representations of alpha
263  // and solve Luu * Lu * alpha = be
265  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
266 
267  eigen_alpha=eigen_chol_utr.triangularView<Upper>().solve(eigen_be);
268  eigen_alpha=eigen_chol_uu.triangularView<Upper>().solve(eigen_alpha);
269 }
270 
272 {
273  // create eigen representation of Ktru, Lu, Luu, dg, be
274  Map<MatrixXd> eigen_Ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
275  Map<MatrixXd> eigen_Lu(m_chol_utr.matrix, m_chol_utr.num_rows,
276  m_chol_utr.num_cols);
277  Map<MatrixXd> eigen_Luu(m_chol_uu.matrix, m_chol_uu.num_rows,
278  m_chol_uu.num_cols);
279  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
280  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
281 
282  // get and create eigen representation of labels
283  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
284  Map<VectorXd> eigen_y(y.vector, y.vlen);
285 
286  // get and create eigen representation of mean vector
288  Map<VectorXd> eigen_m(m.vector, m.vlen);
289 
290  // compute V=inv(Luu')*Ku
291  MatrixXd V=eigen_Luu.triangularView<Upper>().adjoint().solve(eigen_Ktru*
292  CMath::sq(m_scale));
293 
294  // create shogun and eigen representation of al
295  m_al=SGVector<float64_t>(m.vlen);
296  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
297 
298  // compute al=(Kt+sn2*eye(n))\y
299  eigen_al=((eigen_y-eigen_m)-(V.adjoint()*
300  eigen_Lu.triangularView<Upper>().solve(eigen_be))).cwiseQuotient(eigen_dg);
301 
302  // compute inv(Kuu+snu2*I)=iKuu
303  MatrixXd iKuu=eigen_Luu.triangularView<Upper>().adjoint().solve(
304  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
305  iKuu=eigen_Luu.triangularView<Upper>().solve(iKuu);
306 
307  // create shogun and eigen representation of B
308  m_B=SGMatrix<float64_t>(iKuu.rows(), eigen_Ktru.cols());
309  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
310 
311  eigen_B=iKuu*eigen_Ktru*CMath::sq(m_scale);
312 
313  // create shogun and eigen representation of w
314  m_w=SGVector<float64_t>(m_B.num_rows);
315  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
316 
317  eigen_w=eigen_B*eigen_al;
318 
319  // create shogun and eigen representation of W
320  m_W=SGMatrix<float64_t>(m_chol_utr.num_cols, m_dg.vlen);
321  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
322 
323  // compute W=Lu'\(V./repmat(g_sn2',nu,1))
324  eigen_W=eigen_Lu.triangularView<Upper>().adjoint().solve(V*VectorXd::Ones(
325  m_dg.vlen).cwiseQuotient(eigen_dg).asDiagonal());
326 }
327 
329  const TParameter* param)
330 {
331  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
332  "the nagative log marginal likelihood wrt %s.%s parameter\n",
333  get_name(), param->m_name)
334 
335  // create eigen representation of dg, al, B, W, w
336  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
337  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
338  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
339  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
340  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
341 
342  // clone kernel matrices
344  SGMatrix<float64_t> deriv_uu=m_kuu.clone();
345  SGMatrix<float64_t> deriv_tru=m_ktru.clone();
346 
347  // create eigen representation of kernel matrices
348  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
349  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows, deriv_uu.num_cols);
350  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows, deriv_tru.num_cols);
351 
352  // compute derivatives wrt scale for each kernel matrix
353  ddiagKi*=m_scale*2.0;
354  dKuui*=m_scale*2.0;
355  dKui*=m_scale*2.0;
356 
357  // compute R=2*dKui-dKuui*B
358  MatrixXd R=2*dKui-dKuui*eigen_B;
359 
360  // compute v=ddiagKi-sum(R.*B, 1)'
361  VectorXd v=ddiagKi-R.cwiseProduct(eigen_B).colwise().sum().adjoint();
362 
363  SGVector<float64_t> result(1);
364 
365  // compute dnlZ=(ddiagKi'*(1./g_sn2)+w'*(dKuui*w-2*(dKui*al))-al'*(v.*al)-
366  // sum(W.*W,1)*v- sum(sum((R*W').*(B*W'))))/2;
367  result[0]=(ddiagKi.dot(VectorXd::Ones(m_dg.vlen).cwiseQuotient(eigen_dg))+
368  eigen_w.dot(dKuui*eigen_w-2*(dKui*eigen_al))-
369  eigen_al.dot(v.cwiseProduct(eigen_al))-
370  eigen_W.cwiseProduct(eigen_W).colwise().sum().dot(v)-
371  (R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
372 
373  return result;
374 }
375 
377  const TParameter* param)
378 {
379  REQUIRE(!strcmp(param->m_name, "sigma"), "Can't compute derivative of "
380  "the nagative log marginal likelihood wrt %s.%s parameter\n",
381  m_model->get_name(), param->m_name)
382 
383  // create eigen representation of dg, al, w, W and B
384  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
385  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
386  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
387  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
388  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
389 
390  // get the sigma variable from the Gaussian likelihood model
392  float64_t sigma=lik->get_sigma();
393  SG_UNREF(lik);
394 
395  SGVector<float64_t> result(1);
396 
397  result[0]=CMath::sq(sigma)*(VectorXd::Ones(m_dg.vlen).cwiseQuotient(
398  eigen_dg).sum()-eigen_W.cwiseProduct(eigen_W).sum()-eigen_al.dot(eigen_al));
399 
400  float64_t dKuui=2.0*m_ind_noise;
401  MatrixXd R=-dKuui*eigen_B;
402  VectorXd v=-R.cwiseProduct(eigen_B).colwise().sum().adjoint();
403 
404  result[0]=result[0]+((eigen_w.dot(dKuui*eigen_w))-eigen_al.dot(
405  v.cwiseProduct(eigen_al))-eigen_W.cwiseProduct(eigen_W).colwise().sum().dot(v)-
406  (R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
407 
408  return result;
409 }
410 
412  const TParameter* param)
413 {
414  // create eigen representation of dg, al, w, W, B
415  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
416  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
417  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
418  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
419  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
420 
421  SGVector<float64_t> result;
422 
423  if (param->m_datatype.m_ctype==CT_VECTOR ||
424  param->m_datatype.m_ctype==CT_SGVECTOR)
425  {
427  "Length of the parameter %s should not be NULL\n", param->m_name)
428  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
429  }
430  else
431  {
432  result=SGVector<float64_t>(1);
433  }
434 
435  for (index_t i=0; i<result.vlen; i++)
436  {
437  SGVector<float64_t> deriv_trtr;
438  SGMatrix<float64_t> deriv_uu;
439  SGMatrix<float64_t> deriv_tru;
440 
441  if (result.vlen==1)
442  {
445 
446  m_kernel->init(m_latent_features, m_latent_features);
447  deriv_uu=m_kernel->get_parameter_gradient(param);
448 
449  m_kernel->init(m_latent_features, m_features);
450  deriv_tru=m_kernel->get_parameter_gradient(param);
451  }
452  else
453  {
455  deriv_trtr=m_kernel->get_parameter_gradient(param, i).get_diagonal_vector();
456 
457  m_kernel->init(m_latent_features, m_latent_features);
458  deriv_uu=m_kernel->get_parameter_gradient(param, i);
459 
460  m_kernel->init(m_latent_features, m_features);
461  deriv_tru=m_kernel->get_parameter_gradient(param, i);
462  }
463 
464  // create eigen representation of derivatives
465  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
466  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows,
467  deriv_uu.num_cols);
468  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows,
469  deriv_tru.num_cols);
470 
471  ddiagKi*=CMath::sq(m_scale);
472  dKuui*=CMath::sq(m_scale);
473  dKui*=CMath::sq(m_scale);
474 
475  // compute R=2*dKui-dKuui*B
476  MatrixXd R=2*dKui-dKuui*eigen_B;
477 
478  // compute v=ddiagKi-sum(R.*B, 1)'
479  VectorXd v=ddiagKi-R.cwiseProduct(eigen_B).colwise().sum().adjoint();
480 
481  // compute dnlZ=(ddiagKi'*(1./g_sn2)+w'*(dKuui*w-2*(dKui*al))-al'*
482  // (v.*al)-sum(W.*W,1)*v- sum(sum((R*W').*(B*W'))))/2;
483  result[i]=(ddiagKi.dot(VectorXd::Ones(m_dg.vlen).cwiseQuotient(eigen_dg))+
484  eigen_w.dot(dKuui*eigen_w-2*(dKui*eigen_al))-
485  eigen_al.dot(v.cwiseProduct(eigen_al))-
486  eigen_W.cwiseProduct(eigen_W).colwise().sum().dot(v)-
487  (R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
488  }
489 
490  return result;
491 }
492 
494  const TParameter* param)
495 {
496  // create eigen representation of al vector
497  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
498 
499  SGVector<float64_t> result;
500 
501  if (param->m_datatype.m_ctype==CT_VECTOR ||
502  param->m_datatype.m_ctype==CT_SGVECTOR)
503  {
505  "Length of the parameter %s should not be NULL\n", param->m_name)
506 
507  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
508  }
509  else
510  {
511  result=SGVector<float64_t>(1);
512  }
513 
514  for (index_t i=0; i<result.vlen; i++)
515  {
517 
518  if (result.vlen==1)
520  else
522 
523  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
524 
525  // compute dnlZ=-dm'*al
526  result[i]=-eigen_dmu.dot(eigen_al);
527  }
528 
529  return result;
530 }
531 
532 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation