SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FITCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Copyright (C) 2012 Jacob Walker
9  * Copyright (C) 2013 Roman Votyakov
10  *
11  * Code adapted from Gaussian Process Machine Learning Toolbox
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  */
14 
16 
17 #ifdef HAVE_EIGEN3
18 
23 
24 using namespace shogun;
25 using namespace Eigen;
26 
28 {
29  init();
30 }
31 
33  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
34  : CInferenceMethod(kern, feat, m, lab, mod)
35 {
36  init();
38 }
39 
40 void CFITCInferenceMethod::init()
41 {
42  SG_ADD((CSGObject**)&m_latent_features, "latent_features", "Latent features",
44 
45  m_latent_features=NULL;
46  m_ind_noise=1e-10;
47 }
48 
50 {
51  SG_UNREF(m_latent_features);
52 }
53 
55  CInferenceMethod* inference)
56 {
57  ASSERT(inference!=NULL);
58 
59  if (inference->get_inference_type()!=INF_FITC)
60  SG_SERROR("Provided inference is not of type CFITCInferenceMethod!\n")
61 
62  SG_REF(inference);
63  return (CFITCInferenceMethod*)inference;
64 }
65 
67 {
69  update_chol();
70  update_alpha();
71  update_deriv();
72 }
73 
75 {
77 
79  "FITC inference method can only use Gaussian likelihood function\n")
80  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
81  "of CRegressionLabels\n")
82  REQUIRE(m_latent_features, "Latent features should not be NULL\n")
83  REQUIRE(m_latent_features->get_num_vectors(),
84  "Number of latent features must be greater than zero\n")
85 }
86 
88 {
90  update();
91 
92  // get the sigma variable from the Gaussian likelihood model
94  float64_t sigma=lik->get_sigma();
95  SG_UNREF(lik);
96 
97  // compute diagonal vector: sW=1/sigma
99  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
100 
101  return result;
102 }
103 
105 {
106  if (update_parameter_hash())
107  update();
108 
109  // create eigen representations of chol_utr, dg, r, be
110  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
111  m_chol_utr.num_cols);
112  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
113  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
114  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
115 
116  // compute negative log marginal likelihood:
117  // nlZ=sum(log(diag(utr)))+(sum(log(dg))+r'*r-be'*be+n*log(2*pi))/2
118  float64_t result=eigen_chol_utr.diagonal().array().log().sum()+
119  (eigen_dg.array().log().sum()+eigen_r.dot(eigen_r)-eigen_be.dot(eigen_be)+
120  m_chol_utr.num_rows*CMath::log(2*CMath::PI))/2.0;
121 
122  return result;
123 }
124 
126 {
127  if (update_parameter_hash())
128  update();
129 
131  return result;
132 }
133 
135 {
136  if (update_parameter_hash())
137  update();
138 
139  SGMatrix<float64_t> result(m_L);
140  return result;
141 }
142 
144 {
146  return SGVector<float64_t>();
147 }
148 
150 {
152  return SGMatrix<float64_t>();
153 }
154 
156 {
158 
159  // create kernel matrix for latent features
160  m_kernel->cleanup();
161  m_kernel->init(m_latent_features, m_latent_features);
162  m_kuu=m_kernel->get_kernel_matrix();
163  Map<MatrixXd> eigen_kuu(m_kuu.matrix, m_kuu.num_rows, m_kuu.num_cols);
164  eigen_kuu*=CMath::sq(m_scale);
165 
166  // create kernel matrix for latent and training features
167  m_kernel->cleanup();
168  m_kernel->init(m_latent_features, m_features);
169  m_ktru=m_kernel->get_kernel_matrix();
170  Map<MatrixXd> eigen_ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
171  eigen_ktru*=CMath::sq(m_scale);
172 }
173 
175 {
176  // get the sigma variable from the Gaussian likelihood model
178  float64_t sigma=lik->get_sigma();
179  SG_UNREF(lik);
180 
181  // eigen3 representation of covariance matrix of latent features (m_kuu)
182  // and training features (m_ktru)
183  Map<MatrixXd> eigen_kuu(m_kuu.matrix, m_kuu.num_rows, m_kuu.num_cols);
184  Map<MatrixXd> eigen_ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
185 
186  // solve Luu' * Luu = Kuu + m_ind_noise * I
187  LLT<MatrixXd> Luu(eigen_kuu+m_ind_noise*MatrixXd::Identity(
188  m_kuu.num_rows, m_kuu.num_cols));
189 
190  // create shogun and eigen3 representation of cholesky of covariance of
191  // latent features Luu (m_chol_uu and eigen_chol_uu)
192  m_chol_uu=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
193  Map<MatrixXd> eigen_chol_uu(m_chol_uu.matrix, m_chol_uu.num_rows,
194  m_chol_uu.num_cols);
195  eigen_chol_uu=Luu.matrixU();
196 
197  // solve Luu' * V = Ktru, and calculate sV = V.^2
198  MatrixXd V=eigen_chol_uu.triangularView<Upper>().adjoint().solve(eigen_ktru);
199  MatrixXd sV=V.cwiseProduct(V);
200 
201  // create shogun and eigen3 representation of
202  // dg = diag(K) + sn2 - diag(Q), and also compute idg = 1/dg
204  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
205  VectorXd eigen_idg(m_dg.vlen);
206 
207  for (index_t i=0; i<m_ktrtr.num_cols; i++)
208  {
209  eigen_dg[i]=m_ktrtr(i,i)*m_scale*m_scale+sigma*sigma-sV.col(i).sum();
210  eigen_idg[i]=1.0/eigen_dg[i];
211  }
212 
213  // solve Lu' * Lu = V * diag(idg) * V' + I
214  LLT<MatrixXd> Lu(V*eigen_idg.asDiagonal()*V.transpose()+
215  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
216 
217  // create shogun and eigen3 representation of cholesky of covariance of
218  // training features Luu (m_chol_utr and eigen_chol_utr)
219  m_chol_utr=SGMatrix<float64_t>(Lu.rows(), Lu.cols());
220  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
221  m_chol_utr.num_rows);
222  eigen_chol_utr = Lu.matrixU();
223 
224  // create eigen representation of labels and mean vectors
225  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
226  Map<VectorXd> eigen_y(y.vector, y.vlen);
228  Map<VectorXd> eigen_m(m.vector, m.vlen);
229 
230  // compute sgrt_dg = sqrt(dg)
231  VectorXd sqrt_dg=eigen_dg.array().sqrt();
232 
233  // create shogun and eigen3 representation of labels adjusted for
234  // noise and means (m_r)
235  m_r=SGVector<float64_t>(y.vlen);
236  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
237  eigen_r=(eigen_y-eigen_m).cwiseQuotient(sqrt_dg);
238 
239  // compute be
240  m_be=SGVector<float64_t>(m_r.vlen);
241  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
242  eigen_be=eigen_chol_utr.triangularView<Upper>().adjoint().solve(
243  V*eigen_r.cwiseQuotient(sqrt_dg));
244 
245  // compute iKuu
246  MatrixXd iKuu=Luu.solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
247 
248  // create shogun and eigen3 representation of posterior cholesky
249  MatrixXd eigen_prod=eigen_chol_utr*eigen_chol_uu;
251  Map<MatrixXd> eigen_chol(m_L.matrix, m_L.num_rows, m_L.num_cols);
252 
253  eigen_chol=eigen_prod.triangularView<Upper>().adjoint().solve(
254  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
255  eigen_chol=eigen_prod.triangularView<Upper>().solve(eigen_chol)-iKuu;
256 }
257 
259 {
260  Map<MatrixXd> eigen_chol_uu(m_chol_uu.matrix, m_chol_uu.num_rows,
261  m_chol_uu.num_cols);
262  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
263  m_chol_utr.num_cols);
264  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
265 
266  // create shogun and eigen representations of alpha
267  // and solve Luu * Lu * alpha = be
269  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
270 
271  eigen_alpha=eigen_chol_utr.triangularView<Upper>().solve(eigen_be);
272  eigen_alpha=eigen_chol_uu.triangularView<Upper>().solve(eigen_alpha);
273 }
274 
276 {
277  // create eigen representation of Ktru, Lu, Luu, dg, be
278  Map<MatrixXd> eigen_Ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
279  Map<MatrixXd> eigen_Lu(m_chol_utr.matrix, m_chol_utr.num_rows,
280  m_chol_utr.num_cols);
281  Map<MatrixXd> eigen_Luu(m_chol_uu.matrix, m_chol_uu.num_rows,
282  m_chol_uu.num_cols);
283  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
284  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
285 
286  // get and create eigen representation of labels
287  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
288  Map<VectorXd> eigen_y(y.vector, y.vlen);
289 
290  // get and create eigen representation of mean vector
292  Map<VectorXd> eigen_m(m.vector, m.vlen);
293 
294  // compute V=inv(Luu')*Ku
295  MatrixXd V=eigen_Luu.triangularView<Upper>().adjoint().solve(eigen_Ktru);
296 
297  // create shogun and eigen representation of al
298  m_al=SGVector<float64_t>(m.vlen);
299  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
300 
301  // compute al=(Kt+sn2*eye(n))\y
302  eigen_al=((eigen_y-eigen_m)-(V.adjoint()*
303  eigen_Lu.triangularView<Upper>().solve(eigen_be))).cwiseQuotient(eigen_dg);
304 
305  // compute inv(Kuu+snu2*I)=iKuu
306  MatrixXd iKuu=eigen_Luu.triangularView<Upper>().adjoint().solve(
307  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
308  iKuu=eigen_Luu.triangularView<Upper>().solve(iKuu);
309 
310  // create shogun and eigen representation of B
311  m_B=SGMatrix<float64_t>(m_ktru.num_rows, m_ktru.num_cols);
312  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
313 
314  eigen_B=iKuu*eigen_Ktru;
315 
316  // create shogun and eigen representation of w
317  m_w=SGVector<float64_t>(m_al.vlen);
318  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
319 
320  eigen_w=eigen_B*eigen_al;
321 
322  // create shogun and eigen representation of W
323  m_W=SGMatrix<float64_t>(m_chol_utr.num_rows, m_chol_utr.num_cols);
324  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
325 
326  // compute W=Lu'\(V./repmat(g_sn2',nu,1))
327  eigen_W=eigen_Lu.triangularView<Upper>().adjoint().solve(V*VectorXd::Ones(
328  m_dg.vlen).cwiseQuotient(eigen_dg).asDiagonal());
329 }
330 
332  const TParameter* param)
333 {
334  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
335  "the nagative log marginal likelihood wrt %s.%s parameter\n",
336  get_name(), param->m_name)
337 
338  // create eigen representation of dg, al, B, W, w
339  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
340  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
341  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
342  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
343  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
344 
345  // clone kernel matrices
347  SGMatrix<float64_t> deriv_uu=m_kuu.clone();
348  SGMatrix<float64_t> deriv_tru=m_ktru.clone();
349 
350  // create eigen representation of kernel matrices
351  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
352  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_cols, deriv_uu.num_rows);
353  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_cols, deriv_tru.num_rows);
354 
355  // compute derivatives wrt scale for each kernel matrix
356  ddiagKi*=m_scale*2.0;
357  dKuui*=m_scale*2.0;
358  dKui*=m_scale*2.0;
359 
360  // compute R=2*dKui-dKuui*B
361  MatrixXd R=2*dKui-dKuui*eigen_B;
362 
363  // compute v=ddiagKi-sum(R.*B, 1)'
364  VectorXd v=ddiagKi-R.cwiseProduct(eigen_B).colwise().sum().adjoint();
365 
366  SGVector<float64_t> result(1);
367 
368  // compute dnlZ=(ddiagKi'*(1./g_sn2)+w'*(dKuui*w-2*(dKui*al))-al'*(v.*al)-
369  // sum(W.*W,1)*v- sum(sum((R*W').*(B*W'))))/2;
370  result[0]=(ddiagKi.dot(VectorXd::Ones(m_dg.vlen).cwiseQuotient(eigen_dg))+
371  eigen_w.dot(dKuui*eigen_w-2*(dKui*eigen_al))-
372  eigen_al.dot(v.cwiseProduct(eigen_al))-
373  eigen_W.cwiseProduct(eigen_W).colwise().sum().dot(v)-
374  (R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
375 
376  return result;
377 }
378 
380  const TParameter* param)
381 {
382  REQUIRE(!strcmp(param->m_name, "sigma"), "Can't compute derivative of "
383  "the nagative log marginal likelihood wrt %s.%s parameter\n",
384  m_model->get_name(), param->m_name)
385 
386  // create eigen representation of dg, al, w, W and B
387  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
388  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
389  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
390  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
391  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
392 
393  // get the sigma variable from the Gaussian likelihood model
395  float64_t sigma=lik->get_sigma();
396  SG_UNREF(lik);
397 
398  SGVector<float64_t> result(1);
399 
400  result[0]=CMath::sq(sigma)*(VectorXd::Ones(m_dg.vlen).cwiseQuotient(
401  eigen_dg).sum()-eigen_W.cwiseProduct(eigen_W).sum()-eigen_al.dot(eigen_al));
402 
403  float64_t dKuui=2.0*m_ind_noise;
404  MatrixXd R=-dKuui*eigen_B;
405  VectorXd v=-R.cwiseProduct(eigen_B).colwise().sum().adjoint();
406 
407  result[0]=result[0]+((eigen_w.dot(dKuui*eigen_w))-eigen_al.dot(
408  v.cwiseProduct(eigen_al))-eigen_W.cwiseProduct(eigen_W).colwise().sum().dot(v)-
409  (R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
410 
411  return result;
412 }
413 
415  const TParameter* param)
416 {
417  // create eigen representation of dg, al, w, W, B
418  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
419  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
420  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
421  Map<MatrixXd> eigen_W(m_W.matrix, m_W.num_rows, m_W.num_cols);
422  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
423 
424  SGVector<float64_t> result;
425 
426  if (param->m_datatype.m_ctype==CT_VECTOR ||
427  param->m_datatype.m_ctype==CT_SGVECTOR)
428  {
430  "Length of the parameter %s should not be NULL\n", param->m_name)
431  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
432  }
433  else
434  {
435  result=SGVector<float64_t>(1);
436  }
437 
438  for (index_t i=0; i<result.vlen; i++)
439  {
440  SGVector<float64_t> deriv_trtr;
441  SGMatrix<float64_t> deriv_uu;
442  SGMatrix<float64_t> deriv_tru;
443 
444  if (result.vlen==1)
445  {
448 
449  m_kernel->init(m_latent_features, m_latent_features);
450  deriv_uu=m_kernel->get_parameter_gradient(param);
451 
452  m_kernel->init(m_latent_features, m_features);
453  deriv_tru=m_kernel->get_parameter_gradient(param);
454  }
455  else
456  {
458  deriv_trtr=m_kernel->get_parameter_gradient(param, i).get_diagonal_vector();
459 
460  m_kernel->init(m_latent_features, m_latent_features);
461  deriv_uu=m_kernel->get_parameter_gradient(param, i);
462 
463  m_kernel->init(m_latent_features, m_features);
464  deriv_tru=m_kernel->get_parameter_gradient(param, i);
465  }
466 
467  m_kernel->cleanup();
468 
469  // create eigen representation of derivatives
470  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
471  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows,
472  deriv_uu.num_cols);
473  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows,
474  deriv_tru.num_cols);
475 
476  // compute R=2*dKui-dKuui*B
477  MatrixXd R=2*dKui-dKuui*eigen_B;
478 
479  // compute v=ddiagKi-sum(R.*B, 1)'
480  VectorXd v=ddiagKi-R.cwiseProduct(eigen_B).colwise().sum().adjoint();
481 
482  // compute dnlZ=(ddiagKi'*(1./g_sn2)+w'*(dKuui*w-2*(dKui*al))-al'*
483  // (v.*al)-sum(W.*W,1)*v- sum(sum((R*W').*(B*W'))))/2;
484  result[i]=(ddiagKi.dot(VectorXd::Ones(m_dg.vlen).cwiseQuotient(eigen_dg))+
485  eigen_w.dot(dKuui*eigen_w-2*(dKui*eigen_al))-
486  eigen_al.dot(v.cwiseProduct(eigen_al))-
487  eigen_W.cwiseProduct(eigen_W).colwise().sum().dot(v)-
488  (R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
489  }
490 
491  return result;
492 }
493 
495  const TParameter* param)
496 {
497  // create eigen representation of al vector
498  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
499 
500  SGVector<float64_t> result;
501 
502  if (param->m_datatype.m_ctype==CT_VECTOR ||
503  param->m_datatype.m_ctype==CT_SGVECTOR)
504  {
506  "Length of the parameter %s should not be NULL\n", param->m_name)
507 
508  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
509  }
510  else
511  {
512  result=SGVector<float64_t>(1);
513  }
514 
515  for (index_t i=0; i<result.vlen; i++)
516  {
518 
519  if (result.vlen==1)
521  else
523 
524  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
525 
526  // compute dnlZ=-dm'*al
527  result[i]=-eigen_dmu.dot(eigen_al);
528  }
529 
530  return result;
531 }
532 
533 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation