SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FITCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * Written (W) 2013 Roman Votyakov
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  *
31  */
33 
34 #ifdef HAVE_EIGEN3
35 
40 
41 using namespace shogun;
42 using namespace Eigen;
43 
45 {
46  init();
47 }
48 
50  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
51  : CSingleFITCLaplacianBase(kern, feat, m, lab, mod, lat)
52 {
53  init();
54 }
55 
56 void CFITCInferenceMethod::init()
57 {
58 }
59 
61 {
62 }
63 
65 {
66  SG_DEBUG("entering\n");
67 
69  update_chol();
70  update_alpha();
71  update_deriv();
73 
74  SG_DEBUG("leaving\n");
75 }
76 
78  CInferenceMethod* inference)
79 {
80  if (inference==NULL)
81  return NULL;
82 
83  if (inference->get_inference_type()!=INF_FITC_REGRESSION)
84  SG_SERROR("Provided inference is not of type CFITCInferenceMethod!\n")
85 
86  SG_REF(inference);
87  return (CFITCInferenceMethod*)inference;
88 }
89 
91 {
93 
95  "FITC inference method can only use Gaussian likelihood function\n")
96  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
97  "of CRegressionLabels\n")
98 }
99 
101 {
103  update();
104 
105  // get the sigma variable from the Gaussian likelihood model
107  float64_t sigma=lik->get_sigma();
108  SG_UNREF(lik);
109 
110  // compute diagonal vector: sW=1/sigma
112  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
113 
114  return result;
115 }
116 
118 {
120  update();
121 
122  //time complexity of the following operations is O(m*n)
123 
124  // create eigen representations of chol_utr, dg, r, be
125  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
127  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
128  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
129  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
130 
131  // compute negative log marginal likelihood:
132  // nlZ=sum(log(diag(utr)))+(sum(log(dg))+r'*r-be'*be+n*log(2*pi))/2
133  float64_t result=eigen_chol_utr.diagonal().array().log().sum()+
134  (-eigen_t.array().log().sum()+eigen_r.dot(eigen_r)-eigen_be.dot(eigen_be)+
136 
137  return result;
138 }
139 
141 {
142  //time complexits O(m^2*n)
143 
144  // get the sigma variable from the Gaussian likelihood model
146  float64_t sigma=lik->get_sigma();
147  SG_UNREF(lik);
148 
149  // eigen3 representation of covariance matrix of inducing features (m_kuu)
150  // and training features (m_ktru)
151  Map<MatrixXd> eigen_kuu(m_kuu.matrix, m_kuu.num_rows, m_kuu.num_cols);
152  Map<MatrixXd> eigen_ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
153 
154  Map<VectorXd> eigen_ktrtr_diag(m_ktrtr_diag.vector, m_ktrtr_diag.vlen);
155 
156  // solve Luu' * Luu = Kuu + m_ind_noise * I
157  //Luu = chol(Kuu+snu2*eye(nu)); % Kuu + snu2*I = Luu'*Luu
158  LLT<MatrixXd> Luu(eigen_kuu*CMath::sq(m_scale)+m_ind_noise*MatrixXd::Identity(
160 
161  // create shogun and eigen3 representation of cholesky of covariance of
162  // inducing features Luu (m_chol_uu and eigen_chol_uu)
163  m_chol_uu=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
164  Map<MatrixXd> eigen_chol_uu(m_chol_uu.matrix, m_chol_uu.num_rows,
166  eigen_chol_uu=Luu.matrixU();
167 
168  // solve Luu' * V = Ktru
169  //V = Luu'\Ku; % V = inv(Luu')*Ku => V'*V = Q
170 
172  Map<MatrixXd> V(m_V.matrix, m_V.num_rows, m_V.num_cols);
173  V=eigen_chol_uu.triangularView<Upper>().adjoint().solve(eigen_ktru*
174  CMath::sq(m_scale));
175 
176  // create shogun and eigen3 representation of
177  // dg = diag(K) + sn2 - diag(Q)
178  // t = 1/dg
180  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
181 
182  //g_sn2 = diagK + sn2 - sum(V.*V,1)'; % g + sn2 = diag(K) + sn2 - diag(Q)
183  eigen_t=eigen_ktrtr_diag*CMath::sq(m_scale)+CMath::sq(sigma)*
184  VectorXd::Ones(m_t.vlen)-(V.cwiseProduct(V)).colwise().sum().adjoint();
185  eigen_t=MatrixXd::Ones(eigen_t.rows(),1).cwiseQuotient(eigen_t);
186 
187  // solve Lu' * Lu = V * diag(1/dg) * V' + I
188  //Lu = chol(eye(nu) + (V./repmat(g_sn2',nu,1))*V'); % Lu'*Lu=I+V*diag(1/g_sn2)*V'
189  LLT<MatrixXd> Lu(V*((VectorXd::Ones(m_t.vlen)).cwiseProduct(eigen_t)).asDiagonal()*
190  V.adjoint()+MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
191 
192  // create shogun and eigen3 representation of cholesky of covariance of
193  // training features Luu (m_chol_utr and eigen_chol_utr)
194  m_chol_utr=SGMatrix<float64_t>(Lu.rows(), Lu.cols());
195  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
196  m_chol_utr.num_cols);
197  eigen_chol_utr=Lu.matrixU();
198 
199  // create eigen representation of labels and mean vectors
200  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
201  Map<VectorXd> eigen_y(y.vector, y.vlen);
203  Map<VectorXd> eigen_m(m.vector, m.vlen);
204 
205  // compute sgrt_dg = sqrt(dg)
206  VectorXd sqrt_t=eigen_t.array().sqrt();
207 
208  // create shogun and eigen3 representation of labels adjusted for
209  // noise and means (m_r)
210  //r = (y-m)./sqrt(g_sn2);
212  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
213  eigen_r=(eigen_y-eigen_m).cwiseProduct(sqrt_t);
214 
215  // compute be
216  //be = Lu'\(V*(r./sqrt(g_sn2)));
217  m_be=SGVector<float64_t>(m_chol_utr.num_cols);
218  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
219  eigen_be=eigen_chol_utr.triangularView<Upper>().adjoint().solve(
220  V*eigen_r.cwiseProduct(sqrt_t));
221 
222  // compute iKuu
223  //iKuu = solve_chol(Luu,eye(nu));
224  MatrixXd iKuu=Luu.solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
225 
226  // create shogun and eigen3 representation of posterior cholesky
227  MatrixXd eigen_prod=eigen_chol_utr*eigen_chol_uu;
229  Map<MatrixXd> eigen_chol(m_L.matrix, m_L.num_rows, m_L.num_cols);
230 
231  //post.L = solve_chol(Lu*Luu,eye(nu)) - iKuu;
232  eigen_chol=eigen_prod.triangularView<Upper>().adjoint().solve(
233  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
234  eigen_chol=eigen_prod.triangularView<Upper>().solve(eigen_chol)-iKuu;
235 }
236 
238 {
239  //time complexity O(m^2) since triangular.solve is O(m^2)
240  Map<MatrixXd> eigen_chol_uu(m_chol_uu.matrix, m_chol_uu.num_rows,
242  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
244  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
245 
246  // create shogun and eigen representations of alpha
247  // and solve Luu * Lu * alpha = be
249  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
250 
251  //post.alpha = Luu\(Lu\be);
252  eigen_alpha=eigen_chol_utr.triangularView<Upper>().solve(eigen_be);
253  eigen_alpha=eigen_chol_uu.triangularView<Upper>().solve(eigen_alpha);
254 }
255 
257 {
258  //time complexits O(m^2*n)
259 
260  // create eigen representation of Ktru, Lu, Luu, dg, be
261  Map<MatrixXd> eigen_Ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
262  Map<MatrixXd> eigen_Lu(m_chol_utr.matrix, m_chol_utr.num_rows,
264  Map<MatrixXd> eigen_Luu(m_chol_uu.matrix, m_chol_uu.num_rows,
266  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
267  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
268 
269  // get and create eigen representation of labels
270  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
271  Map<VectorXd> eigen_y(y.vector, y.vlen);
272 
273  // get and create eigen representation of mean vector
275  Map<VectorXd> eigen_m(m.vector, m.vlen);
276 
277  // compute V=inv(Luu')*Ku
278  Map<MatrixXd> V(m_V.matrix, m_V.num_rows, m_V.num_cols);
279 
280  // create shogun and eigen representation of al
282  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
283 
284  // compute al=(Kt+sn2*eye(n))\y
285  //r = (y-m)./sqrt(g_sn2);
286  //al = r./sqrt(g_sn2) - (V'*(Lu\be))./g_sn2; % al = (Kt+sn2*eye(n))\(y-m)
287  eigen_al=((eigen_y-eigen_m)-(V.adjoint()*
288  eigen_Lu.triangularView<Upper>().solve(eigen_be))).cwiseProduct(eigen_t);
289 
290  // compute inv(Kuu+snu2*I)=iKuu
291  MatrixXd iKuu=eigen_Luu.triangularView<Upper>().adjoint().solve(
292  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
293  iKuu=eigen_Luu.triangularView<Upper>().solve(iKuu);
294 
295  // create shogun and eigen representation of B
296  m_B=SGMatrix<float64_t>(iKuu.rows(), eigen_Ktru.cols());
297  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
298 
299  //B = iKuu*Ku; w = B*al;
300  eigen_B=iKuu*eigen_Ktru*CMath::sq(m_scale);
301 
302  // create shogun and eigen representation of w
304  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
305 
306  eigen_w=eigen_B*eigen_al;
307 
308  // create shogun and eigen representation of W
310  Map<MatrixXd> eigen_W(m_Rvdd.matrix, m_Rvdd.num_rows, m_Rvdd.num_cols);
311 
312  // compute W=Lu'\(V./repmat(g_sn2',nu,1))
313  //W = Lu'\(V./repmat(g_sn2',nu,1));
314  eigen_W=eigen_Lu.triangularView<Upper>().adjoint().solve(V*VectorXd::Ones(
315  m_t.vlen).cwiseProduct(eigen_t).asDiagonal());
316 }
317 
318 
320 {
322  update();
323 
325  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
326 
327  /*
328  //true posterior mean with equivalent FITC prior
329  //time complexity of the following operations is O(n)
330  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
331  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
332  Map<VectorXd> eigen_y(y.vector, y.vlen);
333  SGVector<float64_t> m=m_mean->get_mean_vector(m_features);
334  Map<VectorXd> eigen_m(m.vector, m.vlen);
335  CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
336  float64_t sigma=lik->get_sigma();
337  SG_UNREF(lik);
338  eigen_mu=(eigen_y-eigen_m)-eigen_al*CMath::sq(sigma);
339  */
340 
341  //FITC approximated posterior mean
342  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
343  Map<MatrixXd> eigen_Ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
344  //time complexity of the following operation is O(m*n)
345  eigen_mu=CMath::sq(m_scale)*eigen_Ktru.adjoint()*eigen_alpha;
346 
347  return SGVector<float64_t>(m_mu);
348 }
349 
351 {
353  update();
354  //time complexity of the following operations is O(m*n^2)
355  //Warning: the the time complexity increases from O(m^2*n) to O(n^2*m) if this method is called
357  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
358  m_Sigma.num_cols);
359  Map<MatrixXd> eigen_V(m_V.matrix, m_V.num_rows, m_V.num_cols);
360  Map<MatrixXd> eigen_Lu(m_chol_utr.matrix, m_chol_utr.num_rows,
362 
363  /*
364  //true posterior mean with equivalent FITC prior
365  CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
366  float64_t sigma=lik->get_sigma();
367  SG_UNREF(lik);
368  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
369  VectorXd diag_part=CMath::sq(sigma)*eigen_t;
370  // diag(sigma2/dg)*V'*(Lu\eye(n))
371  MatrixXd part1=diag_part.asDiagonal()*eigen_V.adjoint()*
372  (eigen_Lu.triangularView<Upper>().solve(MatrixXd::Identity(
373  m_kuu.num_rows, m_kuu.num_cols)));
374  eigen_Sigma=part1*part1.adjoint();
375  VectorXd part2=(VectorXd::Ones(m_t.vlen)-diag_part)*CMath::sq(sigma);
376  eigen_Sigma+=part2.asDiagonal();
377  */
378 
379  //FITC approximated posterior covariance
380  //TO DO we only need tha diagonal elements of m_ktrtr
381  Map<VectorXd> eigen_ktrtr_diag(m_ktrtr_diag.vector, m_ktrtr_diag.vlen);
382  MatrixXd part1=eigen_V.adjoint()*(eigen_Lu.triangularView<Upper>().solve(MatrixXd::Identity(
384  eigen_Sigma=part1*part1.adjoint();
385  VectorXd part2=eigen_ktrtr_diag*CMath::sq(m_scale)-(
386  eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
387  eigen_Sigma+=part2.asDiagonal();
388 
390 }
391 
393  const TParameter* param)
394 {
395  //time complexity O(m*n)
396  REQUIRE(!strcmp(param->m_name, "sigma"), "Can't compute derivative of "
397  "the nagative log marginal likelihood wrt %s.%s parameter\n",
398  m_model->get_name(), param->m_name)
399 
400  // create eigen representation of dg, al, w, W and B
401  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
402  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
403  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
404  Map<MatrixXd> eigen_W(m_Rvdd.matrix, m_Rvdd.num_rows, m_Rvdd.num_cols);
405  Map<MatrixXd> eigen_B(m_B.matrix, m_B.num_rows, m_B.num_cols);
406 
407  // get the sigma variable from the Gaussian likelihood model
409  float64_t sigma=lik->get_sigma();
410  SG_UNREF(lik);
411 
412  SGVector<float64_t> result(1);
413 
414  //diag_dK = 1./g_sn2 - sum(W.*W,1)' - al.*al; % diag(dnlZ/dK)
415  //dnlZ.lik = sn2*sum(diag_dK) without noise term
416  result[0]=CMath::sq(sigma)*(VectorXd::Ones(m_t.vlen).cwiseProduct(
417  eigen_t).sum()-eigen_W.cwiseProduct(eigen_W).sum()-eigen_al.dot(eigen_al));
418 
419  return result;
420 }
421 
422 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation