SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FITCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * Written (W) 2013 Roman Votyakov
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  *
31  */
33 
34 #ifdef HAVE_EIGEN3
35 
40 
41 using namespace shogun;
42 using namespace Eigen;
43 
45 {
46  init();
47 }
48 
50  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
51  : CSingleFITCLaplacianBase(kern, feat, m, lab, mod, lat)
52 {
53  init();
54 }
55 
56 void CFITCInferenceMethod::init()
57 {
58 }
59 
61 {
62 }
64 {
66 
67  if (!m_gradient_update)
68  {
69  update_deriv();
70  m_gradient_update=true;
72  }
73 }
74 
76 {
77  SG_DEBUG("entering\n");
78 
80  update_chol();
81  update_alpha();
82  m_gradient_update=false;
84 
85  SG_DEBUG("leaving\n");
86 }
87 
89  CInferenceMethod* inference)
90 {
91  if (inference==NULL)
92  return NULL;
93 
94  if (inference->get_inference_type()!=INF_FITC_REGRESSION)
95  SG_SERROR("Provided inference is not of type CFITCInferenceMethod!\n")
96 
97  SG_REF(inference);
98  return (CFITCInferenceMethod*)inference;
99 }
100 
102 {
104 
106  "FITC inference method can only use Gaussian likelihood function\n")
107  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
108  "of CRegressionLabels\n")
109 }
110 
112 {
114  update();
115 
116  // get the sigma variable from the Gaussian likelihood model
118  float64_t sigma=lik->get_sigma();
119  SG_UNREF(lik);
120 
121  // compute diagonal vector: sW=1/sigma
123  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
124 
125  return result;
126 }
127 
129 {
131  update();
132 
133  //time complexity of the following operations is O(m*n)
134 
135  // create eigen representations of chol_utr, dg, r, be
138  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
139  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
140  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
141 
142  // compute negative log marginal likelihood:
143  // nlZ=sum(log(diag(utr)))+(sum(log(dg))+r'*r-be'*be+n*log(2*pi))/2
144  float64_t result=eigen_chol_utr.diagonal().array().log().sum()+
145  (-eigen_t.array().log().sum()+eigen_r.dot(eigen_r)-eigen_be.dot(eigen_be)+
147 
148  return result;
149 }
150 
152 {
153  //time complexits O(m^2*n)
154 
155  // get the sigma variable from the Gaussian likelihood model
157  float64_t sigma=lik->get_sigma();
158  SG_UNREF(lik);
159 
160  // eigen3 representation of covariance matrix of inducing features (m_kuu)
161  // and training features (m_ktru)
164 
166 
167  // solve Luu' * Luu = Kuu + m_ind_noise * I
168  //Luu = chol(Kuu+snu2*eye(nu)); % Kuu + snu2*I = Luu'*Luu
169  LLT<MatrixXd> Luu(eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
171 
172  // create shogun and eigen3 representation of cholesky of covariance of
173  // inducing features Luu (m_chol_uu and eigen_chol_uu)
174  m_chol_uu=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
177  eigen_chol_uu=Luu.matrixU();
178 
179  // solve Luu' * V = Ktru
180  //V = Luu'\Ku; % V = inv(Luu')*Ku => V'*V = Q
181 
184  V=eigen_chol_uu.triangularView<Upper>().adjoint().solve(eigen_ktru*
185  CMath::exp(m_log_scale*2.0));
186 
187  // create shogun and eigen3 representation of
188  // dg = diag(K) + sn2 - diag(Q)
189  // t = 1/dg
191  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
192 
193  //g_sn2 = diagK + sn2 - sum(V.*V,1)'; % g + sn2 = diag(K) + sn2 - diag(Q)
194  eigen_t=eigen_ktrtr_diag*CMath::exp(m_log_scale*2.0)+CMath::sq(sigma)*
195  VectorXd::Ones(m_t.vlen)-(V.cwiseProduct(V)).colwise().sum().adjoint();
196  eigen_t=MatrixXd::Ones(eigen_t.rows(),1).cwiseQuotient(eigen_t);
197 
198  // solve Lu' * Lu = V * diag(1/dg) * V' + I
199  //Lu = chol(eye(nu) + (V./repmat(g_sn2',nu,1))*V'); % Lu'*Lu=I+V*diag(1/g_sn2)*V'
200  LLT<MatrixXd> Lu(V*((VectorXd::Ones(m_t.vlen)).cwiseProduct(eigen_t)).asDiagonal()*
201  V.adjoint()+MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
202 
203  // create shogun and eigen3 representation of cholesky of covariance of
204  // training features Luu (m_chol_utr and eigen_chol_utr)
205  m_chol_utr=SGMatrix<float64_t>(Lu.rows(), Lu.cols());
206  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
207  m_chol_utr.num_cols);
208  eigen_chol_utr=Lu.matrixU();
209 
210  // create eigen representation of labels and mean vectors
211  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
212  Map<VectorXd> eigen_y(y.vector, y.vlen);
214  Map<VectorXd> eigen_m(m.vector, m.vlen);
215 
216  // compute sgrt_dg = sqrt(dg)
217  VectorXd sqrt_t=eigen_t.array().sqrt();
218 
219  // create shogun and eigen3 representation of labels adjusted for
220  // noise and means (m_r)
221  //r = (y-m)./sqrt(g_sn2);
223  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
224  eigen_r=(eigen_y-eigen_m).cwiseProduct(sqrt_t);
225 
226  // compute be
227  //be = Lu'\(V*(r./sqrt(g_sn2)));
228  m_be=SGVector<float64_t>(m_chol_utr.num_cols);
229  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
230  eigen_be=eigen_chol_utr.triangularView<Upper>().adjoint().solve(
231  V*eigen_r.cwiseProduct(sqrt_t));
232 
233  // compute iKuu
234  //iKuu = solve_chol(Luu,eye(nu));
235  MatrixXd iKuu=Luu.solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
236 
237  // create shogun and eigen3 representation of posterior cholesky
238  MatrixXd eigen_prod=eigen_chol_utr*eigen_chol_uu;
241 
242  //post.L = solve_chol(Lu*Luu,eye(nu)) - iKuu;
243  eigen_chol=eigen_prod.triangularView<Upper>().adjoint().solve(
244  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
245  eigen_chol=eigen_prod.triangularView<Upper>().solve(eigen_chol)-iKuu;
246 }
247 
249 {
250  //time complexity O(m^2) since triangular.solve is O(m^2)
255  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
256 
257  // create shogun and eigen representations of alpha
258  // and solve Luu * Lu * alpha = be
260  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
261 
262  //post.alpha = Luu\(Lu\be);
263  eigen_alpha=eigen_chol_utr.triangularView<Upper>().solve(eigen_be);
264  eigen_alpha=eigen_chol_uu.triangularView<Upper>().solve(eigen_alpha);
265 }
266 
268 {
269  //time complexits O(m^2*n)
270 
271  // create eigen representation of Ktru, Lu, Luu, dg, be
277  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
278  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
279 
280  // get and create eigen representation of labels
281  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
282  Map<VectorXd> eigen_y(y.vector, y.vlen);
283 
284  // get and create eigen representation of mean vector
286  Map<VectorXd> eigen_m(m.vector, m.vlen);
287 
288  // compute V=inv(Luu')*Ku
290 
291  // create shogun and eigen representation of al
293  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
294 
295  // compute al=(Kt+sn2*eye(n))\y
296  //r = (y-m)./sqrt(g_sn2);
297  //al = r./sqrt(g_sn2) - (V'*(Lu\be))./g_sn2; % al = (Kt+sn2*eye(n))\(y-m)
298  eigen_al=((eigen_y-eigen_m)-(V.adjoint()*
299  eigen_Lu.triangularView<Upper>().solve(eigen_be))).cwiseProduct(eigen_t);
300 
301  // compute inv(Kuu+snu2*I)=iKuu
302  MatrixXd iKuu=eigen_Luu.triangularView<Upper>().adjoint().solve(
303  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
304  iKuu=eigen_Luu.triangularView<Upper>().solve(iKuu);
305 
306  // create shogun and eigen representation of B
307  m_B=SGMatrix<float64_t>(iKuu.rows(), eigen_Ktru.cols());
309 
310  //B = iKuu*Ku; w = B*al;
311  eigen_B=iKuu*eigen_Ktru*CMath::exp(m_log_scale*2.0);
312 
313  // create shogun and eigen representation of w
315  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
316 
317  eigen_w=eigen_B*eigen_al;
318 
319  // create shogun and eigen representation of W
322 
323  // compute W=Lu'\(V./repmat(g_sn2',nu,1))
324  //W = Lu'\(V./repmat(g_sn2',nu,1));
325  eigen_W=eigen_Lu.triangularView<Upper>().adjoint().solve(V*VectorXd::Ones(
326  m_t.vlen).cwiseProduct(eigen_t).asDiagonal());
327 }
328 
329 
331 {
333 
335  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
336 
337  /*
338  //true posterior mean with equivalent FITC prior
339  //time complexity of the following operations is O(n)
340  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
341  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
342  Map<VectorXd> eigen_y(y.vector, y.vlen);
343  SGVector<float64_t> m=m_mean->get_mean_vector(m_features);
344  Map<VectorXd> eigen_m(m.vector, m.vlen);
345  CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
346  float64_t sigma=lik->get_sigma();
347  SG_UNREF(lik);
348  eigen_mu=(eigen_y-eigen_m)-eigen_al*CMath::sq(sigma);
349  */
350 
351  //FITC approximated posterior mean
352  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
354  //time complexity of the following operation is O(m*n)
355  eigen_mu=CMath::exp(m_log_scale*2.0)*eigen_Ktru.adjoint()*eigen_alpha;
356 
357  return SGVector<float64_t>(m_mu);
358 }
359 
361 {
363 
364  //time complexity of the following operations is O(m*n^2)
365  //Warning: the the time complexity increases from O(m^2*n) to O(n^2*m) if this method is called
368  m_Sigma.num_cols);
372 
373  /*
374  //true posterior mean with equivalent FITC prior
375  CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
376  float64_t sigma=lik->get_sigma();
377  SG_UNREF(lik);
378  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
379  VectorXd diag_part=CMath::sq(sigma)*eigen_t;
380  // diag(sigma2/dg)*V'*(Lu\eye(n))
381  MatrixXd part1=diag_part.asDiagonal()*eigen_V.adjoint()*
382  (eigen_Lu.triangularView<Upper>().solve(MatrixXd::Identity(
383  m_kuu.num_rows, m_kuu.num_cols)));
384  eigen_Sigma=part1*part1.adjoint();
385  VectorXd part2=(VectorXd::Ones(m_t.vlen)-diag_part)*CMath::sq(sigma);
386  eigen_Sigma+=part2.asDiagonal();
387  */
388 
389  //FITC approximated posterior covariance
390  //TO DO we only need tha diagonal elements of m_ktrtr
392  MatrixXd part1=eigen_V.adjoint()*(eigen_Lu.triangularView<Upper>().solve(MatrixXd::Identity(
394  eigen_Sigma=part1*part1.adjoint();
395  VectorXd part2=eigen_ktrtr_diag*CMath::exp(m_log_scale*2.0)-(
396  eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
397  eigen_Sigma+=part2.asDiagonal();
398 
400 }
401 
403  const TParameter* param)
404 {
405  //time complexity O(m*n)
406  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
407  "the nagative log marginal likelihood wrt %s.%s parameter\n",
408  m_model->get_name(), param->m_name)
409 
410  // create eigen representation of dg, al, w, W and B
411  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
412  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
413  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
416 
417  // get the sigma variable from the Gaussian likelihood model
419  float64_t sigma=lik->get_sigma();
420  SG_UNREF(lik);
421 
422  SGVector<float64_t> result(1);
423 
424  //diag_dK = 1./g_sn2 - sum(W.*W,1)' - al.*al; % diag(dnlZ/dK)
425  //dnlZ.lik = sn2*sum(diag_dK) without noise term
426  result[0]=CMath::sq(sigma)*(VectorXd::Ones(m_t.vlen).cwiseProduct(
427  eigen_t).sum()-eigen_W.cwiseProduct(eigen_W).sum()-eigen_al.dot(eigen_al));
428 
429  return result;
430 }
431 
432 #endif /* HAVE_EIGEN3 */
virtual const char * get_name() const =0
virtual void check_members() const
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:223
virtual ELabelType get_label_type() const =0
virtual void update_parameter_hash()
Definition: SGObject.cpp:248
Class that models Gaussian likelihood.
SGVector< float64_t > m_ktrtr_diag
Real Labels are real-valued labels.
SGVector< float64_t > m_alpha
The Inference Method base class.
virtual SGVector< float64_t > get_diagonal_vector()
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
SGMatrix< float64_t > m_chol_uu
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
static T sq(T x)
Definition: Math.h:450
Definition: SGMatrix.h:20
virtual ELikelihoodModelType get_model_type() const
parameter struct
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual SGVector< float64_t > get_posterior_mean()
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:49
The Fully Independent Conditional Training inference base class for Laplace and regression for 1-D la...
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
SGMatrix< float64_t > m_chol_utr
virtual float64_t get_negative_log_marginal_likelihood()
virtual void check_members() const
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
double float64_t
Definition: common.h:50
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
The Fully Independent Conditional Training inference method class.
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
virtual EInferenceType get_inference_type() const
The Kernel base class.
Definition: Kernel.h:158
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:262
The Likelihood model base class.
CLikelihoodModel * m_model
virtual SGMatrix< float64_t > get_posterior_covariance()
static CFITCInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
static const float64_t PI
Definition: Math.h:2055

SHOGUN Machine Learning Toolbox - Documentation