SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VarDTCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * This code specifically adapted from function in varsgpLikelihood.m and varsgpPredict.m
31  *
32  * The reference paper is
33  * Titsias, Michalis K.
34  * "Variational learning of inducing variables in sparse Gaussian processes."
35  * International Conference on Artificial Intelligence and Statistics. 2009.
36  *
37  */
38 
44 
45 using namespace shogun;
46 using namespace Eigen;
47 
49 {
50  init();
51 }
52 
54  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
55  : CSingleSparseInference(kern, feat, m, lab, mod, lat)
56 {
57  init();
58 }
59 
60 void CVarDTCInferenceMethod::init()
61 {
62  m_yy=0.0;
63  m_f3=0.0;
64  m_sigma2=0.0;
65  m_trk=0.0;
71 
72  SG_ADD(&m_yy, "yy", "yy", MS_NOT_AVAILABLE);
73  SG_ADD(&m_f3, "f3", "f3", MS_NOT_AVAILABLE);
74  SG_ADD(&m_sigma2, "sigma2", "sigma2", MS_NOT_AVAILABLE);
75  SG_ADD(&m_trk, "trk", "trk", MS_NOT_AVAILABLE);
76  SG_ADD(&m_Tmm, "Tmm", "Tmm", MS_NOT_AVAILABLE);
77  SG_ADD(&m_Tnm, "Tnm", "Tnm", MS_NOT_AVAILABLE);
78  SG_ADD(&m_inv_Lm, "inv_Lm", "inv_Lm", MS_NOT_AVAILABLE);
79  SG_ADD(&m_inv_La, "inv_La", "inv_La", MS_NOT_AVAILABLE);
80  SG_ADD(&m_Knm_inv_Lm, "Knm_Inv_Lm", "Knm_Inv_Lm", MS_NOT_AVAILABLE);
81 }
82 
84 {
85 }
86 
88 {
90 
91  if (!m_gradient_update)
92  {
93  update_deriv();
94  m_gradient_update=true;
96  }
97 }
98 
100 {
101  SG_DEBUG("entering\n");
102 
104  update_chol();
105  update_alpha();
106  m_gradient_update=false;
108 
109  SG_DEBUG("leaving\n");
110 }
111 
113  CInference* inference)
114 {
115  if (inference==NULL)
116  return NULL;
117 
119  SG_SERROR("Provided inference is not of type CVarDTCInferenceMethod!\n")
120 
121  SG_REF(inference);
122  return (CVarDTCInferenceMethod*)inference;
123 }
124 
126 {
128 
130  "VarDTC inference method can only use Gaussian likelihood function\n")
131  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
132  "of CRegressionLabels\n")
133 }
134 
136 {
138  //the inference method does not need to use this
139  return SGVector<float64_t>();
140 }
141 
143 {
145  update();
146 
149 
150  //F012 =-(model.n-model.m)*model.Likelihood.logtheta-0.5*model.n*log(2*pi)-(0.5/sigma2)*(model.yy)-sum(log(diag(La)));
153  0.5*m_yy/(m_sigma2)-eigen_inv_La.diagonal().array().log().sum();
154 
155  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
156  float64_t neg_f3=-m_f3;
157 
158  //model.TrKnn = sum(model.diagKnn);
159  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
160  float64_t neg_trk=-m_trk;
161 
162  //F = F012 + F3 + TrK;
163  //F = - F;
164  return neg_f012+neg_f3+neg_trk;
165 }
166 
168 {
169  // get the sigma variable from the Gaussian likelihood model
171  float64_t sigma=lik->get_sigma();
172  SG_UNREF(lik);
173  m_sigma2=sigma*sigma;
174 
175  //m-by-m matrix
177 
178  //m-by-n matrix
180 
182 
183  //Lm = chol(model.Kmm + model.jitter*eye(model.m))
184  LLT<MatrixXd> Luu(eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
186  m_inv_Lm=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
188  //invLm = Lm\eye(model.m);
189  eigen_inv_Lm=Luu.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
190 
193  //KnmInvLm = model.Knm*invLm;
194  eigen_Knm_inv_Lm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*eigen_inv_Lm;
195 
198  //C = KnmInvLm'*KnmInvLm;
199  eigen_C=eigen_Knm_inv_Lm.transpose()*eigen_Knm_inv_Lm;
200 
203  //A = sigma2*eye(model.m) + C;
204  LLT<MatrixXd> chol_A(m_sigma2*MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)+eigen_C);
205 
206  //La = chol(A);
207  //invLa = La\eye(model.m);
208  eigen_inv_La=chol_A.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
209 
210  //L=-invLm*invLm' + sigma2*(invLm*invLa*invLa'*invLm');
213  eigen_L=eigen_inv_Lm*(
214  m_sigma2*eigen_inv_La*eigen_inv_La.transpose()-MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)
215  )*eigen_inv_Lm.transpose();
216 
217  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
218  m_trk=-0.5/(m_sigma2)*(eigen_ktrtr_diag.array().sum()*CMath::exp(m_log_scale*2.0)
219  -eigen_C.diagonal().array().sum());
220 }
221 
223 {
227 
228  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
229  Map<VectorXd> eigen_y(y.vector, y.vlen);
231  Map<VectorXd> eigen_m(m.vector, m.vlen);
232 
233  //yKnmInvLm = (model.y'*KnmInvLm);
234  //yKnmInvLmInvLa = yKnmInvLm*invLa;
235  VectorXd y_cor=eigen_y-eigen_m;
236  VectorXd eigen_y_Knm_inv_Lm_inv_La_transpose=eigen_inv_La.transpose()*(
237  eigen_Knm_inv_Lm.transpose()*y_cor);
238  //alpha = invLm*invLa*yKnmInvLmInvLa';
240  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
241  eigen_alpha=eigen_inv_Lm*eigen_inv_La*eigen_y_Knm_inv_Lm_inv_La_transpose;
242 
243  m_yy=y_cor.dot(y_cor);
244  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
245  m_f3=0.5*eigen_y_Knm_inv_Lm_inv_La_transpose.dot(eigen_y_Knm_inv_Lm_inv_La_transpose)/m_sigma2;
246 }
247 
249 {
252  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
254  //m-by-n matrix
257 
258  //m_Tmm=SGMatrix<float64_t>(m_kuu.num_rows, m_kuu.num_cols);
260 
263 
265  float64_t sigma=lik->get_sigma();
266  SG_UNREF(lik);
267  m_sigma2=sigma*sigma;
268 
269  //invLmInvLa = invLm*invLa;
270  //invA = invLmInvLa*invLmInvLa';
271  //yKnmInvA = yKnmInvLmInvLa*invLmInvLa';
272  //invKmm = invLm*invLm';
273  //Tmm = sigma2*invA + yKnmInvA'*yKnmInvA;
274  //Tmm = invKmm - Tmm;
275  MatrixXd Tmm=-eigen_L-eigen_alpha*eigen_alpha.transpose();
276 
277  //Tnm = model.Knm*Tmm;
278  eigen_Tnm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*Tmm;
279 
280  //Tmm = Tmm - (invLm*(C*invLm'))/sigma2;
281  eigen_Tmm = Tmm - (eigen_inv_Lm*eigen_C*eigen_inv_Lm.transpose()/m_sigma2);
282 
283  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
284  Map<VectorXd> eigen_y(y.vector, y.vlen);
286  Map<VectorXd> eigen_m(m.vector, m.vlen);
287 
288  //Tnm = Tnm + (model.y*yKnmInvA);
289  eigen_Tnm += (eigen_y-eigen_m)*eigen_alpha.transpose();
290 }
291 
293 {
295  //TODO: implement this method once I get time
296  return SGVector<float64_t>();
297 }
298 
300 {
302  //TODO: implement this method once I get time
303  return SGMatrix<float64_t>();
304 }
305 
307  const TParameter* param)
308 {
309  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
310  "the nagative log marginal likelihood wrt %s.%s parameter\n",
311  m_model->get_name(), param->m_name)
312 
313  SGVector<float64_t> dlik(1);
314 
315  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
319  //yKnmInvLmInvLainvLa = yKnmInvLmInvLa*invLa';
320  //sigma2aux = sigma2*sum(sum(invLa.*invLa)) + yKnmInvLmInvLainvLa*yKnmInvLmInvLainvLa';
321  float64_t sigma2aux= m_sigma2*eigen_inv_La.cwiseProduct(eigen_inv_La).array().sum()+
322  eigen_alpha.transpose()*(eigen_kuu*CMath::exp(m_log_scale*2.0)
323  +CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
324  m_kuu.num_rows, m_kuu.num_cols))*eigen_alpha;
325  //Dlik_neg = - (model.n-model.m) + model.yy/sigma2 - 2*F3 - sigma2aux - 2*TrK;
326 
327  dlik[0]=(m_ktru.num_cols-m_ktru.num_rows)-m_yy/m_sigma2+2.0*m_f3+sigma2aux+2.0*m_trk;
328  return dlik;
329 }
330 
332  const TParameter* param)
333 {
334  //[DXu DXunm] = kernelSparseGradInd(model, Tmm, Tnm);
335  //DXu_neg = DXu + DXunm/model.sigma2;
336 
339 
340  int32_t dim=m_inducing_features.num_rows;
341  int32_t num_samples=m_inducing_features.num_cols;
342  SGVector<float64_t>deriv_lat(dim*num_samples);
343  deriv_lat.zero();
344 
345  m_lock->lock();
346  CFeatures *inducing_features=get_inducing_features();
347  //asymtric part (related to xu and x)
348  m_kernel->init(inducing_features, m_features);
349  for(int32_t lat_idx=0; lat_idx<eigen_Tnm.cols(); lat_idx++)
350  {
351  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_idx*dim,dim);
352  //p by n
353  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_idx);
354  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
355  //DXunm/model.sigma2;
356  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)/m_sigma2*eigen_Tnm.col(lat_idx));
357  }
358 
359  //symtric part (related to xu and xu)
360  m_kernel->init(inducing_features, inducing_features);
361  for(int32_t lat_lidx=0; lat_lidx<eigen_Tmm.cols(); lat_lidx++)
362  {
363  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_lidx*dim,dim);
364  //p by n
365  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_lidx);
366  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
367  //DXu
368  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)*eigen_Tmm.col(lat_lidx));
369  }
370  SG_UNREF(inducing_features);
371  m_lock->unlock();
372  return deriv_lat;
373 }
374 
376  const TParameter* param)
377 {
378  REQUIRE(param, "Param not set\n");
379  REQUIRE(!strcmp(param->m_name, "log_inducing_noise"), "Can't compute derivative of "
380  "the nagative log marginal likelihood wrt %s.%s parameter\n",
381  get_name(), param->m_name)
382 
384  SGVector<float64_t> result(1);
385  result[0]=-0.5*CMath::exp(m_log_ind_noise)*eigen_Tmm.diagonal().array().sum();
386  return result;
387 }
388 
391 {
392  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
393  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
394  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
395 
398 
399  //[Dkern Dkernnm DTrKnn] = kernelSparseGradHyp(model, Tmm, Tnm);
400  //Dkern_neg = 0.5*Dkern + Dkernnm/model.sigma2 - (0.5/model.sigma2)*DTrKnn;
401  float64_t dkern= -0.5*eigen_dKuui.cwiseProduct(eigen_Tmm).sum()
402  -eigen_dKui.cwiseProduct(eigen_Tnm.transpose()).sum()/m_sigma2
403  +0.5*eigen_ddiagKi.array().sum()/m_sigma2;
404  return dkern;
405 }
406 
408  const TParameter* param)
409 {
410  REQUIRE(param, "Param not set\n");
411  SGVector<float64_t> result;
412  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
413  result=SGVector<float64_t>(len);
414 
415  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
416  Map<VectorXd> eigen_y(y.vector, y.vlen);
418  Map<VectorXd> eigen_m(m.vector, m.vlen);
419  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
420 
421  //m-by-n matrix
423 
424  for (index_t i=0; i<result.vlen; i++)
425  {
427  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
428 
429  result[i]=eigen_dmu.dot(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0)
430  *eigen_alpha+(eigen_m-eigen_y))/m_sigma2;
431  }
432  return result;
433 }
434 
436 {
437  SG_WARNING("The method does not require a minimizer. The provided minimizer will not be used.\n");
438 }
439 
virtual const char * get_name() const =0
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
float64_t m_log_scale
Definition: Inference.h:490
virtual void update()
Definition: Inference.cpp:316
virtual ELabelType get_label_type() const =0
virtual void update_parameter_hash()
Definition: SGObject.cpp:281
Class that models Gaussian likelihood.
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
Real Labels are real-valued labels.
virtual SGVector< float64_t > get_posterior_mean()
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
CKernel * m_kernel
Definition: Inference.h:469
The inference method class based on the Titsias' variational bound. For more details, see Titsias, Michalis K. "Variational learning of inducing variables in sparse Gaussian processes." International Conference on Artificial Intelligence and Statistics. 2009.
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
Definition: SGMatrix.h:20
virtual ELikelihoodModelType get_model_type() const
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
virtual SGVector< float64_t > get_diagonal_vector()
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
An abstract class of the mean function.
Definition: MeanFunction.h:49
#define SG_REF(x)
Definition: SGObject.h:54
virtual void check_members() const
index_t num_rows
Definition: SGMatrix.h:374
CFeatures * m_features
Definition: Inference.h:478
CMeanFunction * m_mean
Definition: Inference.h:472
index_t vlen
Definition: SGVector.h:494
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
CLabels * m_labels
Definition: Inference.h:481
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
double float64_t
Definition: common.h:50
static CVarDTCInferenceMethod * obtain_from_generic(CInference *inference)
virtual void compute_gradient()
Definition: Inference.cpp:343
virtual CFeatures * get_inducing_features()
SGMatrix< float64_t > m_kuu
SGMatrix< float64_t > m_L
Definition: Inference.h:487
virtual SGMatrix< float64_t > get_posterior_covariance()
SGVector< float64_t > m_ktrtr_diag
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Inference Method base class.
Definition: Inference.h:81
SGMatrix< float64_t > m_inducing_features
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:851
virtual const char * get_name() const
static float64_t log(float64_t v)
Definition: Math.h:922
The Kernel base class.
Definition: Kernel.h:159
SGMatrix< float64_t > m_ktru
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
The minimizer base class.
Definition: Minimizer.h:43
#define SG_WARNING(...)
Definition: SGIO.h:128
#define SG_ADD(...)
Definition: SGObject.h:84
virtual float64_t get_negative_log_marginal_likelihood()
CLikelihoodModel * m_model
Definition: Inference.h:475
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
virtual void register_minimizer(Minimizer *minimizer)
SGVector< float64_t > m_alpha
Definition: Inference.h:484
static const float64_t PI
Definition: Math.h:2055

SHOGUN Machine Learning Toolbox - Documentation