SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
SparseVGInferenceMethod.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * This code specifically adapted from function in varsgpLikelihood.m and varsgpPredict.m
31  *
32  * The reference paper is
33  * Titsias, Michalis K.
34  * "Variational learning of inducing variables in sparse Gaussian processes."
35  * International Conference on Artificial Intelligence and Statistics. 2009.
36  *
37  */
39 
40 #ifdef HAVE_EIGEN3
41 
46 
47 using namespace shogun;
48 using namespace Eigen;
49 
51 {
52  init();
53 }
54 
56  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
57  : CSingleSparseInferenceBase(kern, feat, m, lab, mod, lat)
58 {
59  init();
60 }
61 
62 void CSparseVGInferenceMethod::init()
63 {
64  m_yy=0.0;
65  m_f3=0.0;
66  m_sigma2=0.0;
67  m_trk=0.0;
73 
74  SG_ADD(&m_yy, "yy", "yy", MS_NOT_AVAILABLE);
75  SG_ADD(&m_f3, "f3", "f3", MS_NOT_AVAILABLE);
76  SG_ADD(&m_sigma2, "sigma2", "sigma2", MS_NOT_AVAILABLE);
77  SG_ADD(&m_trk, "trk", "trk", MS_NOT_AVAILABLE);
78  SG_ADD(&m_Tmm, "Tmm", "Tmm", MS_NOT_AVAILABLE);
79  SG_ADD(&m_Tnm, "Tnm", "Tnm", MS_NOT_AVAILABLE);
80  SG_ADD(&m_inv_Lm, "inv_Lm", "inv_Lm", MS_NOT_AVAILABLE);
81  SG_ADD(&m_inv_La, "inv_La", "inv_La", MS_NOT_AVAILABLE);
82  SG_ADD(&m_Knm_inv_Lm, "Knm_Inv_Lm", "Knm_Inv_Lm", MS_NOT_AVAILABLE);
83 }
84 
86 {
87 }
88 
90 {
92 
93  if (!m_gradient_update)
94  {
95  update_deriv();
96  m_gradient_update=true;
98  }
99 }
100 
102 {
103  SG_DEBUG("entering\n");
104 
106  update_chol();
107  update_alpha();
108  m_gradient_update=false;
110 
111  SG_DEBUG("leaving\n");
112 }
113 
115  CInferenceMethod* inference)
116 {
117  if (inference==NULL)
118  return NULL;
119 
121  SG_SERROR("Provided inference is not of type CSparseVGInferenceMethod!\n")
122 
123  SG_REF(inference);
124  return (CSparseVGInferenceMethod*)inference;
125 }
126 
128 {
130 
132  "SparseVG inference method can only use Gaussian likelihood function\n")
133  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
134  "of CRegressionLabels\n")
135 }
136 
138 {
140  //the inference method does not need to use this
141  return SGVector<float64_t>();
142 }
143 
145 {
147  update();
148 
151 
152  //F012 =-(model.n-model.m)*model.Likelihood.logtheta-0.5*model.n*log(2*pi)-(0.5/sigma2)*(model.yy)-sum(log(diag(La)));
155  0.5*m_yy/(m_sigma2)-eigen_inv_La.diagonal().array().log().sum();
156 
157  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
158  float64_t neg_f3=-m_f3;
159 
160  //model.TrKnn = sum(model.diagKnn);
161  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
162  float64_t neg_trk=-m_trk;
163 
164  //F = F012 + F3 + TrK;
165  //F = - F;
166  return neg_f012+neg_f3+neg_trk;
167 }
168 
170 {
171  // get the sigma variable from the Gaussian likelihood model
173  float64_t sigma=lik->get_sigma();
174  SG_UNREF(lik);
175  m_sigma2=sigma*sigma;
176 
177  //m-by-m matrix
179 
180  //m-by-n matrix
182 
184 
185  //Lm = chol(model.Kmm + model.jitter*eye(model.m))
186  LLT<MatrixXd> Luu(eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
188  m_inv_Lm=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
190  //invLm = Lm\eye(model.m);
191  eigen_inv_Lm=Luu.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
192 
195  //KnmInvLm = model.Knm*invLm;
196  eigen_Knm_inv_Lm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*eigen_inv_Lm;
197 
200  //C = KnmInvLm'*KnmInvLm;
201  eigen_C=eigen_Knm_inv_Lm.transpose()*eigen_Knm_inv_Lm;
202 
205  //A = sigma2*eye(model.m) + C;
206  LLT<MatrixXd> chol_A(m_sigma2*MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)+eigen_C);
207 
208  //La = chol(A);
209  //invLa = La\eye(model.m);
210  eigen_inv_La=chol_A.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
211 
212  //L=-invLm*invLm' + sigma2*(invLm*invLa*invLa'*invLm');
215  eigen_L=eigen_inv_Lm*(
216  m_sigma2*eigen_inv_La*eigen_inv_La.transpose()-MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)
217  )*eigen_inv_Lm.transpose();
218 
219  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
220  m_trk=-0.5/(m_sigma2)*(eigen_ktrtr_diag.array().sum()*CMath::exp(m_log_scale*2.0)
221  -eigen_C.diagonal().array().sum());
222 }
223 
225 {
229 
230  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
231  Map<VectorXd> eigen_y(y.vector, y.vlen);
233  Map<VectorXd> eigen_m(m.vector, m.vlen);
234 
235  //yKnmInvLm = (model.y'*KnmInvLm);
236  //yKnmInvLmInvLa = yKnmInvLm*invLa;
237  VectorXd y_cor=eigen_y-eigen_m;
238  VectorXd eigen_y_Knm_inv_Lm_inv_La_transpose=eigen_inv_La.transpose()*(
239  eigen_Knm_inv_Lm.transpose()*y_cor);
240  //alpha = invLm*invLa*yKnmInvLmInvLa';
242  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
243  eigen_alpha=eigen_inv_Lm*eigen_inv_La*eigen_y_Knm_inv_Lm_inv_La_transpose;
244 
245  m_yy=y_cor.dot(y_cor);
246  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
247  m_f3=0.5*eigen_y_Knm_inv_Lm_inv_La_transpose.dot(eigen_y_Knm_inv_Lm_inv_La_transpose)/m_sigma2;
248 }
249 
251 {
254  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
256  //m-by-n matrix
259 
260  //m_Tmm=SGMatrix<float64_t>(m_kuu.num_rows, m_kuu.num_cols);
262 
265 
267  float64_t sigma=lik->get_sigma();
268  SG_UNREF(lik);
269  m_sigma2=sigma*sigma;
270 
271  //invLmInvLa = invLm*invLa;
272  //invA = invLmInvLa*invLmInvLa';
273  //yKnmInvA = yKnmInvLmInvLa*invLmInvLa';
274  //invKmm = invLm*invLm';
275  //Tmm = sigma2*invA + yKnmInvA'*yKnmInvA;
276  //Tmm = invKmm - Tmm;
277  MatrixXd Tmm=-eigen_L-eigen_alpha*eigen_alpha.transpose();
278 
279  //Tnm = model.Knm*Tmm;
280  eigen_Tnm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*Tmm;
281 
282  //Tmm = Tmm - (invLm*(C*invLm'))/sigma2;
283  eigen_Tmm = Tmm - (eigen_inv_Lm*eigen_C*eigen_inv_Lm.transpose()/m_sigma2);
284 
285  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
286  Map<VectorXd> eigen_y(y.vector, y.vlen);
288  Map<VectorXd> eigen_m(m.vector, m.vlen);
289 
290  //Tnm = Tnm + (model.y*yKnmInvA);
291  eigen_Tnm += (eigen_y-eigen_m)*eigen_alpha.transpose();
292 }
293 
295 {
297  //TODO: implement this method once I get time
298  return SGVector<float64_t>();
299 }
300 
302 {
304  //TODO: implement this method once I get time
305  return SGMatrix<float64_t>();
306 }
307 
309  const TParameter* param)
310 {
311  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
312  "the nagative log marginal likelihood wrt %s.%s parameter\n",
313  m_model->get_name(), param->m_name)
314 
315  SGVector<float64_t> dlik(1);
316 
317  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
321  //yKnmInvLmInvLainvLa = yKnmInvLmInvLa*invLa';
322  //sigma2aux = sigma2*sum(sum(invLa.*invLa)) + yKnmInvLmInvLainvLa*yKnmInvLmInvLainvLa';
323  float64_t sigma2aux= m_sigma2*eigen_inv_La.cwiseProduct(eigen_inv_La).array().sum()+
324  eigen_alpha.transpose()*(eigen_kuu*CMath::exp(m_log_scale*2.0)
325  +CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
326  m_kuu.num_rows, m_kuu.num_cols))*eigen_alpha;
327  //Dlik_neg = - (model.n-model.m) + model.yy/sigma2 - 2*F3 - sigma2aux - 2*TrK;
328 
329  dlik[0]=(m_ktru.num_cols-m_ktru.num_rows)-m_yy/m_sigma2+2.0*m_f3+sigma2aux+2.0*m_trk;
330  return dlik;
331 }
332 
334  const TParameter* param)
335 {
336  //[DXu DXunm] = kernelSparseGradInd(model, Tmm, Tnm);
337  //DXu_neg = DXu + DXunm/model.sigma2;
338 
341 
342  int32_t dim=m_inducing_features.num_rows;
343  int32_t num_samples=m_inducing_features.num_cols;
344  SGVector<float64_t>deriv_lat(dim*num_samples);
345  deriv_lat.zero();
346 
347  m_lock->lock();
348  CFeatures *inducing_features=get_inducing_features();
349  //asymtric part (related to xu and x)
350  m_kernel->init(inducing_features, m_features);
351  for(int32_t lat_idx=0; lat_idx<eigen_Tnm.cols(); lat_idx++)
352  {
353  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_idx*dim,dim);
354  //p by n
355  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_idx);
356  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
357  //DXunm/model.sigma2;
358  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)/m_sigma2*eigen_Tnm.col(lat_idx));
359  }
360 
361  //symtric part (related to xu and xu)
362  m_kernel->init(inducing_features, inducing_features);
363  for(int32_t lat_lidx=0; lat_lidx<eigen_Tmm.cols(); lat_lidx++)
364  {
365  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_lidx*dim,dim);
366  //p by n
367  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_lidx);
368  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
369  //DXu
370  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)*eigen_Tmm.col(lat_lidx));
371  }
372  SG_UNREF(inducing_features);
373  m_lock->unlock();
374  return deriv_lat;
375 }
376 
378  const TParameter* param)
379 {
380  REQUIRE(param, "Param not set\n");
381  REQUIRE(!strcmp(param->m_name, "log_inducing_noise"), "Can't compute derivative of "
382  "the nagative log marginal likelihood wrt %s.%s parameter\n",
383  get_name(), param->m_name)
384 
386  SGVector<float64_t> result(1);
387  result[0]=-0.5*CMath::exp(m_log_ind_noise)*eigen_Tmm.diagonal().array().sum();
388  return result;
389 }
390 
393 {
394  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
395  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
396  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
397 
400 
401  //[Dkern Dkernnm DTrKnn] = kernelSparseGradHyp(model, Tmm, Tnm);
402  //Dkern_neg = 0.5*Dkern + Dkernnm/model.sigma2 - (0.5/model.sigma2)*DTrKnn;
403  float64_t dkern= -0.5*eigen_dKuui.cwiseProduct(eigen_Tmm).sum()
404  -eigen_dKui.cwiseProduct(eigen_Tnm.transpose()).sum()/m_sigma2
405  +0.5*eigen_ddiagKi.array().sum()/m_sigma2;
406  return dkern;
407 }
408 
410  const TParameter* param)
411 {
412  REQUIRE(param, "Param not set\n");
413  SGVector<float64_t> result;
414  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
415  result=SGVector<float64_t>(len);
416 
417  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
418  Map<VectorXd> eigen_y(y.vector, y.vlen);
420  Map<VectorXd> eigen_m(m.vector, m.vlen);
421  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
422 
423  //m-by-n matrix
425 
426  for (index_t i=0; i<result.vlen; i++)
427  {
429  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
430 
431  result[i]=eigen_dmu.dot(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0)
432  *eigen_alpha+(eigen_m-eigen_y))/m_sigma2;
433  }
434  return result;
435 }
436 #endif /* HAVE_EIGEN3 */
virtual const char * get_name() const =0
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
virtual void check_members() const
virtual const char * get_name() const
virtual ELabelType get_label_type() const =0
virtual void update_parameter_hash()
Definition: SGObject.cpp:248
Class that models Gaussian likelihood.
SGVector< float64_t > m_ktrtr_diag
Real Labels are real-valued labels.
SGVector< float64_t > m_alpha
The Inference Method base class.
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
Definition: SGMatrix.h:20
virtual ELikelihoodModelType get_model_type() const
virtual SGVector< float64_t > get_posterior_mean()
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:49
SGMatrix< float64_t > m_inducing_features
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
static CSparseVGInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
virtual SGMatrix< float64_t > get_posterior_covariance()
virtual float64_t get_negative_log_marginal_likelihood()
double float64_t
Definition: common.h:50
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual CFeatures * get_inducing_features()
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:850
static float64_t log(float64_t v)
Definition: Math.h:922
virtual EInferenceType get_inference_type() const
virtual SGVector< float64_t > get_diagonal_vector()
The Kernel base class.
Definition: Kernel.h:158
#define SG_ADD(...)
Definition: SGObject.h:81
The inference method class based on the Titsias' variational bound. For more details, see Titsias, Michalis K. "Variational learning of inducing variables in sparse Gaussian processes." International Conference on Artificial Intelligence and Statistics. 2009.
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:262
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
CLikelihoodModel * m_model
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
static const float64_t PI
Definition: Math.h:2055

SHOGUN 机器学习工具包 - 项目文档