SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ExactInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2013 Roman Votyakov
4  * Written (W) 2012 Jacob Walker
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  *
31  * Code adapted from Gaussian Process Machine Learning Toolbox
32  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
33  */
35 
36 
41 
42 using namespace shogun;
43 using namespace Eigen;
44 
46 {
47 }
48 
50  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
51  CInference(kern, feat, m, lab, mod)
52 {
53 }
54 
56 {
57 }
58 
60 {
61  SG_WARNING("The method does not require a minimizer. The provided minimizer will not be used.\n");
62 }
63 
65 {
67 
68  if (!m_gradient_update)
69  {
70  update_deriv();
71  update_mean();
72  update_cov();
73  m_gradient_update=true;
75  }
76 }
77 
79 {
80  SG_DEBUG("entering\n");
81 
83  update_chol();
84  update_alpha();
85  m_gradient_update=false;
87 
88  SG_DEBUG("leaving\n");
89 }
90 
92 {
94 
96  "Exact inference method can only use Gaussian likelihood function\n")
98  "Labels must be type of CRegressionLabels\n")
99 }
100 
102 {
104  update();
105 
106  // get the sigma variable from the Gaussian likelihood model
108  float64_t sigma=lik->get_sigma();
109  SG_UNREF(lik);
110 
111  // compute diagonal vector: sW=1/sigma
113  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
114 
115  return result;
116 }
117 
119 {
121  update();
122 
123  // get the sigma variable from the Gaussian likelihood model
125  float64_t sigma=lik->get_sigma();
126  SG_UNREF(lik);
127 
128  // create eigen representation of alpha and L
129  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
131 
132  // get labels and mean vectors and create eigen representation
133  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
134  Map<VectorXd> eigen_y(y.vector, y.vlen);
136  Map<VectorXd> eigen_m(m.vector, m.vlen);
137 
138  // compute negative log of the marginal likelihood:
139  // nlZ=(y-m)'*alpha/2+sum(log(diag(L)))+n*log(2*pi*sigma^2)/2
140  float64_t result=(eigen_y-eigen_m).dot(eigen_alpha)/2.0+
141  eigen_L.diagonal().array().log().sum()+m_L.num_rows*
142  CMath::log(2*CMath::PI*CMath::sq(sigma))/2.0;
143 
144  return result;
145 }
146 
148 {
150  update();
151 
153 }
154 
156 {
158  update();
159 
160  return SGMatrix<float64_t>(m_L);
161 }
162 
164 {
166 
167  return SGVector<float64_t>(m_mu);
168 }
169 
171 {
173 
174  return SGMatrix<float64_t>(m_Sigma);
175 }
176 
178 {
179  // get the sigma variable from the Gaussian likelihood model
181  float64_t sigma=lik->get_sigma();
182  SG_UNREF(lik);
183 
184  /* check whether to allocate cholesky memory */
187 
188  /* creates views on kernel and cholesky matrix and perform cholesky */
191  LLT<MatrixXd> llt(K*(CMath::exp(m_log_scale*2.0)/CMath::sq(sigma))+
192  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
193  L=llt.matrixU();
194 }
195 
197 {
198  // get the sigma variable from the Gaussian likelihood model
200  float64_t sigma=lik->get_sigma();
201  SG_UNREF(lik);
202 
203  // get labels and mean vector and create eigen representation
204  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
205  Map<VectorXd> eigen_y(y.vector, y.vlen);
207  Map<VectorXd> eigen_m(m.vector, m.vlen);
208 
210 
211  /* creates views on cholesky matrix and alpha and solve system
212  * (L * L^T) * a = y for a */
215 
216  a=L.triangularView<Upper>().adjoint().solve(eigen_y-eigen_m);
217  a=L.triangularView<Upper>().solve(a);
218 
219  a/=CMath::sq(sigma);
220 }
221 
223 {
224  // create eigen representataion of kernel matrix and alpha
226  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
227 
228  // get mean and create eigen representation of it
230  Map<VectorXd> eigen_m(m.vector, m.vlen);
231 
232  m_mu=SGVector<float64_t>(m.vlen);
233  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
234 
235  eigen_mu=eigen_K*CMath::exp(m_log_scale*2.0)*eigen_alpha;
236 }
237 
239 {
240  // create eigen representataion of upper triangular factor L^T and kernel
241  // matrix
244 
246  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
247  m_Sigma.num_cols);
248 
249  // compute V = L^(-1) * K, using upper triangular factor L^T
250  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
251  eigen_K*CMath::exp(m_log_scale*2.0));
252 
254  float64_t sigma=lik->get_sigma();
255  SG_UNREF(lik);
256  eigen_V = eigen_V/sigma;
257 
258  // compute covariance matrix of the posterior: Sigma = K - V^T * V
259  eigen_Sigma=eigen_K*CMath::exp(m_log_scale*2.0)-eigen_V.adjoint()*eigen_V;
260 }
261 
263 {
264  // get the sigma variable from the Gaussian likelihood model
266  float64_t sigma=lik->get_sigma();
267  SG_UNREF(lik);
268 
269  // create eigen representation of derivative matrix and cholesky
271  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
272 
274  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
275 
276  // solve L * L' * Q = I
277  eigen_Q=eigen_L.triangularView<Upper>().adjoint().solve(
278  MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
279  eigen_Q=eigen_L.triangularView<Upper>().solve(eigen_Q);
280 
281  // divide Q by sigma^2
282  eigen_Q/=CMath::sq(sigma);
283 
284  // create eigen representation of alpha and compute Q=Q-alpha*alpha'
285  eigen_Q-=eigen_alpha*eigen_alpha.transpose();
286 }
287 
289  const TParameter* param)
290 {
291  REQUIRE(!strcmp(param->m_name, "log_scale"), "Can't compute derivative of "
292  "the nagative log marginal likelihood wrt %s.%s parameter\n",
293  get_name(), param->m_name)
294 
296  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
297 
298  SGVector<float64_t> result(1);
299 
300  // compute derivative wrt kernel scale: dnlZ=sum(Q.*K*scale*2)/2
301  result[0]=(eigen_Q.cwiseProduct(eigen_K)).sum();
302  result[0]*=CMath::exp(m_log_scale*2.0);
303 
304  return result;
305 }
306 
308  CInference* inference)
309 {
310  if (inference==NULL)
311  return NULL;
312 
313  if (inference->get_inference_type()!=INF_EXACT)
314  SG_SERROR("Provided inference is not of type CExactInferenceMethod!\n")
315 
316  SG_REF(inference);
317  return (CExactInferenceMethod*)inference;
318 }
319 
321  const TParameter* param)
322 {
323  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
324  "the nagative log marginal likelihood wrt %s.%s parameter\n",
325  m_model->get_name(), param->m_name)
326 
327  // get the sigma variable from the Gaussian likelihood model
329  float64_t sigma=lik->get_sigma();
330  SG_UNREF(lik);
331 
332  // create eigen representation of the matrix Q
333  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
334 
335  SGVector<float64_t> result(1);
336 
337  // compute derivative wrt likelihood model parameter sigma:
338  // dnlZ=sigma^2*trace(Q)
339  result[0]=CMath::sq(sigma)*eigen_Q.trace();
340 
341  return result;
342 }
343 
345  const TParameter* param)
346 {
347  // create eigen representation of the matrix Q
348  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
349 
350  REQUIRE(param, "Param not set\n");
351  SGVector<float64_t> result;
352  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
353  result=SGVector<float64_t>(len);
354 
355  for (index_t i=0; i<result.vlen; i++)
356  {
358 
359  if (result.vlen==1)
360  dK=m_kernel->get_parameter_gradient(param);
361  else
362  dK=m_kernel->get_parameter_gradient(param, i);
363 
364  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
365 
366  // compute derivative wrt kernel parameter: dnlZ=sum(Q.*dK*scale)/2.0
367  result[i]=(eigen_Q.cwiseProduct(eigen_dK)).sum();
368  result[i]*=CMath::exp(m_log_scale*2.0)/2.0;
369  }
370 
371  return result;
372 }
373 
375  const TParameter* param)
376 {
377  // create eigen representation of alpha vector
378  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
379 
380  REQUIRE(param, "Param not set\n");
381  SGVector<float64_t> result;
382  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
383  result=SGVector<float64_t>(len);
384 
385  for (index_t i=0; i<result.vlen; i++)
386  {
388 
389  if (result.vlen==1)
391  else
393 
394  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
395 
396  // compute derivative wrt mean parameter: dnlZ=-dmu'*alpha
397  result[i]=-eigen_dmu.dot(eigen_alpha);
398  }
399 
400  return result;
401 }
402 
virtual const char * get_name() const =0
float64_t m_log_scale
Definition: Inference.h:490
virtual void update()
Definition: Inference.cpp:316
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:221
The Gaussian exact form inference method class.
virtual ELabelType get_label_type() const =0
virtual void update_parameter_hash()
Definition: SGObject.cpp:281
Class that models Gaussian likelihood.
Real Labels are real-valued labels.
virtual SGMatrix< float64_t > get_posterior_covariance()
int32_t index_t
Definition: common.h:62
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:58
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
virtual const char * get_name() const
virtual SGVector< float64_t > get_alpha()
CKernel * m_kernel
Definition: Inference.h:469
static T sq(T x)
Definition: Math.h:450
Definition: SGMatrix.h:20
virtual ELikelihoodModelType get_model_type() const
parameter struct
virtual void register_minimizer(Minimizer *minimizer)
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:49
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
CFeatures * m_features
Definition: Inference.h:478
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:493
CMeanFunction * m_mean
Definition: Inference.h:472
index_t vlen
Definition: SGVector.h:494
virtual SGMatrix< float64_t > get_cholesky()
CLabels * m_labels
Definition: Inference.h:481
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
double float64_t
Definition: common.h:50
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual void compute_gradient()
Definition: Inference.cpp:343
virtual SGVector< float64_t > get_diagonal_vector()
SGMatrix< float64_t > m_L
Definition: Inference.h:487
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
static CExactInferenceMethod * obtain_from_generic(CInference *inference)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Inference Method base class.
Definition: Inference.h:81
virtual float64_t get_negative_log_marginal_likelihood()
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:851
static float64_t log(float64_t v)
Definition: Math.h:922
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
The Kernel base class.
Definition: Kernel.h:159
The minimizer base class.
Definition: Minimizer.h:43
#define SG_WARNING(...)
Definition: SGIO.h:128
CLikelihoodModel * m_model
Definition: Inference.h:475
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295
The Likelihood model base class.
virtual SGVector< float64_t > get_posterior_mean()
SGVector< float64_t > m_alpha
Definition: Inference.h:484
virtual void check_members() const
Definition: Inference.cpp:322
static const float64_t PI
Definition: Math.h:2055

SHOGUN Machine Learning Toolbox - Documentation