SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ExactInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2013 Roman Votyakov
4  * Written (W) 2012 Jacob Walker
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  *
31  * Code adapted from Gaussian Process Machine Learning Toolbox
32  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
33  */
35 
36 #ifdef HAVE_EIGEN3
37 
42 
43 using namespace shogun;
44 using namespace Eigen;
45 
47 {
48 }
49 
51  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
52  CInferenceMethod(kern, feat, m, lab, mod)
53 {
54 }
55 
57 {
58 }
59 
61 {
63 
64  if (!m_gradient_update)
65  {
66  update_deriv();
67  update_mean();
68  update_cov();
69  m_gradient_update=true;
71  }
72 }
73 
75 {
76  SG_DEBUG("entering\n");
77 
79  update_chol();
80  update_alpha();
81  m_gradient_update=false;
83 
84  SG_DEBUG("leaving\n");
85 }
86 
88 {
90 
92  "Exact inference method can only use Gaussian likelihood function\n")
94  "Labels must be type of CRegressionLabels\n")
95 }
96 
98 {
100  update();
101 
102  // get the sigma variable from the Gaussian likelihood model
104  float64_t sigma=lik->get_sigma();
105  SG_UNREF(lik);
106 
107  // compute diagonal vector: sW=1/sigma
109  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
110 
111  return result;
112 }
113 
115 {
117  update();
118 
119  // get the sigma variable from the Gaussian likelihood model
121  float64_t sigma=lik->get_sigma();
122  SG_UNREF(lik);
123 
124  // create eigen representation of alpha and L
125  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
127 
128  // get labels and mean vectors and create eigen representation
129  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
130  Map<VectorXd> eigen_y(y.vector, y.vlen);
132  Map<VectorXd> eigen_m(m.vector, m.vlen);
133 
134  // compute negative log of the marginal likelihood:
135  // nlZ=(y-m)'*alpha/2+sum(log(diag(L)))+n*log(2*pi*sigma^2)/2
136  float64_t result=(eigen_y-eigen_m).dot(eigen_alpha)/2.0+
137  eigen_L.diagonal().array().log().sum()+m_L.num_rows*
138  CMath::log(2*CMath::PI*CMath::sq(sigma))/2.0;
139 
140  return result;
141 }
142 
144 {
146  update();
147 
149 }
150 
152 {
154  update();
155 
156  return SGMatrix<float64_t>(m_L);
157 }
158 
160 {
162 
163  return SGVector<float64_t>(m_mu);
164 }
165 
167 {
169 
170  return SGMatrix<float64_t>(m_Sigma);
171 }
172 
174 {
175  // get the sigma variable from the Gaussian likelihood model
177  float64_t sigma=lik->get_sigma();
178  SG_UNREF(lik);
179 
180  /* check whether to allocate cholesky memory */
183 
184  /* creates views on kernel and cholesky matrix and perform cholesky */
187  LLT<MatrixXd> llt(K*(CMath::exp(m_log_scale*2.0)/CMath::sq(sigma))+
188  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
189  L=llt.matrixU();
190 }
191 
193 {
194  // get the sigma variable from the Gaussian likelihood model
196  float64_t sigma=lik->get_sigma();
197  SG_UNREF(lik);
198 
199  // get labels and mean vector and create eigen representation
200  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
201  Map<VectorXd> eigen_y(y.vector, y.vlen);
203  Map<VectorXd> eigen_m(m.vector, m.vlen);
204 
206 
207  /* creates views on cholesky matrix and alpha and solve system
208  * (L * L^T) * a = y for a */
211 
212  a=L.triangularView<Upper>().adjoint().solve(eigen_y-eigen_m);
213  a=L.triangularView<Upper>().solve(a);
214 
215  a/=CMath::sq(sigma);
216 }
217 
219 {
220  // create eigen representataion of kernel matrix and alpha
222  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
223 
224  // get mean and create eigen representation of it
226  Map<VectorXd> eigen_m(m.vector, m.vlen);
227 
228  m_mu=SGVector<float64_t>(m.vlen);
229  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
230 
231  eigen_mu=eigen_K*CMath::exp(m_log_scale*2.0)*eigen_alpha;
232 }
233 
235 {
236  // create eigen representataion of upper triangular factor L^T and kernel
237  // matrix
240 
242  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
243  m_Sigma.num_cols);
244 
245  // compute V = L^(-1) * K, using upper triangular factor L^T
246  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
247  eigen_K*CMath::exp(m_log_scale*2.0));
248 
250  float64_t sigma=lik->get_sigma();
251  SG_UNREF(lik);
252  eigen_V = eigen_V/sigma;
253 
254  // compute covariance matrix of the posterior: Sigma = K - V^T * V
255  eigen_Sigma=eigen_K*CMath::exp(m_log_scale*2.0)-eigen_V.adjoint()*eigen_V;
256 }
257 
259 {
260  // get the sigma variable from the Gaussian likelihood model
262  float64_t sigma=lik->get_sigma();
263  SG_UNREF(lik);
264 
265  // create eigen representation of derivative matrix and cholesky
267  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
268 
270  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
271 
272  // solve L * L' * Q = I
273  eigen_Q=eigen_L.triangularView<Upper>().adjoint().solve(
274  MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
275  eigen_Q=eigen_L.triangularView<Upper>().solve(eigen_Q);
276 
277  // divide Q by sigma^2
278  eigen_Q/=CMath::sq(sigma);
279 
280  // create eigen representation of alpha and compute Q=Q-alpha*alpha'
281  eigen_Q-=eigen_alpha*eigen_alpha.transpose();
282 }
283 
285  const TParameter* param)
286 {
287  REQUIRE(!strcmp(param->m_name, "log_scale"), "Can't compute derivative of "
288  "the nagative log marginal likelihood wrt %s.%s parameter\n",
289  get_name(), param->m_name)
290 
292  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
293 
294  SGVector<float64_t> result(1);
295 
296  // compute derivative wrt kernel scale: dnlZ=sum(Q.*K*scale*2)/2
297  result[0]=(eigen_Q.cwiseProduct(eigen_K)).sum();
298  result[0]*=CMath::exp(m_log_scale*2.0);
299 
300  return result;
301 }
302 
304  CInferenceMethod* inference)
305 {
306  if (inference==NULL)
307  return NULL;
308 
309  if (inference->get_inference_type()!=INF_EXACT)
310  SG_SERROR("Provided inference is not of type CExactInferenceMethod!\n")
311 
312  SG_REF(inference);
313  return (CExactInferenceMethod*)inference;
314 }
315 
317  const TParameter* param)
318 {
319  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
320  "the nagative log marginal likelihood wrt %s.%s parameter\n",
321  m_model->get_name(), param->m_name)
322 
323  // get the sigma variable from the Gaussian likelihood model
325  float64_t sigma=lik->get_sigma();
326  SG_UNREF(lik);
327 
328  // create eigen representation of the matrix Q
329  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
330 
331  SGVector<float64_t> result(1);
332 
333  // compute derivative wrt likelihood model parameter sigma:
334  // dnlZ=sigma^2*trace(Q)
335  result[0]=CMath::sq(sigma)*eigen_Q.trace();
336 
337  return result;
338 }
339 
341  const TParameter* param)
342 {
343  // create eigen representation of the matrix Q
344  Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
345 
346  REQUIRE(param, "Param not set\n");
347  SGVector<float64_t> result;
348  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
349  result=SGVector<float64_t>(len);
350 
351  for (index_t i=0; i<result.vlen; i++)
352  {
354 
355  if (result.vlen==1)
356  dK=m_kernel->get_parameter_gradient(param);
357  else
358  dK=m_kernel->get_parameter_gradient(param, i);
359 
360  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
361 
362  // compute derivative wrt kernel parameter: dnlZ=sum(Q.*dK*scale)/2.0
363  result[i]=(eigen_Q.cwiseProduct(eigen_dK)).sum();
364  result[i]*=CMath::exp(m_log_scale*2.0)/2.0;
365  }
366 
367  return result;
368 }
369 
371  const TParameter* param)
372 {
373  // create eigen representation of alpha vector
374  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
375 
376  REQUIRE(param, "Param not set\n");
377  SGVector<float64_t> result;
378  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
379  result=SGVector<float64_t>(len);
380 
381  for (index_t i=0; i<result.vlen; i++)
382  {
384 
385  if (result.vlen==1)
387  else
389 
390  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
391 
392  // compute derivative wrt mean parameter: dnlZ=-dmu'*alpha
393  result[i]=-eigen_dmu.dot(eigen_alpha);
394  }
395 
396  return result;
397 }
398 
399 #endif /* HAVE_EIGEN3 */
virtual const char * get_name() const =0
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:223
The Gaussian exact form inference method class.
virtual ELabelType get_label_type() const =0
virtual void update_parameter_hash()
Definition: SGObject.cpp:250
Class that models Gaussian likelihood.
Real Labels are real-valued labels.
SGVector< float64_t > m_alpha
The Inference Method base class.
virtual SGMatrix< float64_t > get_posterior_covariance()
int32_t index_t
Definition: common.h:62
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:56
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
virtual const char * get_name() const
virtual SGVector< float64_t > get_alpha()
static T sq(T x)
Definition: Math.h:450
Definition: SGMatrix.h:20
virtual ELikelihoodModelType get_model_type() const
parameter struct
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
static CExactInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
An abstract class of the mean function.
Definition: MeanFunction.h:49
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
virtual SGMatrix< float64_t > get_cholesky()
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
double float64_t
Definition: common.h:50
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_diagonal_vector()
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual float64_t get_negative_log_marginal_likelihood()
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:850
static float64_t log(float64_t v)
Definition: Math.h:922
virtual void check_members() const
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
virtual EInferenceType get_inference_type() const
The Kernel base class.
Definition: Kernel.h:158
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:264
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
CLikelihoodModel * m_model
virtual SGVector< float64_t > get_posterior_mean()
static const float64_t PI
Definition: Math.h:2055

SHOGUN Machine Learning Toolbox - Documentation