SHOGUN  6.1.3
SingleLaplaceInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2016 Wu Lin
8  * Written (W) 2013 Roman Votyakov
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  *
12  * Code adapted from Gaussian Process Machine Learning Toolbox
13  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
14  * This code specifically adapted from infLaplace.m
15  */
16 
18 
19 
22 #ifdef USE_GPL_SHOGUN
23 #include <shogun/lib/external/brent.h>
24 #endif //USE_GPL_SHOGUN
27 
28 using namespace shogun;
29 using namespace Eigen;
30 
31 namespace shogun
32 {
33 
34 #ifdef USE_GPL_SHOGUN
35 #ifndef DOXYGEN_SHOULD_SKIP_THIS
36 
37 class PsiLine : public func_base
38 {
39 public:
40  float64_t log_scale;
41  MatrixXd K;
42  VectorXd dalpha;
43  VectorXd start_alpha;
44  Map<VectorXd>* alpha;
49  CLikelihoodModel* lik;
50  CLabels* lab;
51 
52  virtual double operator() (double x)
53  {
54  Map<VectorXd> eigen_f(f->vector, f->vlen);
55  Map<VectorXd> eigen_m(m->vector, m->vlen);
56 
57  // compute alpha=alpha+x*dalpha and f=K*alpha+m
58  (*alpha)=start_alpha+x*dalpha;
59  eigen_f=K*(*alpha)*CMath::exp(log_scale*2.0)+eigen_m;
60 
61  // get first and second derivatives of log likelihood
62  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
63 
64  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
65  W->scale(-1.0);
66 
67  // compute psi=alpha'*(f-m)/2-lp
68  float64_t result = (*alpha).dot(eigen_f-eigen_m)/2.0-
70 
71  return result;
72  }
73 };
74 #endif //USE_GPL_SHOGUN
75 
76 class SingleLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
77 {
78 public:
79  SingleLaplaceInferenceMethodCostFunction():FirstOrderCostFunction() { init(); }
80  virtual ~SingleLaplaceInferenceMethodCostFunction() { SG_UNREF(m_obj); }
81  void set_target(CSingleLaplaceInferenceMethod *obj)
82  {
83  REQUIRE(obj, "Obj must set\n");
84  if(m_obj != obj)
85  {
86  SG_REF(obj);
87  SG_UNREF(m_obj);
88  m_obj=obj;
89  }
90  }
91  virtual float64_t get_cost()
92  {
93  REQUIRE(m_obj,"Object not set\n");
94  return m_obj->get_psi_wrt_alpha();
95  }
96  void unset_target(bool is_unref)
97  {
98  if(is_unref)
99  {
100  SG_UNREF(m_obj);
101  }
102  m_obj=NULL;
103  }
104  virtual SGVector<float64_t> obtain_variable_reference()
105  {
106  REQUIRE(m_obj,"Object not set\n");
107  m_derivatives = SGVector<float64_t>((m_obj->m_alpha).vlen);
108  return m_obj->m_alpha;
109  }
110  virtual SGVector<float64_t> get_gradient()
111  {
112  REQUIRE(m_obj,"Object not set\n");
113  m_obj->get_gradient_wrt_alpha(m_derivatives);
114  return m_derivatives;
115  }
116  virtual const char* get_name() const { return "SingleLaplaceInferenceMethodCostFunction"; }
117 private:
118  void init()
119  {
120  m_obj=NULL;
121  m_derivatives = SGVector<float64_t>();
122  SG_ADD(&m_derivatives, "SingleLaplaceInferenceMethodCostFunction__m_derivatives",
123  "derivatives in SingleLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
124  SG_ADD((CSGObject **)&m_obj, "SingleLaplaceInferenceMethodCostFunction__m_obj",
125  "obj in SingleLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
126 
127  }
128 
129  SGVector<float64_t> m_derivatives;
131 };
132 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
133 
134 
136 {
137  REQUIRE(obj, "Obj must set\n");
138  if(m_obj != obj)
139  {
140  SG_REF(obj);
141  SG_UNREF(m_obj);
142  m_obj=obj;
143  }
144 }
145 
147 {
148  if(is_unref)
149  {
150  SG_UNREF(m_obj);
151  }
152  m_obj=NULL;
153 
154 }
155 
156 void CSingleLaplaceNewtonOptimizer::init()
157 {
158  m_obj=NULL;
159  m_iter=20;
160  m_tolerance=1e-6;
161  m_opt_tolerance=1e-6;
162  m_opt_max=10;
163 
164  SG_ADD((CSGObject **)&m_obj, "CSingleLaplaceNewtonOptimizer__m_obj",
165  "obj in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
166  SG_ADD(&m_iter, "CSingleLaplaceNewtonOptimizer__m_iter",
167  "iter in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
168  SG_ADD(&m_tolerance, "CSingleLaplaceNewtonOptimizer__m_tolerance",
169  "tolerance in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
170  SG_ADD(&m_opt_tolerance, "CSingleLaplaceNewtonOptimizer__m_opt_tolerance",
171  "opt_tolerance in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
172  SG_ADD(&m_opt_max, "CSingleLaplaceNewtonOptimizer__m_opt_max",
173  "opt_max in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
174 }
175 
177 {
178  REQUIRE(m_obj,"Object not set\n");
179  float64_t Psi_Old=CMath::INFTY;
180  float64_t Psi_New=m_obj->m_Psi;
181 
182  // get mean vector and create eigen representation of it
183  Map<VectorXd> eigen_mean( (m_obj->m_mean_f).vector, (m_obj->m_mean_f).vlen);
184 
185  // create eigen representation of kernel matrix
186  Map<MatrixXd> eigen_ktrtr( (m_obj->m_ktrtr).matrix, (m_obj->m_ktrtr).num_rows, (m_obj->m_ktrtr).num_cols);
187 
188  Map<VectorXd> eigen_mu(m_obj->m_mu, (m_obj->m_mu).vlen);
189 
190  // compute W = -d2lp
191  m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
192  m_obj->m_W.scale(-1.0);
193 
194  Map<VectorXd> eigen_alpha(m_obj->m_alpha.vector, m_obj->m_alpha.vlen);
195 
196  // get first derivative of log probability function
197  m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
198 
199  // create shogun and eigen representation of sW
200  m_obj->m_sW=SGVector<float64_t>((m_obj->m_W).vlen);
201  Map<VectorXd> eigen_sW((m_obj->m_sW).vector, (m_obj->m_sW).vlen);
202 
203  index_t iter=0;
204 
205  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
206  {
207  Map<VectorXd> eigen_W( (m_obj->m_W).vector, (m_obj->m_W).vlen);
208  Map<VectorXd> eigen_dlp( (m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
209 
210  Psi_Old = Psi_New;
211  iter++;
212 
213  if (eigen_W.minCoeff() < 0)
214  {
215  // Suggested by Vanhatalo et. al.,
216  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
217  // Quoted from infLaplace.m
218  float64_t df;
219 
220  if (m_obj->m_model->get_model_type()==LT_STUDENTST)
221  {
223  df=lik->get_degrees_freedom();
224  SG_UNREF(lik);
225  }
226  else
227  df=1;
228 
229  eigen_W+=(2.0/df)*eigen_dlp.cwiseProduct(eigen_dlp);
230  }
231 
232  // compute sW = sqrt(W)
233  eigen_sW=eigen_W.cwiseSqrt();
234 
235  LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::exp((m_obj->m_log_scale)*2.0))+
236  MatrixXd::Identity( (m_obj->m_ktrtr).num_rows, (m_obj->m_ktrtr).num_cols));
237 
238  VectorXd b=eigen_W.cwiseProduct(eigen_mu - eigen_mean)+eigen_dlp;
239 
240  VectorXd dalpha=b-eigen_sW.cwiseProduct(
241  L.solve(eigen_sW.cwiseProduct(eigen_ktrtr*b*CMath::exp((m_obj->m_log_scale)*2.0))))-eigen_alpha;
242 
243 #ifdef USE_GPL_SHOGUN
244  // perform Brent's optimization
245  PsiLine func;
246 
247  func.log_scale=m_obj->m_log_scale;
248  func.K=eigen_ktrtr;
249  func.dalpha=dalpha;
250  func.start_alpha=eigen_alpha;
251  func.alpha=&eigen_alpha;
252  func.dlp=&(m_obj->m_dlp);
253  func.f=&(m_obj->m_mu);
254  func.m=&(m_obj->m_mean_f);
255  func.W=&(m_obj->m_W);
256  func.lik=m_obj->m_model;
257  func.lab=m_obj->m_labels;
258 
259  float64_t x;
260  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
261 #else
263 #endif //USE_GPL_SHOGUN
264  }
265 
266  if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
267  {
268  SG_SWARNING("Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
269  }
270  return Psi_New;
271 }
272 
274 {
275  init();
276 }
277 
279  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
280  : CLaplaceInference(kern, feat, m, lab, mod)
281 {
282  init();
283 }
284 
285 void CSingleLaplaceInferenceMethod::init()
286 {
287  m_Psi=0;
288  SG_ADD(&m_Psi, "Psi", "posterior log likelihood without constant terms", MS_NOT_AVAILABLE);
289  SG_ADD(&m_sW, "sW", "square root of W", MS_NOT_AVAILABLE);
290  SG_ADD(&m_d2lp, "d2lp", "second derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
291  SG_ADD(&m_d3lp, "d3lp", "third derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
293 }
294 
296 {
298  update();
299 
300  return SGVector<float64_t>(m_sW);
301 }
302 
304  CInference* inference)
305 {
306  if (inference==NULL)
307  return NULL;
308 
309  if (inference->get_inference_type()!=INF_LAPLACE_SINGLE)
310  SG_SERROR("Provided inference is not of type CSingleLaplaceInferenceMethod\n")
311 
312  SG_REF(inference);
313  return (CSingleLaplaceInferenceMethod*)inference;
314 }
315 
317 {
318 }
319 
321 {
323  update();
324 
325  // create eigen representations alpha, f, W, L
326  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
327  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
328  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
330 
331  // get mean vector and create eigen representation of it
333  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
334 
335  // get log likelihood
337  m_mu));
338 
339  float64_t result;
340 
341  if (eigen_W.minCoeff()<0)
342  {
343  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
345 
346  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
347  eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_sW.asDiagonal());
348 
349  result=(eigen_alpha.dot(eigen_mu-eigen_mean))/2.0-
350  lp+log(lu.determinant())/2.0;
351  }
352  else
353  {
354  result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
355  eigen_L.diagonal().array().log().sum();
356  }
357 
358  return result;
359 }
360 
362 {
365  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
366 
369 
370  // compute V = L^(-1) * W^(1/2) * K, using upper triangular factor L^T
371  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
372  eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
373 
374  // compute covariance matrix of the posterior:
375  // Sigma = K - K * W^(1/2) * (L * L^T)^(-1) * W^(1/2) * K =
376  // K - (K * W^(1/2)) * (L^T)^(-1) * L^(-1) * W^(1/2) * K =
377  // K - (W^(1/2) * K)^T * (L^(-1))^T * L^(-1) * W^(1/2) * K = K - V^T * V
378  eigen_Sigma=eigen_K*CMath::exp(m_log_scale*2.0)-eigen_V.adjoint()*eigen_V;
379 }
380 
382 {
383  // get log probability derivatives
387 
388  // W = -d2lp
389  m_W=m_d2lp.clone();
390  m_W.scale(-1.0);
392 
393  // compute sW
394  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
395  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
396 
397  if (eigen_W.minCoeff()>0)
398  eigen_sW=eigen_W.cwiseSqrt();
399  else
400  //post.sW = sqrt(abs(W)).*sign(W);
401  eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
402 
403  // create eigen representation of kernel matrix
405 
406  // create shogun and eigen representation of posterior cholesky
409 
410  if (eigen_W.minCoeff() < 0)
411  {
412  //A = eye(n)+K.*repmat(w',n,1);
413  FullPivLU<MatrixXd> lu(
414  MatrixXd::Identity(m_ktrtr.num_rows,m_ktrtr.num_cols)+
415  eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_W.asDiagonal());
416  // compute cholesky: L = -(K + 1/W)^-1
417  //-iA = -inv(A)
418  eigen_L=-lu.inverse();
419  // -repmat(w,1,n).*iA == (-iA'.*repmat(w',n,1))'
420  eigen_L=eigen_W.asDiagonal()*eigen_L;
421  }
422  else
423  {
424  // compute cholesky: L = chol(sW * sW' .* K + I)
425  LLT<MatrixXd> L(
426  (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::exp(m_log_scale*2.0))+
427  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
428 
429  eigen_L = L.matrixU();
430  }
431 }
432 
434 {
435  SG_DEBUG("entering\n");
436 
438  update_init();
439  update_alpha();
440  update_chol();
441  m_gradient_update=false;
443 
444  SG_DEBUG("leaving\n");
445 }
446 
447 
449 {
450  float64_t Psi_New;
451  float64_t Psi_Def;
452  // get mean vector and create eigen representation of it
455 
456  // create eigen representation of kernel matrix
458 
459  // create shogun and eigen representation of function vector
461  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
462 
464  {
465  // set alpha a zero vector
467  m_alpha.zero();
468 
469  // f = mean, if length of alpha and length of y doesn't match
470  eigen_mu=eigen_mean;
471 
473  m_labels, m_mu));
474  }
475  else
476  {
477  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
478 
479  // compute f = K * alpha + m
480  eigen_mu=eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
481 
482  Psi_New=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-
484 
486 
487  // if default is better, then use it
488  if (Psi_Def < Psi_New)
489  {
490  m_alpha.zero();
491  eigen_mu=eigen_mean;
492  Psi_New=Psi_Def;
493  }
494  }
495  m_Psi=Psi_New;
496 }
497 
498 
500 {
501  REQUIRE(minimizer, "Minimizer must set\n");
502  if (!dynamic_cast<CSingleLaplaceNewtonOptimizer*>(minimizer))
503  {
504  FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(minimizer);
505  REQUIRE(opt, "The provided minimizer is not supported\n")
506  }
508 }
509 
511 {
513  bool cleanup=false;
514  if (opt)
515  {
516  opt->set_target(this);
517  if(this->ref_count()>1)
518  cleanup=true;
519  opt->minimize();
520  opt->unset_target(cleanup);
521  }
522  else
523  {
524  FirstOrderMinimizer* minimizer= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
525  REQUIRE(minimizer, "The provided minimizer is not supported\n");
526 #ifdef USE_GPL_SHOGUN
528  cost_fun->set_target(this);
529  if(this->ref_count()>1)
530  cleanup=true;
531  minimizer->set_cost_function(cost_fun);
532  minimizer->minimize();
533  minimizer->unset_cost_function(false);
534  cost_fun->unset_target(cleanup);
535  SG_UNREF(cost_fun);
536 #else
538 #endif //USE_GPL_SHOGUN
539  }
540  // get mean vector and create eigen representation of it
542 
543  // create eigen representation of kernel matrix
545 
546  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
547 
548  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
549 
550  // compute f = K * alpha + m
551  eigen_mu=eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
552 }
553 
555 {
556  // create eigen representation of W, sW, dlp, d3lp, K, alpha and L
557  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
558  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
559  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
560  Map<VectorXd> eigen_d3lp(m_d3lp.vector, m_d3lp.vlen);
562  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
564 
565  // create shogun and eigen representation of matrix Z
568 
569  // create shogun and eigen representation of the vector g
571  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
572 
573  if (eigen_W.minCoeff()<0)
574  {
575  eigen_Z=-eigen_L;
576 
577  // compute iA = (I + K * diag(W))^-1
578  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
579  eigen_K*CMath::exp(m_log_scale*2.0)*eigen_W.asDiagonal());
580  MatrixXd iA=lu.inverse();
581 
582  // compute derivative ln|L'*L| wrt W: g=sum(iA.*K,2)/2
583  eigen_g=(iA.cwiseProduct(eigen_K*CMath::exp(m_log_scale*2.0))).rowwise().sum()/2.0;
584  }
585  else
586  {
587  // solve L'*L*Z=diag(sW) and compute Z=diag(sW)*Z
588  eigen_Z=eigen_L.triangularView<Upper>().adjoint().solve(
589  MatrixXd(eigen_sW.asDiagonal()));
590  eigen_Z=eigen_L.triangularView<Upper>().solve(eigen_Z);
591  eigen_Z=eigen_sW.asDiagonal()*eigen_Z;
592 
593  // solve L'*C=diag(sW)*K
594  MatrixXd C=eigen_L.triangularView<Upper>().adjoint().solve(
595  eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
596 
597  // compute derivative ln|L'*L| wrt W: g=(diag(K)-sum(C.^2,1)')/2
598  eigen_g=(eigen_K.diagonal()*CMath::exp(m_log_scale*2.0)-
599  (C.cwiseProduct(C)).colwise().sum().adjoint())/2.0;
600  }
601 
602  // create shogun and eigen representation of the vector dfhat
604  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
605 
606  // compute derivative of nlZ wrt fhat
607  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
608 }
609 
611  const TParameter* param)
612 {
613  REQUIRE(!strcmp(param->m_name, "log_scale"), "Can't compute derivative of "
614  "the nagative log marginal likelihood wrt %s.%s parameter\n",
615  get_name(), param->m_name)
616 
617  // create eigen representation of K, Z, dfhat, dlp and alpha
620  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
621  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
622  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
623 
624  SGVector<float64_t> result(1);
625 
626  // compute derivative K wrt scale
627  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
628  result[0]=(eigen_Z.cwiseProduct(eigen_K)).sum()/2.0-
629  (eigen_alpha.adjoint()*eigen_K).dot(eigen_alpha)/2.0;
630 
631  // compute b=dK*dlp
632  VectorXd b=eigen_K*eigen_dlp;
633 
634  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
635  result[0]=result[0]-eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*b));
636  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
637 
638  return result;
639 }
640 
642  const TParameter* param)
643 {
644  // create eigen representation of K, Z, g and dfhat
647  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
648  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
649 
650  // get derivatives wrt likelihood model parameters
652  m_mu, param);
654  m_mu, param);
656  m_mu, param);
657 
658  // create eigen representation of the derivatives
659  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
660  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
661  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
662 
663  SGVector<float64_t> result(1);
664 
665  // compute b vector
666  VectorXd b=eigen_K*eigen_dlp_dhyp;
667 
668  // compute dnlZ=-g'*d2lp_dhyp-sum(lp_dhyp)-dfhat'*(b-K*(Z*b))
669  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum()-
670  eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*b));
671 
672  return result;
673 }
674 
676  const TParameter* param)
677 {
678  // create eigen representation of K, Z, dfhat, dlp and alpha
681  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
682  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
683  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
684 
685  REQUIRE(param, "Param not set\n");
686  SGVector<float64_t> result;
687  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
688  result=SGVector<float64_t>(len);
689 
690  for (index_t i=0; i<result.vlen; i++)
691  {
693 
694  if (result.vlen==1)
695  dK=m_kernel->get_parameter_gradient(param);
696  else
697  dK=m_kernel->get_parameter_gradient(param, i);
698 
699  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
700 
701  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
702  result[i]=(eigen_Z.cwiseProduct(eigen_dK)).sum()/2.0-
703  (eigen_alpha.adjoint()*eigen_dK).dot(eigen_alpha)/2.0;
704 
705  // compute b=dK*dlp
706  VectorXd b=eigen_dK*eigen_dlp;
707 
708  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
709  result[i]=result[i]-eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*
710  (eigen_Z*b));
711  result[i]*=CMath::exp(m_log_scale*2.0);
712  }
713 
714  return result;
715 }
716 
718  const TParameter* param)
719 {
720  // create eigen representation of K, Z, dfhat and alpha
723  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
724  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
725 
726  REQUIRE(param, "Param not set\n");
727  SGVector<float64_t> result;
728  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
729  result=SGVector<float64_t>(len);
730 
731  for (index_t i=0; i<result.vlen; i++)
732  {
734 
735  if (result.vlen==1)
737  else
739 
740  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
741 
742  // compute dnlZ=-alpha'*dm-dfhat'*(dm-K*(Z*dm))
743  result[i]=-eigen_alpha.dot(eigen_dmu)-eigen_dfhat.dot(eigen_dmu-
744  eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*eigen_dmu));
745  }
746 
747  return result;
748 }
749 
751 {
753 
755  Map<VectorXd> eigen_res(res.vector, res.vlen);
756 
757  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
759  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
760  eigen_res=eigen_mu-eigen_mean;
761 
762  return res;
763 }
764 
765 
767 {
773  m_ktrtr.num_cols);
775  m_mean_f.vlen);
776  /* f = K * alpha + mean_f given alpha*/
777  eigen_f
778  = kernel * ((eigen_alpha) * CMath::exp(m_log_scale*2.0)) + eigen_mean_f;
779 
780  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
781  float64_t psi = eigen_alpha.dot(eigen_f - eigen_mean_f) * 0.5;
783  return psi;
784 }
785 
787 {
788  REQUIRE(gradient.vlen==m_alpha.vlen,
789  "The length of gradients (%d) should the same as the length of parameters (%d)\n",
790  gradient.vlen, m_alpha.vlen);
791 
793  Eigen::Map<Eigen::VectorXd> eigen_gradient(gradient.vector, gradient.vlen);
798  m_ktrtr.num_cols);
800  m_mean_f.vlen);
801 
802  /* f = K * alpha + mean_f given alpha*/
803  eigen_f = kernel * ((eigen_alpha) * CMath::exp(m_log_scale*2.0)) + eigen_mean_f;
804 
805  SGVector<float64_t> dlp_f =
807 
808  Eigen::Map<Eigen::VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
809 
810  /* g_alpha = K * (alpha - dlp_f)*/
811  eigen_gradient = kernel * ((eigen_alpha - eigen_dlp_f) * CMath::exp(m_log_scale*2.0));
812 }
813 
814 
815 }
816 
float64_t m_log_scale
Definition: Inference.h:485
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update()
Definition: Inference.cpp:243
virtual void update_parameter_hash()
Definition: SGObject.cpp:282
SGVector< float64_t > m_mu
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:1868
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:464
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:418
#define SG_SWARNING(...)
Definition: SGIO.h:163
The build-in minimizer for SingleLaplaceInference.
Definition: SGMatrix.h:25
parameter struct
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
#define REQUIRE(x,...)
Definition: SGIO.h:181
T dot(const SGVector< T > &a, const SGVector< T > &b)
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
float64_t get_degrees_freedom() const
An abstract class of the mean function.
Definition: MeanFunction.h:49
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
#define SG_REF(x)
Definition: SGObject.h:52
CFeatures * m_features
Definition: Inference.h:473
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:488
CMeanFunction * m_mean
Definition: Inference.h:467
void set_target(CSingleLaplaceInferenceMethod *obj)
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:898
static CSingleLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
#define SG_GPL_ONLY
Definition: SGIO.h:139
CLabels * m_labels
Definition: Inference.h:476
virtual void unset_cost_function(bool is_unref=true)
double float64_t
Definition: common.h:60
SGVector< float64_t > m_dlp
virtual SGVector< float64_t > get_diagonal_vector()
index_t num_rows
Definition: SGMatrix.h:495
SGMatrix< float64_t > m_L
Definition: Inference.h:482
The first order cost function base class.
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
Definition: KLInference.h:52
SGVector< T > clone() const
Definition: SGVector.cpp:262
index_t num_cols
Definition: SGMatrix.h:497
static void inverse(SGMatrix< float64_t > matrix)
inverses square matrix in-place
Definition: SGMatrix.cpp:995
virtual float64_t minimize()=0
virtual SGVector< float64_t > get_posterior_mean()
Class that models a Student&#39;s-t likelihood.
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:116
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Laplace approximation inference method base class.
T sum(const Container< T > &a, bool no_diag=false)
The Inference Method base class.
Definition: Inference.h:81
Minimizer * m_minimizer
Definition: Inference.h:461
SGMatrix< float64_t > m_Sigma
The class Features is the base class of all feature objects.
Definition: Features.h:69
#define SG_SERROR(...)
Definition: SGIO.h:164
static float64_t exp(float64_t x)
Definition: Math.h:551
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
SGVector< float64_t > m_W
The Kernel base class.
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
int32_t ref_count()
Definition: SGObject.cpp:193
The minimizer base class.
Definition: Minimizer.h:43
#define SG_ADD(...)
Definition: SGObject.h:93
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
The SingleLaplace approximation inference method class for regression and binary Classification.
CLikelihoodModel * m_model
Definition: Inference.h:470
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:296
The Likelihood model base class.
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
virtual void set_cost_function(FirstOrderCostFunction *fun)
index_t vlen
Definition: SGVector.h:571
SGVector< float64_t > m_alpha
Definition: Inference.h:479
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
The first order minimizer base class.
virtual void register_minimizer(Minimizer *minimizer)

SHOGUN Machine Learning Toolbox - Documentation