SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SingleLaplaceInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2016 Wu Lin
8  * Written (W) 2013 Roman Votyakov
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  *
12  * Code adapted from Gaussian Process Machine Learning Toolbox
13  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
14  * This code specifically adapted from infLaplace.m
15  */
16 
18 
19 
22 #include <shogun/lib/external/brent.h>
25 
26 using namespace shogun;
27 using namespace Eigen;
28 
29 namespace shogun
30 {
31 
32 #ifndef DOXYGEN_SHOULD_SKIP_THIS
33 
34 class PsiLine : public func_base
35 {
36 public:
37  float64_t log_scale;
38  MatrixXd K;
39  VectorXd dalpha;
40  VectorXd start_alpha;
41  Map<VectorXd>* alpha;
46  CLikelihoodModel* lik;
47  CLabels* lab;
48 
49  virtual double operator() (double x)
50  {
51  Map<VectorXd> eigen_f(f->vector, f->vlen);
52  Map<VectorXd> eigen_m(m->vector, m->vlen);
53 
54  // compute alpha=alpha+x*dalpha and f=K*alpha+m
55  (*alpha)=start_alpha+x*dalpha;
56  eigen_f=K*(*alpha)*CMath::exp(log_scale*2.0)+eigen_m;
57 
58  // get first and second derivatives of log likelihood
59  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
60 
61  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
62  W->scale(-1.0);
63 
64  // compute psi=alpha'*(f-m)/2-lp
65  float64_t result = (*alpha).dot(eigen_f-eigen_m)/2.0-
67 
68  return result;
69  }
70 };
71 
72 class SingleLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
73 {
74 public:
75  SingleLaplaceInferenceMethodCostFunction():FirstOrderCostFunction() { init(); }
76  virtual ~SingleLaplaceInferenceMethodCostFunction() { SG_UNREF(m_obj); }
77  void set_target(CSingleLaplaceInferenceMethod *obj)
78  {
79  REQUIRE(obj, "Obj must set\n");
80  if(m_obj != obj)
81  {
82  SG_REF(obj);
83  SG_UNREF(m_obj);
84  m_obj=obj;
85  }
86  }
87  virtual float64_t get_cost()
88  {
89  REQUIRE(m_obj,"Object not set\n");
90  return m_obj->get_psi_wrt_alpha();
91  }
92  void unset_target(bool is_unref)
93  {
94  if(is_unref)
95  {
96  SG_UNREF(m_obj);
97  }
98  m_obj=NULL;
99  }
100  virtual SGVector<float64_t> obtain_variable_reference()
101  {
102  REQUIRE(m_obj,"Object not set\n");
103  m_derivatives = SGVector<float64_t>((m_obj->m_alpha).vlen);
104  return m_obj->m_alpha;
105  }
106  virtual SGVector<float64_t> get_gradient()
107  {
108  REQUIRE(m_obj,"Object not set\n");
109  m_obj->get_gradient_wrt_alpha(m_derivatives);
110  return m_derivatives;
111  }
112  virtual const char* get_name() const { return "SingleLaplaceInferenceMethodCostFunction"; }
113 private:
114  void init()
115  {
116  m_obj=NULL;
117  m_derivatives = SGVector<float64_t>();
118  SG_ADD(&m_derivatives, "SingleLaplaceInferenceMethodCostFunction__m_derivatives",
119  "derivatives in SingleLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
120  SG_ADD((CSGObject **)&m_obj, "SingleLaplaceInferenceMethodCostFunction__m_obj",
121  "obj in SingleLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
122 
123  }
124 
125  SGVector<float64_t> m_derivatives;
127 };
128 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
129 
130 
132 {
133  REQUIRE(obj, "Obj must set\n");
134  if(m_obj != obj)
135  {
136  SG_REF(obj);
137  SG_UNREF(m_obj);
138  m_obj=obj;
139  }
140 }
141 
143 {
144  if(is_unref)
145  {
146  SG_UNREF(m_obj);
147  }
148  m_obj=NULL;
149 
150 }
151 
152 void CSingleLaplaceNewtonOptimizer::init()
153 {
154  m_obj=NULL;
155  m_iter=20;
156  m_tolerance=1e-6;
157  m_opt_tolerance=1e-6;
158  m_opt_max=10;
159 
160  SG_ADD((CSGObject **)&m_obj, "CSingleLaplaceNewtonOptimizer__m_obj",
161  "obj in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
162  SG_ADD(&m_iter, "CSingleLaplaceNewtonOptimizer__m_iter",
163  "iter in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
164  SG_ADD(&m_tolerance, "CSingleLaplaceNewtonOptimizer__m_tolerance",
165  "tolerance in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
166  SG_ADD(&m_opt_tolerance, "CSingleLaplaceNewtonOptimizer__m_opt_tolerance",
167  "opt_tolerance in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
168  SG_ADD(&m_opt_max, "CSingleLaplaceNewtonOptimizer__m_opt_max",
169  "opt_max in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
170 }
171 
173 {
174  REQUIRE(m_obj,"Object not set\n");
175  float64_t Psi_Old=CMath::INFTY;
176  float64_t Psi_New=m_obj->m_Psi;
177 
178  // get mean vector and create eigen representation of it
179  Map<VectorXd> eigen_mean( (m_obj->m_mean_f).vector, (m_obj->m_mean_f).vlen);
180 
181  // create eigen representation of kernel matrix
182  Map<MatrixXd> eigen_ktrtr( (m_obj->m_ktrtr).matrix, (m_obj->m_ktrtr).num_rows, (m_obj->m_ktrtr).num_cols);
183 
184  Map<VectorXd> eigen_mu(m_obj->m_mu, (m_obj->m_mu).vlen);
185 
186  // compute W = -d2lp
187  m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
188  m_obj->m_W.scale(-1.0);
189 
190  Map<VectorXd> eigen_alpha(m_obj->m_alpha.vector, m_obj->m_alpha.vlen);
191 
192  // get first derivative of log probability function
193  m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
194 
195  // create shogun and eigen representation of sW
196  m_obj->m_sW=SGVector<float64_t>((m_obj->m_W).vlen);
197  Map<VectorXd> eigen_sW((m_obj->m_sW).vector, (m_obj->m_sW).vlen);
198 
199  index_t iter=0;
200 
201  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
202  {
203  Map<VectorXd> eigen_W( (m_obj->m_W).vector, (m_obj->m_W).vlen);
204  Map<VectorXd> eigen_dlp( (m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
205 
206  Psi_Old = Psi_New;
207  iter++;
208 
209  if (eigen_W.minCoeff() < 0)
210  {
211  // Suggested by Vanhatalo et. al.,
212  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
213  // Quoted from infLaplace.m
214  float64_t df;
215 
216  if (m_obj->m_model->get_model_type()==LT_STUDENTST)
217  {
219  df=lik->get_degrees_freedom();
220  SG_UNREF(lik);
221  }
222  else
223  df=1;
224 
225  eigen_W+=(2.0/df)*eigen_dlp.cwiseProduct(eigen_dlp);
226  }
227 
228  // compute sW = sqrt(W)
229  eigen_sW=eigen_W.cwiseSqrt();
230 
231  LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::exp((m_obj->m_log_scale)*2.0))+
232  MatrixXd::Identity( (m_obj->m_ktrtr).num_rows, (m_obj->m_ktrtr).num_cols));
233 
234  VectorXd b=eigen_W.cwiseProduct(eigen_mu - eigen_mean)+eigen_dlp;
235 
236  VectorXd dalpha=b-eigen_sW.cwiseProduct(
237  L.solve(eigen_sW.cwiseProduct(eigen_ktrtr*b*CMath::exp((m_obj->m_log_scale)*2.0))))-eigen_alpha;
238 
239  // perform Brent's optimization
240  PsiLine func;
241 
242  func.log_scale=m_obj->m_log_scale;
243  func.K=eigen_ktrtr;
244  func.dalpha=dalpha;
245  func.start_alpha=eigen_alpha;
246  func.alpha=&eigen_alpha;
247  func.dlp=&(m_obj->m_dlp);
248  func.f=&(m_obj->m_mu);
249  func.m=&(m_obj->m_mean_f);
250  func.W=&(m_obj->m_W);
251  func.lik=m_obj->m_model;
252  func.lab=m_obj->m_labels;
253 
254  float64_t x;
255  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
256  }
257 
258  if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
259  {
260  SG_SWARNING("Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
261  }
262  return Psi_New;
263 }
264 
266 {
267  init();
268 }
269 
271  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
272  : CLaplaceInference(kern, feat, m, lab, mod)
273 {
274  init();
275 }
276 
277 void CSingleLaplaceInferenceMethod::init()
278 {
279  m_Psi=0;
280  SG_ADD(&m_Psi, "Psi", "posterior log likelihood without constant terms", MS_NOT_AVAILABLE);
281  SG_ADD(&m_sW, "sW", "square root of W", MS_NOT_AVAILABLE);
282  SG_ADD(&m_d2lp, "d2lp", "second derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
283  SG_ADD(&m_d3lp, "d3lp", "third derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
285 }
286 
288 {
290  update();
291 
292  return SGVector<float64_t>(m_sW);
293 }
294 
296  CInference* inference)
297 {
298  if (inference==NULL)
299  return NULL;
300 
301  if (inference->get_inference_type()!=INF_LAPLACE_SINGLE)
302  SG_SERROR("Provided inference is not of type CSingleLaplaceInferenceMethod\n")
303 
304  SG_REF(inference);
305  return (CSingleLaplaceInferenceMethod*)inference;
306 }
307 
309 {
310 }
311 
313 {
315  update();
316 
317  // create eigen representations alpha, f, W, L
318  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
319  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
320  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
322 
323  // get mean vector and create eigen representation of it
325  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
326 
327  // get log likelihood
329  m_mu));
330 
331  float64_t result;
332 
333  if (eigen_W.minCoeff()<0)
334  {
335  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
337 
338  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
339  eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_sW.asDiagonal());
340 
341  result=(eigen_alpha.dot(eigen_mu-eigen_mean))/2.0-
342  lp+log(lu.determinant())/2.0;
343  }
344  else
345  {
346  result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
347  eigen_L.diagonal().array().log().sum();
348  }
349 
350  return result;
351 }
352 
354 {
357  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
358 
361 
362  // compute V = L^(-1) * W^(1/2) * K, using upper triangular factor L^T
363  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
364  eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
365 
366  // compute covariance matrix of the posterior:
367  // Sigma = K - K * W^(1/2) * (L * L^T)^(-1) * W^(1/2) * K =
368  // K - (K * W^(1/2)) * (L^T)^(-1) * L^(-1) * W^(1/2) * K =
369  // K - (W^(1/2) * K)^T * (L^(-1))^T * L^(-1) * W^(1/2) * K = K - V^T * V
370  eigen_Sigma=eigen_K*CMath::exp(m_log_scale*2.0)-eigen_V.adjoint()*eigen_V;
371 }
372 
374 {
375  // get log probability derivatives
379 
380  // W = -d2lp
381  m_W=m_d2lp.clone();
382  m_W.scale(-1.0);
384 
385  // compute sW
386  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
387  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
388 
389  if (eigen_W.minCoeff()>0)
390  eigen_sW=eigen_W.cwiseSqrt();
391  else
392  //post.sW = sqrt(abs(W)).*sign(W);
393  eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
394 
395  // create eigen representation of kernel matrix
397 
398  // create shogun and eigen representation of posterior cholesky
401 
402  if (eigen_W.minCoeff() < 0)
403  {
404  //A = eye(n)+K.*repmat(w',n,1);
405  FullPivLU<MatrixXd> lu(
406  MatrixXd::Identity(m_ktrtr.num_rows,m_ktrtr.num_cols)+
407  eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_W.asDiagonal());
408  // compute cholesky: L = -(K + 1/W)^-1
409  //-iA = -inv(A)
410  eigen_L=-lu.inverse();
411  // -repmat(w,1,n).*iA == (-iA'.*repmat(w',n,1))'
412  eigen_L=eigen_W.asDiagonal()*eigen_L;
413  }
414  else
415  {
416  // compute cholesky: L = chol(sW * sW' .* K + I)
417  LLT<MatrixXd> L(
418  (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::exp(m_log_scale*2.0))+
419  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
420 
421  eigen_L = L.matrixU();
422  }
423 }
424 
426 {
427  SG_DEBUG("entering\n");
428 
430  update_init();
431  update_alpha();
432  update_chol();
433  m_gradient_update=false;
435 
436  SG_DEBUG("leaving\n");
437 }
438 
439 
441 {
442  float64_t Psi_New;
443  float64_t Psi_Def;
444  // get mean vector and create eigen representation of it
447 
448  // create eigen representation of kernel matrix
450 
451  // create shogun and eigen representation of function vector
453  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
454 
456  {
457  // set alpha a zero vector
459  m_alpha.zero();
460 
461  // f = mean, if length of alpha and length of y doesn't match
462  eigen_mu=eigen_mean;
463 
465  m_labels, m_mu));
466  }
467  else
468  {
469  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
470 
471  // compute f = K * alpha + m
472  eigen_mu=eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
473 
474  Psi_New=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-
476 
478 
479  // if default is better, then use it
480  if (Psi_Def < Psi_New)
481  {
482  m_alpha.zero();
483  eigen_mu=eigen_mean;
484  Psi_New=Psi_Def;
485  }
486  }
487  m_Psi=Psi_New;
488 }
489 
490 
492 {
493  REQUIRE(minimizer, "Minimizer must set\n");
494  if (!dynamic_cast<CSingleLaplaceNewtonOptimizer*>(minimizer))
495  {
496  FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(minimizer);
497  REQUIRE(opt, "The provided minimizer is not supported\n")
498  }
500 }
501 
503 {
505  bool cleanup=false;
506  if (opt)
507  {
508  opt->set_target(this);
509  if(this->ref_count()>1)
510  cleanup=true;
511  opt->minimize();
512  opt->unset_target(cleanup);
513  }
514  else
515  {
516  FirstOrderMinimizer* minimizer= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
517  REQUIRE(minimizer, "The provided minimizer is not supported\n");
518 
520  cost_fun->set_target(this);
521  if(this->ref_count()>1)
522  cleanup=true;
523  minimizer->set_cost_function(cost_fun);
524  minimizer->minimize();
525  minimizer->unset_cost_function(false);
526  cost_fun->unset_target(cleanup);
527  SG_UNREF(cost_fun);
528  }
529  // get mean vector and create eigen representation of it
531 
532  // create eigen representation of kernel matrix
534 
535  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
536 
537  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
538 
539  // compute f = K * alpha + m
540  eigen_mu=eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
541 }
542 
544 {
545  // create eigen representation of W, sW, dlp, d3lp, K, alpha and L
546  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
547  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
548  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
549  Map<VectorXd> eigen_d3lp(m_d3lp.vector, m_d3lp.vlen);
551  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
553 
554  // create shogun and eigen representation of matrix Z
557 
558  // create shogun and eigen representation of the vector g
560  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
561 
562  if (eigen_W.minCoeff()<0)
563  {
564  eigen_Z=-eigen_L;
565 
566  // compute iA = (I + K * diag(W))^-1
567  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
568  eigen_K*CMath::exp(m_log_scale*2.0)*eigen_W.asDiagonal());
569  MatrixXd iA=lu.inverse();
570 
571  // compute derivative ln|L'*L| wrt W: g=sum(iA.*K,2)/2
572  eigen_g=(iA.cwiseProduct(eigen_K*CMath::exp(m_log_scale*2.0))).rowwise().sum()/2.0;
573  }
574  else
575  {
576  // solve L'*L*Z=diag(sW) and compute Z=diag(sW)*Z
577  eigen_Z=eigen_L.triangularView<Upper>().adjoint().solve(
578  MatrixXd(eigen_sW.asDiagonal()));
579  eigen_Z=eigen_L.triangularView<Upper>().solve(eigen_Z);
580  eigen_Z=eigen_sW.asDiagonal()*eigen_Z;
581 
582  // solve L'*C=diag(sW)*K
583  MatrixXd C=eigen_L.triangularView<Upper>().adjoint().solve(
584  eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
585 
586  // compute derivative ln|L'*L| wrt W: g=(diag(K)-sum(C.^2,1)')/2
587  eigen_g=(eigen_K.diagonal()*CMath::exp(m_log_scale*2.0)-
588  (C.cwiseProduct(C)).colwise().sum().adjoint())/2.0;
589  }
590 
591  // create shogun and eigen representation of the vector dfhat
593  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
594 
595  // compute derivative of nlZ wrt fhat
596  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
597 }
598 
600  const TParameter* param)
601 {
602  REQUIRE(!strcmp(param->m_name, "log_scale"), "Can't compute derivative of "
603  "the nagative log marginal likelihood wrt %s.%s parameter\n",
604  get_name(), param->m_name)
605 
606  // create eigen representation of K, Z, dfhat, dlp and alpha
609  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
610  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
611  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
612 
613  SGVector<float64_t> result(1);
614 
615  // compute derivative K wrt scale
616  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
617  result[0]=(eigen_Z.cwiseProduct(eigen_K)).sum()/2.0-
618  (eigen_alpha.adjoint()*eigen_K).dot(eigen_alpha)/2.0;
619 
620  // compute b=dK*dlp
621  VectorXd b=eigen_K*eigen_dlp;
622 
623  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
624  result[0]=result[0]-eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*b));
625  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
626 
627  return result;
628 }
629 
631  const TParameter* param)
632 {
633  // create eigen representation of K, Z, g and dfhat
636  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
637  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
638 
639  // get derivatives wrt likelihood model parameters
641  m_mu, param);
643  m_mu, param);
645  m_mu, param);
646 
647  // create eigen representation of the derivatives
648  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
649  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
650  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
651 
652  SGVector<float64_t> result(1);
653 
654  // compute b vector
655  VectorXd b=eigen_K*eigen_dlp_dhyp;
656 
657  // compute dnlZ=-g'*d2lp_dhyp-sum(lp_dhyp)-dfhat'*(b-K*(Z*b))
658  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum()-
659  eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*b));
660 
661  return result;
662 }
663 
665  const TParameter* param)
666 {
667  // create eigen representation of K, Z, dfhat, dlp and alpha
670  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
671  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
672  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
673 
674  REQUIRE(param, "Param not set\n");
675  SGVector<float64_t> result;
676  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
677  result=SGVector<float64_t>(len);
678 
679  for (index_t i=0; i<result.vlen; i++)
680  {
682 
683  if (result.vlen==1)
684  dK=m_kernel->get_parameter_gradient(param);
685  else
686  dK=m_kernel->get_parameter_gradient(param, i);
687 
688  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
689 
690  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
691  result[i]=(eigen_Z.cwiseProduct(eigen_dK)).sum()/2.0-
692  (eigen_alpha.adjoint()*eigen_dK).dot(eigen_alpha)/2.0;
693 
694  // compute b=dK*dlp
695  VectorXd b=eigen_dK*eigen_dlp;
696 
697  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
698  result[i]=result[i]-eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*
699  (eigen_Z*b));
700  result[i]*=CMath::exp(m_log_scale*2.0);
701  }
702 
703  return result;
704 }
705 
707  const TParameter* param)
708 {
709  // create eigen representation of K, Z, dfhat and alpha
712  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
713  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
714 
715  REQUIRE(param, "Param not set\n");
716  SGVector<float64_t> result;
717  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
718  result=SGVector<float64_t>(len);
719 
720  for (index_t i=0; i<result.vlen; i++)
721  {
723 
724  if (result.vlen==1)
726  else
728 
729  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
730 
731  // compute dnlZ=-alpha'*dm-dfhat'*(dm-K*(Z*dm))
732  result[i]=-eigen_alpha.dot(eigen_dmu)-eigen_dfhat.dot(eigen_dmu-
733  eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*eigen_dmu));
734  }
735 
736  return result;
737 }
738 
740 {
742 
744  Map<VectorXd> eigen_res(res.vector, res.vlen);
745 
746  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
748  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
749  eigen_res=eigen_mu-eigen_mean;
750 
751  return res;
752 }
753 
754 
756 {
762  m_ktrtr.num_cols);
764  m_mean_f.vlen);
765  /* f = K * alpha + mean_f given alpha*/
766  eigen_f
767  = kernel * ((eigen_alpha) * CMath::exp(m_log_scale*2.0)) + eigen_mean_f;
768 
769  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
770  float64_t psi = eigen_alpha.dot(eigen_f - eigen_mean_f) * 0.5;
772  return psi;
773 }
774 
776 {
777  REQUIRE(gradient.vlen==m_alpha.vlen,
778  "The length of gradients (%d) should the same as the length of parameters (%d)\n",
779  gradient.vlen, m_alpha.vlen);
780 
782  Eigen::Map<Eigen::VectorXd> eigen_gradient(gradient.vector, gradient.vlen);
787  m_ktrtr.num_cols);
789  m_mean_f.vlen);
790 
791  /* f = K * alpha + mean_f given alpha*/
792  eigen_f = kernel * ((eigen_alpha) * CMath::exp(m_log_scale*2.0)) + eigen_mean_f;
793 
794  SGVector<float64_t> dlp_f =
796 
797  Eigen::Map<Eigen::VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
798 
799  /* g_alpha = K * (alpha - dlp_f)*/
800  eigen_gradient = kernel * ((eigen_alpha - eigen_dlp_f) * CMath::exp(m_log_scale*2.0));
801 }
802 
803 
804 }
805 
float64_t m_log_scale
Definition: Inference.h:490
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update()
Definition: Inference.cpp:316
virtual void update_parameter_hash()
Definition: SGObject.cpp:281
SGVector< float64_t > m_mu
int32_t index_t
Definition: common.h:62
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:58
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:469
#define SG_SWARNING(...)
Definition: SGIO.h:178
The build-in minimizer for SingleLaplaceInference.
Definition: SGMatrix.h:20
parameter struct
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
float64_t get_degrees_freedom() const
An abstract class of the mean function.
Definition: MeanFunction.h:49
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:841
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
CFeatures * m_features
Definition: Inference.h:478
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:493
CMeanFunction * m_mean
Definition: Inference.h:472
void set_target(CSingleLaplaceInferenceMethod *obj)
static CSingleLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
index_t vlen
Definition: SGVector.h:494
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
CLabels * m_labels
Definition: Inference.h:481
virtual void unset_cost_function(bool is_unref=true)
double float64_t
Definition: common.h:50
SGVector< float64_t > m_dlp
virtual SGVector< float64_t > get_diagonal_vector()
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
SGMatrix< float64_t > m_L
Definition: Inference.h:487
The first order cost function base class.
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
Definition: KLInference.h:52
virtual float64_t minimize()=0
virtual SGVector< float64_t > get_posterior_mean()
Class that models a Student's-t likelihood.
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:128
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Laplace approximation inference method base class.
The Inference Method base class.
Definition: Inference.h:81
Minimizer * m_minimizer
Definition: Inference.h:466
SGMatrix< float64_t > m_Sigma
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:851
SGVector< T > clone() const
Definition: SGVector.cpp:207
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
SGVector< float64_t > m_W
static void inverse(SGMatrix< float64_t > matrix)
inverses square matrix in-place
Definition: SGMatrix.cpp:881
The Kernel base class.
Definition: Kernel.h:159
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
The minimizer base class.
Definition: Minimizer.h:43
#define SG_ADD(...)
Definition: SGObject.h:84
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
The SingleLaplace approximation inference method class for regression and binary Classification.
CLikelihoodModel * m_model
Definition: Inference.h:475
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295
The Likelihood model base class.
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
virtual void set_cost_function(FirstOrderCostFunction *fun)
SGVector< float64_t > m_alpha
Definition: Inference.h:484
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
The first order minimizer base class.
virtual void register_minimizer(Minimizer *minimizer)

SHOGUN Machine Learning Toolbox - Documentation