SHOGUN  6.1.3
SingleFITCLaplaceInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
33 
36 #ifdef USE_GPL_SHOGUN
37 #include <shogun/lib/external/brent.h>
38 #endif //USE_GPL_SHOGUN
42 
43 using namespace shogun;
44 using namespace Eigen;
45 
46 namespace shogun
47 {
48 
49 #ifndef DOXYGEN_SHOULD_SKIP_THIS
50 #ifdef USE_GPL_SHOGUN
51 
52 class CFITCPsiLine : public func_base
53 {
54 public:
55  float64_t log_scale;
56  VectorXd dalpha;
57  VectorXd start_alpha;
58  SGVector<float64_t>* alpha;
63  CLikelihoodModel* lik;
64  CLabels* lab;
66 
67  virtual double operator() (double x)
68  {
69  //time complexity O(m*n)
70  Map<VectorXd> eigen_f(f->vector, f->vlen);
71  Map<VectorXd> eigen_m(m->vector, m->vlen);
72  Map<VectorXd> eigen_alpha(alpha->vector, alpha->vlen);
73 
74  //alpha = alpha + s*dalpha;
75  eigen_alpha=start_alpha+x*dalpha;
76  SGVector<float64_t> tmp=inf->compute_mvmK(*alpha);
77  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
78  //f = mvmK(alpha,V,d0)+m;
79  eigen_f=eigen_tmp+eigen_m;
80 
81  // get first and second derivatives of log likelihood
82  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
83 
84  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
85  W->scale(-1.0);
86 
87  // compute psi=alpha'*(f-m)/2-lp
88  float64_t result = eigen_alpha.dot(eigen_f-eigen_m)/2.0-
90 
91  return result;
92  }
93 };
94 #endif //USE_GPL_SHOGUN
95 
96 class SingleFITCLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
97 {
98 public:
99  SingleFITCLaplaceInferenceMethodCostFunction():FirstOrderCostFunction() { init(); }
100  virtual ~SingleFITCLaplaceInferenceMethodCostFunction() { clean(); }
101  void set_target(CSingleFITCLaplaceInferenceMethod *obj)
102  {
103  REQUIRE(obj, "Obj must set\n");
104  if(m_obj != obj)
105  {
106  SG_REF(obj);
107  SG_UNREF(m_obj);
108  m_obj=obj;
109  }
110  }
111 
112  void clean()
113  {
114  SG_UNREF(m_obj);
115  }
116 
117  virtual float64_t get_cost()
118  {
119  REQUIRE(m_obj,"Object not set\n");
120  return m_obj->get_psi_wrt_alpha();
121  }
122  void unset_target(bool is_unref)
123  {
124  if(is_unref)
125  {
126  SG_UNREF(m_obj);
127  }
128  m_obj=NULL;
129  }
130  virtual SGVector<float64_t> obtain_variable_reference()
131  {
132  REQUIRE(m_obj,"Object not set\n");
133  m_derivatives = SGVector<float64_t>((m_obj->m_al).vlen);
134  return m_obj->m_al;
135  }
136  virtual SGVector<float64_t> get_gradient()
137  {
138  REQUIRE(m_obj,"Object not set\n");
139  m_obj->get_gradient_wrt_alpha(m_derivatives);
140  return m_derivatives;
141  }
142  virtual const char* get_name() const { return "SingleFITCLaplaceInferenceMethodCostFunction"; }
143 private:
144  void init()
145  {
146  m_obj=NULL;
147  m_derivatives = SGVector<float64_t>();
148  SG_ADD(&m_derivatives, "SingleFITCLaplaceInferenceMethodCostFunction__m_derivatives",
149  "derivatives in SingleFITCLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
150  SG_ADD((CSGObject **)&m_obj, "SingleFITCLaplaceInferenceMethodCostFunction__m_obj",
151  "obj in SingleFITCLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
152  }
153 
154  SGVector<float64_t> m_derivatives;
156 };
157 
158 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
159 
161 {
162  REQUIRE(obj, "Obj must set\n");
163  if(m_obj != obj)
164  {
165  SG_REF(obj);
166  SG_UNREF(m_obj);
167  m_obj=obj;
168  }
169 }
170 
172 {
173  if(is_unref)
174  {
175  SG_UNREF(m_obj);
176  }
177  m_obj=NULL;
178 
179 }
180 
181 void CSingleFITCLaplaceNewtonOptimizer::init()
182 {
183  m_obj=NULL;
184  m_iter=20;
185  m_tolerance=1e-6;
186  m_opt_tolerance=1e-6;
187  m_opt_max=10;
188 
189  SG_ADD((CSGObject **)&m_obj, "CSingleFITCLaplaceNewtonOptimizer__m_obj",
190  "obj in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
191  SG_ADD(&m_iter, "CSingleFITCLaplaceNewtonOptimizer__m_iter",
192  "iter in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
193  SG_ADD(&m_tolerance, "CSingleFITCLaplaceNewtonOptimizer__m_tolerance",
194  "tolerance in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
195  SG_ADD(&m_opt_tolerance, "CSingleFITCLaplaceNewtonOptimizer__m_opt_tolerance",
196  "opt_tolerance in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
197  SG_ADD(&m_opt_max, "CSingleFITCLaplaceNewtonOptimizer__m_opt_max",
198  "opt_max in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
199 }
200 
202 {
203  REQUIRE(m_obj,"Object not set\n");
204  //time complexity O(m^2*n)
205  Map<MatrixXd> eigen_kuu((m_obj->m_kuu).matrix, (m_obj->m_kuu).num_rows, (m_obj->m_kuu).num_cols);
206  Map<MatrixXd> eigen_V((m_obj->m_V).matrix, (m_obj->m_V).num_rows, (m_obj->m_V).num_cols);
207  Map<VectorXd> eigen_dg((m_obj->m_dg).vector, (m_obj->m_dg).vlen);
208  Map<MatrixXd> eigen_R0((m_obj->m_chol_R0).matrix, (m_obj->m_chol_R0).num_rows, (m_obj->m_chol_R0).num_cols);
209  Map<VectorXd> eigen_mu(m_obj->m_mu, (m_obj->m_mu).vlen);
210 
211  SGVector<float64_t> mean=m_obj->m_mean->get_mean_vector(m_obj->m_features);
212  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
213 
214  float64_t Psi_Old=CMath::INFTY;
215  float64_t Psi_New=m_obj->m_Psi;
216 
217  // compute W = -d2lp
218  m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
219  m_obj->m_W.scale(-1.0);
220 
221  //n-by-1 vector
222  Map<VectorXd> eigen_al((m_obj->m_al).vector, (m_obj->m_al).vlen);
223 
224  // get first derivative of log probability function
225  m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
226 
227  index_t iter=0;
228 
229  m_obj->m_Wneg=false;
230  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
231  {
232  //time complexity O(m^2*n)
233  Map<VectorXd> eigen_W((m_obj->m_W).vector, (m_obj->m_W).vlen);
234  Map<VectorXd> eigen_dlp((m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
235 
236  Psi_Old = Psi_New;
237  iter++;
238 
239  if (eigen_W.minCoeff() < 0)
240  {
241  // Suggested by Vanhatalo et. al.,
242  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
243  // Quoted from infFITC_Laplace.m
244  float64_t df;
245 
246  if (m_obj->m_model->get_model_type()==LT_STUDENTST)
247  {
249  df=lik->get_degrees_freedom();
250  SG_UNREF(lik);
251  }
252  else
253  df=1;
254  eigen_W+=(2.0/(df+1))*eigen_dlp.cwiseProduct(eigen_dlp);
255  }
256 
257  //b = W.*(f-m) + dlp;
258  VectorXd b=eigen_W.cwiseProduct(eigen_mu-eigen_mean)+eigen_dlp;
259 
260  //dd = 1./(1+W.*d0);
261  VectorXd dd=MatrixXd::Ones(b.rows(),1).cwiseQuotient(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(b.rows(),1));
262 
263  VectorXd eigen_t=eigen_W.cwiseProduct(dd);
264  //m-by-m matrix
265  SGMatrix<float64_t> tmp( (m_obj->m_V).num_rows, (m_obj->m_V).num_rows);
266  Map<MatrixXd> eigen_tmp(tmp.matrix, tmp.num_rows, tmp.num_cols);
267  //eye(nu)+(V.*repmat((W.*dd)',nu,1))*V'
268  eigen_tmp=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(tmp.num_rows,tmp.num_rows);
269  tmp=m_obj->get_chol_inv(tmp);
270  //chol_inv(eye(nu)+(V.*repmat((W.*dd)',nu,1))*V')
271  Map<MatrixXd> eigen_tmp2(tmp.matrix, tmp.num_rows, tmp.num_cols);
272  //RV = chol_inv(eye(nu)+(V.*repmat((W.*dd)',nu,1))*V')*V;
273  // m-by-n matrix
274  MatrixXd eigen_RV=eigen_tmp2*eigen_V;
275  //dalpha = dd.*b - (W.*dd).*(RV'*(RV*(dd.*b))) - alpha; % Newt dir + line search
276  VectorXd dalpha=dd.cwiseProduct(b)-eigen_t.cwiseProduct(eigen_RV.transpose()*(eigen_RV*(dd.cwiseProduct(b))))-eigen_al;
277 #ifdef USE_GPL_SHOGUN
278  //perform Brent's optimization
279  CFITCPsiLine func;
280 
281  func.log_scale=m_obj->m_log_scale;
282  func.dalpha=dalpha;
283  func.start_alpha=eigen_al;
284  func.alpha=&(m_obj->m_al);
285  func.dlp=&(m_obj->m_dlp);
286  func.f=&(m_obj->m_mu);
287  func.m=&mean;
288  func.W=&(m_obj->m_W);
289  func.lik=m_obj->m_model;
290  func.lab=m_obj->m_labels;
291  func.inf=m_obj;
292 
293  float64_t x;
294  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
295 #else
297 #endif //USE_GPL_SHOGUN
298  }
299 
300  if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
301  {
302  SG_SWARNING("Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
303  }
304  return Psi_New;
305 }
306 
308 {
309  init();
310 }
311 
313  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
314 : CSingleFITCInference(kern, feat, m, lab, mod, lat)
315 {
316  init();
317 }
318 
319 void CSingleFITCLaplaceInferenceMethod::init()
320 {
321  m_Psi=0;
322  m_Wneg=false;
323 
324  SG_ADD(&m_dlp, "dlp", "derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
325  SG_ADD(&m_W, "W", "the noise matrix", MS_NOT_AVAILABLE);
326 
327  SG_ADD(&m_sW, "sW", "square root of W", MS_NOT_AVAILABLE);
328  SG_ADD(&m_d2lp, "d2lp", "second derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
329  SG_ADD(&m_d3lp, "d3lp", "third derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
330  SG_ADD(&m_chol_R0, "chol_R0", "Cholesky of inverse covariance of inducing features", MS_NOT_AVAILABLE);
331  SG_ADD(&m_dfhat, "dfhat", "derivative of negative log (approximated) marginal likelihood wrt f", MS_NOT_AVAILABLE);
332  SG_ADD(&m_g, "g", "variable g defined in infFITC_Laplace.m", MS_NOT_AVAILABLE);
333  SG_ADD(&m_dg, "dg", "variable d0 defined in infFITC_Laplace.m", MS_NOT_AVAILABLE);
334  SG_ADD(&m_Psi, "Psi", "the negative log likelihood without constant terms used in Newton's method", MS_NOT_AVAILABLE);
335  SG_ADD(&m_Wneg, "Wneg", "whether W contains negative elements", MS_NOT_AVAILABLE);
336 
338 }
339 
341 {
343 
344  if (!m_gradient_update)
345  {
347  update_deriv();
348  m_gradient_update=true;
350  }
351 }
352 
354 {
355  SG_DEBUG("entering\n");
356 
358  update_init();
359  update_alpha();
360  update_chol();
361  m_gradient_update=false;
363 
364  SG_DEBUG("leaving\n");
365 }
366 
368 {
370  update();
371 
372  return SGVector<float64_t>(m_sW);
373 }
374 
376 {
377 }
378 
380 {
381  //time complexity O(m*n)
383  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
384  Map<VectorXd> eigen_x(x.vector, x.vlen);
385 
386  SGVector<float64_t> res(x.vlen);
387  Map<VectorXd> eigen_res(res.vector, res.vlen);
388 
389  //Zx = t.*x - RVdd'*(RVdd*x);
390  eigen_res=eigen_x.cwiseProduct(eigen_t)-eigen_Rvdd.transpose()*(eigen_Rvdd*eigen_x);
391  return res;
392 }
393 
395 {
396  //time complexity O(m*n)
398  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
399  Map<VectorXd> eigen_al(al.vector, al.vlen);
400 
401  SGVector<float64_t> res(al.vlen);
402  Map<VectorXd> eigen_res(res.vector, res.vlen);
403 
404  //Kal = V'*(V*al) + d0.*al;
405  eigen_res= eigen_V.transpose()*(eigen_V*eigen_al)+eigen_dg.cwiseProduct(eigen_al);
406  return res;
407 }
408 
410  CInference* inference)
411 {
412  REQUIRE(inference!=NULL, "Inference should be not NULL");
413 
415  SG_SERROR("Provided inference is not of type CSingleFITCLaplaceInferenceMethod!\n")
416 
417  SG_REF(inference);
418  return (CSingleFITCLaplaceInferenceMethod*)inference;
419 }
420 
422 {
423  //time complexity O(m^3), where mtx is a m-by-m matrix
424  REQUIRE(mtx.num_rows==mtx.num_cols, "Matrix must be square\n");
425 
426  Map<MatrixXd> eigen_mtx(mtx.matrix, mtx.num_rows, mtx.num_cols);
427  LLT<MatrixXd> chol(eigen_mtx.colwise().reverse().rowwise().reverse().matrix());
428  //tmp=chol(rot180(A))'
429  MatrixXd tmp=chol.matrixL();
430  SGMatrix<float64_t> res(mtx.num_rows, mtx.num_cols);
431  Map<MatrixXd> eigen_res(res.matrix, res.num_rows, res.num_cols);
432  //chol_inv = @(A) rot180(chol(rot180(A))')\eye(nu); % chol(inv(A))
433  eigen_res=tmp.colwise().reverse().rowwise().reverse().matrix().triangularView<Upper>(
434  ).solve(MatrixXd::Identity(mtx.num_rows, mtx.num_cols));
435  return res;
436 }
437 
439 {
441  update();
442 
443  if (m_Wneg)
444  {
445  SG_WARNING("nlZ cannot be computed since W is too negative");
446  //nlZ = NaN;
447  return CMath::INFTY;
448  }
449  //time complexity O(m^2*n)
450  Map<VectorXd> eigen_alpha(m_al.vector, m_al.vlen);
451  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
453  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
454  // get log likelihood
456  m_mu));
457 
459  Map<VectorXd>eigen_t(m_t.vector, m_t.vlen);
460  MatrixXd A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(m_V.num_rows,m_V.num_rows);
461  LLT<MatrixXd> chol(A);
462  A=chol.matrixU();
463 
464  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
465  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
466 
467  //nlZ = alpha'*(f-m)/2 - sum(lp) - sum(log(dd))/2 + sum(log(diag(chol(A))));
468  float64_t result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
469  A.diagonal().array().log().sum()+(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_dg.rows(),1)).array().log().sum()/2.0;
470 
471  return result;
472 }
473 
475 {
476 }
477 
479 {
480  //time complexity O(m^2*n)
481  //m-by-m matrix
483  //m-by-n matrix
485 
487 
489  Map<MatrixXd> eigen_cor_kuu(cor_kuu.matrix, cor_kuu.num_rows, cor_kuu.num_cols);
490  eigen_cor_kuu=eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
492  //R0 = chol_inv(Kuu+snu2*eye(nu)); m-by-m matrix
493  m_chol_R0=get_chol_inv(cor_kuu);
495 
496  //V = R0*Ku; m-by-n matrix
499 
500  eigen_V=eigen_R0*(eigen_ktru*CMath::exp(m_log_scale*2.0));
502  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
503  //d0 = diagK-sum(V.*V,1)';
504  eigen_dg=eigen_ktrtr_diag*CMath::exp(m_log_scale*2.0)-(eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
505 
506  // get mean vector and create eigen representation of it
509 
510  // create shogun and eigen representation of function vector
512  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
513 
514  float64_t Psi_New;
515  float64_t Psi_Def;
517  {
518  // set alpha a zero vector
520  m_al.zero();
521 
522  // f = mean, if length of alpha and length of y doesn't match
523  eigen_mu=eigen_mean;
524 
526  m_labels, m_mu));
527  }
528  else
529  {
530  Map<VectorXd> eigen_alpha(m_al.vector, m_al.vlen);
531 
532  // compute f = K * alpha + m
534  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
535  eigen_mu=eigen_tmp+eigen_mean;
536 
537  Psi_New=eigen_alpha.dot(eigen_tmp)/2.0-
539 
541 
542  // if default is better, then use it
543  if (Psi_Def < Psi_New)
544  {
545  m_al.zero();
546  eigen_mu=eigen_mean;
547  Psi_New=Psi_Def;
548  }
549  }
550  m_Psi=Psi_New;
551 }
552 
554 {
555  REQUIRE(minimizer, "Minimizer must set\n");
556  if (!dynamic_cast<CSingleFITCLaplaceNewtonOptimizer*>(minimizer))
557  {
558  FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(minimizer);
559  REQUIRE(opt, "The provided minimizer is not supported\n")
560  }
562 }
563 
564 
566 {
568  bool cleanup=false;
569  if (opt)
570  {
571  opt->set_target(this);
572  if(this->ref_count()>1)
573  cleanup=true;
574  opt->minimize();
575  opt->unset_target(cleanup);
576  }
577  else
578  {
579  FirstOrderMinimizer* minimizer= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
580  REQUIRE(minimizer, "The provided minimizer is not supported\n");
581 
583  cost_fun->set_target(this);
584  if(this->ref_count()>1)
585  cleanup=true;
586  minimizer->set_cost_function(cost_fun);
587  minimizer->minimize();
588  minimizer->unset_cost_function(false);
589  cost_fun->unset_target(cleanup);
590  SG_UNREF(cost_fun);
591  }
592 
596  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
597  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
598 
599 
600  // compute f = K * alpha + m
602  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
603  eigen_mu=eigen_tmp+eigen_mean;
604 
606  Map<VectorXd> eigen_post_alpha(m_alpha.vector, m_alpha.vlen);
607  //post.alpha = R0'*(V*alpha);
608  //m-by-1 vector
609  eigen_post_alpha=eigen_R0.transpose()*(eigen_V*eigen_al);
610 }
611 
613 {
614  //time complexity O(m^2*n)
615  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
618 
619  // get log probability derivatives
623 
624  // W = -d2lp
625  m_W=m_d2lp.clone();
626  m_W.scale(-1.0);
627 
628  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
630  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
631 
632  VectorXd Wd0_1=eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_W.rows(),1);
633 
634  // compute sW
635  // post.sW = sqrt(abs(W)).*sign(W); % preserve sign in case of negative
636  if (eigen_W.minCoeff()>0)
637  {
638  eigen_sW=eigen_W.cwiseSqrt();
639  }
640  else
641  {
642  eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
643  //any(1+d0.*W<0)
644  if (!(Wd0_1.array().abs().matrix()==Wd0_1))
645  m_Wneg=true;
646  }
647 
649  Map<VectorXd>eigen_t(m_t.vector, m_t.vlen);
650 
651  //dd = 1./(1+d0.*W);
652  VectorXd dd=MatrixXd::Ones(Wd0_1.rows(),1).cwiseQuotient(Wd0_1);
653  eigen_t=eigen_W.cwiseProduct(dd);
654 
655  //m-by-m matrix
657  Map<MatrixXd> eigen_A(A.matrix, A.num_rows, A.num_cols);
658  //A = eye(nu)+(V.*repmat((W.*dd)',nu,1))*V';
659  eigen_A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(A.num_rows,A.num_rows);
660 
661  //R0tV = R0'*V; m-by-n
662  MatrixXd R0tV=eigen_R0.transpose()*eigen_V;
663 
664  //B = R0tV.*repmat((W.*dd)',nu,1); m-by-n matrix
665  MatrixXd B=R0tV*eigen_t.asDiagonal();
666 
667  //m-by-m matrix
670 
671  //post.L = -B*R0tV';
672  eigen_L=-B*R0tV.transpose();
673 
675  Map<MatrixXd> eigen_tmp(tmp.matrix, tmp.num_rows, tmp.num_cols);
676  //RV = chol_inv(A)*V; m-by-n matrix
677  MatrixXd eigen_RV=eigen_tmp*eigen_V;
678  //RVdd m-by-n matrix
681  //RVdd = RV.*repmat((W.*dd)',nu,1);
682  eigen_Rvdd=eigen_RV*eigen_t.asDiagonal();
683 
684  if (!m_Wneg)
685  {
686  //B = B*RV';
687  B=B*eigen_RV.transpose();
688  //post.L = post.L + B*B';
689  eigen_L+=B*B.transpose();
690  }
691  else
692  {
693  //B = B*V';
694  B=B*eigen_V.transpose();
695  //post.L = post.L + (B*inv(A))*B';
696  FullPivLU<MatrixXd> lu(eigen_A);
697  eigen_L+=B*lu.inverse()*B.transpose();
698  }
699 
702  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
703  //g = d/2 + sum(((R*R0)*P).^2,1)'/2
704  eigen_g=((eigen_dg.cwiseProduct(dd)).array()+
705  ((eigen_tmp*eigen_R0)*(eigen_ktru*CMath::exp(m_log_scale*2.0))*dd.asDiagonal()
706  ).array().pow(2).colwise().sum().transpose())/2;
707 }
708 
710 {
711  //time complexity O(m^2*n)
714  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
716  // create shogun and eigen representation of B
717  // m-by-n matrix
720 
721  //B = (R0'*R0)*Ku
722  eigen_B=eigen_R0.transpose()*eigen_V;
723 
724  // create shogun and eigen representation of w
726  //w = B*al;
727  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
728  eigen_w=eigen_B*eigen_al;
729 
730  // create shogun and eigen representation of the vector dfhat
731  Map<VectorXd> eigen_d3lp(m_d3lp.vector, m_d3lp.vlen);
732  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
734  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
735 
736  // compute derivative of nlZ wrt fhat
737  // dfhat = g.*d3lp;
738  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
739 }
740 
743 {
744  //time complexity O(m^2*n)
746  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
747  //m-by-m matrix
748  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
749  //m-by-n matrix
750  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
751 
752  // compute R=2*dKui-dKuui*B
753  SGMatrix<float64_t> dA(dKui.num_rows, dKui.num_cols);
754  Map<MatrixXd> eigen_dA(dA.matrix, dA.num_rows, dA.num_cols);
755  //dA = 2*dKu'-R0tV'*dKuu;
756  //dA' = 2*dKu-dKuu'*R0tV;
757  eigen_dA=2*eigen_dKui-eigen_dKuui*eigen_R0tV;
758 
759  SGVector<float64_t> v(ddiagKi.vlen);
760  Map<VectorXd> eigen_v(v.vector, v.vlen);
761  //w = sum(dA.*R0tV',2);
762  //w' = sum(dA'.*R0tV,1);
763  //v = ddiagK-w;
764  eigen_v=eigen_ddiagKi-eigen_dA.cwiseProduct(eigen_R0tV).colwise().sum().transpose();
765 
766  //explicit term
767  float64_t result=CSingleFITCInference::get_derivative_related_cov(ddiagKi, dKuui, dKui, v, dA);
768 
769  //implicit term
770  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
771  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
772 
774  Map<VectorXd> eigen_b(b.vector, b.vlen);
775  //b = dA*(R0tV*dlp) + v.*dlp;
776  eigen_b=eigen_dA.transpose()*(eigen_R0tV*eigen_dlp)+eigen_v.cwiseProduct(eigen_dlp);
777  //KZb = mvmK(mvmZ(b,RVdd,t),V,d0);
779  Map<VectorXd> eigen_KZb(KZb.vector, KZb.vlen);
780  //dnlZ.cov(i) = dnlZ.cov(i) - dfhat'*( b-KZb );
781  result-=eigen_dfhat.dot(eigen_b-eigen_KZb);
782  return result;
783 }
784 
786  const TParameter* param)
787 {
788  REQUIRE(param, "Param not set\n");
789  //time complexity O(m^2*n)
790  REQUIRE(!(strcmp(param->m_name, "log_scale")
791  && strcmp(param->m_name, "log_inducing_noise")
792  && strcmp(param->m_name, "inducing_features")),
793  "Can't compute derivative of"
794  " the nagative log marginal likelihood wrt %s.%s parameter\n",
795  get_name(), param->m_name)
796 
797  SGVector<float64_t> result;
798  int32_t len;
799  if (!strcmp(param->m_name, "inducing_features"))
800  {
801  if(m_Wneg)
802  {
803  int32_t dim=m_inducing_features.num_rows;
804  int32_t num_samples=m_inducing_features.num_cols;
805  len=dim*num_samples;
806  }
807  else if (!m_fully_sparse)
809  else
811  }
812  else
813  len=1;
814 
815  if (m_Wneg)
816  {
817  result=SGVector<float64_t>(len);
818  return derivative_helper_when_Wneg(result, param);
819  }
820 
821  if (!strcmp(param->m_name, "log_inducing_noise"))
822  // wrt inducing_noise
823  // compute derivative wrt inducing noise
824  return get_derivative_wrt_inducing_noise(param);
825 
826  result=SGVector<float64_t>(len);
827  // wrt scale
828  // clone kernel matrices
830  SGMatrix<float64_t> deriv_uu=m_kuu.clone();
831  SGMatrix<float64_t> deriv_tru=m_ktru.clone();
832 
833  // create eigen representation of kernel matrices
834  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
835  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows, deriv_uu.num_cols);
836  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows, deriv_tru.num_cols);
837 
838  // compute derivatives wrt scale for each kernel matrix
839  result[0]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
840  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
841  return result;
842 }
843 
845  const TParameter* param)
846 {
847  SGVector<float64_t> result(1);
848  if (m_Wneg)
849  return derivative_helper_when_Wneg(result, param);
850 
851  // get derivatives wrt likelihood model parameters
853  m_mu, param);
855  m_mu, param);
857  m_mu, param);
858 
859  // create eigen representation of the derivatives
860  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
861  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
862  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
863  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
864  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
865 
866  //explicit term
867  //dnlZ.lik(i) = -g'*d2lp_dhyp - sum(lp_dhyp);
868  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum();
869 
870  //implicit term
871  //b = mvmK(dlp_dhyp,V,d0);
872  SGVector<float64_t> b=compute_mvmK(dlp_dhyp);
873  //dnlZ.lik(i) = dnlZ.lik(i) - dfhat'*(b-mvmK(mvmZ(b,RVdd,t),V,d0));
875 
876  return result;
877 }
878 
880  const TParameter* param)
881 {
882  REQUIRE(param, "Param not set\n");
883  SGVector<float64_t> result;
884  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
885  result=SGVector<float64_t>(len);
886 
887  if (m_Wneg)
888  return derivative_helper_when_Wneg(result, param);
889 
890  m_lock->lock();
891  CFeatures *inducing_features=get_inducing_features();
892  for (index_t i=0; i<result.vlen; i++)
893  {
894  //time complexity O(m^2*n)
895  SGVector<float64_t> deriv_trtr;
896  SGMatrix<float64_t> deriv_uu;
897  SGMatrix<float64_t> deriv_tru;
898 
900  deriv_trtr=m_kernel->get_parameter_gradient_diagonal(param, i);
901 
902  m_kernel->init(inducing_features, inducing_features);
903  deriv_uu=m_kernel->get_parameter_gradient(param, i);
904 
905  m_kernel->init(inducing_features, m_features);
906  deriv_tru=m_kernel->get_parameter_gradient(param, i);
907 
908  // create eigen representation of derivatives
909  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
910  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows,
911  deriv_uu.num_cols);
912  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows,
913  deriv_tru.num_cols);
914 
915  result[i]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
916  result[i]*=CMath::exp(m_log_scale*2.0);
917  }
918  SG_UNREF(inducing_features);
919  m_lock->unlock();
920 
921  return result;
922 }
923 
925 {
926  //time complexity O(m*n)
927  //explicit term
929 
930  //implicit term
931  //Zdm = mvmZ(dm,RVdd,t);
932  //tmp = mvmK(Zdm,V,d0)
933  //dnlZ.mean(i) = dnlZ.mean(i) - dfhat'*(dm-mvmK(Zdm,V,d0));
935 
936  return result;
937 }
938 
940 {
941  //time complexity O(m*n)
942  Map<VectorXd> eigen_d(d.vector, d.vlen);
944  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
945  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
946  return eigen_dfhat.dot(eigen_d-eigen_tmp);
947 }
948 
950  const TParameter* param)
951 {
952  //time complexity O(m*n)
953  REQUIRE(param, "Param not set\n");
954  SGVector<float64_t> result;
955  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
956  result=SGVector<float64_t>(len);
957 
958  if (m_Wneg)
959  return derivative_helper_when_Wneg(result, param);
960 
961  for (index_t i=0; i<result.vlen; i++)
962  {
965  result[i]=get_derivative_related_mean(dmu);
966  }
967 
968  return result;
969 }
970 
972  SGVector<float64_t> res, const TParameter *param)
973 {
974  REQUIRE(param, "Param not set\n");
975  SG_WARNING("Derivative wrt %s cannot be computed since W (the Hessian (diagonal) matrix) is too negative\n", param->m_name);
976  //dnlZ = struct('cov',0*hyp.cov, 'mean',0*hyp.mean, 'lik',0*hyp.lik);
977  res.zero();
978  return res;
979 }
980 
982  const TParameter* param)
983 {
984  //time complexity depends on the implementation of the provided kernel
985  //time complexity is at least O(max((p*n*m),(m^2*n))), where p is the dimension (#) of features
986  //For an ARD kernel with KL_FULL, the time complexity is O(max((p*n*m*d),(m^2*n)))
987  //where the paramter \f$\Lambda\f$ of the ARD kerenl is a \f$d\f$-by-\f$p\f$ matrix,
988  //For an ARD kernel with KL_SCALE and KL_DIAG, the time complexity is O(max((p*n*m),(m^2*n)))
989  //efficiently compute the implicit term and explicit term at one shot
990  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
992  //w=B*al
993  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
995  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
996  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
997 
998  //q = dfhat - mvmZ(mvmK(dfhat,V,d0),RVdd,t);
1000  Map<VectorXd> eigen_q(q.vector, q.vlen);
1001  eigen_q=eigen_dfhat-eigen_q;
1002 
1003  //explicit term
1004  //diag_dK = alpha.*alpha + sum(RVdd.*RVdd,1)'-t, where t can be cancelled out
1005  //-v_1=get_derivative_related_cov_diagonal= -(alpha.*alpha + sum(RVdd.*RVdd,1)')
1006  //implicit term
1007  //-v_2=-2*dlp.*q
1008  //neg_v = -(diag_dK+ 2*dlp.*q);
1010  Map<VectorXd> eigen_neg_v(neg_v.vector, neg_v.vlen);
1011  eigen_neg_v-=2*eigen_dlp.cwiseProduct(eigen_q);
1012 
1014  Map<MatrixXd> eigen_BdK(BdK.matrix, BdK.num_rows, BdK.num_cols);
1015  //explicit
1016  //BdK = (B*alpha)*alpha' + (B*RVdd')*RVdd - B.*repmat(v_1',nu,1),
1017  //implicit
1018  //BdK = BdK + (B*dlp)*q' + (B*q)*dlp' - B.*repmat(v_2',nu,1)
1019  //where v_1 is the explicit part of v and v_2 is the implicit part of v
1020  //v=v_1+v_2
1021  eigen_BdK=eigen_B*eigen_neg_v.asDiagonal()+eigen_w*(eigen_al.transpose())+
1022  (eigen_B*eigen_Rvdd.transpose())*eigen_Rvdd+
1023  (eigen_B*eigen_dlp)*eigen_q.transpose()+(eigen_B*eigen_q)*eigen_dlp.transpose();
1024 
1025  return get_derivative_related_inducing_features(BdK, param);
1026 }
1027 
1029  const TParameter* param)
1030 {
1031  //time complexity O(m^2*n)
1032  //explicit term
1034 
1035  //implicit term
1037  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
1038 
1039  //snu = sqrt(snu2);
1040  //T = chol_inv(Kuu + snu2*eye(nu)); T = T'*(T*(snu*Ku));
1041  //t1 = sum(T.*T,1)';
1042  VectorXd eigen_t1=eigen_B.cwiseProduct(eigen_B).colwise().sum().adjoint();
1043 
1044  //b = (t1.*dlp-T'*(T*dlp))*2;
1045  SGVector<float64_t> b(eigen_t1.rows());
1046  Map<VectorXd> eigen_b(b.vector, b.vlen);
1047  float64_t factor=2.0*CMath::exp(m_log_ind_noise);
1048  eigen_b=(eigen_t1.cwiseProduct(eigen_dlp)-eigen_B.transpose()*(eigen_B*eigen_dlp))*factor;
1049 
1050  //KZb = mvmK(mvmZ(b,RVdd,t),V,d0);
1051  //z = z - dfhat'*( b-KZb );
1053 
1054  return result;
1055 }
1056 
1058 {
1059  compute_gradient();
1060 
1062  Map<VectorXd> eigen_res(res.vector, res.vlen);
1063 
1064  /*
1065  //true posterior mean with equivalent FITC prior approximated by Newton method
1066  //time complexity O(n)
1067  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
1068  SGVector<float64_t> mean=m_mean->get_mean_vector(m_features);
1069  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
1070  eigen_res=eigen_mu-eigen_mean;
1071  */
1072 
1073  //FITC (further) approximated posterior mean with Netwon method
1074  //time complexity of the following operation is O(m*n)
1075  Map<VectorXd> eigen_post_alpha(m_alpha.vector, m_alpha.vlen);
1077  eigen_res=CMath::exp(m_log_scale*2.0)*eigen_Ktru.adjoint()*eigen_post_alpha;
1078 
1079  return res;
1080 }
1081 
1083 {
1084  compute_gradient();
1085  //time complexity of the following operations is O(m*n^2)
1086  //Warning: the the time complexity increases from O(m^2*n) to O(n^2*m) if this method is called
1089  m_Sigma.num_cols);
1090 
1091  //FITC (further) approximated posterior covariance with Netwon method
1094  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
1096 
1097  MatrixXd diagonal_part=eigen_dg.asDiagonal();
1098  //FITC equivalent prior
1099  MatrixXd prior=eigen_V.transpose()*eigen_V+diagonal_part;
1100 
1101  MatrixXd tmp=CMath::exp(m_log_scale*2.0)*eigen_Ktru;
1102  eigen_Sigma=prior-tmp.adjoint()*eigen_L*tmp;
1103 
1104  /*
1105  //true posterior mean with equivalent FITC prior approximated by Newton method
1106  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
1107  Map<MatrixXd> eigen_Rvdd(m_Rvdd.matrix, m_Rvdd.num_rows, m_Rvdd.num_cols);
1108  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
1109  MatrixXd tmp1=eigen_Rvdd*prior;
1110  eigen_Sigma=prior+tmp.transpose()*tmp;
1111 
1112  MatrixXd tmp2=((eigen_dg.cwiseProduct(eigen_t)).asDiagonal()*eigen_V.transpose())*eigen_V;
1113  eigen_Sigma-=(tmp2+tmp2.transpose());
1114  eigen_Sigma-=eigen_V.transpose()*(eigen_V*eigen_t.asDiagonal()*eigen_V.transpose())*eigen_V;
1115  MatrixXd tmp3=((eigen_dg.cwiseProduct(eigen_dg)).cwiseProduct(eigen_t)).asDiagonal();
1116  eigen_Sigma-=tmp3;
1117  */
1118 
1119  return SGMatrix<float64_t>(m_Sigma);
1120 }
1121 
1123 {
1124  //time complexity O(m*n)
1125  Map<VectorXd> eigen_alpha(m_al, m_al.vlen);
1127  Map<VectorXd> eigen_f(f.vector, f.vlen);
1128  Map<VectorXd> eigen_mean_f(m_mean_f.vector,m_mean_f.vlen);
1129  /* f = K * alpha + mean_f given alpha*/
1131  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
1132  eigen_f=eigen_tmp+eigen_mean_f;
1133 
1134  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
1135  float64_t psi=eigen_alpha.dot(eigen_tmp) * 0.5;
1137 
1138  return psi;
1139 }
1140 
1142 {
1143  //time complexity O(m*n)
1144  Map<VectorXd> eigen_alpha(m_al, m_al.vlen);
1145  Map<VectorXd> eigen_gradient(gradient.vector, gradient.vlen);
1147  Map<VectorXd> eigen_f(f.vector, f.vlen);
1148  Map<MatrixXd> kernel(m_ktrtr.matrix,
1149  m_ktrtr.num_rows,
1150  m_ktrtr.num_cols);
1151  Map<VectorXd> eigen_mean_f(m_mean_f.vector, m_mean_f.vlen);
1152 
1153  /* f = K * alpha + mean_f given alpha*/
1155  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
1156  eigen_f=eigen_tmp+eigen_mean_f;
1157 
1158  SGVector<float64_t> dlp_f =
1160 
1161  Map<VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
1162 
1163  /* g_alpha = K * (alpha - dlp_f)*/
1165  Map<VectorXd> eigen_tmp2(tmp2.vector, tmp2.vlen);
1166  eigen_tmp2=eigen_alpha-eigen_dlp_f;
1167  tmp2=compute_mvmK(tmp2);
1168  Map<VectorXd> eigen_tmp3(tmp2.vector, tmp2.vlen);
1169  eigen_gradient=eigen_tmp3;
1170 }
1171 
1172 }
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:97
float64_t m_log_scale
Definition: Inference.h:485
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update()
Definition: Inference.cpp:243
void set_target(CSingleFITCLaplaceInferenceMethod *obj)
virtual void update_parameter_hash()
Definition: SGObject.cpp:282
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:1868
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:464
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:418
#define SG_SWARNING(...)
Definition: SGIO.h:163
SGVector< float64_t > m_mu
The build-in minimizer for SingleFITCLaplaceInference.
Definition: SGMatrix.h:25
parameter struct
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
#define REQUIRE(x,...)
Definition: SGIO.h:181
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
float64_t get_degrees_freedom() const
virtual SGVector< float64_t > get_derivative_related_cov_diagonal()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
static CSingleFITCLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
An abstract class of the mean function.
Definition: MeanFunction.h:49
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
#define SG_REF(x)
Definition: SGObject.h:52
CFeatures * m_features
Definition: Inference.h:473
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:488
CMeanFunction * m_mean
Definition: Inference.h:467
SGMatrix< T > clone() const
Definition: SGMatrix.cpp:330
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:898
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
#define SG_GPL_ONLY
Definition: SGIO.h:139
CLabels * m_labels
Definition: Inference.h:476
virtual void unset_cost_function(bool is_unref=true)
double float64_t
Definition: common.h:60
virtual void compute_gradient()
Definition: Inference.cpp:270
SGMatrix< float64_t > m_Sigma
virtual CFeatures * get_inducing_features()
index_t num_rows
Definition: SGMatrix.h:495
SGMatrix< float64_t > m_kuu
SGMatrix< float64_t > m_L
Definition: Inference.h:482
The first order cost function base class.
SGVector< T > clone() const
Definition: SGVector.cpp:262
index_t num_cols
Definition: SGMatrix.h:497
virtual SGVector< float64_t > get_derivative_related_inducing_features(SGMatrix< float64_t > BdK, const TParameter *param)
virtual float64_t minimize()=0
SG_FORCED_INLINE void lock()
Definition: Lock.h:23
SGVector< float64_t > m_ktrtr_diag
virtual SGVector< float64_t > derivative_helper_when_Wneg(SGVector< float64_t > res, const TParameter *param)
Class that models a Student&#39;s-t likelihood.
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:116
virtual SGVector< float64_t > compute_mvmZ(SGVector< float64_t > x)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Inference Method base class.
Definition: Inference.h:81
Minimizer * m_minimizer
Definition: Inference.h:461
virtual float64_t get_derivative_implicit_term_helper(SGVector< float64_t > d)
SGMatrix< float64_t > m_inducing_features
The class Features is the base class of all feature objects.
Definition: Features.h:69
#define SG_SERROR(...)
Definition: SGIO.h:164
static float64_t exp(float64_t x)
Definition: Math.h:551
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
virtual SGVector< float64_t > compute_mvmK(SGVector< float64_t > al)
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
The Kernel base class.
SGMatrix< float64_t > m_ktru
SG_FORCED_INLINE void unlock()
Definition: Lock.h:34
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
int32_t ref_count()
Definition: SGObject.cpp:193
The minimizer base class.
Definition: Minimizer.h:43
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:93
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGMatrix< float64_t > get_chol_inv(SGMatrix< float64_t > mtx)
The FITC approximation inference method class for regression and binary Classification. Note that the number of inducing points (m) is usually far less than the number of input points (n). (the time complexity is computed based on the assumption m < n)
The Fully Independent Conditional Training inference base class for Laplace and regression for 1-D la...
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
CLikelihoodModel * m_model
Definition: Inference.h:470
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:296
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
The Likelihood model base class.
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
virtual void set_cost_function(FirstOrderCostFunction *fun)
index_t vlen
Definition: SGVector.h:571
SGVector< float64_t > m_alpha
Definition: Inference.h:479
The first order minimizer base class.

SHOGUN Machine Learning Toolbox - Documentation