SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SingleFITCLaplaceInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
33 
36 #include <shogun/lib/external/brent.h>
40 
41 using namespace shogun;
42 using namespace Eigen;
43 
44 namespace shogun
45 {
46 
47 #ifndef DOXYGEN_SHOULD_SKIP_THIS
48 
50 class CFITCPsiLine : public func_base
51 {
52 public:
53  float64_t log_scale;
54  VectorXd dalpha;
55  VectorXd start_alpha;
56  SGVector<float64_t>* alpha;
61  CLikelihoodModel* lik;
62  CLabels* lab;
64 
65  virtual double operator() (double x)
66  {
67  //time complexity O(m*n)
68  Map<VectorXd> eigen_f(f->vector, f->vlen);
69  Map<VectorXd> eigen_m(m->vector, m->vlen);
70  Map<VectorXd> eigen_alpha(alpha->vector, alpha->vlen);
71 
72  //alpha = alpha + s*dalpha;
73  eigen_alpha=start_alpha+x*dalpha;
74  SGVector<float64_t> tmp=inf->compute_mvmK(*alpha);
75  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
76  //f = mvmK(alpha,V,d0)+m;
77  eigen_f=eigen_tmp+eigen_m;
78 
79  // get first and second derivatives of log likelihood
80  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
81 
82  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
83  W->scale(-1.0);
84 
85  // compute psi=alpha'*(f-m)/2-lp
86  float64_t result = eigen_alpha.dot(eigen_f-eigen_m)/2.0-
88 
89  return result;
90  }
91 };
92 
93 class SingleFITCLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
94 {
95 public:
96  SingleFITCLaplaceInferenceMethodCostFunction():FirstOrderCostFunction() { init(); }
97  virtual ~SingleFITCLaplaceInferenceMethodCostFunction() { clean(); }
98  void set_target(CSingleFITCLaplaceInferenceMethod *obj)
99  {
100  REQUIRE(obj, "Obj must set\n");
101  if(m_obj != obj)
102  {
103  SG_REF(obj);
104  SG_UNREF(m_obj);
105  m_obj=obj;
106  }
107  }
108 
109  void clean()
110  {
111  SG_UNREF(m_obj);
112  }
113 
114  virtual float64_t get_cost()
115  {
116  REQUIRE(m_obj,"Object not set\n");
117  return m_obj->get_psi_wrt_alpha();
118  }
119  void unset_target(bool is_unref)
120  {
121  if(is_unref)
122  {
123  SG_UNREF(m_obj);
124  }
125  m_obj=NULL;
126  }
127  virtual SGVector<float64_t> obtain_variable_reference()
128  {
129  REQUIRE(m_obj,"Object not set\n");
130  m_derivatives = SGVector<float64_t>((m_obj->m_al).vlen);
131  return m_obj->m_al;
132  }
133  virtual SGVector<float64_t> get_gradient()
134  {
135  REQUIRE(m_obj,"Object not set\n");
136  m_obj->get_gradient_wrt_alpha(m_derivatives);
137  return m_derivatives;
138  }
139  virtual const char* get_name() const { return "SingleFITCLaplaceInferenceMethodCostFunction"; }
140 private:
141  void init()
142  {
143  m_obj=NULL;
144  m_derivatives = SGVector<float64_t>();
145  SG_ADD(&m_derivatives, "SingleFITCLaplaceInferenceMethodCostFunction__m_derivatives",
146  "derivatives in SingleFITCLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
147  SG_ADD((CSGObject **)&m_obj, "SingleFITCLaplaceInferenceMethodCostFunction__m_obj",
148  "obj in SingleFITCLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
149  }
150 
151  SGVector<float64_t> m_derivatives;
153 };
154 
155 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
156 
158 {
159  REQUIRE(obj, "Obj must set\n");
160  if(m_obj != obj)
161  {
162  SG_REF(obj);
163  SG_UNREF(m_obj);
164  m_obj=obj;
165  }
166 }
167 
169 {
170  if(is_unref)
171  {
172  SG_UNREF(m_obj);
173  }
174  m_obj=NULL;
175 
176 }
177 
178 void CSingleFITCLaplaceNewtonOptimizer::init()
179 {
180  m_obj=NULL;
181  m_iter=20;
182  m_tolerance=1e-6;
183  m_opt_tolerance=1e-6;
184  m_opt_max=10;
185 
186  SG_ADD((CSGObject **)&m_obj, "CSingleFITCLaplaceNewtonOptimizer__m_obj",
187  "obj in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
188  SG_ADD(&m_iter, "CSingleFITCLaplaceNewtonOptimizer__m_iter",
189  "iter in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
190  SG_ADD(&m_tolerance, "CSingleFITCLaplaceNewtonOptimizer__m_tolerance",
191  "tolerance in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
192  SG_ADD(&m_opt_tolerance, "CSingleFITCLaplaceNewtonOptimizer__m_opt_tolerance",
193  "opt_tolerance in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
194  SG_ADD(&m_opt_max, "CSingleFITCLaplaceNewtonOptimizer__m_opt_max",
195  "opt_max in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
196 }
197 
199 {
200  REQUIRE(m_obj,"Object not set\n");
201  //time complexity O(m^2*n)
202  Map<MatrixXd> eigen_kuu((m_obj->m_kuu).matrix, (m_obj->m_kuu).num_rows, (m_obj->m_kuu).num_cols);
203  Map<MatrixXd> eigen_V((m_obj->m_V).matrix, (m_obj->m_V).num_rows, (m_obj->m_V).num_cols);
204  Map<VectorXd> eigen_dg((m_obj->m_dg).vector, (m_obj->m_dg).vlen);
205  Map<MatrixXd> eigen_R0((m_obj->m_chol_R0).matrix, (m_obj->m_chol_R0).num_rows, (m_obj->m_chol_R0).num_cols);
206  Map<VectorXd> eigen_mu(m_obj->m_mu, (m_obj->m_mu).vlen);
207 
208  SGVector<float64_t> mean=m_obj->m_mean->get_mean_vector(m_obj->m_features);
209  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
210 
211  float64_t Psi_Old=CMath::INFTY;
212  float64_t Psi_New=m_obj->m_Psi;
213 
214  // compute W = -d2lp
215  m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
216  m_obj->m_W.scale(-1.0);
217 
218  //n-by-1 vector
219  Map<VectorXd> eigen_al((m_obj->m_al).vector, (m_obj->m_al).vlen);
220 
221  // get first derivative of log probability function
222  m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
223 
224  index_t iter=0;
225 
226  m_obj->m_Wneg=false;
227  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
228  {
229  //time complexity O(m^2*n)
230  Map<VectorXd> eigen_W((m_obj->m_W).vector, (m_obj->m_W).vlen);
231  Map<VectorXd> eigen_dlp((m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
232 
233  Psi_Old = Psi_New;
234  iter++;
235 
236  if (eigen_W.minCoeff() < 0)
237  {
238  // Suggested by Vanhatalo et. al.,
239  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
240  // Quoted from infFITC_Laplace.m
241  float64_t df;
242 
243  if (m_obj->m_model->get_model_type()==LT_STUDENTST)
244  {
246  df=lik->get_degrees_freedom();
247  SG_UNREF(lik);
248  }
249  else
250  df=1;
251  eigen_W+=(2.0/(df+1))*eigen_dlp.cwiseProduct(eigen_dlp);
252  }
253 
254  //b = W.*(f-m) + dlp;
255  VectorXd b=eigen_W.cwiseProduct(eigen_mu-eigen_mean)+eigen_dlp;
256 
257  //dd = 1./(1+W.*d0);
258  VectorXd dd=MatrixXd::Ones(b.rows(),1).cwiseQuotient(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(b.rows(),1));
259 
260  VectorXd eigen_t=eigen_W.cwiseProduct(dd);
261  //m-by-m matrix
262  SGMatrix<float64_t> tmp( (m_obj->m_V).num_rows, (m_obj->m_V).num_rows);
263  Map<MatrixXd> eigen_tmp(tmp.matrix, tmp.num_rows, tmp.num_cols);
264  //eye(nu)+(V.*repmat((W.*dd)',nu,1))*V'
265  eigen_tmp=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(tmp.num_rows,tmp.num_rows);
266  tmp=m_obj->get_chol_inv(tmp);
267  //chol_inv(eye(nu)+(V.*repmat((W.*dd)',nu,1))*V')
268  Map<MatrixXd> eigen_tmp2(tmp.matrix, tmp.num_rows, tmp.num_cols);
269  //RV = chol_inv(eye(nu)+(V.*repmat((W.*dd)',nu,1))*V')*V;
270  // m-by-n matrix
271  MatrixXd eigen_RV=eigen_tmp2*eigen_V;
272  //dalpha = dd.*b - (W.*dd).*(RV'*(RV*(dd.*b))) - alpha; % Newt dir + line search
273  VectorXd dalpha=dd.cwiseProduct(b)-eigen_t.cwiseProduct(eigen_RV.transpose()*(eigen_RV*(dd.cwiseProduct(b))))-eigen_al;
274 
275  //perform Brent's optimization
276  CFITCPsiLine func;
277 
278  func.log_scale=m_obj->m_log_scale;
279  func.dalpha=dalpha;
280  func.start_alpha=eigen_al;
281  func.alpha=&(m_obj->m_al);
282  func.dlp=&(m_obj->m_dlp);
283  func.f=&(m_obj->m_mu);
284  func.m=&mean;
285  func.W=&(m_obj->m_W);
286  func.lik=m_obj->m_model;
287  func.lab=m_obj->m_labels;
288  func.inf=m_obj;
289 
290  float64_t x;
291  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
292  }
293 
294  if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
295  {
296  SG_SWARNING("Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
297  }
298  return Psi_New;
299 }
300 
302 {
303  init();
304 }
305 
307  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
308 : CSingleFITCInference(kern, feat, m, lab, mod, lat)
309 {
310  init();
311 }
312 
313 void CSingleFITCLaplaceInferenceMethod::init()
314 {
315  m_Psi=0;
316  m_Wneg=false;
317 
318  SG_ADD(&m_dlp, "dlp", "derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
319  SG_ADD(&m_W, "W", "the noise matrix", MS_NOT_AVAILABLE);
320 
321  SG_ADD(&m_sW, "sW", "square root of W", MS_NOT_AVAILABLE);
322  SG_ADD(&m_d2lp, "d2lp", "second derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
323  SG_ADD(&m_d3lp, "d3lp", "third derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
324  SG_ADD(&m_chol_R0, "chol_R0", "Cholesky of inverse covariance of inducing features", MS_NOT_AVAILABLE);
325  SG_ADD(&m_dfhat, "dfhat", "derivative of negative log (approximated) marginal likelihood wrt f", MS_NOT_AVAILABLE);
326  SG_ADD(&m_g, "g", "variable g defined in infFITC_Laplace.m", MS_NOT_AVAILABLE);
327  SG_ADD(&m_dg, "dg", "variable d0 defined in infFITC_Laplace.m", MS_NOT_AVAILABLE);
328  SG_ADD(&m_Psi, "Psi", "the negative log likelihood without constant terms used in Newton's method", MS_NOT_AVAILABLE);
329  SG_ADD(&m_Wneg, "Wneg", "whether W contains negative elements", MS_NOT_AVAILABLE);
330 
332 }
333 
335 {
337 
338  if (!m_gradient_update)
339  {
341  update_deriv();
342  m_gradient_update=true;
344  }
345 }
346 
348 {
349  SG_DEBUG("entering\n");
350 
352  update_init();
353  update_alpha();
354  update_chol();
355  m_gradient_update=false;
357 
358  SG_DEBUG("leaving\n");
359 }
360 
362 {
364  update();
365 
366  return SGVector<float64_t>(m_sW);
367 }
368 
370 {
371 }
372 
374 {
375  //time complexity O(m*n)
377  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
378  Map<VectorXd> eigen_x(x.vector, x.vlen);
379 
380  SGVector<float64_t> res(x.vlen);
381  Map<VectorXd> eigen_res(res.vector, res.vlen);
382 
383  //Zx = t.*x - RVdd'*(RVdd*x);
384  eigen_res=eigen_x.cwiseProduct(eigen_t)-eigen_Rvdd.transpose()*(eigen_Rvdd*eigen_x);
385  return res;
386 }
387 
389 {
390  //time complexity O(m*n)
392  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
393  Map<VectorXd> eigen_al(al.vector, al.vlen);
394 
395  SGVector<float64_t> res(al.vlen);
396  Map<VectorXd> eigen_res(res.vector, res.vlen);
397 
398  //Kal = V'*(V*al) + d0.*al;
399  eigen_res= eigen_V.transpose()*(eigen_V*eigen_al)+eigen_dg.cwiseProduct(eigen_al);
400  return res;
401 }
402 
404  CInference* inference)
405 {
406  REQUIRE(inference!=NULL, "Inference should be not NULL");
407 
409  SG_SERROR("Provided inference is not of type CSingleFITCLaplaceInferenceMethod!\n")
410 
411  SG_REF(inference);
412  return (CSingleFITCLaplaceInferenceMethod*)inference;
413 }
414 
416 {
417  //time complexity O(m^3), where mtx is a m-by-m matrix
418  REQUIRE(mtx.num_rows==mtx.num_cols, "Matrix must be square\n");
419 
420  Map<MatrixXd> eigen_mtx(mtx.matrix, mtx.num_rows, mtx.num_cols);
421  LLT<MatrixXd> chol(eigen_mtx.colwise().reverse().rowwise().reverse().matrix());
422  //tmp=chol(rot180(A))'
423  MatrixXd tmp=chol.matrixL();
424  SGMatrix<float64_t> res(mtx.num_rows, mtx.num_cols);
425  Map<MatrixXd> eigen_res(res.matrix, res.num_rows, res.num_cols);
426  //chol_inv = @(A) rot180(chol(rot180(A))')\eye(nu); % chol(inv(A))
427  eigen_res=tmp.colwise().reverse().rowwise().reverse().matrix().triangularView<Upper>(
428  ).solve(MatrixXd::Identity(mtx.num_rows, mtx.num_cols));
429  return res;
430 }
431 
433 {
435  update();
436 
437  if (m_Wneg)
438  {
439  SG_WARNING("nlZ cannot be computed since W is too negative");
440  //nlZ = NaN;
441  return CMath::INFTY;
442  }
443  //time complexity O(m^2*n)
444  Map<VectorXd> eigen_alpha(m_al.vector, m_al.vlen);
445  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
447  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
448  // get log likelihood
450  m_mu));
451 
453  Map<VectorXd>eigen_t(m_t.vector, m_t.vlen);
454  MatrixXd A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(m_V.num_rows,m_V.num_rows);
455  LLT<MatrixXd> chol(A);
456  A=chol.matrixU();
457 
458  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
459  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
460 
461  //nlZ = alpha'*(f-m)/2 - sum(lp) - sum(log(dd))/2 + sum(log(diag(chol(A))));
462  float64_t result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
463  A.diagonal().array().log().sum()+(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_dg.rows(),1)).array().log().sum()/2.0;
464 
465  return result;
466 }
467 
469 {
470 }
471 
473 {
474  //time complexity O(m^2*n)
475  //m-by-m matrix
477  //m-by-n matrix
479 
481 
483  Map<MatrixXd> eigen_cor_kuu(cor_kuu.matrix, cor_kuu.num_rows, cor_kuu.num_cols);
484  eigen_cor_kuu=eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
486  //R0 = chol_inv(Kuu+snu2*eye(nu)); m-by-m matrix
487  m_chol_R0=get_chol_inv(cor_kuu);
489 
490  //V = R0*Ku; m-by-n matrix
493 
494  eigen_V=eigen_R0*(eigen_ktru*CMath::exp(m_log_scale*2.0));
496  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
497  //d0 = diagK-sum(V.*V,1)';
498  eigen_dg=eigen_ktrtr_diag*CMath::exp(m_log_scale*2.0)-(eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
499 
500  // get mean vector and create eigen representation of it
503 
504  // create shogun and eigen representation of function vector
506  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
507 
508  float64_t Psi_New;
509  float64_t Psi_Def;
511  {
512  // set alpha a zero vector
514  m_al.zero();
515 
516  // f = mean, if length of alpha and length of y doesn't match
517  eigen_mu=eigen_mean;
518 
520  m_labels, m_mu));
521  }
522  else
523  {
524  Map<VectorXd> eigen_alpha(m_al.vector, m_al.vlen);
525 
526  // compute f = K * alpha + m
528  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
529  eigen_mu=eigen_tmp+eigen_mean;
530 
531  Psi_New=eigen_alpha.dot(eigen_tmp)/2.0-
533 
535 
536  // if default is better, then use it
537  if (Psi_Def < Psi_New)
538  {
539  m_al.zero();
540  eigen_mu=eigen_mean;
541  Psi_New=Psi_Def;
542  }
543  }
544  m_Psi=Psi_New;
545 }
546 
548 {
549  REQUIRE(minimizer, "Minimizer must set\n");
550  if (!dynamic_cast<CSingleFITCLaplaceNewtonOptimizer*>(minimizer))
551  {
552  FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(minimizer);
553  REQUIRE(opt, "The provided minimizer is not supported\n")
554  }
556 }
557 
558 
560 {
562  bool cleanup=false;
563  if (opt)
564  {
565  opt->set_target(this);
566  if(this->ref_count()>1)
567  cleanup=true;
568  opt->minimize();
569  opt->unset_target(cleanup);
570  }
571  else
572  {
573  FirstOrderMinimizer* minimizer= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
574  REQUIRE(minimizer, "The provided minimizer is not supported\n");
575 
577  cost_fun->set_target(this);
578  if(this->ref_count()>1)
579  cleanup=true;
580  minimizer->set_cost_function(cost_fun);
581  minimizer->minimize();
582  minimizer->unset_cost_function(false);
583  cost_fun->unset_target(cleanup);
584  SG_UNREF(cost_fun);
585  }
586 
590  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
591  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
592 
593 
594  // compute f = K * alpha + m
596  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
597  eigen_mu=eigen_tmp+eigen_mean;
598 
600  Map<VectorXd> eigen_post_alpha(m_alpha.vector, m_alpha.vlen);
601  //post.alpha = R0'*(V*alpha);
602  //m-by-1 vector
603  eigen_post_alpha=eigen_R0.transpose()*(eigen_V*eigen_al);
604 }
605 
607 {
608  //time complexity O(m^2*n)
609  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
612 
613  // get log probability derivatives
617 
618  // W = -d2lp
619  m_W=m_d2lp.clone();
620  m_W.scale(-1.0);
621 
622  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
624  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
625 
626  VectorXd Wd0_1=eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_W.rows(),1);
627 
628  // compute sW
629  // post.sW = sqrt(abs(W)).*sign(W); % preserve sign in case of negative
630  if (eigen_W.minCoeff()>0)
631  {
632  eigen_sW=eigen_W.cwiseSqrt();
633  }
634  else
635  {
636  eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
637  //any(1+d0.*W<0)
638  if (!(Wd0_1.array().abs().matrix()==Wd0_1))
639  m_Wneg=true;
640  }
641 
643  Map<VectorXd>eigen_t(m_t.vector, m_t.vlen);
644 
645  //dd = 1./(1+d0.*W);
646  VectorXd dd=MatrixXd::Ones(Wd0_1.rows(),1).cwiseQuotient(Wd0_1);
647  eigen_t=eigen_W.cwiseProduct(dd);
648 
649  //m-by-m matrix
651  Map<MatrixXd> eigen_A(A.matrix, A.num_rows, A.num_cols);
652  //A = eye(nu)+(V.*repmat((W.*dd)',nu,1))*V';
653  eigen_A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(A.num_rows,A.num_rows);
654 
655  //R0tV = R0'*V; m-by-n
656  MatrixXd R0tV=eigen_R0.transpose()*eigen_V;
657 
658  //B = R0tV.*repmat((W.*dd)',nu,1); m-by-n matrix
659  MatrixXd B=R0tV*eigen_t.asDiagonal();
660 
661  //m-by-m matrix
664 
665  //post.L = -B*R0tV';
666  eigen_L=-B*R0tV.transpose();
667 
669  Map<MatrixXd> eigen_tmp(tmp.matrix, tmp.num_rows, tmp.num_cols);
670  //RV = chol_inv(A)*V; m-by-n matrix
671  MatrixXd eigen_RV=eigen_tmp*eigen_V;
672  //RVdd m-by-n matrix
675  //RVdd = RV.*repmat((W.*dd)',nu,1);
676  eigen_Rvdd=eigen_RV*eigen_t.asDiagonal();
677 
678  if (!m_Wneg)
679  {
680  //B = B*RV';
681  B=B*eigen_RV.transpose();
682  //post.L = post.L + B*B';
683  eigen_L+=B*B.transpose();
684  }
685  else
686  {
687  //B = B*V';
688  B=B*eigen_V.transpose();
689  //post.L = post.L + (B*inv(A))*B';
690  FullPivLU<MatrixXd> lu(eigen_A);
691  eigen_L+=B*lu.inverse()*B.transpose();
692  }
693 
696  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
697  //g = d/2 + sum(((R*R0)*P).^2,1)'/2
698  eigen_g=((eigen_dg.cwiseProduct(dd)).array()+
699  ((eigen_tmp*eigen_R0)*(eigen_ktru*CMath::exp(m_log_scale*2.0))*dd.asDiagonal()
700  ).array().pow(2).colwise().sum().transpose())/2;
701 }
702 
704 {
705  //time complexity O(m^2*n)
708  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
710  // create shogun and eigen representation of B
711  // m-by-n matrix
714 
715  //B = (R0'*R0)*Ku
716  eigen_B=eigen_R0.transpose()*eigen_V;
717 
718  // create shogun and eigen representation of w
720  //w = B*al;
721  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
722  eigen_w=eigen_B*eigen_al;
723 
724  // create shogun and eigen representation of the vector dfhat
725  Map<VectorXd> eigen_d3lp(m_d3lp.vector, m_d3lp.vlen);
726  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
728  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
729 
730  // compute derivative of nlZ wrt fhat
731  // dfhat = g.*d3lp;
732  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
733 }
734 
737 {
738  //time complexity O(m^2*n)
740  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
741  //m-by-m matrix
742  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
743  //m-by-n matrix
744  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
745 
746  // compute R=2*dKui-dKuui*B
747  SGMatrix<float64_t> dA(dKui.num_rows, dKui.num_cols);
748  Map<MatrixXd> eigen_dA(dA.matrix, dA.num_rows, dA.num_cols);
749  //dA = 2*dKu'-R0tV'*dKuu;
750  //dA' = 2*dKu-dKuu'*R0tV;
751  eigen_dA=2*eigen_dKui-eigen_dKuui*eigen_R0tV;
752 
753  SGVector<float64_t> v(ddiagKi.vlen);
754  Map<VectorXd> eigen_v(v.vector, v.vlen);
755  //w = sum(dA.*R0tV',2);
756  //w' = sum(dA'.*R0tV,1);
757  //v = ddiagK-w;
758  eigen_v=eigen_ddiagKi-eigen_dA.cwiseProduct(eigen_R0tV).colwise().sum().transpose();
759 
760  //explicit term
761  float64_t result=CSingleFITCInference::get_derivative_related_cov(ddiagKi, dKuui, dKui, v, dA);
762 
763  //implicit term
764  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
765  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
766 
768  Map<VectorXd> eigen_b(b.vector, b.vlen);
769  //b = dA*(R0tV*dlp) + v.*dlp;
770  eigen_b=eigen_dA.transpose()*(eigen_R0tV*eigen_dlp)+eigen_v.cwiseProduct(eigen_dlp);
771  //KZb = mvmK(mvmZ(b,RVdd,t),V,d0);
773  Map<VectorXd> eigen_KZb(KZb.vector, KZb.vlen);
774  //dnlZ.cov(i) = dnlZ.cov(i) - dfhat'*( b-KZb );
775  result-=eigen_dfhat.dot(eigen_b-eigen_KZb);
776  return result;
777 }
778 
780  const TParameter* param)
781 {
782  REQUIRE(param, "Param not set\n");
783  //time complexity O(m^2*n)
784  REQUIRE(!(strcmp(param->m_name, "log_scale")
785  && strcmp(param->m_name, "log_inducing_noise")
786  && strcmp(param->m_name, "inducing_features")),
787  "Can't compute derivative of"
788  " the nagative log marginal likelihood wrt %s.%s parameter\n",
789  get_name(), param->m_name)
790 
791  SGVector<float64_t> result;
792  int32_t len;
793  if (!strcmp(param->m_name, "inducing_features"))
794  {
795  if(m_Wneg)
796  {
797  int32_t dim=m_inducing_features.num_rows;
798  int32_t num_samples=m_inducing_features.num_cols;
799  len=dim*num_samples;
800  }
801  else if (!m_fully_sparse)
803  else
805  }
806  else
807  len=1;
808 
809  if (m_Wneg)
810  {
811  result=SGVector<float64_t>(len);
812  return derivative_helper_when_Wneg(result, param);
813  }
814 
815  if (!strcmp(param->m_name, "log_inducing_noise"))
816  // wrt inducing_noise
817  // compute derivative wrt inducing noise
818  return get_derivative_wrt_inducing_noise(param);
819 
820  result=SGVector<float64_t>(len);
821  // wrt scale
822  // clone kernel matrices
824  SGMatrix<float64_t> deriv_uu=m_kuu.clone();
825  SGMatrix<float64_t> deriv_tru=m_ktru.clone();
826 
827  // create eigen representation of kernel matrices
828  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
829  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows, deriv_uu.num_cols);
830  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows, deriv_tru.num_cols);
831 
832  // compute derivatives wrt scale for each kernel matrix
833  result[0]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
834  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
835  return result;
836 }
837 
839  const TParameter* param)
840 {
841  SGVector<float64_t> result(1);
842  if (m_Wneg)
843  return derivative_helper_when_Wneg(result, param);
844 
845  // get derivatives wrt likelihood model parameters
847  m_mu, param);
849  m_mu, param);
851  m_mu, param);
852 
853  // create eigen representation of the derivatives
854  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
855  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
856  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
857  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
858  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
859 
860  //explicit term
861  //dnlZ.lik(i) = -g'*d2lp_dhyp - sum(lp_dhyp);
862  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum();
863 
864  //implicit term
865  //b = mvmK(dlp_dhyp,V,d0);
866  SGVector<float64_t> b=compute_mvmK(dlp_dhyp);
867  //dnlZ.lik(i) = dnlZ.lik(i) - dfhat'*(b-mvmK(mvmZ(b,RVdd,t),V,d0));
869 
870  return result;
871 }
872 
874  const TParameter* param)
875 {
876  REQUIRE(param, "Param not set\n");
877  SGVector<float64_t> result;
878  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
879  result=SGVector<float64_t>(len);
880 
881  if (m_Wneg)
882  return derivative_helper_when_Wneg(result, param);
883 
884  m_lock->lock();
885  CFeatures *inducing_features=get_inducing_features();
886  for (index_t i=0; i<result.vlen; i++)
887  {
888  //time complexity O(m^2*n)
889  SGVector<float64_t> deriv_trtr;
890  SGMatrix<float64_t> deriv_uu;
891  SGMatrix<float64_t> deriv_tru;
892 
894  deriv_trtr=m_kernel->get_parameter_gradient_diagonal(param, i);
895 
896  m_kernel->init(inducing_features, inducing_features);
897  deriv_uu=m_kernel->get_parameter_gradient(param, i);
898 
899  m_kernel->init(inducing_features, m_features);
900  deriv_tru=m_kernel->get_parameter_gradient(param, i);
901 
902  // create eigen representation of derivatives
903  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
904  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows,
905  deriv_uu.num_cols);
906  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows,
907  deriv_tru.num_cols);
908 
909  result[i]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
910  result[i]*=CMath::exp(m_log_scale*2.0);
911  }
912  SG_UNREF(inducing_features);
913  m_lock->unlock();
914 
915  return result;
916 }
917 
919 {
920  //time complexity O(m*n)
921  //explicit term
923 
924  //implicit term
925  //Zdm = mvmZ(dm,RVdd,t);
926  //tmp = mvmK(Zdm,V,d0)
927  //dnlZ.mean(i) = dnlZ.mean(i) - dfhat'*(dm-mvmK(Zdm,V,d0));
929 
930  return result;
931 }
932 
934 {
935  //time complexity O(m*n)
936  Map<VectorXd> eigen_d(d.vector, d.vlen);
938  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
939  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
940  return eigen_dfhat.dot(eigen_d-eigen_tmp);
941 }
942 
944  const TParameter* param)
945 {
946  //time complexity O(m*n)
947  REQUIRE(param, "Param not set\n");
948  SGVector<float64_t> result;
949  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
950  result=SGVector<float64_t>(len);
951 
952  if (m_Wneg)
953  return derivative_helper_when_Wneg(result, param);
954 
955  for (index_t i=0; i<result.vlen; i++)
956  {
959  result[i]=get_derivative_related_mean(dmu);
960  }
961 
962  return result;
963 }
964 
966  SGVector<float64_t> res, const TParameter *param)
967 {
968  REQUIRE(param, "Param not set\n");
969  SG_WARNING("Derivative wrt %s cannot be computed since W (the Hessian (diagonal) matrix) is too negative\n", param->m_name);
970  //dnlZ = struct('cov',0*hyp.cov, 'mean',0*hyp.mean, 'lik',0*hyp.lik);
971  res.zero();
972  return res;
973 }
974 
976  const TParameter* param)
977 {
978  //time complexity depends on the implementation of the provided kernel
979  //time complexity is at least O(max((p*n*m),(m^2*n))), where p is the dimension (#) of features
980  //For an ARD kernel with KL_FULL, the time complexity is O(max((p*n*m*d),(m^2*n)))
981  //where the paramter \f$\Lambda\f$ of the ARD kerenl is a \f$d\f$-by-\f$p\f$ matrix,
982  //For an ARD kernel with KL_SCALE and KL_DIAG, the time complexity is O(max((p*n*m),(m^2*n)))
983  //efficiently compute the implicit term and explicit term at one shot
984  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
986  //w=B*al
987  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
989  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
990  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
991 
992  //q = dfhat - mvmZ(mvmK(dfhat,V,d0),RVdd,t);
994  Map<VectorXd> eigen_q(q.vector, q.vlen);
995  eigen_q=eigen_dfhat-eigen_q;
996 
997  //explicit term
998  //diag_dK = alpha.*alpha + sum(RVdd.*RVdd,1)'-t, where t can be cancelled out
999  //-v_1=get_derivative_related_cov_diagonal= -(alpha.*alpha + sum(RVdd.*RVdd,1)')
1000  //implicit term
1001  //-v_2=-2*dlp.*q
1002  //neg_v = -(diag_dK+ 2*dlp.*q);
1004  Map<VectorXd> eigen_neg_v(neg_v.vector, neg_v.vlen);
1005  eigen_neg_v-=2*eigen_dlp.cwiseProduct(eigen_q);
1006 
1008  Map<MatrixXd> eigen_BdK(BdK.matrix, BdK.num_rows, BdK.num_cols);
1009  //explicit
1010  //BdK = (B*alpha)*alpha' + (B*RVdd')*RVdd - B.*repmat(v_1',nu,1),
1011  //implicit
1012  //BdK = BdK + (B*dlp)*q' + (B*q)*dlp' - B.*repmat(v_2',nu,1)
1013  //where v_1 is the explicit part of v and v_2 is the implicit part of v
1014  //v=v_1+v_2
1015  eigen_BdK=eigen_B*eigen_neg_v.asDiagonal()+eigen_w*(eigen_al.transpose())+
1016  (eigen_B*eigen_Rvdd.transpose())*eigen_Rvdd+
1017  (eigen_B*eigen_dlp)*eigen_q.transpose()+(eigen_B*eigen_q)*eigen_dlp.transpose();
1018 
1019  return get_derivative_related_inducing_features(BdK, param);
1020 }
1021 
1023  const TParameter* param)
1024 {
1025  //time complexity O(m^2*n)
1026  //explicit term
1028 
1029  //implicit term
1031  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
1032 
1033  //snu = sqrt(snu2);
1034  //T = chol_inv(Kuu + snu2*eye(nu)); T = T'*(T*(snu*Ku));
1035  //t1 = sum(T.*T,1)';
1036  VectorXd eigen_t1=eigen_B.cwiseProduct(eigen_B).colwise().sum().adjoint();
1037 
1038  //b = (t1.*dlp-T'*(T*dlp))*2;
1039  SGVector<float64_t> b(eigen_t1.rows());
1040  Map<VectorXd> eigen_b(b.vector, b.vlen);
1041  float64_t factor=2.0*CMath::exp(m_log_ind_noise);
1042  eigen_b=(eigen_t1.cwiseProduct(eigen_dlp)-eigen_B.transpose()*(eigen_B*eigen_dlp))*factor;
1043 
1044  //KZb = mvmK(mvmZ(b,RVdd,t),V,d0);
1045  //z = z - dfhat'*( b-KZb );
1047 
1048  return result;
1049 }
1050 
1052 {
1053  compute_gradient();
1054 
1056  Map<VectorXd> eigen_res(res.vector, res.vlen);
1057 
1058  /*
1059  //true posterior mean with equivalent FITC prior approximated by Newton method
1060  //time complexity O(n)
1061  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
1062  SGVector<float64_t> mean=m_mean->get_mean_vector(m_features);
1063  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
1064  eigen_res=eigen_mu-eigen_mean;
1065  */
1066 
1067  //FITC (further) approximated posterior mean with Netwon method
1068  //time complexity of the following operation is O(m*n)
1069  Map<VectorXd> eigen_post_alpha(m_alpha.vector, m_alpha.vlen);
1071  eigen_res=CMath::exp(m_log_scale*2.0)*eigen_Ktru.adjoint()*eigen_post_alpha;
1072 
1073  return res;
1074 }
1075 
1077 {
1078  compute_gradient();
1079  //time complexity of the following operations is O(m*n^2)
1080  //Warning: the the time complexity increases from O(m^2*n) to O(n^2*m) if this method is called
1083  m_Sigma.num_cols);
1084 
1085  //FITC (further) approximated posterior covariance with Netwon method
1088  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
1090 
1091  MatrixXd diagonal_part=eigen_dg.asDiagonal();
1092  //FITC equivalent prior
1093  MatrixXd prior=eigen_V.transpose()*eigen_V+diagonal_part;
1094 
1095  MatrixXd tmp=CMath::exp(m_log_scale*2.0)*eigen_Ktru;
1096  eigen_Sigma=prior-tmp.adjoint()*eigen_L*tmp;
1097 
1098  /*
1099  //true posterior mean with equivalent FITC prior approximated by Newton method
1100  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
1101  Map<MatrixXd> eigen_Rvdd(m_Rvdd.matrix, m_Rvdd.num_rows, m_Rvdd.num_cols);
1102  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
1103  MatrixXd tmp1=eigen_Rvdd*prior;
1104  eigen_Sigma=prior+tmp.transpose()*tmp;
1105 
1106  MatrixXd tmp2=((eigen_dg.cwiseProduct(eigen_t)).asDiagonal()*eigen_V.transpose())*eigen_V;
1107  eigen_Sigma-=(tmp2+tmp2.transpose());
1108  eigen_Sigma-=eigen_V.transpose()*(eigen_V*eigen_t.asDiagonal()*eigen_V.transpose())*eigen_V;
1109  MatrixXd tmp3=((eigen_dg.cwiseProduct(eigen_dg)).cwiseProduct(eigen_t)).asDiagonal();
1110  eigen_Sigma-=tmp3;
1111  */
1112 
1113  return SGMatrix<float64_t>(m_Sigma);
1114 }
1115 
1117 {
1118  //time complexity O(m*n)
1119  Map<VectorXd> eigen_alpha(m_al, m_al.vlen);
1121  Map<VectorXd> eigen_f(f.vector, f.vlen);
1122  Map<VectorXd> eigen_mean_f(m_mean_f.vector,m_mean_f.vlen);
1123  /* f = K * alpha + mean_f given alpha*/
1125  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
1126  eigen_f=eigen_tmp+eigen_mean_f;
1127 
1128  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
1129  float64_t psi=eigen_alpha.dot(eigen_tmp) * 0.5;
1131 
1132  return psi;
1133 }
1134 
1136 {
1137  //time complexity O(m*n)
1138  Map<VectorXd> eigen_alpha(m_al, m_al.vlen);
1139  Map<VectorXd> eigen_gradient(gradient.vector, gradient.vlen);
1141  Map<VectorXd> eigen_f(f.vector, f.vlen);
1142  Map<MatrixXd> kernel(m_ktrtr.matrix,
1143  m_ktrtr.num_rows,
1144  m_ktrtr.num_cols);
1145  Map<VectorXd> eigen_mean_f(m_mean_f.vector, m_mean_f.vlen);
1146 
1147  /* f = K * alpha + mean_f given alpha*/
1149  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
1150  eigen_f=eigen_tmp+eigen_mean_f;
1151 
1152  SGVector<float64_t> dlp_f =
1154 
1155  Map<VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
1156 
1157  /* g_alpha = K * (alpha - dlp_f)*/
1159  Map<VectorXd> eigen_tmp2(tmp2.vector, tmp2.vlen);
1160  eigen_tmp2=eigen_alpha-eigen_dlp_f;
1161  tmp2=compute_mvmK(tmp2);
1162  Map<VectorXd> eigen_tmp3(tmp2.vector, tmp2.vlen);
1163  eigen_gradient=eigen_tmp3;
1164 }
1165 
1166 }
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
float64_t m_log_scale
Definition: Inference.h:490
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update()
Definition: Inference.cpp:316
void set_target(CSingleFITCLaplaceInferenceMethod *obj)
virtual void update_parameter_hash()
Definition: SGObject.cpp:281
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:469
#define SG_SWARNING(...)
Definition: SGIO.h:178
SGVector< float64_t > m_mu
The build-in minimizer for SingleFITCLaplaceInference.
Definition: SGMatrix.h:20
SGMatrix< T > clone()
Definition: SGMatrix.cpp:256
parameter struct
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
float64_t get_degrees_freedom() const
virtual SGVector< float64_t > get_derivative_related_cov_diagonal()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
static CSingleFITCLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
An abstract class of the mean function.
Definition: MeanFunction.h:49
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:841
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
CFeatures * m_features
Definition: Inference.h:478
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:493
CMeanFunction * m_mean
Definition: Inference.h:472
index_t vlen
Definition: SGVector.h:494
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
CLabels * m_labels
Definition: Inference.h:481
virtual void unset_cost_function(bool is_unref=true)
double float64_t
Definition: common.h:50
virtual void compute_gradient()
Definition: Inference.cpp:343
SGMatrix< float64_t > m_Sigma
virtual CFeatures * get_inducing_features()
SGMatrix< float64_t > m_kuu
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
SGMatrix< float64_t > m_L
Definition: Inference.h:487
The first order cost function base class.
virtual SGVector< float64_t > get_derivative_related_inducing_features(SGMatrix< float64_t > BdK, const TParameter *param)
virtual float64_t minimize()=0
SGVector< float64_t > m_ktrtr_diag
virtual SGVector< float64_t > derivative_helper_when_Wneg(SGVector< float64_t > res, const TParameter *param)
Class that models a Student's-t likelihood.
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:128
virtual SGVector< float64_t > compute_mvmZ(SGVector< float64_t > x)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Inference Method base class.
Definition: Inference.h:81
Minimizer * m_minimizer
Definition: Inference.h:466
virtual float64_t get_derivative_implicit_term_helper(SGVector< float64_t > d)
SGMatrix< float64_t > m_inducing_features
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:851
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
virtual SGVector< float64_t > compute_mvmK(SGVector< float64_t > al)
SGVector< T > clone() const
Definition: SGVector.cpp:207
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
The Kernel base class.
Definition: Kernel.h:159
SGMatrix< float64_t > m_ktru
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
The minimizer base class.
Definition: Minimizer.h:43
#define SG_WARNING(...)
Definition: SGIO.h:128
#define SG_ADD(...)
Definition: SGObject.h:84
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGMatrix< float64_t > get_chol_inv(SGMatrix< float64_t > mtx)
The FITC approximation inference method class for regression and binary Classification. Note that the number of inducing points (m) is usually far less than the number of input points (n). (the time complexity is computed based on the assumption m < n)
The Fully Independent Conditional Training inference base class for Laplace and regression for 1-D la...
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
CLikelihoodModel * m_model
Definition: Inference.h:475
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
Definition: Kernel.h:865
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
virtual void set_cost_function(FirstOrderCostFunction *fun)
SGVector< float64_t > m_alpha
Definition: Inference.h:484
The first order minimizer base class.

SHOGUN Machine Learning Toolbox - Documentation