SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KLInferenceMethod.cpp
Go to the documentation of this file.
1  /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * This code specifically adapted from function in approxKL.m and infKL.m
31  */
32 
34 
35 #ifdef HAVE_EIGEN3
37 
38 using namespace Eigen;
39 
40 namespace shogun
41 {
42 
43 CKLInferenceMethod::CKLInferenceMethod() : CInferenceMethod()
44 {
45  init();
46 }
47 
49  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
50  : CInferenceMethod(kern, feat, m, lab, mod)
51 {
52  init();
54 }
55 
57 {
58  REQUIRE(mod, "the likelihood model must not be NULL\n")
60  REQUIRE(lik,
61  "The provided likelihood model (%s) must support variational Gaussian inference. ",
62  "Please use a Variational Gaussian Likelihood model\n",
63  mod->get_name());
64 }
65 
67 {
70 }
71 
72 void CKLInferenceMethod::init()
73 {
74  m_noise_factor=1e-10;
75  m_max_attempt=0;
76  m_exp_factor=2;
77  m_min_coeff_kernel=1e-5;
78  SG_ADD(&m_noise_factor, "noise_factor",
79  "The noise factor used for correcting Kernel matrix",
81  SG_ADD(&m_exp_factor, "exp_factor",
82  "The exponential factor used for increasing noise_factor",
84  SG_ADD(&m_max_attempt, "max_attempt",
85  "The max number of attempt to correct Kernel matrix",
87  SG_ADD(&m_min_coeff_kernel, "min_coeff_kernel",
88  "The minimum coeefficient of kernel matrix in LDLT factorization used to check whether the kernel matrix is positive definite or not",
90 
92  SG_ADD(&m_m, "m",
93  "The number of corrections to approximate the inverse Hessian matrix",
95  SG_ADD(&m_max_linesearch, "max_linesearch",
96  "The maximum number of trials to do line search for each L-BFGS update",
98  SG_ADD(&m_linesearch, "linesearch",
99  "The line search algorithm",
101  SG_ADD(&m_max_iterations, "max_iterations",
102  "The maximum number of iterations for L-BFGS update",
104  SG_ADD(&m_delta, "delta",
105  "Delta for convergence test based on the change of function value",
107  SG_ADD(&m_past, "past",
108  "Distance for delta-based convergence test",
110  SG_ADD(&m_epsilon, "epsilon",
111  "Epsilon for convergence test based on the change of gradient",
113  SG_ADD(&m_min_step, "min_step",
114  "The minimum step of the line search",
116  SG_ADD(&m_max_step, "max_step",
117  "The maximum step of the line search",
119  SG_ADD(&m_ftol, "ftol",
120  "A parameter used in Armijo condition",
122  SG_ADD(&m_wolfe, "wolfe",
123  "A parameter used in curvature condition",
125  SG_ADD(&m_gtol, "gtol",
126  "A parameter used in Morethuente linesearch to control the accuracy",
128  SG_ADD(&m_xtol, "xtol",
129  "The machine precision for floating-point values",
131  SG_ADD(&m_orthantwise_c, "orthantwise_c",
132  "Coeefficient for the L1 norm of variables",
134  SG_ADD(&m_orthantwise_start, "orthantwise_start",
135  "Start index for computing L1 norm of the variables",
137  SG_ADD(&m_orthantwise_end, "orthantwise_end",
138  "End index for computing L1 norm of the variables",
140  SG_ADD(&m_s2, "s2",
141  "Variational parameter sigma2",
143  SG_ADD(&m_mu, "mu",
144  "Variational parameter mu and posterior mean",
146  SG_ADD(&m_Sigma, "Sigma",
147  "Posterior covariance matrix Sigma",
149 }
150 
152 {
153 }
154 
156 {
157  SG_DEBUG("entering\n");
158 
160  update_init();
161  update_alpha();
162  update_chol();
164  update_deriv();
166 
167  SG_DEBUG("leaving\n");
168 }
169 
171 {
172  REQUIRE(noise_factor>=0, "The noise_factor %.20f should be non-negative\n", noise_factor);
173  m_noise_factor=noise_factor;
174 }
175 
177 {
178  REQUIRE(min_coeff_kernel>=0, "The min_coeff_kernel %.20f should be non-negative\n", min_coeff_kernel);
179  m_min_coeff_kernel=min_coeff_kernel;
180 }
181 
183 {
184  REQUIRE(max_attempt>=0, "The max_attempt %d should be non-negative. 0 means inifity attempts\n", max_attempt);
185  m_max_attempt=max_attempt;
186 }
187 
189 {
190  REQUIRE(exp_factor>1.0, "The exp_factor %f should be greater than 1.0.\n", exp_factor);
191  m_exp_factor=exp_factor;
192 }
193 
195 {
197 }
198 
199 Eigen::LDLT<Eigen::MatrixXd> CKLInferenceMethod::update_init_helper()
200 {
201  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
202 
203  eigen_K=eigen_K+m_noise_factor*MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
204 
205  Eigen::LDLT<Eigen::MatrixXd> ldlt;
206  ldlt.compute(eigen_K*CMath::sq(m_scale));
207 
208  float64_t attempt_count=0;
209  MatrixXd Kernel_D=ldlt.vectorD();
210  float64_t noise_factor=m_noise_factor;
211 
212  while (Kernel_D.minCoeff()<=m_min_coeff_kernel)
213  {
214  if (m_max_attempt>0 && attempt_count>m_max_attempt)
215  SG_ERROR("The Kernel matrix is highly non-positive definite since the min_coeff_kernel is less than %.20f",
216  " even when adding %.20f noise to the diagonal elements at max %d attempts\n",
217  m_min_coeff_kernel, noise_factor, m_max_attempt);
218  attempt_count++;
219  float64_t pre_noise_factor=noise_factor;
220  noise_factor*=m_exp_factor;
221  //updat the noise eigen_K=eigen_K+noise_factor*(m_exp_factor^attempt_count)*Identity()
222  eigen_K=eigen_K+(noise_factor-pre_noise_factor)*MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
223  ldlt.compute(eigen_K*CMath::sq(m_scale));
224  Kernel_D=ldlt.vectorD();
225  }
226 
227  return ldlt;
228 }
229 
231 {
233  update();
234 
235  return SGVector<float64_t>(m_mu);
236 }
237 
239 {
241  update();
242 
244 }
245 
246 float64_t CKLInferenceMethod::evaluate(void *obj, const float64_t *parameters,
247  float64_t *gradient, const int dim, const float64_t step)
248 {
249  /* Note that parameters = parameters_pre_iter - step * gradient_pre_iter */
250  CKLInferenceMethod * obj_prt
251  = static_cast<CKLInferenceMethod *>(obj);
252 
253  REQUIRE(obj_prt, "The instance object passed to L-BFGS optimizer should not be NULL\n");
254 
255  obj_prt->lbfgs_precompute();
256  float64_t nlml=obj_prt->get_nlml_wrt_parameters();
257 
258  SGVector<float64_t> sg_gradient(gradient, dim, false);
259  obj_prt->get_gradient_of_nlml_wrt_parameters(sg_gradient);
260 
261  return nlml;
262 }
263 
265 {
268  return lik;
269 }
270 
272 {
276 }
277 
279  int m,
280  int max_linesearch,
281  int linesearch,
282  int max_iterations,
284  int past,
286  float64_t min_step,
287  float64_t max_step,
288  float64_t ftol,
289  float64_t wolfe,
290  float64_t gtol,
291  float64_t xtol,
292  float64_t orthantwise_c,
293  int orthantwise_start,
294  int orthantwise_end)
295 {
296  m_m = m;
297  m_max_linesearch = max_linesearch;
298  m_linesearch = linesearch;
299  m_max_iterations = max_iterations;
300  m_delta = delta;
301  m_past = past;
302  m_epsilon = epsilon;
303  m_min_step = min_step;
304  m_max_step = max_step;
305  m_ftol = ftol;
306  m_wolfe = wolfe;
307  m_gtol = gtol;
308  m_xtol = xtol;
309  m_orthantwise_c = orthantwise_c;
310  m_orthantwise_start = orthantwise_start;
311  m_orthantwise_end = orthantwise_end;
312 }
313 
315 {
317  update();
318 
320 }
321 
323 {
326  return SGVector<float64_t> ();
327 
328  //%lp_dhyp = likKL(v,lik,hyp.lik,y,K*post.alpha+m,[],[],j);
330  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
331  SGVector<float64_t> result(1);
332  //%dnlZ.lik(j) = -sum(lp_dhyp);
333  result[0]=-eigen_lp_dhyp.sum();
334 
335  return result;
336 }
337 
339 {
340  // create eigen representation of K, Z, dfhat and alpha
341  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen/2);
342 
343  SGVector<float64_t> result;
344 
345  if (param->m_datatype.m_ctype==CT_VECTOR ||
346  param->m_datatype.m_ctype==CT_SGVECTOR)
347  {
349  "Length of the parameter %s should not be NULL\n", param->m_name)
350 
351  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
352  }
353  else
354  {
355  result=SGVector<float64_t>(1);
356  }
357 
358  for (index_t i=0; i<result.vlen; i++)
359  {
361 
362  //%dm_t = feval(mean{:}, hyp.mean, x, j);
363  if (result.vlen==1)
365  else
367 
368  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
369 
370  //%dnlZ.mean(j) = -alpha'*dm_t;
371  result[i]=-eigen_alpha.dot(eigen_dmu);
372  }
373 
374  return result;
375 }
376 
378 {
379  lbfgs_parameter_t lbfgs_param;
380  lbfgs_param.m = m_m;
381  lbfgs_param.max_linesearch = m_max_linesearch;
382  lbfgs_param.linesearch = m_linesearch;
383  lbfgs_param.max_iterations = m_max_iterations;
384  lbfgs_param.delta = m_delta;
385  lbfgs_param.past = m_past;
386  lbfgs_param.epsilon = m_epsilon;
387  lbfgs_param.min_step = m_min_step;
388  lbfgs_param.max_step = m_max_step;
389  lbfgs_param.ftol = m_ftol;
390  lbfgs_param.wolfe = m_wolfe;
391  lbfgs_param.gtol = m_gtol;
392  lbfgs_param.xtol = m_xtol;
393  lbfgs_param.orthantwise_c = m_orthantwise_c;
395  lbfgs_param.orthantwise_end = m_orthantwise_end;
396 
397  float64_t nlml_opt=0;
398  void * obj_prt = static_cast<void *>(this);
399  lbfgs(m_alpha.vlen, m_alpha.vector, &nlml_opt,
400  CKLInferenceMethod::evaluate,
401  NULL, obj_prt, &lbfgs_param);
402 
403  return nlml_opt;
404 }
405 
407 {
408  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
409  "the nagative log marginal likelihood wrt %s.%s parameter\n",
410  get_name(), param->m_name)
411 
412  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
413  // compute derivative K wrt scale
414  MatrixXd eigen_dK=eigen_K*m_scale*2.0;
415 
416  SGVector<float64_t> result(1);
417 
418  result[0]=get_derivative_related_cov(eigen_dK);
419 
420  return result;
421 }
422 
424 {
425  SGVector<float64_t> result;
426 
427  if (param->m_datatype.m_ctype==CT_VECTOR ||
428  param->m_datatype.m_ctype==CT_SGVECTOR)
429  {
431  "Length of the parameter %s should not be NULL\n", param->m_name)
432  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
433  }
434  else
435  {
436  result=SGVector<float64_t>(1);
437  }
438 
439  for (index_t i=0; i<result.vlen; i++)
440  {
442 
443  //dK = feval(covfunc{:},hyper,x,j);
444  if (result.vlen==1)
445  dK=m_kernel->get_parameter_gradient(param);
446  else
447  dK=m_kernel->get_parameter_gradient(param, i);
448 
449  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_cols, dK.num_rows);
450 
451  result[i]=get_derivative_related_cov(eigen_dK*CMath::sq(m_scale));
452  }
453 
454  return result;
455 }
456 
458 {
460  update();
461 
462  return SGMatrix<float64_t>(m_L);
463 }
464 
465 } /* namespace shogun */
466 
467 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation