SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KLInferenceMethod.cpp
Go to the documentation of this file.
1  /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * This code specifically adapted from function in approxKL.m and infKL.m
31  */
32 
34 
35 #ifdef HAVE_EIGEN3
38 
39 using namespace Eigen;
40 
41 namespace shogun
42 {
43 
44 CKLInferenceMethod::CKLInferenceMethod() : CInferenceMethod()
45 {
46  init();
47 }
48 
50  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
51  : CInferenceMethod(kern, feat, m, lab, mod)
52 {
53  init();
55 }
56 
58 {
59  REQUIRE(mod, "the likelihood model must not be NULL\n")
61  REQUIRE(lik,
62  "The provided likelihood model (%s) must support variational Gaussian inference. ",
63  "Please use a Variational Gaussian Likelihood model\n",
64  mod->get_name());
65 }
66 
68 {
71 }
72 
73 void CKLInferenceMethod::init()
74 {
75  m_noise_factor=1e-10;
76  m_max_attempt=0;
77  m_exp_factor=2;
78  m_min_coeff_kernel=1e-5;
79  SG_ADD(&m_noise_factor, "noise_factor",
80  "The noise factor used for correcting Kernel matrix",
82  SG_ADD(&m_exp_factor, "exp_factor",
83  "The exponential factor used for increasing noise_factor",
85  SG_ADD(&m_max_attempt, "max_attempt",
86  "The max number of attempt to correct Kernel matrix",
88  SG_ADD(&m_min_coeff_kernel, "min_coeff_kernel",
89  "The minimum coeefficient of kernel matrix in LDLT factorization used to check whether the kernel matrix is positive definite or not",
91 
93  SG_ADD(&m_m, "m",
94  "The number of corrections to approximate the inverse Hessian matrix",
96  SG_ADD(&m_max_linesearch, "max_linesearch",
97  "The maximum number of trials to do line search for each L-BFGS update",
99  SG_ADD(&m_linesearch, "linesearch",
100  "The line search algorithm",
102  SG_ADD(&m_max_iterations, "max_iterations",
103  "The maximum number of iterations for L-BFGS update",
105  SG_ADD(&m_delta, "delta",
106  "Delta for convergence test based on the change of function value",
108  SG_ADD(&m_past, "past",
109  "Distance for delta-based convergence test",
111  SG_ADD(&m_epsilon, "epsilon",
112  "Epsilon for convergence test based on the change of gradient",
114  SG_ADD(&m_min_step, "min_step",
115  "The minimum step of the line search",
117  SG_ADD(&m_max_step, "max_step",
118  "The maximum step of the line search",
120  SG_ADD(&m_ftol, "ftol",
121  "A parameter used in Armijo condition",
123  SG_ADD(&m_wolfe, "wolfe",
124  "A parameter used in curvature condition",
126  SG_ADD(&m_gtol, "gtol",
127  "A parameter used in Morethuente linesearch to control the accuracy",
129  SG_ADD(&m_xtol, "xtol",
130  "The machine precision for floating-point values",
132  SG_ADD(&m_orthantwise_c, "orthantwise_c",
133  "Coeefficient for the L1 norm of variables",
135  SG_ADD(&m_orthantwise_start, "orthantwise_start",
136  "Start index for computing L1 norm of the variables",
138  SG_ADD(&m_orthantwise_end, "orthantwise_end",
139  "End index for computing L1 norm of the variables",
141  SG_ADD(&m_s2, "s2",
142  "Variational parameter sigma2",
144  SG_ADD(&m_mu, "mu",
145  "Variational parameter mu and posterior mean",
147  SG_ADD(&m_Sigma, "Sigma",
148  "Posterior covariance matrix Sigma",
150 }
151 
153 {
154 }
155 
157 {
158  SG_DEBUG("entering\n");
159 
161  update_init();
162  update_alpha();
163  update_chol();
165  update_deriv();
167 
168  SG_DEBUG("leaving\n");
169 }
170 
172 {
173  REQUIRE(noise_factor>=0, "The noise_factor %.20f should be non-negative\n", noise_factor);
174  m_noise_factor=noise_factor;
175 }
176 
178 {
179  REQUIRE(min_coeff_kernel>=0, "The min_coeff_kernel %.20f should be non-negative\n", min_coeff_kernel);
180  m_min_coeff_kernel=min_coeff_kernel;
181 }
182 
184 {
185  REQUIRE(max_attempt>=0, "The max_attempt %d should be non-negative. 0 means inifity attempts\n", max_attempt);
186  m_max_attempt=max_attempt;
187 }
188 
190 {
191  REQUIRE(exp_factor>1.0, "The exp_factor %f should be greater than 1.0.\n", exp_factor);
192  m_exp_factor=exp_factor;
193 }
194 
196 {
198 }
199 
200 Eigen::LDLT<Eigen::MatrixXd> CKLInferenceMethod::update_init_helper()
201 {
202  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
203 
204  eigen_K=eigen_K+m_noise_factor*MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
205 
206  Eigen::LDLT<Eigen::MatrixXd> ldlt;
207  ldlt.compute(eigen_K*CMath::sq(m_scale));
208 
209  float64_t attempt_count=0;
210  MatrixXd Kernel_D=ldlt.vectorD();
211  float64_t noise_factor=m_noise_factor;
212 
213  while (Kernel_D.minCoeff()<=m_min_coeff_kernel)
214  {
215  if (m_max_attempt>0 && attempt_count>m_max_attempt)
216  SG_ERROR("The Kernel matrix is highly non-positive definite since the min_coeff_kernel is less than %.20f",
217  " even when adding %.20f noise to the diagonal elements at max %d attempts\n",
218  m_min_coeff_kernel, noise_factor, m_max_attempt);
219  attempt_count++;
220  float64_t pre_noise_factor=noise_factor;
221  noise_factor*=m_exp_factor;
222  //updat the noise eigen_K=eigen_K+noise_factor*(m_exp_factor^attempt_count)*Identity()
223  eigen_K=eigen_K+(noise_factor-pre_noise_factor)*MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
224  ldlt.compute(eigen_K*CMath::sq(m_scale));
225  Kernel_D=ldlt.vectorD();
226  }
227 
228  return ldlt;
229 }
230 
232 {
234  update();
235 
236  return SGVector<float64_t>(m_mu);
237 }
238 
240 {
242  update();
243 
245 }
246 
247 float64_t CKLInferenceMethod::evaluate(void *obj, const float64_t *parameters,
248  float64_t *gradient, const int dim, const float64_t step)
249 {
250  /* Note that parameters = parameters_pre_iter - step * gradient_pre_iter */
251  CKLInferenceMethod * obj_prt
252  = static_cast<CKLInferenceMethod *>(obj);
253 
254  REQUIRE(obj_prt, "The instance object passed to L-BFGS optimizer should not be NULL\n");
255 
256  bool status = obj_prt->lbfgs_precompute();
257  if (!status)
258  return CMath::NOT_A_NUMBER;
259 
260  float64_t nlml=obj_prt->get_nlml_wrt_parameters();
261 
262  SGVector<float64_t> sg_gradient(gradient, dim, false);
263  obj_prt->get_gradient_of_nlml_wrt_parameters(sg_gradient);
264 
265  return nlml;
266 }
267 
269 {
272  return lik;
273 }
274 
276 {
280 }
281 
283  int m,
284  int max_linesearch,
285  int linesearch,
286  int max_iterations,
288  int past,
290  float64_t min_step,
291  float64_t max_step,
292  float64_t ftol,
293  float64_t wolfe,
294  float64_t gtol,
295  float64_t xtol,
296  float64_t orthantwise_c,
297  int orthantwise_start,
298  int orthantwise_end)
299 {
300  m_m = m;
301  m_max_linesearch = max_linesearch;
302  m_linesearch = linesearch;
303  m_max_iterations = max_iterations;
304  m_delta = delta;
305  m_past = past;
306  m_epsilon = epsilon;
307  m_min_step = min_step;
308  m_max_step = max_step;
309  m_ftol = ftol;
310  m_wolfe = wolfe;
311  m_gtol = gtol;
312  m_xtol = xtol;
313  m_orthantwise_c = orthantwise_c;
314  m_orthantwise_start = orthantwise_start;
315  m_orthantwise_end = orthantwise_end;
316 }
317 
319 {
321  update();
322 
324 }
325 
327 {
330  return SGVector<float64_t> ();
331 
332  //%lp_dhyp = likKL(v,lik,hyp.lik,y,K*post.alpha+m,[],[],j);
334  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
335  SGVector<float64_t> result(1);
336  //%dnlZ.lik(j) = -sum(lp_dhyp);
337  result[0]=-eigen_lp_dhyp.sum();
338 
339  return result;
340 }
341 
343 {
344  // create eigen representation of K, Z, dfhat and alpha
345  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen/2);
346 
347  SGVector<float64_t> result;
348 
349  if (param->m_datatype.m_ctype==CT_VECTOR ||
350  param->m_datatype.m_ctype==CT_SGVECTOR)
351  {
353  "Length of the parameter %s should not be NULL\n", param->m_name)
354 
355  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
356  }
357  else
358  {
359  result=SGVector<float64_t>(1);
360  }
361 
362  for (index_t i=0; i<result.vlen; i++)
363  {
365 
366  //%dm_t = feval(mean{:}, hyp.mean, x, j);
367  if (result.vlen==1)
369  else
371 
372  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
373 
374  //%dnlZ.mean(j) = -alpha'*dm_t;
375  result[i]=-eigen_alpha.dot(eigen_dmu);
376  }
377 
378  return result;
379 }
380 
382 {
383  lbfgs_parameter_t lbfgs_param;
384  lbfgs_param.m = m_m;
385  lbfgs_param.max_linesearch = m_max_linesearch;
386  lbfgs_param.linesearch = m_linesearch;
387  lbfgs_param.max_iterations = m_max_iterations;
388  lbfgs_param.delta = m_delta;
389  lbfgs_param.past = m_past;
390  lbfgs_param.epsilon = m_epsilon;
391  lbfgs_param.min_step = m_min_step;
392  lbfgs_param.max_step = m_max_step;
393  lbfgs_param.ftol = m_ftol;
394  lbfgs_param.wolfe = m_wolfe;
395  lbfgs_param.gtol = m_gtol;
396  lbfgs_param.xtol = m_xtol;
397  lbfgs_param.orthantwise_c = m_orthantwise_c;
399  lbfgs_param.orthantwise_end = m_orthantwise_end;
400 
401  float64_t nlml_opt=0;
402  void * obj_prt = static_cast<void *>(this);
403  lbfgs(m_alpha.vlen, m_alpha.vector, &nlml_opt,
404  CKLInferenceMethod::evaluate,
405  NULL, obj_prt, &lbfgs_param);
406 
407  return nlml_opt;
408 }
409 
411 {
412  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
413  "the nagative log marginal likelihood wrt %s.%s parameter\n",
414  get_name(), param->m_name)
415 
416  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
417  // compute derivative K wrt scale
418  MatrixXd eigen_dK=eigen_K*m_scale*2.0;
419 
420  SGVector<float64_t> result(1);
421 
422  result[0]=get_derivative_related_cov(eigen_dK);
423 
424  return result;
425 }
426 
428 {
429  SGVector<float64_t> result;
430 
431  if (param->m_datatype.m_ctype==CT_VECTOR ||
432  param->m_datatype.m_ctype==CT_SGVECTOR)
433  {
435  "Length of the parameter %s should not be NULL\n", param->m_name)
436  result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
437  }
438  else
439  {
440  result=SGVector<float64_t>(1);
441  }
442 
443  for (index_t i=0; i<result.vlen; i++)
444  {
446 
447  //dK = feval(covfunc{:},hyper,x,j);
448  if (result.vlen==1)
449  dK=m_kernel->get_parameter_gradient(param);
450  else
451  dK=m_kernel->get_parameter_gradient(param, i);
452 
453  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_cols, dK.num_rows);
454 
455  result[i]=get_derivative_related_cov(eigen_dK*CMath::sq(m_scale));
456  }
457 
458  return result;
459 }
460 
462 {
464  update();
465 
466  return SGMatrix<float64_t>(m_L);
467 }
468 
469 } /* namespace shogun */
470 
471 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation