SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KLInferenceMethod.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * Code adapted from
31  * http://hannes.nickisch.org/code/approxXX.tar.gz
32  * and Gaussian Process Machine Learning Toolbox
33  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
34  * and the reference paper is
35  * Nickisch, Hannes, and Carl Edward Rasmussen.
36  * "Approximations for Binary Gaussian Process Classification."
37  * Journal of Machine Learning Research 9.10 (2008).
38  *
39  */
40 
41 #ifndef _KLINFERENCEMETHOD_H_
42 #define _KLINFERENCEMETHOD_H_
43 
44 #include <shogun/lib/config.h>
45 
46 #ifdef HAVE_EIGEN3
50 
51 namespace Eigen
52 {
53  template <class, int, int, int, int, int> class Matrix;
54  template <class, int> class LDLT;
55 
56  typedef Matrix<float64_t,-1,-1,0,-1,-1> MatrixXd;
57 }
58 
59 namespace shogun
60 {
61 
79 {
80 public:
83 
92  CKLInferenceMethod(CKernel* kernel, CFeatures* features,
93  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model);
94 
95  virtual ~CKLInferenceMethod();
96 
99  virtual EInferenceType get_inference_type() const { return INF_KL; }
100 
105  virtual const char* get_name() const { return "KLInferenceMethod"; }
106 
119 
132 
152 
157  virtual bool supports_regression() const
158  {
159  check_members();
160  return m_model->supports_regression();
161  }
162 
167  virtual bool supports_binary() const
168  {
169  check_members();
170  return m_model->supports_binary();
171  }
172 
177  virtual void set_model(CLikelihoodModel* mod);
178 
180  virtual void update();
181 
182  /* set L-BFGS parameters
183  * For details please see shogun/optimization/lbfgs/lbfgs.h
184  * @param m The number of corrections to approximate the inverse hessian matrix.
185  * Default value is 100.
186  * @param max_linesearch The maximum number of trials to do line search for each L-BFGS update.
187  * Default value is 1000.
188  * @param linesearch The line search algorithm.
189  * Default value is using the backtracking with the strong Wolfe condition line search
190  * @param max_iterations The maximum number of iterations for L-BFGS update.
191  * Default value is 1000.
192  * @param delta Delta for convergence test based on the change of function value.
193  * Default value is 0.
194  * @param past Distance for delta-based convergence test.
195  * Default value is 0.
196  * @param epsilon Epsilon for convergence test based on the change of gradient.
197  * Default value is 1e-5
198  * @param min_step The minimum step of the line search.
199  * The default value is 1e-20
200  * @param max_step The maximum step of the line search.
201  * The default value is 1e+20
202  * @param ftol A parameter used in Armijo condition.
203  * Default value is 1e-4
204  * @param wolfe A parameter used in curvature condition.
205  * Default value is 0.9
206  * @param gtol A parameter used in Morethuente linesearch to control the accuracy.
207  * Default value is 0.9
208  * @param xtol The machine precision for floating-point values.
209  * Default value is 1e-16.
210  * @param orthantwise_c Coeefficient for the L1 norm of variables.
211  * This parameter should be set to zero for standard minimization problems.
212  * Setting this parameter to a positive value activates
213  * Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method. Default value is 0.
214  * @param orthantwise_start Start index for computing L1 norm of the variables.
215  * This parameter is valid only for OWL-QN method. Default value is 0.
216  * @param orthantwise_end End index for computing L1 norm of the variables.
217  * Default value is 1.
218  */
219  virtual void set_lbfgs_parameters(int m = 100,
220  int max_linesearch = 1000,
222  int max_iterations = 1000,
223  float64_t delta = 0.0,
224  int past = 0,
225  float64_t epsilon = 1e-5,
226  float64_t min_step = 1e-20,
227  float64_t max_step = 1e+20,
228  float64_t ftol = 1e-4,
229  float64_t wolfe = 0.9,
230  float64_t gtol = 0.9,
231  float64_t xtol = 1e-16,
232  float64_t orthantwise_c = 0.0,
233  int orthantwise_start = 0,
234  int orthantwise_end = 1);
235 
252 
260  virtual void set_noise_factor(float64_t noise_factor);
261 
268  virtual void set_max_attempt(index_t max_attempt);
269 
276  virtual void set_exp_factor(float64_t exp_factor);
277 
284  virtual void set_min_coeff_kernel(float64_t min_coeff_kernel);
285 protected:
286 
288  virtual void compute_gradient();
289 
292 
295 
298 
301 
305  virtual void update_init();
306 
312 
317 
323  virtual void check_variational_likelihood(CLikelihoodModel* mod) const;
324 
326  virtual void update_approx_cov()=0;
327 
339 
341  virtual float64_t lbfgs_optimization();
342 
351  const TParameter* param);
352 
361  const TParameter* param);
362 
371  const TParameter* param);
372 
381  const TParameter* param);
382 
389 
396 
403 
412  virtual bool lbfgs_precompute()=0;
413 
418 
421 
426 
427  /* The number of corrections to approximate the inverse hessian matrix.*/
428  int m_m;
429 
430  /* The maximum number of trials to do line search for each L-BFGS update.*/
432 
433  /* The line search algorithm.*/
435 
436  /* The maximum number of iterations for L-BFGS update.*/
438 
439  /* Delta for convergence test based on the change of function value.*/
441 
442  /* Distance for delta-based convergence test.*/
443  int m_past;
444 
445  /* Epsilon for convergence test based on the change of gradient.*/
447 
448  /* The minimum step of the line search.*/
450 
451  /* The maximum step of the line search.*/
453 
454  /* A parameter used in Armijo condition.*/
456 
457  /* A parameter used in curvature condition.*/
459 
460  /* A parameter used in Morethuente linesearch to control the accuracy.*/
462 
463  /* The machine precision for floating-point values.*/
465 
466  /* Coeefficient for the L1 norm of variables.*/
468 
469  /* Start index for computing L1 norm of the variables.*/
471 
472  /* End index for computing L1 norm of the variables.*/
474 
475 private:
476  void init();
477 
481  static float64_t evaluate(void *obj,
482  const float64_t *parameters,
483  float64_t *gradient,
484  const int dim,
485  const float64_t step);
486 
487 };
488 }
489 #endif /* HAVE_EIGEN3 */
490 #endif /* _KLINFERENCEMETHOD_H_ */
virtual void set_lbfgs_parameters(int m=100, int max_linesearch=1000, int linesearch=LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE, int max_iterations=1000, float64_t delta=0.0, int past=0, float64_t epsilon=1e-5, float64_t min_step=1e-20, float64_t max_step=1e+20, float64_t ftol=1e-4, float64_t wolfe=0.9, float64_t gtol=0.9, float64_t xtol=1e-16, float64_t orthantwise_c=0.0, int orthantwise_start=0, int orthantwise_end=1)
virtual bool supports_regression() const
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
virtual SGMatrix< float64_t > get_cholesky()
The Inference Method base class.
virtual void set_exp_factor(float64_t exp_factor)
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
The variational Gaussian Likelihood base class. The variational distribution is Gaussian.
Definition: SGMatrix.h:20
parameter struct
virtual void update_approx_cov()=0
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
virtual bool lbfgs_precompute()=0
An abstract class of the mean function.
Definition: MeanFunction.h:28
virtual const char * get_name() const
static const float64_t epsilon
Definition: libbmrm.cpp:25
SGMatrix< float64_t > m_Sigma
virtual float64_t get_negative_log_marginal_likelihood()
virtual void set_min_coeff_kernel(float64_t min_coeff_kernel)
virtual void check_variational_likelihood(CLikelihoodModel *mod) const
virtual float64_t lbfgs_optimization()
double float64_t
Definition: common.h:50
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual bool supports_regression() const
virtual void set_max_attempt(index_t max_attempt)
virtual SGVector< float64_t > get_posterior_mean()
virtual bool supports_binary() const
virtual float64_t get_derivative_related_cov(SGMatrix< float64_t > dK)=0
virtual SGMatrix< float64_t > get_posterior_covariance()
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
virtual Eigen::LDLT< Eigen::MatrixXd, 0x1 > update_init_helper()
The KL approximation inference method class.
virtual float64_t get_nlml_wrt_parameters()
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void set_noise_factor(float64_t noise_factor)
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual bool supports_binary() const
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< float64_t > m_mu
SGVector< float64_t > m_s2
virtual void check_members() const
The Kernel base class.
Definition: Kernel.h:158
virtual void get_gradient_of_nlml_wrt_parameters(SGVector< float64_t > gradient)=0
virtual CVariationalGaussianLikelihood * get_variational_likelihood() const
virtual void set_model(CLikelihoodModel *mod)
#define delta
Definition: sfa.cpp:23
The Likelihood model base class.
virtual float64_t get_negative_log_marginal_likelihood_helper()=0
CLikelihoodModel * m_model
virtual EInferenceType get_inference_type() const

SHOGUN Machine Learning Toolbox - Documentation