SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SingleFITCLaplacianInferenceMethodWithLBFGS.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * Code adapted from Gaussian Process Machine Learning Toolbox
31  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
32  */
34 
35 #ifdef HAVE_EIGEN3
39 
40 using namespace Eigen;
41 namespace shogun
42 {
43 
44 CSingleFITCLaplacianInferenceMethodWithLBFGS::CSingleFITCLaplacianInferenceMethodWithLBFGS()
46 {
47  init();
48 }
49 
51  CKernel* kern, CFeatures* feat, CMeanFunction* m,
52  CLabels* lab, CLikelihoodModel* mod, CFeatures* inducing_features)
53  : CSingleFITCLaplacianInferenceMethod(kern, feat, m, lab, mod, inducing_features)
54 {
55  init();
56 }
57 
59  bool enable_newton_if_fail)
60 {
61  m_enable_newton_if_fail = enable_newton_if_fail;
62 }
63 
65  int m,
66  int max_linesearch,
67  int linesearch,
68  int max_iterations,
70  int past,
72  float64_t min_step,
73  float64_t max_step,
74  float64_t ftol,
75  float64_t wolfe,
76  float64_t gtol,
77  float64_t xtol,
78  float64_t orthantwise_c,
79  int orthantwise_start,
80  int orthantwise_end)
81 {
82  m_m = m;
83  m_max_linesearch = max_linesearch;
84  m_linesearch = linesearch;
85  m_max_iterations = max_iterations;
86  m_delta = delta;
87  m_past = past;
88  m_epsilon = epsilon;
89  m_min_step = min_step;
90  m_max_step = max_step;
91  m_ftol = ftol;
92  m_wolfe = wolfe;
93  m_gtol = gtol;
94  m_xtol = xtol;
95  m_orthantwise_c = orthantwise_c;
96  m_orthantwise_start = orthantwise_start;
97  m_orthantwise_end = orthantwise_end;
98 }
99 
100 void CSingleFITCLaplacianInferenceMethodWithLBFGS::init()
101 {
103  set_newton_method(false);
104  m_mean_f = NULL;
105  SG_ADD(&m_m, "m",
106  "The number of corrections to approximate the inverse hessian matrix",
108  SG_ADD(&m_max_linesearch, "max_linesearch",
109  "The maximum number of trials to do line search for each L-BFGS update",
111  SG_ADD(&m_linesearch, "linesearch",
112  "The line search algorithm",
114  SG_ADD(&m_max_iterations, "max_iterations",
115  "The maximum number of iterations for L-BFGS update",
117  SG_ADD(&m_delta, "delta",
118  "Delta for convergence test based on the change of function value",
120  SG_ADD(&m_past, "past",
121  "Distance for delta-based convergence test",
123  SG_ADD(&m_epsilon, "epsilon",
124  "Epsilon for convergence test based on the change of gradient",
126  SG_ADD(&m_min_step, "min_step",
127  "The minimum step of the line search",
129  SG_ADD(&m_max_step, "max_step",
130  "The maximum step of the line search",
132  SG_ADD(&m_ftol, "ftol",
133  "A parameter used in Armijo condition",
135  SG_ADD(&m_wolfe, "wolfe",
136  "A parameter used in curvature condition",
138  SG_ADD(&m_gtol, "gtol",
139  "A parameter used in Morethuente linesearch to control the accuracy",
141  SG_ADD(&m_xtol, "xtol",
142  "The machine precision for floating-point values",
144  SG_ADD(&m_orthantwise_c, "orthantwise_c",
145  "Coeefficient for the L1 norm of variables",
147  SG_ADD(&m_orthantwise_start, "orthantwise_start",
148  "Start index for computing L1 norm of the variables",
150  SG_ADD(&m_orthantwise_end, "orthantwise_end",
151  "End index for computing L1 norm of the variables",
153  SG_ADD(&m_enable_newton_if_fail, "enable_newton_if_fail",
154  "Enable the original Newton method if the L-BFGS method fails",
156 }
157 
159 {
160 }
161 
162 float64_t CSingleFITCLaplacianInferenceMethodWithLBFGS::evaluate(
163  void *obj,
164  const float64_t *alpha,
165  float64_t *gradient,
166  const int dim,
167  const float64_t step)
168 {
169  //time complexity O(m*n)
170  /* Note that alpha = alpha_pre_iter - step * gradient_pre_iter */
171 
172  /* Unfortunately we can not use dynamic_cast to cast the void * pointer to an
173  * object pointer. Therefore, make sure this method is private.
174  */
176  = static_cast<CSingleFITCLaplacianInferenceMethodWithLBFGS *>(obj);
177  float64_t * alpha_cast = const_cast<float64_t *>(alpha);
178  float64_t psi = 0.0;
179  obj_prt->get_psi_wrt_alpha(alpha_cast, dim, psi);
180  obj_prt->get_gradient_wrt_alpha(alpha_cast, gradient, dim);
181  return psi;
182 }
183 
185 {
186  //time complexity O(m*n)
187  float64_t psi_new=m_Psi;
188 
189  /* get mean vector and create eigen representation of it*/
191  Map<VectorXd> eigen_mean_f(mean_f.vector, mean_f.vlen);
192 
193  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
194  Map<VectorXd> eigen_alpha(m_al.vector, m_al.vlen);
195 
196 
197  lbfgs_parameter_t lbfgs_param;
198  lbfgs_param.m = m_m;
199  lbfgs_param.max_linesearch = m_max_linesearch;
200  lbfgs_param.linesearch = m_linesearch;
201  lbfgs_param.max_iterations = m_max_iterations;
202  lbfgs_param.delta = m_delta;
203  lbfgs_param.past = m_past;
204  lbfgs_param.epsilon = m_epsilon;
205  lbfgs_param.min_step = m_min_step;
206  lbfgs_param.max_step = m_max_step;
207  lbfgs_param.ftol = m_ftol;
208  lbfgs_param.wolfe = m_wolfe;
209  lbfgs_param.gtol = m_gtol;
210  lbfgs_param.xtol = m_xtol;
211  lbfgs_param.orthantwise_c = m_orthantwise_c;
212  lbfgs_param.orthantwise_start = m_orthantwise_start;
213  lbfgs_param.orthantwise_end = m_orthantwise_end;
214 
215  /* use for passing variables to compute function value and gradient*/
216  m_mean_f = &mean_f;
217 
218  /* In order to use the provided lbfgs function, we have to pass the object via
219  * void * pointer, which the evaluate method will use static_cast to cast
220  * the pointer to an object pointer.
221  * Therefore, make sure the evaluate method is a private method of the class.
222  * Because the evaluate method is defined in a class, we have to pass the
223  * method pointer to the lbfgs function via static method
224  * If we also use the progress method, make sure the method is static and
225  * private.
226  */
227  void * obj_prt = static_cast<void *>(this);
228 
229  int ret = lbfgs(m_al.vlen, m_al.vector, &psi_new,
230  CSingleFITCLaplacianInferenceMethodWithLBFGS::evaluate,
231  NULL, obj_prt, &lbfgs_param);
232  /* clean up*/
233  m_mean_f = NULL;
234 
235  /* Note that ret should be zero if the minimization
236  * process terminates without an error.
237  * A non-zero value indicates an error.
238  */
239  if (m_enable_newton_if_fail && ret != 0 && ret != LBFGS_ALREADY_MINIMIZED)
240  {
241  /* If some error happened during the L-BFGS optimization, we use the original
242  * Newton method.
243  */
244  SG_WARNING("Error during L-BFGS optimization, using original Newton method as fallback\n");
246  return;
247  }
248  /* compute f = K * alpha + m*/
250  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
251  eigen_mu=eigen_tmp+eigen_mean_f;
252 
256  Map<VectorXd> eigen_post_alpha(m_alpha.vector, m_alpha.vlen);
257  //post.alpha = R0'*(V*alpha);
258  eigen_post_alpha=eigen_R0.transpose()*(eigen_V*eigen_alpha);
259 }
260 
261 void CSingleFITCLaplacianInferenceMethodWithLBFGS::get_psi_wrt_alpha(
262  float64_t *alpha,
263  const int dim,
264  float64_t &psi)
265 {
266  //time complexity O(m*n)
267  Map<VectorXd> eigen_alpha(alpha, dim);
268  SGVector<float64_t> f(dim);
269  Map<VectorXd> eigen_f(f.vector, f.vlen);
270  Map<VectorXd> eigen_mean_f(m_mean_f->vector,
271  m_mean_f->vlen);
272  /* f = K * alpha + mean_f given alpha*/
273  SGVector<float64_t> al(alpha, dim, false);
275  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
276  eigen_f=eigen_tmp+eigen_mean_f;
277 
278  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
279  psi=eigen_alpha.dot(eigen_tmp) * 0.5;
281 }
282 
283 void CSingleFITCLaplacianInferenceMethodWithLBFGS::get_gradient_wrt_alpha(
284  float64_t *alpha,
285  float64_t *gradient,
286  const int dim)
287 {
288  //time complexity O(m*n)
289  Map<VectorXd> eigen_alpha(alpha, dim);
290  Map<VectorXd> eigen_gradient(gradient, dim);
291  SGVector<float64_t> f(dim);
292  Map<VectorXd> eigen_f(f.vector, f.vlen);
295  m_ktrtr.num_cols);
296  Map<VectorXd> eigen_mean_f(m_mean_f->vector,
297  m_mean_f->vlen);
298 
299  /* f = K * alpha + mean_f given alpha*/
300  SGVector<float64_t> al(alpha, dim, false);
301  SGVector<float64_t> tmp=compute_mvmK(al);
302  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
303  eigen_f=eigen_tmp+eigen_mean_f;
304 
305  SGVector<float64_t> dlp_f =
307 
308  Map<VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
309 
310  /* g_alpha = K * (alpha - dlp_f)*/
311  SGVector<float64_t> tmp2(al.vlen);
312  Map<VectorXd> eigen_tmp2(tmp2.vector, tmp2.vlen);
313  eigen_tmp2=eigen_alpha-eigen_dlp_f;
314  tmp2=compute_mvmK(tmp2);
315  Map<VectorXd> eigen_tmp3(tmp2.vector, tmp2.vlen);
316  eigen_gradient=eigen_tmp3;
317 }
318 
319 } /* namespace shogun */
320 #endif /* HAVE_EIGEN3 */
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
The Laplace approximation FITC inference method with LBFGS class for regression and binary classifica...
SGVector< float64_t > m_alpha
int32_t lbfgs(int32_t n, float64_t *x, float64_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param, lbfgs_adjust_step_t proc_adjust_step)
Definition: lbfgs.cpp:208
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
float64_t orthantwise_c
Definition: lbfgs.h:311
Definition: SGMatrix.h:20
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
The SingleFITCLaplace approximation inference method class for regression and binary Classification...
An abstract class of the mean function.
Definition: MeanFunction.h:49
index_t num_rows
Definition: SGMatrix.h:376
virtual SGVector< float64_t > compute_mvmK(SGVector< float64_t > al)
static const float64_t epsilon
Definition: libbmrm.cpp:25
virtual void set_lbfgs_parameters(int m=100, int max_linesearch=1000, int linesearch=LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE, int max_iterations=1000, float64_t delta=0.0, int past=0, float64_t epsilon=1e-5, float64_t min_step=1e-20, float64_t max_step=1e+20, float64_t ftol=1e-4, float64_t wolfe=0.9, float64_t gtol=0.9, float64_t xtol=1e-16, float64_t orthantwise_c=0.0, int orthantwise_start=0, int orthantwise_end=1)
index_t vlen
Definition: SGVector.h:494
double float64_t
Definition: common.h:50
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
The Kernel base class.
Definition: Kernel.h:158
#define SG_WARNING(...)
Definition: SGIO.h:128
#define SG_ADD(...)
Definition: SGObject.h:81
#define delta
Definition: sfa.cpp:23
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
CLikelihoodModel * m_model

SHOGUN Machine Learning Toolbox - Documentation