SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LaplacianInferenceMethodWithLBFGS.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * Code adapted from Gaussian Process Machine Learning Toolbox
31  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
32  */
34 
35 #ifdef HAVE_EIGEN3
38 
39 namespace shogun
40 {
41 
44 {
45  init();
46 }
47 
49  CKernel* kern,
50  CFeatures* feat,
51  CMeanFunction* m,
52  CLabels* lab,
53  CLikelihoodModel* mod)
54  : CLaplacianInferenceMethod(kern, feat, m, lab, mod)
55 {
56  init();
57 }
58 
60  int m,
61  int max_linesearch,
62  int linesearch,
63  int max_iterations,
65  int past,
67  bool enable_newton_if_fail,
68  float64_t min_step,
69  float64_t max_step,
70  float64_t ftol,
71  float64_t wolfe,
72  float64_t gtol,
73  float64_t xtol,
74  float64_t orthantwise_c,
75  int orthantwise_start,
76  int orthantwise_end)
77 {
78  m_m = m;
79  m_max_linesearch = max_linesearch;
80  m_linesearch = linesearch;
81  m_max_iterations = max_iterations;
82  m_delta = delta;
83  m_past = past;
84  m_epsilon = epsilon;
85  m_min_step = min_step;
86  m_max_step = max_step;
87  m_ftol = ftol;
88  m_wolfe = wolfe;
89  m_gtol = gtol;
90  m_xtol = xtol;
91  m_orthantwise_c = orthantwise_c;
92  m_orthantwise_start = orthantwise_start;
93  m_orthantwise_end = orthantwise_end;
94  m_enable_newton_if_fail = enable_newton_if_fail;
95 }
96 
97  void CLaplacianInferenceMethodWithLBFGS::init()
98 {
100  m_mean_f = NULL;
101  SG_ADD(&m_m, "m",
102  "The number of corrections to approximate the inverse hessian matrix",
104  SG_ADD(&m_max_linesearch, "max_linesearch",
105  "The maximum number of trials to do line search for each L-BFGS update",
107  SG_ADD(&m_linesearch, "linesearch",
108  "The line search algorithm",
110  SG_ADD(&m_max_iterations, "max_iterations",
111  "The maximum number of iterations for L-BFGS update",
113  SG_ADD(&m_delta, "delta",
114  "Delta for convergence test based on the change of function value",
116  SG_ADD(&m_past, "past",
117  "Distance for delta-based convergence test",
119  SG_ADD(&m_epsilon, "epsilon",
120  "Epsilon for convergence test based on the change of gradient",
122  SG_ADD(&m_min_step, "min_step",
123  "The minimum step of the line search",
125  SG_ADD(&m_max_step, "max_step",
126  "The maximum step of the line search",
128  SG_ADD(&m_ftol, "ftol",
129  "A parameter used in Armijo condition",
131  SG_ADD(&m_wolfe, "wolfe",
132  "A parameter used in curvature condition",
134  SG_ADD(&m_gtol, "gtol",
135  "A parameter used in Morethuente linesearch to control the accuracy",
137  SG_ADD(&m_xtol, "xtol",
138  "The machine precision for floating-point values",
140  SG_ADD(&m_orthantwise_c, "orthantwise_c",
141  "Coeefficient for the L1 norm of variables",
143  SG_ADD(&m_orthantwise_start, "orthantwise_start",
144  "Start index for computing L1 norm of the variables",
146  SG_ADD(&m_orthantwise_end, "orthantwise_end",
147  "End index for computing L1 norm of the variables",
149  SG_ADD(&m_enable_newton_if_fail, "enable_newton_if_fail",
150  "Enable the original Newton method if the L-BFGS method fails",
152 }
153 
155 {
156 }
157 
158  float64_t CLaplacianInferenceMethodWithLBFGS::evaluate(
159  void *obj,
160  const float64_t *alpha,
161  float64_t *gradient,
162  const int dim,
163  const float64_t step)
164 {
165  /* Note that alpha = alpha_pre_iter - step * gradient_pre_iter */
166 
167  /* Unfortunately we can not use dynamic_cast to cast the void * pointer to an
168  * object pointer. Therefore, make sure this method is private.
169  */
171  = static_cast<CLaplacianInferenceMethodWithLBFGS *>(obj);
172  float64_t * alpha_cast = const_cast<float64_t *>(alpha);
173  Eigen::Map<Eigen::VectorXd> eigen_alpha(alpha_cast, dim);
174  float64_t psi = 0.0;
175  obj_prt->get_psi_wrt_alpha(&eigen_alpha, &psi);
176  Eigen::Map<Eigen::VectorXd> eigen_gradient(gradient, dim);
177  obj_prt->get_gradient_wrt_alpha(&eigen_alpha, &eigen_gradient);
178  return psi;
179 }
180 
182 {
183  float64_t psi_new;
184  float64_t psi_def;
185 
186  /* get mean vector and create eigen representation of it*/
188  Eigen::Map<Eigen::VectorXd> eigen_mean_f(mean_f.vector, mean_f.vlen);
189 
190  /* create eigen representation of kernel matrix*/
191  Eigen::Map<Eigen::MatrixXd> eigen_ktrtr(m_ktrtr.matrix,
193  m_ktrtr.num_cols);
194 
195  /* create shogun and eigen representation of function vector*/
196  m_mu = SGVector<float64_t>(mean_f.vlen);
197  Eigen::Map<Eigen::VectorXd> eigen_mu(m_mu, m_mu.vlen);
198 
200  {
201  /* set alpha a zero vector*/
203  m_alpha.zero();
204 
205  /* f = mean, if length of alpha and length of y doesn't match*/
206  eigen_mu = eigen_mean_f;
207  psi_new = -SGVector<float64_t>::sum(
209  }
210  else
211  {
212  /* compute f = K * alpha + m*/
213  Eigen::Map<Eigen::VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
214  eigen_mu = eigen_ktrtr * (eigen_alpha * CMath::sq(m_scale)) + eigen_mean_f;
215  psi_new = eigen_alpha.dot(eigen_mu - eigen_mean_f) / 2.0;
217 
218  psi_def = -SGVector<float64_t>::sum(
220 
221  /* if default is better, then use it*/
222  if (psi_def < psi_new)
223  {
224  m_alpha.zero();
225  eigen_mu = eigen_mean_f;
226  psi_new = psi_def;
227  }
228  }
229  Eigen::Map<Eigen::VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
230  lbfgs_parameter_t lbfgs_param;
231  lbfgs_param.m = m_m;
232  lbfgs_param.max_linesearch = m_max_linesearch;
233  lbfgs_param.linesearch = m_linesearch;
234  lbfgs_param.max_iterations = m_max_iterations;
235  lbfgs_param.delta = m_delta;
236  lbfgs_param.past = m_past;
237  lbfgs_param.epsilon = m_epsilon;
238  lbfgs_param.min_step = m_min_step;
239  lbfgs_param.max_step = m_max_step;
240  lbfgs_param.ftol = m_ftol;
241  lbfgs_param.wolfe = m_wolfe;
242  lbfgs_param.gtol = m_gtol;
243  lbfgs_param.xtol = m_xtol;
244  lbfgs_param.orthantwise_c = m_orthantwise_c;
245  lbfgs_param.orthantwise_start = m_orthantwise_start;
246  lbfgs_param.orthantwise_end = m_orthantwise_end;
247 
248  /* use for passing variables to compute function value and gradient*/
249  m_mean_f = &mean_f;
250 
251  /* In order to use the provided lbfgs function, we have to pass the object via
252  * void * pointer, which the evaluate method will use static_cast to cast
253  * the pointer to an object pointer.
254  * Therefore, make sure the evaluate method is a private method of the class.
255  * Because the evaluate method is defined in a class, we have to pass the
256  * method pointer to the lbfgs function via static method
257  * If we also use the progress method, make sure the method is static and
258  * private.
259  */
260  void * obj_prt = static_cast<void *>(this);
261 
262  int ret = lbfgs(m_alpha.vlen, m_alpha.vector, &psi_new,
263  CLaplacianInferenceMethodWithLBFGS::evaluate,
264  NULL, obj_prt, &lbfgs_param);
265  /* clean up*/
266  m_mean_f = NULL;
267 
268  /* Note that ret should be zero if the minimization
269  * process terminates without an error.
270  * A non-zero value indicates an error.
271  */
272  if (m_enable_newton_if_fail && ret != 0 && ret != LBFGS_ALREADY_MINIMIZED)
273  {
274  /* If some error happened during the L-BFGS optimization, we use the original
275  * Newton method.
276  */
277  SG_WARNING("Error during L-BFGS optimization, using original Newton method as fallback\n");
279  return;
280  }
281 
282  /* compute f = K * alpha + m*/
283  eigen_mu = eigen_ktrtr * (eigen_alpha * CMath::sq(m_scale)) + eigen_mean_f;
284 
285  /* get log probability derivatives*/
289 
290  /* W = -d2lp*/
291  W = d2lp.clone();
292  W.scale(-1.0);
293 
294  /* compute sW*/
295  Eigen::Map<Eigen::VectorXd> eigen_W(W.vector, W.vlen);
296  /* create shogun and eigen representation of sW*/
298  Eigen::Map<Eigen::VectorXd> eigen_sW(sW.vector, sW.vlen);
299 
300  if (eigen_W.minCoeff() > 0)
301  eigen_sW = eigen_W.cwiseSqrt();
302  else
303  eigen_sW.setZero();
304 }
305 
306  void CLaplacianInferenceMethodWithLBFGS::get_psi_wrt_alpha(
307  Eigen::Map<Eigen::VectorXd>* alpha,
308  float64_t* psi)
309  {
310  SGVector<float64_t> f(alpha->rows());
311  Eigen::Map<Eigen::VectorXd> eigen_f(f.vector, f.vlen);
312  Eigen::Map<Eigen::MatrixXd> kernel(m_ktrtr.matrix,
314  m_ktrtr.num_cols);
315  Eigen::Map<Eigen::VectorXd> eigen_mean_f(m_mean_f->vector,
316  m_mean_f->vlen);
317  /* f = K * alpha + mean_f given alpha*/
318  eigen_f
319  = kernel * ((*alpha) * CMath::sq(m_scale)) + eigen_mean_f;
320 
321  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
322  *psi = alpha->dot(eigen_f - eigen_mean_f) * 0.5;
324  }
325 
326  void CLaplacianInferenceMethodWithLBFGS::get_gradient_wrt_alpha(
327  Eigen::Map<Eigen::VectorXd>* alpha,
328  Eigen::Map<Eigen::VectorXd>* gradient)
329  {
330  SGVector<float64_t> f(alpha->rows());
331  Eigen::Map<Eigen::VectorXd> eigen_f(f.vector, f.vlen);
332  Eigen::Map<Eigen::MatrixXd> kernel(m_ktrtr.matrix,
334  m_ktrtr.num_cols);
335  Eigen::Map<Eigen::VectorXd> eigen_mean_f(m_mean_f->vector,
336  m_mean_f->vlen);
337 
338  /* f = K * alpha + mean_f given alpha*/
339  eigen_f = kernel * ((*alpha) * CMath::sq(m_scale)) + eigen_mean_f;
340 
341  SGVector<float64_t> dlp_f =
343 
344  Eigen::Map<Eigen::VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
345 
346  /* g_alpha = K * (alpha - dlp_f)*/
347  *gradient = kernel * ((*alpha - eigen_dlp_f) * CMath::sq(m_scale));
348  }
349 
350 } /* namespace shogun */
351 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation