SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
KLDualInferenceMethod.cpp
浏览该文件的文档.
1  /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * Code adapted from
31  * http://hannes.nickisch.org/code/approxXX.tar.gz
32  * and Gaussian Process Machine Learning Toolbox
33  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
34  * and the reference paper is
35  * Mohammad Emtiyaz Khan, Aleksandr Y. Aravkin, Michael P. Friedlander, Matthias Seeger
36  * Fast Dual Variational Inference for Non-Conjugate Latent Gaussian Models. ICML2013*
37  *
38  * This code specifically adapted from function in approxKL.m and infKL.m
39  */
40 
42 
43 #ifdef HAVE_EIGEN3
49 
50 using namespace Eigen;
51 
52 namespace shogun
53 {
54 
55 CKLDualInferenceMethod::CKLDualInferenceMethod() : CKLInferenceMethod()
56 {
57  init();
58 }
59 
61  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
62  : CKLInferenceMethod(kern, feat, m, lab, mod)
63 {
64  init();
65 }
66 
68  CInferenceMethod* inference)
69 {
70  if (inference==NULL)
71  return NULL;
72 
73  if (inference->get_inference_type()!=INF_KL_DUAL)
74  SG_SERROR("Provided inference is not of type CKLDualInferenceMethod!\n")
75 
76  SG_REF(inference);
77  return (CKLDualInferenceMethod*)inference;
78 }
79 
81 {
83  update();
84 
86  return result;
87 }
88 
90 {
91 }
92 
94 {
96  REQUIRE(lik,
97  "The provided likelihood model is not a variational dual Likelihood model.\n");
98 }
99 
101 {
104 }
105 
107 {
110  return lik;
111 }
112 
113 void CKLDualInferenceMethod::init()
114 {
115  SG_ADD(&m_W, "W",
116  "noise matrix W",
118  SG_ADD(&m_sW, "sW",
119  "Square root of noise matrix W",
121  SG_ADD(&m_dv, "dv",
122  "the gradient of the variational expection wrt sigma2",
124  SG_ADD(&m_df, "df",
125  "the gradient of the variational expection wrt mu",
127  SG_ADD(&m_is_dual_valid, "is_dual_valid",
128  "whether the lambda (m_W) is valid or not",
130  m_is_dual_valid=false;
131 }
132 
134 {
137  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
138 
139  lik->set_dual_parameters(m_W, m_labels);
140  m_is_dual_valid=lik->dual_parameters_valid();
141 
142  if (!m_is_dual_valid)
143  return false;
144 
145  //construct alpha
147  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
148  eigen_alpha=-eigen_alpha;
149 
150  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
151  eigen_sW=eigen_W.array().sqrt().matrix();
152 
155 
156  //solve L'*V=diag(sW)*K
157  Map<MatrixXd> eigen_V(m_V.matrix, m_V.num_rows, m_V.num_cols);
158  eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
159  Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen);
160  //Sigma=inv(inv(K)+diag(W))=K-K*diag(sW)*inv(L)'*inv(L)*diag(sW)*K
161  //v=abs(diag(Sigma))
162  eigen_s2=(eigen_K.diagonal().array()*CMath::exp(m_log_scale*2.0)-(eigen_V.array().pow(2).colwise().sum().transpose())).abs().matrix();
163 
164  //construct mu
166  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
167  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
168  //mu=K*alpha+m
169  eigen_mu=eigen_K*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
170  return true;
171 }
172 
174 {
175  if (!m_is_dual_valid)
176  return CMath::INFTY;
177 
179  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
180  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
181  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
183 
185 
187  float64_t result=0.5*eigen_alpha.dot(eigen_mu-eigen_mean)+a;
188  result+=eigen_mean.dot(eigen_alpha);
189  result-=eigen_L.diagonal().array().log().sum();
190 
191  return result;
192 }
193 
195 {
196  REQUIRE(gradient.vlen==m_alpha.vlen,
197  "The length of gradients (%d) should the same as the length of parameters (%d)\n",
198  gradient.vlen, m_alpha.vlen);
199 
200  if (!m_is_dual_valid)
201  return;
202 
203  Map<VectorXd> eigen_gradient(gradient.vector, gradient.vlen);
204 
206 
207  TParameter* lambda_param=lik->m_parameters->get_parameter("lambda");
208  SGVector<float64_t>d_lambda=lik->get_dual_first_derivative(lambda_param);
209  Map<VectorXd> eigen_d_lambda(d_lambda.vector, d_lambda.vlen);
210 
211  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
212  Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen);
213 
214  eigen_gradient=-eigen_mu-0.5*eigen_s2+eigen_d_lambda;
215 }
216 
217 float64_t CKLDualInferenceMethod::get_nlml_wrapper(SGVector<float64_t> alpha, SGVector<float64_t> mu, SGMatrix<float64_t> L)
218 {
219  Map<MatrixXd> eigen_L(L.matrix, L.num_rows, L.num_cols);
220  Map<VectorXd> eigen_alpha(alpha.vector, alpha.vlen);
222  //get mean vector and create eigen representation of it
224  Map<VectorXd> eigen_mu(mu.vector, mu.vlen);
225  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
226 
228 
229  SGVector<float64_t>lab=((CBinaryLabels*)m_labels)->get_labels();
230  Map<VectorXd> eigen_lab(lab.vector, lab.vlen);
231 
233 
234  float64_t trace=0;
235  //L_inv=L\eye(n);
236  //trace(L_inv'*L_inv) %V*inv(K)
237  MatrixXd eigen_t=eigen_L.triangularView<Upper>().adjoint().solve(MatrixXd::Identity(eigen_L.rows(),eigen_L.cols()));
238 
239  for(index_t idx=0; idx<eigen_t.rows(); idx++)
240  trace +=(eigen_t.col(idx).array().pow(2)).sum();
241 
242  //nlZ = -a -logdet(V*inv(K))/2 -n/2 +(alpha'*K*alpha)/2 +trace(V*inv(K))/2;
243  float64_t result=-a+eigen_L.diagonal().array().log().sum();
244 
245  result+=0.5*(-eigen_K.rows()+eigen_alpha.dot(eigen_mu-eigen_mean)+trace);
246  return result;
247 }
248 
250 {
252  bool status = lik->set_variational_distribution(m_mu, m_s2, m_labels);
253  if (status)
254  return get_nlml_wrapper(m_alpha, m_mu, m_L);
255  return CMath::NOT_A_NUMBER;
256 }
257 
259 {
260  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
262  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
264  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
266  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
267 
268  Map<VectorXd> eigen_dv(m_dv.vector, m_dv.vlen);
269  Map<VectorXd> eigen_df(m_df.vector, m_df.vlen);
270 
271  index_t len=m_W.vlen;
272  //U=inv(L')*diag(sW)
273  MatrixXd eigen_U=eigen_L.triangularView<Upper>().adjoint().solve(MatrixXd(eigen_sW.asDiagonal()));
274  //A=I-K*diag(sW)*inv(L)*inv(L')*diag(sW)
275  Map<MatrixXd> eigen_V(m_V.matrix, m_V.num_rows, m_V.num_cols);
276  MatrixXd eigen_A=MatrixXd::Identity(len, len)-eigen_V.transpose()*eigen_U;
277 
278  //AdK = A*dK;
279  MatrixXd AdK=eigen_A*eigen_dK;
280 
281  //z = diag(AdK) + sum(A.*AdK,2) - sum(A'.*AdK,1)';
282  VectorXd z=AdK.diagonal()+(eigen_A.array()*AdK.array()).rowwise().sum().matrix()
283  -(eigen_A.transpose().array()*AdK.array()).colwise().sum().transpose().matrix();
284 
285  float64_t result=eigen_alpha.dot(eigen_dK*(eigen_alpha/2.0-eigen_df))-z.dot(eigen_dv);
286 
287  return result;
288 }
289 
291 {
292  float64_t nlml_new=0;
293  float64_t nlml_def=0;
294 
297 
299  {
302  SGVector<float64_t> W_tmp(len);
303  Map<VectorXd> eigen_W(W_tmp.vector, W_tmp.vlen);
304  eigen_W.fill(0.5);
305  SGVector<float64_t> sW_tmp(len);
306  Map<VectorXd> eigen_sW(sW_tmp.vector, sW_tmp.vlen);
307  eigen_sW=eigen_W.array().sqrt().matrix();
309  Map<MatrixXd> eigen_L(L_tmp.matrix, L_tmp.num_rows, L_tmp.num_cols);
310 
311  lik->set_dual_parameters(W_tmp, m_labels);
312 
313  //construct alpha
315  Map<VectorXd> eigen_alpha(alpha_tmp.vector, alpha_tmp.vlen);
316  eigen_alpha=-eigen_alpha;
317  //construct mu
319  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
320  SGVector<float64_t> mu_tmp(len);
321  Map<VectorXd> eigen_mu(mu_tmp.vector, mu_tmp.vlen);
322  //mu=K*alpha+m
323  eigen_mu=eigen_K*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
324  //construct s2
325  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
326  SGVector<float64_t> s2_tmp(len);
327  Map<VectorXd> eigen_s2(s2_tmp.vector, s2_tmp.vlen);
328  eigen_s2=(eigen_K.diagonal().array()*CMath::exp(m_log_scale*2.0)-(eigen_V.array().pow(2).colwise().sum().transpose())).abs().matrix();
329 
330  lik->set_variational_distribution(mu_tmp, s2_tmp, m_labels);
331 
332  nlml_def=get_nlml_wrapper(alpha_tmp, mu_tmp, L_tmp);
333 
334  if (nlml_new<=nlml_def)
335  {
336  lik->set_dual_parameters(m_W, m_labels);
338  }
339  }
340 
341  if (m_alpha.vlen != m_labels->get_num_labels() || nlml_def<nlml_new)
342  {
345 
346  index_t len=m_alpha.vlen;
347 
348  m_W=SGVector<float64_t>(len);
349  for (index_t i=0; i<m_W.vlen; i++)
350  m_W[i]=0.5;
351 
352  lik->set_dual_parameters(m_W, m_labels);
353  m_sW=SGVector<float64_t>(len);
356  m_Sigma=SGMatrix<float64_t>(len, len);
357  m_V=SGMatrix<float64_t>(len, len);
358  }
359 
360  nlml_new=lbfgs_optimization();
362  TParameter* s2_param=lik->m_parameters->get_parameter("sigma2");
363  m_dv=lik->get_variational_first_derivative(s2_param);
364  TParameter* mu_param=lik->m_parameters->get_parameter("mu");
365  m_df=lik->get_variational_first_derivative(mu_param);
366 }
367 
368 float64_t CKLDualInferenceMethod::adjust_step(void *obj,
369  const float64_t *parameters, const float64_t *direction,
370  const int dim, const float64_t step)
371 {
372  /* Note that parameters = parameters_pre_iter - step * gradient_pre_iter */
373  CKLDualInferenceMethod * obj_prt
374  = static_cast<CKLDualInferenceMethod *>(obj);
375 
376  ASSERT(obj_prt != NULL);
377 
378  float64_t *non_const_direction=const_cast<float64_t *>(direction);
379  SGVector<float64_t> sg_direction(non_const_direction, dim, false);
380 
382 
383  float64_t adjust_stp=lik->adjust_step_wrt_dual_parameter(sg_direction, step);
384  return adjust_stp;
385 }
386 
387 float64_t CKLDualInferenceMethod::evaluate(void *obj, const float64_t *parameters,
388  float64_t *gradient, const int dim, const float64_t step)
389 {
390  /* Note that parameters = parameters_pre_iter - step * gradient_pre_iter */
391  CKLDualInferenceMethod * obj_prt
392  = static_cast<CKLDualInferenceMethod *>(obj);
393 
394  ASSERT(obj_prt != NULL);
395 
396  bool status=obj_prt->lbfgs_precompute();
397  if (status)
398  {
399  float64_t nlml=obj_prt->get_dual_objective_wrt_parameters();
400 
401  SGVector<float64_t> sg_gradient(gradient, dim, false);
402  Map<VectorXd> eigen_g(sg_gradient.vector, sg_gradient.vlen);
403  obj_prt->get_gradient_of_dual_objective_wrt_parameters(sg_gradient);
404 
405  return nlml;
406  }
407  return CMath::NOT_A_NUMBER;
408 }
409 
411 {
412  lbfgs_parameter_t lbfgs_param;
413  lbfgs_param.m = m_m;
414  lbfgs_param.max_linesearch = m_max_linesearch;
415  lbfgs_param.linesearch = m_linesearch;
416  lbfgs_param.max_iterations = m_max_iterations;
417  lbfgs_param.delta = m_delta;
418  lbfgs_param.past = m_past;
419  lbfgs_param.epsilon = m_epsilon;
420  lbfgs_param.min_step = m_min_step;
421  lbfgs_param.max_step = m_max_step;
422  lbfgs_param.ftol = m_ftol;
423  lbfgs_param.wolfe = m_wolfe;
424  lbfgs_param.gtol = m_gtol;
425  lbfgs_param.xtol = m_xtol;
426  lbfgs_param.orthantwise_c = m_orthantwise_c;
428  lbfgs_param.orthantwise_end = m_orthantwise_end;
429 
430  float64_t nlml_opt=0;
431  void * obj_prt = static_cast<void *>(this);
432 
433  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
434  lbfgs(m_W.vlen, m_W.vector, &nlml_opt,
435  CKLDualInferenceMethod::evaluate,
436  NULL, obj_prt, &lbfgs_param, CKLDualInferenceMethod::adjust_step);
437  return nlml_opt;
438 }
439 
441 {
443  update();
444 
445  return SGVector<float64_t>(m_sW);
446 }
447 
449 {
450  /* get_derivative_related_cov(MatrixXd eigen_dK) does the similar job
451  * Therefore, this function body is empty
452  */
453 }
454 
456 {
457  /* L is automatically updated when update_alpha is called
458  * Therefore, this function body is empty
459  */
460 }
461 
463 {
465 }
466 
467 } /* namespace shogun */
468 
469 #endif /* HAVE_EIGEN3 */
virtual CDualVariationalGaussianLikelihood * get_dual_variational_likelihood() const
SGVector< float64_t > m_alpha
int32_t lbfgs(int32_t n, float64_t *x, float64_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param, lbfgs_adjust_step_t proc_adjust_step)
Definition: lbfgs.cpp:208
The Inference Method base class.
virtual void set_dual_parameters(SGVector< float64_t > the_lambda, const CLabels *lab)
static SGMatrix< float64_t > get_choleksy(SGVector< float64_t > W, SGVector< float64_t > sW, SGMatrix< float64_t > kernel, float64_t scale)
virtual SGVector< float64_t > get_mu_dual_parameter() const =0
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual int32_t get_num_labels() const =0
static SGMatrix< float64_t > get_inverse(SGMatrix< float64_t > L, SGMatrix< float64_t > kernel, SGVector< float64_t > sW, SGMatrix< float64_t > V, float64_t scale)
TParameter * get_parameter(int32_t idx)
float64_t orthantwise_c
Definition: lbfgs.h:311
Definition: SGMatrix.h:20
parameter struct
virtual SGVector< float64_t > get_dual_objective_value()=0
#define REQUIRE(x,...)
Definition: SGIO.h:206
Parameter * m_parameters
Definition: SGObject.h:378
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:49
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
virtual float64_t adjust_step_wrt_dual_parameter(SGVector< float64_t > direction, const float64_t step) const
SGMatrix< float64_t > m_Sigma
virtual void check_dual_inference(CLikelihoodModel *mod) const
The dual KL approximation inference method class.
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
#define ASSERT(x)
Definition: SGIO.h:201
void set_model(CLikelihoodModel *mod)
virtual void get_gradient_of_dual_objective_wrt_parameters(SGVector< float64_t > gradient)
virtual SGVector< float64_t > get_alpha()
double float64_t
Definition: common.h:50
virtual bool set_variational_distribution(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab)
virtual SGVector< float64_t > get_dual_first_derivative(const TParameter *param) const =0
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
The KL approximation inference method class.
virtual float64_t get_derivative_related_cov(SGMatrix< float64_t > dK)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
SGVector< float64_t > m_mu
SGVector< float64_t > m_s2
static CKLDualInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
virtual EInferenceType get_inference_type() const
The Kernel base class.
Definition: Kernel.h:158
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
#define SG_ADD(...)
Definition: SGObject.h:81
virtual float64_t get_dual_objective_wrt_parameters()
virtual float64_t get_negative_log_marginal_likelihood_helper()
virtual SGVector< float64_t > get_diagonal_vector()
virtual SGVector< float64_t > get_variational_first_derivative(const TParameter *param) const
virtual void set_model(CLikelihoodModel *mod)
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:262
Class that models dual variational likelihood.
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
static const float64_t NOT_A_NUMBER
not a number
Definition: Math.h:2046
CLikelihoodModel * m_model

SHOGUN 机器学习工具包 - 项目文档