SHOGUN  6.1.3
KLCholeskyInferenceMethod.cpp
Go to the documentation of this file.
1  /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * Code adapted from
31  * http://hannes.nickisch.org/code/approxXX.tar.gz
32  * and Gaussian Process Machine Learning Toolbox
33  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
34  * and the reference paper is
35  * Challis, Edward, and David Barber.
36  * "Concave Gaussian variational approximations for inference in large-scale Bayesian linear models."
37  * International conference on Artificial Intelligence and Statistics. 2011.
38  *
39  * This code specifically adapted from function in approxKL.m and infKL.m
40  */
41 
43 
48 
49 using namespace Eigen;
50 
51 namespace shogun
52 {
53 
54 CKLCholeskyInferenceMethod::CKLCholeskyInferenceMethod() : CKLLowerTriangularInference()
55 {
56  init();
57 }
58 
60  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
61  : CKLLowerTriangularInference(kern, feat, m, lab, mod)
62 {
63  init();
64 }
65 
66 void CKLCholeskyInferenceMethod::init()
67 {
68  SG_ADD(&m_C, "C",
69  "The Cholesky represention of the variational co-variance matrix",
71  SG_ADD(&m_InvK_C, "invK_C",
72  " The K^{-1}C matrix",
74 }
75 
77  CInference* inference)
78 {
79  if (inference==NULL)
80  return NULL;
81 
82  if (inference->get_inference_type()!=INF_KL_CHOLESKY)
83  SG_SERROR("Provided inference is not of type CKLCholeskyInferenceMethod!\n")
84 
85  SG_REF(inference);
86  return (CKLCholeskyInferenceMethod*)inference;
87 }
88 
90 {
99  update();
100 
101  index_t len=m_mu.vlen;
102  SGVector<float64_t> result(len);
103 
104  Map<VectorXd> eigen_result(result.vector, len);
105  Map<VectorXd> eigen_alpha(m_alpha.vector, len);
106 
107  eigen_result=eigen_alpha;
108 
109  return result;
110 }
111 
113 {
114 }
115 
117 {
118  index_t len=m_mean_vec.vlen;
121  Map<VectorXd> eigen_alpha(m_alpha.vector, len);
122 
123  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
124  //mu=K*alpha+m
125  eigen_mu=eigen_K*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
126 
127  update_C();
128  Map<MatrixXd> eigen_C(m_C.matrix, m_C.num_rows, m_C.num_cols);
129  Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen);
130  //s2=sum(C.*C,2);
131  eigen_s2=(eigen_C.array()*eigen_C.array()).rowwise().sum().matrix();
132 
134  bool status = lik->set_variational_distribution(m_mu, m_s2, m_labels);
135  if (status)
136  {
137  Map<MatrixXd> eigen_InvK_C(m_InvK_C.matrix, m_InvK_C.num_rows, m_InvK_C.num_cols);
138  eigen_InvK_C=solve_inverse(eigen_C);
139  }
140  return status;
141 }
142 
144 {
145  REQUIRE(gradient.vlen==m_alpha.vlen,
146  "The length of gradients (%d) should the same as the length of parameters (%d)\n",
147  gradient.vlen, m_alpha.vlen);
148 
150  Map<MatrixXd> eigen_C(m_C.matrix, m_C.num_rows, m_C.num_cols);
151 
152  index_t len=m_mu.vlen;
153  Map<VectorXd> eigen_alpha(m_alpha.vector, len);
154  Map<VectorXd> eigen_C_seq(m_alpha.vector+len, m_alpha.vlen-len);
155 
157  //[a,df,dV] = a_related2(mu,s2,y,lik);
158  TParameter* s2_param=lik->m_parameters->get_parameter("sigma2");
160  Map<VectorXd> eigen_dv(dv.vector, dv.vlen);
161 
162  TParameter* mu_param=lik->m_parameters->get_parameter("mu");
164  Map<VectorXd> eigen_df(df.vector, df.vlen);
165 
166  Map<VectorXd> eigen_dnlz_alpha(gradient.vector, len);
167  //dnlZ_alpha = -K*(df-alpha);
168  eigen_dnlz_alpha=eigen_K*CMath::exp(m_log_scale*2.0)*(-eigen_df+eigen_alpha);
169 
170  Map<VectorXd> eigen_dnlz_C_seq(gradient.vector+len, gradient.vlen-len);
171 
172  SGVector<float64_t> tmp(eigen_dnlz_C_seq.rows());
173  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
174 
175  //dnlZ_C=low_matrix_to_vector(invK_C)-convert_diag(1.0./diag(C))-2*(alla(n+1:end,1).*convert_dC(dv));
176  float64_t offset=0;
177  for (index_t i=0; i<len; i++)
178  {
179  eigen_tmp.block(offset, 0, len-i, 1)=VectorXd::Map(eigen_dv.data()+i, len-i);
180  offset+=(len-i);
181  }
182 
183  //-2*(alla(n+1:end,1).*convert_dC(dV))
184  eigen_dnlz_C_seq=(-2.0*(eigen_C_seq.array()*eigen_tmp.array())).matrix();
185  //low_matrix_to_vector(invK_C)
186  get_lower_triangular_vector(m_InvK_C, tmp);
187  eigen_dnlz_C_seq+=eigen_tmp;
188 
189  Map<VectorXd> eigen_tmp2(tmp.vector, eigen_C.rows());
190  //-convert_diag(1.0./diag(C))
191  eigen_tmp2=(1.0/eigen_C.diagonal().array()).matrix();
192 
193  offset=0;
194  for (index_t i=0; i<len; i++)
195  {
196  eigen_dnlz_C_seq.block(offset,0,1,1)-=VectorXd::Map(eigen_tmp2.data()+i,1);
197  offset+=(len-i);
198  }
199 }
200 
202 {
203  Map<VectorXd> eigen_alpha(m_alpha.vector, m_mu.vlen);
204  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
206  //get mean vector and create eigen representation of it
208 
209  Map<MatrixXd> eigen_InvK_C(m_InvK_C.matrix, m_InvK_C.num_rows, m_InvK_C.num_cols);
210  Map<MatrixXd> eigen_C(m_C.matrix, m_C.num_rows, m_C.num_cols);
211 
214 
215  //float64_t log_det=2.0*log_det(eigen_C)-m_log_det_Kernel;
216  float64_t log_det=2.0*eigen_C.diagonal().array().abs().log().sum()-m_log_det_Kernel;
217  float64_t trace=(eigen_InvK_C.array()*eigen_C.array()).sum();
218 
219  //nlZ = -a -logdet(V*inv(K))/2 -n/2 +(alpha'*K*alpha)/2 +trace(V*inv(K))/2;
220  float64_t result=-a+0.5*(-eigen_K.rows()+eigen_alpha.dot(eigen_mu-eigen_mean)+trace-log_det);
221  return result;
222 }
223 
225 {
227 
228  float64_t nlml_new=0;
229  float64_t nlml_def=0;
230 
232  index_t total_len=len*(len+3);
233 
234  if (m_alpha.vlen*2 == total_len)
235  {
237 
238  SGVector<float64_t> s2_tmp(m_s2.vlen);
239  Map<VectorXd> eigen_s2(s2_tmp.vector, s2_tmp.vlen);
240  eigen_s2.fill(1.0);
244  MatrixXd inv_K=solve_inverse(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
245  float64_t trace=inv_K.diagonal().array().sum();
246  nlml_def=-a+0.5*(-eigen_K.rows()+trace+m_log_det_Kernel);
247 
248  if (nlml_new<=nlml_def)
250  }
251 
252  if (m_alpha.vlen*2 != total_len || nlml_def<nlml_new)
253  {
254  if(m_alpha.vlen*2 != total_len)
255  m_alpha = SGVector<float64_t>(total_len/2);
256  m_alpha.zero();
257  index_t offset=0;
258  index_t count=0;
259  //init
260  for (index_t i=0; i<m_alpha.vlen; i++)
261  {
262  if (i-len==offset)
263  {
264  m_alpha[i]=1.0;
265  offset+=(len-count);
266  count++;
267  }
268  }
269  m_InvK_C=SGMatrix<float64_t>(len, len);
270  m_C=SGMatrix<float64_t>(len, len);
271  m_C.zero();
274  }
275 
276  nlml_new=optimization();
277 }
278 
279 void CKLCholeskyInferenceMethod::update_C()
280 {
281  ASSERT(m_C.num_rows == m_C.num_cols);
282  index_t len=m_C.num_rows;
283  ASSERT(m_alpha.vlen*2 == len*(len+3));
284 
285  Map<MatrixXd> eigen_C(m_C.matrix, m_C.num_rows, m_C.num_cols);
286  Map<VectorXd> eigen_C_seq(m_alpha.vector+len, m_alpha.vlen-len);
287 
288  index_t offset=0;
289  for (index_t i=0; i<len; i++)
290  {
291  eigen_C.block(i, i, len-i ,1)=VectorXd::Map(eigen_C_seq.data()+offset, len-i);
292  offset+=(len-i);
293  }
294 }
295 
296 void CKLCholeskyInferenceMethod::get_lower_triangular_vector(SGMatrix<float64_t> square_matrix,
297  SGVector<float64_t> target)
298 {
299  ASSERT(square_matrix.num_rows == square_matrix.num_cols);
300  index_t len=m_InvK_C.num_rows;
301  ASSERT(target.vlen*2 == len*(len+1));
302 
303  Map<MatrixXd> eigen_square_matrix(square_matrix.matrix, len, len);
304  Map<VectorXd> eigen_result(target.vector, target.vlen);
305 
306  index_t offset=0;
307  for (index_t i=0; i<len; i++)
308  {
309  eigen_result.block(offset, 0, len-i, 1)=eigen_square_matrix.block(i, i, len-i, 1);
310  offset+=(len-i);
311  }
312 }
313 
315 {
318  Map<MatrixXd> eigen_C(m_C.matrix, m_C.num_rows, m_C.num_cols);
319  eigen_Sigma=eigen_C*(eigen_C.transpose());
320 }
321 
323 {
326  Map<MatrixXd> eigen_InvK_C(m_InvK_C.matrix, m_InvK_C.num_rows, m_InvK_C.num_cols);
327  Map<MatrixXd> eigen_C(m_C.matrix, m_C.num_rows, m_C.num_cols);
328  eigen_InvK_Sigma=eigen_InvK_C*(eigen_C.transpose());
329 }
330 
331 } /* namespace shogun */
332 
float64_t m_log_scale
Definition: Inference.h:485
virtual bool set_variational_distribution(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab)
virtual float64_t optimization()
virtual void update()
virtual SGVector< float64_t > get_variational_first_derivative(const TParameter *param) const =0
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
virtual CVariationalGaussianLikelihood * get_variational_likelihood() const
virtual float64_t get_negative_log_marginal_likelihood_helper()
virtual int32_t get_num_labels() const =0
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:418
The variational Gaussian Likelihood base class. The variational distribution is Gaussian.
TParameter * get_parameter(int32_t idx)
Definition: SGMatrix.h:25
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:181
Parameter * m_parameters
Definition: SGObject.h:609
An abstract class of the mean function.
Definition: MeanFunction.h:49
virtual SGVector< float64_t > get_alpha()
#define SG_REF(x)
Definition: SGObject.h:52
T trace(const SGMatrix< T > &A)
virtual void get_gradient_of_nlml_wrt_parameters(SGVector< float64_t > gradient)
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:488
The KL approximation inference method class.
#define ASSERT(x)
Definition: SGIO.h:176
CLabels * m_labels
Definition: Inference.h:476
double float64_t
Definition: common.h:60
static CKLCholeskyInferenceMethod * obtain_from_generic(CInference *inference)
index_t num_rows
Definition: SGMatrix.h:495
SGVector< float64_t > m_mu
Definition: KLInference.h:367
virtual SGVector< float64_t > get_variational_expection()=0
index_t num_cols
Definition: SGMatrix.h:497
Eigen::MatrixXd solve_inverse(Eigen::MatrixXd A)
SGMatrix< float64_t > m_Sigma
Definition: KLInference.h:370
SGVector< float64_t > m_s2
Definition: KLInference.h:375
The KL approximation inference method class.
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
T sum(const Container< T > &a, bool no_diag=false)
The Inference Method base class.
Definition: Inference.h:81
The class Features is the base class of all feature objects.
Definition: Features.h:69
#define SG_SERROR(...)
Definition: SGIO.h:164
static float64_t exp(float64_t x)
Definition: Math.h:551
The Kernel base class.
#define SG_ADD(...)
Definition: SGObject.h:93
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:296
The Likelihood model base class.
index_t vlen
Definition: SGVector.h:571
SGVector< float64_t > m_alpha
Definition: Inference.h:479

SHOGUN Machine Learning Toolbox - Documentation