SHOGUN  6.1.3
SingleFITCInference.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
32 
34 
38 
39 using namespace shogun;
40 using namespace Eigen;
41 
43 {
44  init();
45 }
46 
48  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
49  : CSingleSparseInference(kern, feat, m, lab, mod, lat)
50 {
51  init();
52 }
53 
54 void CSingleFITCInference::init()
55 {
56  SG_ADD(&m_al, "al", "alpha", MS_NOT_AVAILABLE);
57  SG_ADD(&m_t, "t", "noise", MS_NOT_AVAILABLE);
58  SG_ADD(&m_B, "B", "B", MS_NOT_AVAILABLE);
59  SG_ADD(&m_w, "w", "B*al", MS_NOT_AVAILABLE);
60  SG_ADD(&m_Rvdd, "Rvdd", "Rvdd", MS_NOT_AVAILABLE);
61  SG_ADD(&m_V, "V", "V", MS_NOT_AVAILABLE);
62 }
63 
65 {
66 }
67 
69 {
70  //time complexity O(m*n)
72  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
73 
75  Map<VectorXd> eigen_res(res.vector, res.vlen);
76  //-sum(W.*W,1)' - al.*al;
77  eigen_res=-eigen_W.cwiseProduct(eigen_W).colwise().sum().transpose()-eigen_al.array().pow(2).matrix();
78  return res;
79 }
80 
83 {
84  //time complexity O(m^2*n)
85  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
88 
89  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
90  Map<VectorXd> eigen_v(v.vector, v.vlen);
91  Map<MatrixXd> eigen_R(R.matrix, R.num_rows, R.num_cols);
92 
93  //-al'*(v.*al)-sum(W.*W,1)*v = v'*(-sum(W.*W,1)'-(al.*al))
95  Map<VectorXd> eigen_di(di.vector, di.vlen);
96 
97  //(w'*dKuui*w -al'*(v.*al)- sum(W.*W,1)*v - sum(sum((R*W').*BWt)))/2;
98  float64_t result=(eigen_w.dot(eigen_dKuui*eigen_w)+eigen_v.dot(eigen_di)-
99  (eigen_R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
100 
101  return result;
102 }
103 
106 {
107  //time complexity O(m^2*n)
109  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
110  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
111  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
112 
113  // compute R=2*dKui-dKuui*B
114  SGMatrix<float64_t> R(dKui.num_rows, dKui.num_cols);
115  Map<MatrixXd> eigen_R(R.matrix, R.num_rows, R.num_cols);
116  eigen_R=2*eigen_dKui-eigen_dKuui*eigen_B;
117 
118  // compute v=ddiagKi-sum(R.*B, 1)'
119  SGVector<float64_t> v(ddiagKi.vlen);
120  Map<VectorXd> eigen_v(v.vector, v.vlen);
121  eigen_v=eigen_ddiagKi-eigen_R.cwiseProduct(eigen_B).colwise().sum().adjoint();
122 
123  return get_derivative_related_cov(ddiagKi, dKuui, dKui, v, R);
124 }
125 
129 {
130  //time complexity O(m^2*n)
131  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
132  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
133  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
135  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
136  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
137  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
138 
139  //(w'*dKuui*w -al'*(v.*al)- sum(W.*W,1)*v - sum(sum((R*W').*BWt)))/2;
140  float64_t result=get_derivative_related_cov_helper(dKuui, v, R);
141 
142  // compute dnlZ=(ddiagKi'*(1./g_sn2)+w'*(dKuui*w-2*(dKui*al))-al'*(v.*al)-
143  // sum(W.*W,1)*v- sum(sum((R*W').*(B*W'))))/2;
144  result+=(eigen_ddiagKi.dot(eigen_t))/2.0-
145  eigen_w.dot((eigen_dKui*eigen_al));
146  return result;
147 }
148 
150 {
151  //time complexity O(n)
152  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
153  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
154  return -eigen_dmu.dot(eigen_al);
155 }
156 
158  const TParameter* param)
159 {
160  //time complexity O(n)
161  REQUIRE(param, "Param not set\n");
162  SGVector<float64_t> result;
163  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
164  result=SGVector<float64_t>(len);
165 
166  for (index_t i=0; i<result.vlen; i++)
167  {
169 
171 
172  // compute dnlZ=-dm'*al
173  result[i]=get_derivative_related_mean(dmu);
174  }
175 
176  return result;
177 }
178 
180  const TParameter* param)
181 {
182  //time complexity O(m^2*n)
183  REQUIRE(param, "Param not set\n");
184  REQUIRE(!strcmp(param->m_name, "log_inducing_noise"), "Can't compute derivative of "
185  "the nagative log marginal likelihood wrt %s.%s parameter\n",
186  get_name(), param->m_name)
187 
189 
191  Map<MatrixXd> eigen_R(R.matrix, R.num_rows, R.num_cols);
192  //dKuui = 2*snu2; R = -dKuui*B;
194  eigen_R=-eigen_B*factor;
195 
197  Map<VectorXd> eigen_v(v.vector, v.vlen);
198  //v = -sum(R.*B,1)';
199  eigen_v=-eigen_R.cwiseProduct(eigen_B).colwise().sum().adjoint();
200 
202 
203  SGVector<float64_t> result(1);
204  //(w'*dKuui*w -al'*(v.*al)- sum(W.*W,1)*v - sum(sum((R*W').*BWt)))/2;
205  result[0]=get_derivative_related_cov_helper(dKuui, v, R);
206 
207  return result;
208 }
209 
211  SGMatrix<float64_t> BdK, const TParameter* param)
212 {
213  //time complexity depends on the implementation of the provided kernel
214  //time complexity is at least O(p*n*m), where p is the dimension (#) of features
215  //For an ARD kernel with KL_FULL, the time complexity is O(p*n*m*d),
216  //where the paramter \f$\Lambda\f$ of the ARD kerenl is a \f$d\f$-by-\f$p\f$ matrix,
217  //For an ARD kernel with KL_SCALAR and KL_DIAG, the time complexity is O(p*n*m)
219  Map<MatrixXd> eigen_BdK(BdK.matrix, BdK.num_rows, BdK.num_cols);
220 
221  int32_t dim=m_inducing_features.num_rows;
222  int32_t num_samples=m_inducing_features.num_cols;
223  SGVector<float64_t>deriv_lat(dim*num_samples);
224  deriv_lat.zero();
225 
226  m_lock->lock();
227  CFeatures *inducing_features=get_inducing_features();
228  //asymtric part (related to xu and x)
229  m_kernel->init(inducing_features, m_features);
230  //A = (Kpu.*BdK)*diag(e);
231  //Kpu=1 in our setting
232  MatrixXd A=CMath::exp(m_log_scale*2.0)*eigen_BdK;
233  for(int32_t lat_idx=0; lat_idx<A.rows(); lat_idx++)
234  {
235  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_idx*dim,dim);
236  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_idx);
237  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
238  deriv_lat_col_vec+=eigen_deriv_mat*(-A.row(lat_idx).transpose());
239  }
240 
241  //symtric part (related to xu and xu)
242  m_kernel->init(inducing_features, inducing_features);
243  //C = (Kpuu.*(BdK*B'))*diag(e);
244  //Kpuu=1 in our setting
245  MatrixXd C=CMath::exp(m_log_scale*2.0)*(eigen_BdK*eigen_B.transpose());
246  for(int32_t lat_lidx=0; lat_lidx<C.rows(); lat_lidx++)
247  {
248  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_lidx*dim,dim);
249  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_lidx);
250  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
251  deriv_lat_col_vec+=eigen_deriv_mat*(C.row(lat_lidx).transpose());
252  }
253  SG_UNREF(inducing_features);
254  m_lock->unlock();
255  return deriv_lat;
256 }
257 
259 {
260  //time complexity depends on the implementation of the provided kernel
261  //time complexity is at least O(max((p*n*m),(m^2*n))), where p is the dimension (#) of features
262  //For an ARD kernel with KL_FULL, the time complexity is O(max((p*n*m*d),(m^2*n)))
263  //where the paramter \f$\Lambda\f$ of the ARD kerenl is a \f$d\f$-by-\f$p\f$ matrix,
264  //For an ARD kernel with KL_SCALE and KL_DIAG, the time complexity is O(max((p*n*m),(m^2*n)))
265  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
267  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
269 
270  //v = diag_dK-1./g_sn2;
272  Map<VectorXd> eigen_v(v.vector, v.vlen);
273 
274  //BdK = B.*repmat(v',nu,1) + BWt*W + (B*al)*al';
276  Map<MatrixXd> eigen_BdK(BdK.matrix, BdK.num_rows, BdK.num_cols);
277  eigen_BdK=eigen_B*eigen_v.asDiagonal()+eigen_w*(eigen_al.transpose())+
278  eigen_B*eigen_W.transpose()*eigen_W;
279 
280  return get_derivative_related_inducing_features(BdK, param);
281 }
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:97
float64_t m_log_scale
Definition: Inference.h:485
virtual float64_t get_derivative_related_cov_helper(SGMatrix< float64_t > dKuui, SGVector< float64_t > v, SGMatrix< float64_t > R)
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
CKernel * m_kernel
Definition: Inference.h:464
Definition: SGMatrix.h:25
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:181
virtual SGVector< float64_t > get_derivative_related_cov_diagonal()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
An abstract class of the mean function.
Definition: MeanFunction.h:49
CFeatures * m_features
Definition: Inference.h:473
CMeanFunction * m_mean
Definition: Inference.h:467
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
double float64_t
Definition: common.h:60
static SGMatrix< T > create_identity_matrix(index_t size, T scale)
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
virtual CFeatures * get_inducing_features()
index_t num_rows
Definition: SGMatrix.h:495
index_t num_cols
Definition: SGMatrix.h:497
virtual SGVector< float64_t > get_derivative_related_inducing_features(SGMatrix< float64_t > BdK, const TParameter *param)
SG_FORCED_INLINE void lock()
Definition: Lock.h:23
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:53
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
T sum(const Container< T > &a, bool no_diag=false)
SGMatrix< float64_t > m_inducing_features
The class Features is the base class of all feature objects.
Definition: Features.h:69
static float64_t exp(float64_t x)
Definition: Math.h:551
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
The Kernel base class.
SG_FORCED_INLINE void unlock()
Definition: Lock.h:34
#define SG_ADD(...)
Definition: SGObject.h:93
virtual const char * get_name() const
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
The Likelihood model base class.
index_t vlen
Definition: SGVector.h:571

SHOGUN Machine Learning Toolbox - Documentation