SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
SingleFITCLaplacianBase.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
33 
34 #ifdef HAVE_NLOPT
35 #include <nlopt.h>
37 #endif
38 
39 #ifdef HAVE_EIGEN3
40 
44 
45 using namespace shogun;
46 using namespace Eigen;
47 
49 {
50  init();
51 }
52 
54  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
55  : CSingleSparseInferenceBase(kern, feat, m, lab, mod, lat)
56 {
57  init();
58 }
59 
60 void CSingleFITCLaplacianBase::init()
61 {
62  SG_ADD(&m_al, "al", "alpha", MS_NOT_AVAILABLE);
63  SG_ADD(&m_t, "t", "noise", MS_NOT_AVAILABLE);
64  SG_ADD(&m_B, "B", "B", MS_NOT_AVAILABLE);
65  SG_ADD(&m_w, "w", "B*al", MS_NOT_AVAILABLE);
66  SG_ADD(&m_Rvdd, "Rvdd", "Rvdd", MS_NOT_AVAILABLE);
67  SG_ADD(&m_V, "V", "V", MS_NOT_AVAILABLE);
68 }
69 
71 {
72 }
73 
75 {
76  //time complexity O(m*n)
78  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
79 
81  Map<VectorXd> eigen_res(res.vector, res.vlen);
82  //-sum(W.*W,1)' - al.*al;
83  eigen_res=-eigen_W.cwiseProduct(eigen_W).colwise().sum().transpose()-eigen_al.array().pow(2).matrix();
84  return res;
85 }
86 
89 {
90  //time complexity O(m^2*n)
91  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
94 
95  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
96  Map<VectorXd> eigen_v(v.vector, v.vlen);
97  Map<MatrixXd> eigen_R(R.matrix, R.num_rows, R.num_cols);
98 
99  //-al'*(v.*al)-sum(W.*W,1)*v = v'*(-sum(W.*W,1)'-(al.*al))
101  Map<VectorXd> eigen_di(di.vector, di.vlen);
102 
103  //(w'*dKuui*w -al'*(v.*al)- sum(W.*W,1)*v - sum(sum((R*W').*BWt)))/2;
104  float64_t result=(eigen_w.dot(eigen_dKuui*eigen_w)+eigen_v.dot(eigen_di)-
105  (eigen_R*eigen_W.adjoint()).cwiseProduct(eigen_B*eigen_W.adjoint()).sum())/2.0;
106 
107  return result;
108 }
109 
112 {
113  //time complexity O(m^2*n)
115  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
116  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
117  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
118 
119  // compute R=2*dKui-dKuui*B
120  SGMatrix<float64_t> R(dKui.num_rows, dKui.num_cols);
121  Map<MatrixXd> eigen_R(R.matrix, R.num_rows, R.num_cols);
122  eigen_R=2*eigen_dKui-eigen_dKuui*eigen_B;
123 
124  // compute v=ddiagKi-sum(R.*B, 1)'
125  SGVector<float64_t> v(ddiagKi.vlen);
126  Map<VectorXd> eigen_v(v.vector, v.vlen);
127  eigen_v=eigen_ddiagKi-eigen_R.cwiseProduct(eigen_B).colwise().sum().adjoint();
128 
129  return get_derivative_related_cov(ddiagKi, dKuui, dKui, v, R);
130 }
131 
135 {
136  //time complexity O(m^2*n)
137  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
138  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
139  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
141  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
142  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
143  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
144 
145  //(w'*dKuui*w -al'*(v.*al)- sum(W.*W,1)*v - sum(sum((R*W').*BWt)))/2;
146  float64_t result=get_derivative_related_cov_helper(dKuui, v, R);
147 
148  // compute dnlZ=(ddiagKi'*(1./g_sn2)+w'*(dKuui*w-2*(dKui*al))-al'*(v.*al)-
149  // sum(W.*W,1)*v- sum(sum((R*W').*(B*W'))))/2;
150  result+=(eigen_ddiagKi.dot(eigen_t))/2.0-
151  eigen_w.dot((eigen_dKui*eigen_al));
152  return result;
153 }
154 
156 {
157  //time complexity O(n)
158  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
159  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
160  return -eigen_dmu.dot(eigen_al);
161 }
162 
164  const TParameter* param)
165 {
166  //time complexity O(n)
167  REQUIRE(param, "Param not set\n");
168  SGVector<float64_t> result;
169  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
170  result=SGVector<float64_t>(len);
171 
172  for (index_t i=0; i<result.vlen; i++)
173  {
175 
177 
178  // compute dnlZ=-dm'*al
179  result[i]=get_derivative_related_mean(dmu);
180  }
181 
182  return result;
183 }
184 
186  const TParameter* param)
187 {
188  //time complexity O(m^2*n)
189  REQUIRE(param, "Param not set\n");
190  REQUIRE(!strcmp(param->m_name, "log_inducing_noise"), "Can't compute derivative of "
191  "the nagative log marginal likelihood wrt %s.%s parameter\n",
192  get_name(), param->m_name)
193 
195 
197  Map<MatrixXd> eigen_R(R.matrix, R.num_rows, R.num_cols);
198  //dKuui = 2*snu2; R = -dKuui*B;
200  eigen_R=-eigen_B*factor;
201 
203  Map<VectorXd> eigen_v(v.vector, v.vlen);
204  //v = -sum(R.*B,1)';
205  eigen_v=-eigen_R.cwiseProduct(eigen_B).colwise().sum().adjoint();
206 
208 
209  SGVector<float64_t> result(1);
210  //(w'*dKuui*w -al'*(v.*al)- sum(W.*W,1)*v - sum(sum((R*W').*BWt)))/2;
211  result[0]=get_derivative_related_cov_helper(dKuui, v, R);
212 
213  return result;
214 }
215 
217  SGMatrix<float64_t> BdK, const TParameter* param)
218 {
219  //time complexity depends on the implementation of the provided kernel
220  //time complexity is at least O(p*n*m), where p is the dimension (#) of features
221  //For an ARD kernel with KL_FULL, the time complexity is O(p*n*m*d),
222  //where the paramter \f$\Lambda\f$ of the ARD kerenl is a \f$d\f$-by-\f$p\f$ matrix,
223  //For an ARD kernel with KL_SCALAR and KL_DIAG, the time complexity is O(p*n*m)
225  Map<MatrixXd> eigen_BdK(BdK.matrix, BdK.num_rows, BdK.num_cols);
226 
227  int32_t dim=m_inducing_features.num_rows;
228  int32_t num_samples=m_inducing_features.num_cols;
229  SGVector<float64_t>deriv_lat(dim*num_samples);
230  deriv_lat.zero();
231 
232  m_lock->lock();
233  CFeatures *inducing_features=get_inducing_features();
234  //asymtric part (related to xu and x)
235  m_kernel->init(inducing_features, m_features);
236  //A = (Kpu.*BdK)*diag(e);
237  //Kpu=1 in our setting
238  MatrixXd A=CMath::exp(m_log_scale*2.0)*eigen_BdK;
239  for(int32_t lat_idx=0; lat_idx<A.rows(); lat_idx++)
240  {
241  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_idx*dim,dim);
242  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_idx);
243  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
244  deriv_lat_col_vec+=eigen_deriv_mat*(-A.row(lat_idx).transpose());
245  }
246 
247  //symtric part (related to xu and xu)
248  m_kernel->init(inducing_features, inducing_features);
249  //C = (Kpuu.*(BdK*B'))*diag(e);
250  //Kpuu=1 in our setting
251  MatrixXd C=CMath::exp(m_log_scale*2.0)*(eigen_BdK*eigen_B.transpose());
252  for(int32_t lat_lidx=0; lat_lidx<C.rows(); lat_lidx++)
253  {
254  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_lidx*dim,dim);
255  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_lidx);
256  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
257  deriv_lat_col_vec+=eigen_deriv_mat*(C.row(lat_lidx).transpose());
258  }
259  SG_UNREF(inducing_features);
260  m_lock->unlock();
261  return deriv_lat;
262 }
263 
265 {
266  //time complexity depends on the implementation of the provided kernel
267  //time complexity is at least O(max((p*n*m),(m^2*n))), where p is the dimension (#) of features
268  //For an ARD kernel with KL_FULL, the time complexity is O(max((p*n*m*d),(m^2*n)))
269  //where the paramter \f$\Lambda\f$ of the ARD kerenl is a \f$d\f$-by-\f$p\f$ matrix,
270  //For an ARD kernel with KL_SCALE and KL_DIAG, the time complexity is O(max((p*n*m),(m^2*n)))
271  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
273  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
275 
276  //v = diag_dK-1./g_sn2;
278  Map<VectorXd> eigen_v(v.vector, v.vlen);
279 
280  //BdK = B.*repmat(v',nu,1) + BWt*W + (B*al)*al';
282  Map<MatrixXd> eigen_BdK(BdK.matrix, BdK.num_rows, BdK.num_cols);
283  eigen_BdK=eigen_B*eigen_v.asDiagonal()+eigen_w*(eigen_al.transpose())+
284  eigen_B*eigen_W.transpose()*eigen_W;
285 
286  return get_derivative_related_inducing_features(BdK, param);
287 }
288 #endif /* HAVE_EIGEN3 */
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
Definition: SGMatrix.h:20
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:206
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:378
An abstract class of the mean function.
Definition: MeanFunction.h:49
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
SGMatrix< float64_t > m_inducing_features
index_t num_rows
Definition: SGMatrix.h:376
index_t vlen
Definition: SGVector.h:494
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
virtual const char * get_name() const
double float64_t
Definition: common.h:50
virtual SGVector< float64_t > get_derivative_related_cov_diagonal()
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
virtual SGVector< float64_t > get_derivative_related_inducing_features(SGMatrix< float64_t > BdK, const TParameter *param)
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual CFeatures * get_inducing_features()
The class Features is the base class of all feature objects.
Definition: Features.h:68
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:850
The Kernel base class.
Definition: Kernel.h:158
#define SG_ADD(...)
Definition: SGObject.h:81
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
static SGMatrix< T > create_identity_matrix(index_t size, T scale)
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
virtual float64_t get_derivative_related_cov_helper(SGMatrix< float64_t > dKuui, SGVector< float64_t > v, SGMatrix< float64_t > R)

SHOGUN 机器学习工具包 - 项目文档