SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
SingleSparseInferenceBase.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
33 
34 #ifdef HAVE_NLOPT
35 #include <nlopt.h>
37 #endif
38 
39 #ifdef HAVE_EIGEN3
40 
44 
45 using namespace shogun;
46 using namespace Eigen;
47 
49 {
50  init();
51 }
52 
54  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
55  : CSparseInferenceBase(kern, feat, m, lab, mod, lat)
56 {
57  init();
59 }
60 
61 void CSingleSparseInferenceBase::init()
62 {
63  m_fully_sparse=false;
64  SG_ADD(&m_fully_sparse, "fully_Sparse",
65  "whether the kernel support sparse inference", MS_NOT_AVAILABLE);
66  m_lock=new CLock();
67 
68  SG_ADD(&m_upper_bound, "upper_bound",
69  "upper bound of inducing features", MS_NOT_AVAILABLE);
70  SG_ADD(&m_lower_bound, "lower_bound",
71  "lower bound of inducing features", MS_NOT_AVAILABLE);
72  SG_ADD(&m_max_ind_iterations, "max_ind_iterations",
73  "max number of iterations used in inducing features optimization", MS_NOT_AVAILABLE);
74  SG_ADD(&m_ind_tolerance, "ind_tolerance",
75  "tolearance used in inducing features optimization", MS_NOT_AVAILABLE);
77  "opt_inducing_features", "whether optimize inducing features", MS_NOT_AVAILABLE);
79  m_ind_tolerance=1e-3;
83 }
84 
86 {
89 }
90 
92 {
93  delete m_lock;
94 }
95 
97 {
98  REQUIRE(m_kernel, "Kernel must be set first\n")
99  if (strstr(m_kernel->get_name(), "SparseKernel")!=NULL)
100  m_fully_sparse=true;
101  else
102  {
103  SG_WARNING( "The provided kernel does not support to optimize inducing features\n");
104  m_fully_sparse=false;
105  }
106 }
107 
109  const TParameter* param)
110 {
111  // the time complexity O(m^2*n) if the TO DO is done
112  REQUIRE(param, "Param not set\n");
113  REQUIRE(!(strcmp(param->m_name, "log_scale")
114  && strcmp(param->m_name, "log_inducing_noise")
115  && strcmp(param->m_name, "inducing_features")),
116  "Can't compute derivative of"
117  " the nagative log marginal likelihood wrt %s.%s parameter\n",
118  get_name(), param->m_name)
119 
120  if (!strcmp(param->m_name, "log_inducing_noise"))
121  // wrt inducing_noise
122  // compute derivative wrt inducing noise
123  return get_derivative_wrt_inducing_noise(param);
124  else if (!strcmp(param->m_name, "inducing_features"))
125  {
127  if (!m_fully_sparse)
128  {
129  int32_t dim=m_inducing_features.num_rows;
130  int32_t num_samples=m_inducing_features.num_cols;
131  res=SGVector<float64_t>(dim*num_samples);
132  SG_WARNING("Derivative wrt %s cannot be computed since the kernel does not support fully sparse inference\n",
133  param->m_name);
134  res.zero();
135  return res;
136  }
138  return res;
139  }
140 
141  // wrt scale
142  // clone kernel matrices
144  SGMatrix<float64_t> deriv_uu=m_kuu.clone();
145  SGMatrix<float64_t> deriv_tru=m_ktru.clone();
146 
147  // create eigen representation of kernel matrices
148  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
149  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows, deriv_uu.num_cols);
150  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows, deriv_tru.num_cols);
151 
152  // compute derivatives wrt scale for each kernel matrix
153  SGVector<float64_t> result(1);
154 
155  result[0]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
156  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
157  return result;
158 }
159 
161  const TParameter* param)
162 {
163  REQUIRE(param, "Param not set\n");
164  SGVector<float64_t> result;
165  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
166  result=SGVector<float64_t>(len);
167 
168  CFeatures *inducing_features=get_inducing_features();
169  for (index_t i=0; i<result.vlen; i++)
170  {
171  SGVector<float64_t> deriv_trtr;
172  SGMatrix<float64_t> deriv_uu;
173  SGMatrix<float64_t> deriv_tru;
174 
175  m_lock->lock();
177  //to reduce the time complexity
178  //the kernel object only computes diagonal elements of gradients wrt hyper-parameter
179  deriv_trtr=m_kernel->get_parameter_gradient_diagonal(param, i);
180 
181  m_kernel->init(inducing_features, inducing_features);
182  deriv_uu=m_kernel->get_parameter_gradient(param, i);
183 
184  m_kernel->init(inducing_features, m_features);
185  deriv_tru=m_kernel->get_parameter_gradient(param, i);
186  m_lock->unlock();
187 
188  // create eigen representation of derivatives
189  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
190  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows,
191  deriv_uu.num_cols);
192  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows,
193  deriv_tru.num_cols);
194 
195  result[i]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
196  result[i]*=CMath::exp(m_log_scale*2.0);
197  }
198  SG_UNREF(inducing_features);
199  return result;
200 }
201 
203 {
204  if (bound.vlen>1)
205  {
206  REQUIRE(m_inducing_features.num_rows>0, "Inducing features must set before this method is called\n");
208  "The length of inducing features (%dx%d)",
209  " and the length of bound constraints (%d) are different\n",
211  }
212  else if(bound.vlen==1)
213  {
214  SG_WARNING("All inducing_features (%dx%d) are constrainted by the single value (%f) in the %s bound\n",
216  }
217 }
218 
220 {
221  check_bound(bound,"lower");
222  m_lower_bound=bound;
223 }
225 {
226  check_bound(bound, "upper");
227  m_upper_bound=bound;
228 }
229 
231 {
232  REQUIRE(it>0, "Iteration (%d) must be positive\n",it);
234 }
236 {
237 
238  REQUIRE(tol>0, "Tolearance (%f) must be positive\n",tol);
239  m_ind_tolerance=tol;
240 }
241 double CSingleSparseInferenceBase::nlopt_function(unsigned n, const double* x, double* grad, void* func_data)
242 {
243  CSingleSparseInferenceBase* object=static_cast<CSingleSparseInferenceBase *>(func_data);
244  REQUIRE(object,"func_data must be SingleSparseInferenceBase pointer\n");
245 
246  double nlz=object->get_negative_log_marginal_likelihood();
247  object->compute_gradient();
248 
249  TParameter* param=object->m_gradient_parameters->get_parameter("inducing_features");
250  SGVector<float64_t> derivatives=object->get_derivative_wrt_inducing_features(param);
251 
252  std::copy(derivatives.vector,derivatives.vector+n,grad);
253 
254  return nlz;
255 }
256 
258 {
259  m_opt_inducing_features=is_optmization;
260 }
261 
263 {
264 #ifdef HAVE_NLOPT
266  return;
267 
269  REQUIRE(m_fully_sparse,"Please use a kernel which supports to optimize inducing features\n");
270 
271  //features by samples
273  SGVector<double> x(lat_m.matrix,lat_m.num_rows*lat_m.num_cols,false);
274 
275  // create nlopt object and choose LBFGS
276  // optimization algorithm
277  nlopt_opt opt=nlopt_create(NLOPT_LD_LBFGS, lat_m.num_rows*lat_m.num_cols);
278 
279  if (m_lower_bound.vlen>0)
280  {
281  if(m_lower_bound.vlen==1)
282  nlopt_set_lower_bounds1(opt, m_lower_bound[0]);
283  else
284  {
285  SGVector<double> lower_bound(lat_m.num_rows*lat_m.num_cols);
286  for(index_t j=0; j<lat_m.num_cols; j++)
288  lower_bound.vector+j*lat_m.num_rows);
289  // set lower bound
290  nlopt_set_lower_bounds(opt, lower_bound.vector);
291  }
292  }
293  if (m_upper_bound.vlen>0)
294  {
295  if(m_upper_bound.vlen==1)
296  nlopt_set_upper_bounds1(opt, m_upper_bound[0]);
297  else
298  {
299  SGVector<double> upper_bound(lat_m.num_rows*lat_m.num_cols);
300  for(index_t j=0; j<lat_m.num_cols; j++)
302  upper_bound.vector+j*lat_m.num_rows);
303  // set upper bound
304  nlopt_set_upper_bounds(opt, upper_bound.vector);
305  }
306  }
307 
308  // set maximum number of evaluations
309  nlopt_set_maxeval(opt, m_max_ind_iterations);
310  // set absolute argument tolearance
311  nlopt_set_xtol_abs1(opt, m_ind_tolerance);
312  nlopt_set_ftol_abs(opt, m_ind_tolerance);
313 
314  nlopt_set_min_objective(opt, CSingleSparseInferenceBase::nlopt_function, this);
315 
316  // the minimum objective value, upon return
317  double minf;
318 
319  // optimize our function
320  nlopt_result result=nlopt_optimize(opt, x.vector, &minf);
321  REQUIRE(result>0, "NLopt failed while optimizing objective function!\n");
322 
323  // clean up
324  nlopt_destroy(opt);
325 #else
326  SG_PRINT("For this functionality we require NLOPT library\n");
327 #endif
328 }
329 
330 #endif /* HAVE_EIGEN3 */
virtual const char * get_name() const =0
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
SGVector< float64_t > m_ktrtr_diag
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)=0
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
Definition: SGMatrix.h:20
SGMatrix< T > clone()
Definition: SGMatrix.cpp:260
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:206
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
An abstract class of the mean function.
Definition: MeanFunction.h:49
SGMatrix< float64_t > m_inducing_features
index_t num_rows
Definition: SGMatrix.h:376
virtual void set_tolearance_for_inducing_features(float64_t tol)
index_t vlen
Definition: SGVector.h:494
#define SG_PRINT(...)
Definition: SGIO.h:137
Class Lock used for synchronization in concurrent programs.
Definition: Lock.h:17
virtual void enable_optimizing_inducing_features(bool is_optmization)
virtual void set_upper_bound_of_inducing_features(SGVector< float64_t > bound)
double float64_t
Definition: common.h:50
virtual void set_kernel(CKernel *kern)
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
virtual void set_max_iterations_for_inducing_features(int32_t it)
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual CFeatures * get_inducing_features()
The class Features is the base class of all feature objects.
Definition: Features.h:68
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual const char * get_name() const
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:850
SGVector< T > clone() const
Definition: SGVector.cpp:209
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)=0
virtual void check_bound(SGVector< float64_t > bound, const char *name)
The Kernel base class.
Definition: Kernel.h:158
virtual void set_lower_bound_of_inducing_features(SGVector< float64_t > bound)
#define SG_WARNING(...)
Definition: SGIO.h:128
#define SG_ADD(...)
Definition: SGObject.h:81
The Fully Independent Conditional Training inference base class.
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)=0
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
Definition: Kernel.h:864
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.

SHOGUN 机器学习工具包 - 项目文档