SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
InferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Written (W) 2013 Heiko Strathmann
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  */
12 
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_EIGEN3
16 
21 #include <shogun/lib/Lock.h>
22 
23 using namespace shogun;
24 
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 struct GRADIENT_THREAD_PARAM
27 {
28  CInferenceMethod* inf;
30  CSGObject* obj;
31  TParameter* param;
32  CLock* lock;
33 };
34 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
35 
37 {
38  init();
39 }
40 
42 {
43  return CMath::exp(m_log_scale);
44 }
45 
47 {
48  REQUIRE(scale>0, "Scale (%f) must be positive", scale);
49  m_log_scale=CMath::log(scale);
50 }
51 
53 {
55  update();
56 
57  return SGMatrix<float64_t>(m_E);
58 }
59 
61  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
62 {
63  init();
64 
65  set_kernel(kernel);
66  set_features(features);
67  set_labels(labels);
68  set_model(model);
69  set_mean(mean);
70 }
71 
73 {
79 }
80 
81 void CInferenceMethod::init()
82 {
83  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
84  SG_ADD(&m_log_scale, "log_scale", "Kernel log scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
85  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
86  MS_AVAILABLE);
87  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
88  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
89  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
90  SG_ADD(&m_gradient_update, "gradient_update", "Whether gradients are updated", MS_NOT_AVAILABLE);
91 
92 
93  m_kernel=NULL;
94  m_model=NULL;
95  m_labels=NULL;
96  m_features=NULL;
97  m_mean=NULL;
98  m_log_scale=0.0;
99  m_gradient_update=false;
100 
101  SG_ADD(&m_alpha, "alpha", "alpha vector used in process mean calculation", MS_NOT_AVAILABLE);
102  SG_ADD(&m_L, "L", "upper triangular factor of Cholesky decomposition", MS_NOT_AVAILABLE);
103  SG_ADD(&m_E, "E", "the matrix used for multi classification", MS_NOT_AVAILABLE);
104 }
105 
107  int32_t num_importance_samples, float64_t ridge_size)
108 {
109  /* sample from Gaussian approximation to q(f|y) */
111 
112  /* add ridge */
113  for (index_t i=0; i<cov.num_rows; ++i)
114  cov(i,i)+=ridge_size;
115 
117 
118  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
119  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
120 
121  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
122  * log pdf of approximation, prior and likelihood */
123 
124  /* log pdf q(f^i|y) */
125  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
126 
127  /* dont need gaussian anymore, free memory */
128  SG_UNREF(post_approx);
129  post_approx=NULL;
130 
131  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
133  memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
135  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
136  scaled_kernel.matrix[i]*=CMath::exp(m_log_scale*2.0);
137 
138  /* add ridge */
139  for (index_t i=0; i<cov.num_rows; ++i)
140  scaled_kernel(i,i)+=ridge_size;
141 
143  m_mean->get_mean_vector(m_features), scaled_kernel);
144  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
145  SG_UNREF(prior);
146  prior=NULL;
147 
148  /* p(y|f^i) */
150  m_labels, samples);
151 
152  /* combine probabilities */
153  ASSERT(log_likelihood.vlen==num_importance_samples);
154  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
155  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
156  SGVector<float64_t> sum(log_likelihood);
157  for (index_t i=0; i<log_likelihood.vlen; ++i)
158  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
159 
160  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
161  return CMath::log_mean_exp(sum);
162 }
163 
166 {
167  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
168  "than zero\n")
169 
171 
172  // get number of derivatives
173  const index_t num_deriv=params->get_num_elements();
174 
175  // create map of derivatives
177  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
178 
179  SG_REF(result);
180 
181  // create lock object
182  CLock lock;
183 
184 #ifdef HAVE_PTHREAD
185  if (num_deriv<2)
186  {
187 #endif /* HAVE_PTHREAD */
188  for (index_t i=0; i<num_deriv; i++)
189  {
190  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
191 
192  GRADIENT_THREAD_PARAM thread_params;
193 
194  thread_params.inf=this;
195  thread_params.obj=node->data;
196  thread_params.param=node->key;
197  thread_params.grad=result;
198  thread_params.lock=&lock;
199 
200  get_derivative_helper((void*) &thread_params);
201  }
202 #ifdef HAVE_PTHREAD
203  }
204  else
205  {
206  pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
207  GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
208  num_deriv);
209 
210  for (index_t t=0; t<num_deriv; t++)
211  {
212  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
213 
214  thread_params[t].inf=this;
215  thread_params[t].obj=node->data;
216  thread_params[t].param=node->key;
217  thread_params[t].grad=result;
218  thread_params[t].lock=&lock;
219 
220  pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper,
221  (void*)&thread_params[t]);
222  }
223 
224  for (index_t t=0; t<num_deriv; t++)
225  pthread_join(threads[t], NULL);
226 
227  SG_FREE(thread_params);
228  SG_FREE(threads);
229  }
230 #endif /* HAVE_PTHREAD */
231 
232  return result;
233 }
234 
236 {
237  GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
238 
239  CInferenceMethod* inf=thread_param->inf;
240  CSGObject* obj=thread_param->obj;
241  CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
242  TParameter* param=thread_param->param;
243  CLock* lock=thread_param->lock;
244 
245  REQUIRE(param, "Parameter should not be NULL\n");
246  REQUIRE(obj, "Object of the parameter should not be NULL\n");
247 
248  SGVector<float64_t> gradient;
249 
250  if (obj==inf)
251  {
252  // try to find dervative wrt InferenceMethod.parameter
253  gradient=inf->get_derivative_wrt_inference_method(param);
254  }
255  else if (obj==inf->m_model)
256  {
257  // try to find derivative wrt LikelihoodModel.parameter
258  gradient=inf->get_derivative_wrt_likelihood_model(param);
259  }
260  else if (obj==inf->m_kernel)
261  {
262  // try to find derivative wrt Kernel.parameter
263  gradient=inf->get_derivative_wrt_kernel(param);
264  }
265  else if (obj==inf->m_mean)
266  {
267  // try to find derivative wrt MeanFunction.parameter
268  gradient=inf->get_derivative_wrt_mean(param);
269  }
270  else
271  {
272  SG_SERROR("Can't compute derivative of negative log marginal "
273  "likelihood wrt %s.%s", obj->get_name(), param->m_name);
274  }
275 
276  lock->lock();
277  grad->add(param, gradient);
278  lock->unlock();
279 
280  return NULL;
281 }
282 
284 {
285  check_members();
287 }
288 
290 {
291  REQUIRE(m_features, "Training features should not be NULL\n")
293  "Number of training features must be greater than zero\n")
294  REQUIRE(m_labels, "Labels should not be NULL\n")
296  "Number of labels must be greater than zero\n")
298  "Number of training vectors (%d) must match number of labels (%d)\n",
300  REQUIRE(m_kernel, "Kernel should not be NULL\n")
301  REQUIRE(m_mean, "Mean function should not be NULL\n")
302 }
303 
305 {
308 }
309 
311 {
313  update();
314 }
315 #endif /* HAVE_EIGEN3 */
virtual void set_labels(CLabels *lab)
virtual const char * get_name() const =0
virtual void set_model(CLikelihoodModel *mod)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
SGVector< float64_t > m_alpha
The Inference Method base class.
virtual void set_features(CFeatures *feat)
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
parameter struct
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
virtual void set_scale(float64_t scale)
An abstract class of the mean function.
Definition: MeanFunction.h:28
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)=0
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:219
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
CMapNode< K, T > * get_node_ptr(int32_t index)
Definition: Map.h:247
int32_t add(const K &key, const T &data)
Definition: Map.h:101
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
virtual SGVector< float64_t > get_log_probability_fmatrix(const CLabels *lab, SGMatrix< float64_t > F) const
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
Class Lock used for synchronization in concurrent programs.
Definition: Lock.h:17
virtual SGMatrix< float64_t > get_multiclass_E()
double float64_t
Definition: common.h:50
SGMatrix< float64_t > m_E
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)=0
virtual void update_train_kernel()
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)=0
virtual SGVector< float64_t > log_pdf_multiple(SGMatrix< float64_t > samples) const
virtual void set_kernel(CKernel *kern)
float64_t get_marginal_likelihood_estimate(int32_t num_importance_samples=1, float64_t ridge_size=1e-15)
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Dense version of the well-known Gaussian probability distribution, defined as .
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)=0
virtual void set_mean(CMeanFunction *m)
virtual SGMatrix< float64_t > get_posterior_covariance()=0
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
void scale(Matrix A, Matrix B, typename Matrix::Scalar alpha)
Definition: Core.h:93
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
virtual void check_members() const
virtual SGVector< float64_t > get_posterior_mean()=0
The Kernel base class.
Definition: Kernel.h:158
static T log_mean_exp(SGVector< T > values)
Definition: Math.h:1285
int32_t get_num_elements() const
Definition: Map.h:211
#define SG_ADD(...)
Definition: SGObject.h:81
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:264
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
virtual CMap< TParameter *, SGVector< float64_t > > * get_negative_log_marginal_likelihood_derivatives(CMap< TParameter *, CSGObject * > *parameters)
CLikelihoodModel * m_model
virtual float64_t get_scale() const
the class CMap, a map based on the hash-table. w: http://en.wikipedia.org/wiki/Hash_table ...
Definition: SGObject.h:36
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const
static void * get_derivative_helper(void *p)

SHOGUN Machine Learning Toolbox - Documentation