SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Inference.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2013 Heiko Strathmann
4  * Written (W) 2013 Roman Votyakov
5  * Written (W) 2012 Jacob Walker
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice, this
12  * list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * The views and conclusions contained in the software and documentation are those
29  * of the authors and should not be interpreted as representing official policies,
30  * either expressed or implied, of the Shogun Development Team.
31  *
32  */
33 #include <shogun/lib/config.h>
34 
35 
40 #include <shogun/lib/Lock.h>
41 
42 using namespace shogun;
43 
44 #ifndef DOXYGEN_SHOULD_SKIP_THIS
45 struct GRADIENT_THREAD_PARAM
46 {
47  CInference* inf;
49  CSGObject* obj;
50  TParameter* param;
51  CLock* lock;
52 };
53 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
54 
56 {
57  init();
58 }
59 
61 {
62  return CMath::exp(m_log_scale);
63 }
64 
66 {
67  REQUIRE(scale>0, "Scale (%f) must be positive", scale);
68  m_log_scale=CMath::log(scale);
69 }
70 
72 {
74  update();
75 
76  return SGMatrix<float64_t>(m_E);
77 }
78 
80  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
81 {
82  init();
83 
84  set_kernel(kernel);
85  set_features(features);
86  set_labels(labels);
87  set_model(model);
88  set_mean(mean);
89 }
90 
92 {
99 }
100 
101 void CInference::init()
102 {
103  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
104  SG_ADD(&m_log_scale, "log_scale", "Kernel log scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
105  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
106  MS_AVAILABLE);
107  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
108  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
109  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
110  SG_ADD(&m_gradient_update, "gradient_update", "Whether gradients are updated", MS_NOT_AVAILABLE);
111 
112 
113  m_kernel=NULL;
114  m_model=NULL;
115  m_labels=NULL;
116  m_features=NULL;
117  m_mean=NULL;
118  m_log_scale=0.0;
119  m_gradient_update=false;
120  m_minimizer=NULL;
121 
122  SG_ADD((CSGObject**)&m_minimizer, "Inference__m_minimizer", "minimizer in Inference", MS_NOT_AVAILABLE);
123  SG_ADD(&m_alpha, "alpha", "alpha vector used in process mean calculation", MS_NOT_AVAILABLE);
124  SG_ADD(&m_L, "L", "upper triangular factor of Cholesky decomposition", MS_NOT_AVAILABLE);
125  SG_ADD(&m_E, "E", "the matrix used for multi classification", MS_NOT_AVAILABLE);
126 }
127 
129 {
130  REQUIRE(minimizer, "Minimizer must set\n");
131  if(minimizer!=m_minimizer)
132  {
133  SG_REF(minimizer);
135  m_minimizer=minimizer;
136  }
137 }
138 
140  int32_t num_importance_samples, float64_t ridge_size)
141 {
142  /* sample from Gaussian approximation to q(f|y) */
144 
145  /* add ridge */
146  for (index_t i=0; i<cov.num_rows; ++i)
147  cov(i,i)+=ridge_size;
148 
150 
151  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
152  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
153 
154  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
155  * log pdf of approximation, prior and likelihood */
156 
157  /* log pdf q(f^i|y) */
158  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
159 
160  /* dont need gaussian anymore, free memory */
161  SG_UNREF(post_approx);
162  post_approx=NULL;
163 
164  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
166  memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
168  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
169  scaled_kernel.matrix[i]*=CMath::exp(m_log_scale*2.0);
170 
171  /* add ridge */
172  for (index_t i=0; i<m_ktrtr.num_rows; ++i)
173  scaled_kernel(i,i)+=ridge_size;
174 
176  m_mean->get_mean_vector(m_features), scaled_kernel);
177  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
178  SG_UNREF(prior);
179  prior=NULL;
180 
181  /* p(y|f^i) */
183  m_labels, samples);
184 
185  /* combine probabilities */
186  ASSERT(log_likelihood.vlen==num_importance_samples);
187  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
188  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
189  SGVector<float64_t> sum(log_likelihood);
190  for (index_t i=0; i<log_likelihood.vlen; ++i)
191  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
192 
193  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
194  return CMath::log_mean_exp(sum);
195 }
196 
199 {
200  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
201  "than zero\n")
202 
204 
205  // get number of derivatives
206  const index_t num_deriv=params->get_num_elements();
207 
208  // create map of derivatives
210  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
211 
212  SG_REF(result);
213 
214  // create lock object
215  CLock lock;
216 
217 #ifdef HAVE_PTHREAD
218  if (num_deriv<2)
219  {
220 #endif /* HAVE_PTHREAD */
221  for (index_t i=0; i<num_deriv; i++)
222  {
223  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
224 
225  GRADIENT_THREAD_PARAM thread_params;
226 
227  thread_params.inf=this;
228  thread_params.obj=node->data;
229  thread_params.param=node->key;
230  thread_params.grad=result;
231  thread_params.lock=&lock;
232 
233  get_derivative_helper((void*) &thread_params);
234  }
235 #ifdef HAVE_PTHREAD
236  }
237  else
238  {
239  pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
240  GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
241  num_deriv);
242 
243  for (index_t t=0; t<num_deriv; t++)
244  {
245  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
246 
247  thread_params[t].inf=this;
248  thread_params[t].obj=node->data;
249  thread_params[t].param=node->key;
250  thread_params[t].grad=result;
251  thread_params[t].lock=&lock;
252 
253  pthread_create(&threads[t], NULL, CInference::get_derivative_helper,
254  (void*)&thread_params[t]);
255  }
256 
257  for (index_t t=0; t<num_deriv; t++)
258  pthread_join(threads[t], NULL);
259 
260  SG_FREE(thread_params);
261  SG_FREE(threads);
262  }
263 #endif /* HAVE_PTHREAD */
264 
265  return result;
266 }
267 
269 {
270  GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
271 
272  CInference* inf=thread_param->inf;
273  CSGObject* obj=thread_param->obj;
274  CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
275  TParameter* param=thread_param->param;
276  CLock* lock=thread_param->lock;
277 
278  REQUIRE(param, "Parameter should not be NULL\n");
279  REQUIRE(obj, "Object of the parameter should not be NULL\n");
280 
281  SGVector<float64_t> gradient;
282 
283  if (obj==inf)
284  {
285  // try to find dervative wrt InferenceMethod.parameter
286  gradient=inf->get_derivative_wrt_inference_method(param);
287  }
288  else if (obj==inf->m_model)
289  {
290  // try to find derivative wrt LikelihoodModel.parameter
291  gradient=inf->get_derivative_wrt_likelihood_model(param);
292  }
293  else if (obj==inf->m_kernel)
294  {
295  // try to find derivative wrt Kernel.parameter
296  gradient=inf->get_derivative_wrt_kernel(param);
297  }
298  else if (obj==inf->m_mean)
299  {
300  // try to find derivative wrt MeanFunction.parameter
301  gradient=inf->get_derivative_wrt_mean(param);
302  }
303  else
304  {
305  SG_SERROR("Can't compute derivative of negative log marginal "
306  "likelihood wrt %s.%s", obj->get_name(), param->m_name);
307  }
308 
309  lock->lock();
310  grad->add(param, gradient);
311  lock->unlock();
312 
313  return NULL;
314 }
315 
317 {
318  check_members();
320 }
321 
323 {
324  REQUIRE(m_features, "Training features should not be NULL\n")
326  "Number of training features must be greater than zero\n")
327  REQUIRE(m_labels, "Labels should not be NULL\n")
329  "Number of labels must be greater than zero\n")
331  "Number of training vectors (%d) must match number of labels (%d)\n",
333  REQUIRE(m_kernel, "Kernel should not be NULL\n")
334  REQUIRE(m_mean, "Mean function should not be NULL\n")
335 }
336 
338 {
341 }
342 
344 {
346  update();
347 }
virtual const char * get_name() const =0
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
float64_t m_log_scale
Definition: Inference.h:490
virtual void update_train_kernel()
Definition: Inference.cpp:337
virtual void update()
Definition: Inference.cpp:316
virtual ~CInference()
Definition: Inference.cpp:91
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:469
static void * get_derivative_helper(void *p)
Definition: Inference.cpp:268
parameter struct
virtual int32_t get_num_vectors() const =0
virtual void set_scale(float64_t scale)
Definition: Inference.cpp:65
#define REQUIRE(x,...)
Definition: SGIO.h:206
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
SGMatrix< float64_t > m_E
Definition: Inference.h:496
An abstract class of the mean function.
Definition: MeanFunction.h:49
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:220
virtual void set_labels(CLabels *lab)
Definition: Inference.h:323
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
CFeatures * m_features
Definition: Inference.h:478
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:493
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)=0
virtual SGMatrix< float64_t > get_posterior_covariance()=0
CMeanFunction * m_mean
Definition: Inference.h:472
CMapNode< K, T > * get_node_ptr(int32_t index)
Definition: Map.h:247
int32_t add(const K &key, const T &data)
Definition: Map.h:101
index_t vlen
Definition: SGVector.h:494
virtual SGVector< float64_t > get_log_probability_fmatrix(const CLabels *lab, SGMatrix< float64_t > F) const
#define ASSERT(x)
Definition: SGIO.h:201
virtual SGMatrix< float64_t > get_multiclass_E()
Definition: Inference.cpp:71
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
Class Lock used for synchronization in concurrent programs.
Definition: Lock.h:17
CLabels * m_labels
Definition: Inference.h:481
virtual float64_t get_scale() const
Definition: Inference.cpp:60
double float64_t
Definition: common.h:50
virtual void compute_gradient()
Definition: Inference.cpp:343
virtual void set_model(CLikelihoodModel *mod)
Definition: Inference.h:340
virtual void set_kernel(CKernel *kern)
Definition: Inference.h:289
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)=0
SGMatrix< float64_t > m_L
Definition: Inference.h:487
virtual SGVector< float64_t > get_posterior_mean()=0
virtual SGVector< float64_t > log_pdf_multiple(SGMatrix< float64_t > samples) const
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:128
virtual void set_features(CFeatures *feat)
Definition: Inference.h:272
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)=0
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Dense version of the well-known Gaussian probability distribution, defined as .
The Inference Method base class.
Definition: Inference.h:81
float64_t get_marginal_likelihood_estimate(int32_t num_importance_samples=1, float64_t ridge_size=1e-15)
Definition: Inference.cpp:139
Minimizer * m_minimizer
Definition: Inference.h:466
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
void scale(Matrix A, Matrix B, typename Matrix::Scalar alpha)
Definition: Core.h:94
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
The Kernel base class.
Definition: Kernel.h:159
static T log_mean_exp(SGVector< T > values)
Definition: Math.h:1285
virtual CMap< TParameter *, SGVector< float64_t > > * get_negative_log_marginal_likelihood_derivatives(CMap< TParameter *, CSGObject * > *parameters)
Definition: Inference.cpp:198
int32_t get_num_elements() const
Definition: Map.h:211
The minimizer base class.
Definition: Minimizer.h:43
#define SG_ADD(...)
Definition: SGObject.h:84
CLikelihoodModel * m_model
Definition: Inference.h:475
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295
virtual void set_mean(CMeanFunction *m)
Definition: Inference.h:306
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
SGVector< float64_t > m_alpha
Definition: Inference.h:484
the class CMap, a map based on the hash-table. w: http://en.wikipedia.org/wiki/Hash_table ...
Definition: SGObject.h:39
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const
virtual void check_members() const
Definition: Inference.cpp:322
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)=0

SHOGUN Machine Learning Toolbox - Documentation