SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Written (W) 2013 Heiko Strathmann
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  */
12 
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_EIGEN3
16 
20 
21 #ifdef HAVE_PTHREAD
22 #include <pthread.h>
23 #endif
24 
25 using namespace shogun;
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 struct GRADIENT_THREAD_PARAM
29 {
30  CInferenceMethod* inf;
32  CSGObject* obj;
33  TParameter* param;
34 };
35 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
36 
38 {
39  init();
40 }
41 
43  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
44 {
45  init();
46 
47  set_kernel(kernel);
48  set_features(features);
49  set_labels(labels);
50  set_model(model);
51  set_mean(mean);
52 }
53 
55 {
61 }
62 
63 void CInferenceMethod::init()
64 {
65  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
66  SG_ADD(&m_scale, "scale", "Kernel scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
67  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
68  MS_AVAILABLE);
69  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
70  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
71  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
72 
73  m_kernel=NULL;
74  m_model=NULL;
75  m_labels=NULL;
76  m_features=NULL;
77  m_mean=NULL;
78  m_scale=1.0;
79 }
80 
82  int32_t num_importance_samples, float64_t ridge_size)
83 {
84  /* sample from Gaussian approximation to q(f|y) */
86 
87  /* add ridge */
88  for (index_t i=0; i<cov.num_rows; ++i)
89  cov(i,i)+=ridge_size;
90 
92 
93  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
94  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
95 
96  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
97  * log pdf of approximation, prior and likelihood */
98 
99  /* log pdf q(f^i|y) */
100  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
101 
102  /* dont need gaussian anymore, free memory */
103  SG_UNREF(post_approx);
104  post_approx=NULL;
105 
106  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
108  memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
110  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
111  scaled_kernel.matrix[i]*=CMath::sq(m_scale);
112 
113  /* add ridge */
114  for (index_t i=0; i<cov.num_rows; ++i)
115  scaled_kernel(i,i)+=ridge_size;
116 
118  m_mean->get_mean_vector(m_features), scaled_kernel);
119  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
120  SG_UNREF(prior);
121  prior=NULL;
122 
123  /* p(y|f^i) */
125  m_labels, samples);
126 
127  /* combine probabilities */
128  ASSERT(log_likelihood.vlen==num_importance_samples);
129  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
130  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
131  SGVector<float64_t> sum(log_likelihood);
132  for (index_t i=0; i<log_likelihood.vlen; ++i)
133  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
134 
135  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
136  return CMath::log_mean_exp(sum);
137 }
138 
141 {
142  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
143  "than zero\n")
144 
145  if (update_parameter_hash())
146  update();
147 
148  // get number of derivatives
149  const index_t num_deriv=params->get_num_elements();
150 
151  // create map of derivatives
153  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
154 
155  SG_REF(result);
156 
157 #ifdef HAVE_PTHREAD
158  if (num_deriv<2)
159  {
160 #endif /* HAVE_PTHREAD */
161  for (index_t i=0; i<num_deriv; i++)
162  {
163  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
164 
165  GRADIENT_THREAD_PARAM thread_params;
166 
167  thread_params.inf=this;
168  thread_params.obj=node->data;
169  thread_params.param=node->key;
170  thread_params.grad=result;
171 
172  get_derivative_helper((void*) &thread_params);
173  }
174 #ifdef HAVE_PTHREAD
175  }
176  else
177  {
178  pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
179  GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
180  num_deriv);
181 
182  for (index_t t=0; t<num_deriv; t++)
183  {
184  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
185 
186  thread_params[t].inf=this;
187  thread_params[t].obj=node->data;
188  thread_params[t].param=node->key;
189  thread_params[t].grad=result;
190 
191  pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper,
192  (void*)&thread_params[t]);
193  }
194 
195  for (index_t t=0; t<num_deriv; t++)
196  pthread_join(threads[t], NULL);
197 
198  SG_FREE(thread_params);
199  SG_FREE(threads);
200  }
201 #endif /* HAVE_PTHREAD */
202 
203  return result;
204 }
205 
207 {
208  GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
209 
210  CInferenceMethod* inf=thread_param->inf;
211  CSGObject* obj=thread_param->obj;
212  CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
213  TParameter* param=thread_param->param;
214 
215  REQUIRE(param, "Parameter should not be NULL\n");
216  REQUIRE(obj, "Object of the parameter should not be NULL\n");
217 
218  if (obj==inf)
219  {
220  // try to find dervative wrt InferenceMethod.parameter
221  grad->add(param, inf->get_derivative_wrt_inference_method(param));
222  }
223  else if (obj==inf->m_model)
224  {
225  // try to find derivative wrt LikelihoodModel.parameter
226  grad->add(param, inf->get_derivative_wrt_likelihood_model(param));
227  }
228  else if (obj==inf->m_kernel)
229  {
230  // try to find derivative wrt Kernel.parameter
231  grad->add(param, inf->get_derivative_wrt_kernel(param));
232  }
233  else if (obj==inf->m_mean)
234  {
235  // try to find derivative wrt MeanFunction.parameter
236  grad->add(param, inf->get_derivative_wrt_mean(param));
237  }
238  else
239  {
240  SG_SERROR("Can't compute derivative of negative log marginal "
241  "likelihood wrt %s.%s", obj->get_name(), param->m_name);
242  }
243 
244  return NULL;
245 }
246 
248 {
249  check_members();
251 }
252 
254 {
255  REQUIRE(m_features, "Training features should not be NULL\n")
257  "Number of training features must be greater than zero\n")
258  REQUIRE(m_labels, "Labels should not be NULL\n")
260  "Number of labels must be greater than zero\n")
262  "Number of training vectors must match number of labels, which is "
263  "%d, but number of training vectors is %d\n",
265  REQUIRE(m_kernel, "Kernel should not be NULL\n")
266  REQUIRE(m_mean, "Mean function should not be NULL\n")
267 }
268 
270 {
271  m_kernel->cleanup();
274 }
275 
276 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation