SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
InferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2013 Heiko Strathmann
4  * Written (W) 2013 Roman Votyakov
5  * Written (W) 2012 Jacob Walker
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice, this
12  * list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * The views and conclusions contained in the software and documentation are those
29  * of the authors and should not be interpreted as representing official policies,
30  * either expressed or implied, of the Shogun Development Team.
31  *
32  */
33 #include <shogun/lib/config.h>
34 
35 #ifdef HAVE_EIGEN3
36 
41 #include <shogun/lib/Lock.h>
42 
43 using namespace shogun;
44 
45 #ifndef DOXYGEN_SHOULD_SKIP_THIS
46 struct GRADIENT_THREAD_PARAM
47 {
48  CInferenceMethod* inf;
50  CSGObject* obj;
51  TParameter* param;
52  CLock* lock;
53 };
54 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
55 
57 {
58  init();
59 }
60 
62 {
63  return CMath::exp(m_log_scale);
64 }
65 
67 {
68  REQUIRE(scale>0, "Scale (%f) must be positive", scale);
69  m_log_scale=CMath::log(scale);
70 }
71 
73 {
75  update();
76 
77  return SGMatrix<float64_t>(m_E);
78 }
79 
81  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
82 {
83  init();
84 
85  set_kernel(kernel);
86  set_features(features);
87  set_labels(labels);
88  set_model(model);
89  set_mean(mean);
90 }
91 
93 {
99 }
100 
101 void CInferenceMethod::init()
102 {
103  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
104  SG_ADD(&m_log_scale, "log_scale", "Kernel log scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
105  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
106  MS_AVAILABLE);
107  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
108  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
109  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
110  SG_ADD(&m_gradient_update, "gradient_update", "Whether gradients are updated", MS_NOT_AVAILABLE);
111 
112 
113  m_kernel=NULL;
114  m_model=NULL;
115  m_labels=NULL;
116  m_features=NULL;
117  m_mean=NULL;
118  m_log_scale=0.0;
119  m_gradient_update=false;
120 
121  SG_ADD(&m_alpha, "alpha", "alpha vector used in process mean calculation", MS_NOT_AVAILABLE);
122  SG_ADD(&m_L, "L", "upper triangular factor of Cholesky decomposition", MS_NOT_AVAILABLE);
123  SG_ADD(&m_E, "E", "the matrix used for multi classification", MS_NOT_AVAILABLE);
124 }
125 
127  int32_t num_importance_samples, float64_t ridge_size)
128 {
129  /* sample from Gaussian approximation to q(f|y) */
131 
132  /* add ridge */
133  for (index_t i=0; i<cov.num_rows; ++i)
134  cov(i,i)+=ridge_size;
135 
137 
138  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
139  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
140 
141  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
142  * log pdf of approximation, prior and likelihood */
143 
144  /* log pdf q(f^i|y) */
145  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
146 
147  /* dont need gaussian anymore, free memory */
148  SG_UNREF(post_approx);
149  post_approx=NULL;
150 
151  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
153  memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
155  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
156  scaled_kernel.matrix[i]*=CMath::exp(m_log_scale*2.0);
157 
158  /* add ridge */
159  for (index_t i=0; i<cov.num_rows; ++i)
160  scaled_kernel(i,i)+=ridge_size;
161 
163  m_mean->get_mean_vector(m_features), scaled_kernel);
164  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
165  SG_UNREF(prior);
166  prior=NULL;
167 
168  /* p(y|f^i) */
170  m_labels, samples);
171 
172  /* combine probabilities */
173  ASSERT(log_likelihood.vlen==num_importance_samples);
174  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
175  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
176  SGVector<float64_t> sum(log_likelihood);
177  for (index_t i=0; i<log_likelihood.vlen; ++i)
178  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
179 
180  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
181  return CMath::log_mean_exp(sum);
182 }
183 
186 {
187  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
188  "than zero\n")
189 
191 
192  // get number of derivatives
193  const index_t num_deriv=params->get_num_elements();
194 
195  // create map of derivatives
197  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
198 
199  SG_REF(result);
200 
201  // create lock object
202  CLock lock;
203 
204 #ifdef HAVE_PTHREAD
205  if (num_deriv<2)
206  {
207 #endif /* HAVE_PTHREAD */
208  for (index_t i=0; i<num_deriv; i++)
209  {
210  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
211 
212  GRADIENT_THREAD_PARAM thread_params;
213 
214  thread_params.inf=this;
215  thread_params.obj=node->data;
216  thread_params.param=node->key;
217  thread_params.grad=result;
218  thread_params.lock=&lock;
219 
220  get_derivative_helper((void*) &thread_params);
221  }
222 #ifdef HAVE_PTHREAD
223  }
224  else
225  {
226  pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
227  GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
228  num_deriv);
229 
230  for (index_t t=0; t<num_deriv; t++)
231  {
232  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
233 
234  thread_params[t].inf=this;
235  thread_params[t].obj=node->data;
236  thread_params[t].param=node->key;
237  thread_params[t].grad=result;
238  thread_params[t].lock=&lock;
239 
240  pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper,
241  (void*)&thread_params[t]);
242  }
243 
244  for (index_t t=0; t<num_deriv; t++)
245  pthread_join(threads[t], NULL);
246 
247  SG_FREE(thread_params);
248  SG_FREE(threads);
249  }
250 #endif /* HAVE_PTHREAD */
251 
252  return result;
253 }
254 
256 {
257  GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
258 
259  CInferenceMethod* inf=thread_param->inf;
260  CSGObject* obj=thread_param->obj;
261  CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
262  TParameter* param=thread_param->param;
263  CLock* lock=thread_param->lock;
264 
265  REQUIRE(param, "Parameter should not be NULL\n");
266  REQUIRE(obj, "Object of the parameter should not be NULL\n");
267 
268  SGVector<float64_t> gradient;
269 
270  if (obj==inf)
271  {
272  // try to find dervative wrt InferenceMethod.parameter
273  gradient=inf->get_derivative_wrt_inference_method(param);
274  }
275  else if (obj==inf->m_model)
276  {
277  // try to find derivative wrt LikelihoodModel.parameter
278  gradient=inf->get_derivative_wrt_likelihood_model(param);
279  }
280  else if (obj==inf->m_kernel)
281  {
282  // try to find derivative wrt Kernel.parameter
283  gradient=inf->get_derivative_wrt_kernel(param);
284  }
285  else if (obj==inf->m_mean)
286  {
287  // try to find derivative wrt MeanFunction.parameter
288  gradient=inf->get_derivative_wrt_mean(param);
289  }
290  else
291  {
292  SG_SERROR("Can't compute derivative of negative log marginal "
293  "likelihood wrt %s.%s", obj->get_name(), param->m_name);
294  }
295 
296  lock->lock();
297  grad->add(param, gradient);
298  lock->unlock();
299 
300  return NULL;
301 }
302 
304 {
305  check_members();
307 }
308 
310 {
311  REQUIRE(m_features, "Training features should not be NULL\n")
313  "Number of training features must be greater than zero\n")
314  REQUIRE(m_labels, "Labels should not be NULL\n")
316  "Number of labels must be greater than zero\n")
318  "Number of training vectors (%d) must match number of labels (%d)\n",
320  REQUIRE(m_kernel, "Kernel should not be NULL\n")
321  REQUIRE(m_mean, "Mean function should not be NULL\n")
322 }
323 
325 {
328 }
329 
331 {
333  update();
334 }
335 #endif /* HAVE_EIGEN3 */
virtual void set_labels(CLabels *lab)
virtual const char * get_name() const =0
virtual void set_model(CLikelihoodModel *mod)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
SGVector< float64_t > m_alpha
The Inference Method base class.
virtual void set_features(CFeatures *feat)
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
parameter struct
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
virtual void set_scale(float64_t scale)
An abstract class of the mean function.
Definition: MeanFunction.h:49
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)=0
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:219
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
CMapNode< K, T > * get_node_ptr(int32_t index)
Definition: Map.h:247
int32_t add(const K &key, const T &data)
Definition: Map.h:101
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
virtual SGVector< float64_t > get_log_probability_fmatrix(const CLabels *lab, SGMatrix< float64_t > F) const
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
Class Lock used for synchronization in concurrent programs.
Definition: Lock.h:17
virtual SGMatrix< float64_t > get_multiclass_E()
double float64_t
Definition: common.h:50
SGMatrix< float64_t > m_E
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)=0
virtual void update_train_kernel()
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)=0
virtual SGVector< float64_t > log_pdf_multiple(SGMatrix< float64_t > samples) const
virtual void set_kernel(CKernel *kern)
float64_t get_marginal_likelihood_estimate(int32_t num_importance_samples=1, float64_t ridge_size=1e-15)
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Dense version of the well-known Gaussian probability distribution, defined as .
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)=0
virtual void set_mean(CMeanFunction *m)
virtual SGMatrix< float64_t > get_posterior_covariance()=0
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
void scale(Matrix A, Matrix B, typename Matrix::Scalar alpha)
Definition: Core.h:93
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
virtual void check_members() const
virtual SGVector< float64_t > get_posterior_mean()=0
The Kernel base class.
Definition: Kernel.h:158
static T log_mean_exp(SGVector< T > values)
Definition: Math.h:1285
int32_t get_num_elements() const
Definition: Map.h:211
#define SG_ADD(...)
Definition: SGObject.h:81
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:264
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
virtual CMap< TParameter *, SGVector< float64_t > > * get_negative_log_marginal_likelihood_derivatives(CMap< TParameter *, CSGObject * > *parameters)
CLikelihoodModel * m_model
virtual float64_t get_scale() const
the class CMap, a map based on the hash-table. w: http://en.wikipedia.org/wiki/Hash_table ...
Definition: SGObject.h:36
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const
static void * get_derivative_helper(void *p)

SHOGUN Machine Learning Toolbox - Documentation