en/latest/Inference_8cpp_source.html

 /*

  * Copyright (c) The Shogun Machine Learning Toolbox

  * Written (W) 2013 Heiko Strathmann

  * Written (W) 2013 Roman Votyakov

  * Written (W) 2012 Jacob Walker

  * All rights reserved.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions are met:

  *

  * 1. Redistributions of source code must retain the above copyright notice, this

  *    list of conditions and the following disclaimer.

  * 2. Redistributions in binary form must reproduce the above copyright notice,

  *    this list of conditions and the following disclaimer in the documentation

  *    and/or other materials provided with the distribution.

  *

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR

  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

  *

  * The views and conclusions contained in the software and documentation are those

  * of the authors and should not be interpreted as representing official policies,

  * either expressed or implied, of the Shogun Development Team.

  *

  */

 #include <shogun/lib/config.h>


 #include <shogun/machine/gp/Inference.h>

 #include <shogun/distributions/classical/GaussianDistribution.h>

 #include <shogun/mathematics/Statistics.h>

 #include <shogun/mathematics/Math.h>

 #include <shogun/lib/Lock.h>


 using namespace shogun;


 #ifndef DOXYGEN_SHOULD_SKIP_THIS

 struct GRADIENT_THREAD_PARAM

 {

     CInference* inf;

     CMap<TParameter*, SGVector<float64_t> >* grad;

     CSGObject* obj;

     TParameter* param;

     CLock* lock;

 };

 #endif /* DOXYGEN_SHOULD_SKIP_THIS */


 CInference::CInference()

 {

     init();

 }


 float64_t CInference::get_scale() const

 {

     return CMath::exp(m_log_scale);

 }


 void CInference::set_scale(float64_t scale)

 {

     REQUIRE(scale>0, "Scale (%f) must be positive", scale);

     m_log_scale=CMath::log(scale);

 }


 SGMatrix<float64_t> CInference::get_multiclass_E()

 {

     if (parameter_hash_changed())

         update();


     return SGMatrix<float64_t>(m_E);

 }


 CInference::CInference(CKernel* kernel, CFeatures* features,

     CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)

 {

     init();


     set_kernel(kernel);

     set_features(features);

     set_labels(labels);

     set_model(model);

     set_mean(mean);

 }


 CInference::~CInference()

 {

     SG_UNREF(m_kernel);

     SG_UNREF(m_features);

     SG_UNREF(m_labels);

     SG_UNREF(m_model);

     SG_UNREF(m_mean);

     SG_UNREF(m_minimizer);

 }


 void CInference::init()

 {

     SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);

     SG_ADD(&m_log_scale, "log_scale", "Kernel log scale", MS_AVAILABLE, GRADIENT_AVAILABLE);

     SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",

         MS_AVAILABLE);

     SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);

     SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);

     SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);

     SG_ADD(&m_gradient_update, "gradient_update", "Whether gradients are updated", MS_NOT_AVAILABLE);


     m_kernel=NULL;

     m_model=NULL;

     m_labels=NULL;

     m_features=NULL;

     m_mean=NULL;

     m_log_scale=0.0;

     m_gradient_update=false;

     m_minimizer=NULL;


     SG_ADD((CSGObject**)&m_minimizer, "Inference__m_minimizer", "minimizer in Inference", MS_NOT_AVAILABLE);

     SG_ADD(&m_alpha, "alpha", "alpha vector used in process mean calculation", MS_NOT_AVAILABLE);

     SG_ADD(&m_L, "L", "upper triangular factor of Cholesky decomposition", MS_NOT_AVAILABLE);

     SG_ADD(&m_E, "E", "the matrix used for multi classification", MS_NOT_AVAILABLE);

 }


 void CInference::register_minimizer(Minimizer* minimizer)

 {

     REQUIRE(minimizer, "Minimizer must set\n");

     if(minimizer!=m_minimizer)

     {

         SG_REF(minimizer);

         SG_UNREF(m_minimizer);

         m_minimizer=minimizer;

     }

 }


 float64_t CInference::get_marginal_likelihood_estimate(

         int32_t num_importance_samples, float64_t ridge_size)

 {

     /* sample from Gaussian approximation to q(f|y) */

     SGMatrix<float64_t> cov=get_posterior_covariance();


     /* add ridge */

     for (index_t i=0; i<cov.num_rows; ++i)

         cov(i,i)+=ridge_size;


     SGVector<float64_t> mean=get_posterior_mean();


     CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);

     SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);


     /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,

      * log pdf of approximation, prior and likelihood */


     /* log pdf q(f^i|y) */

     SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);


     /* dont need gaussian anymore, free memory */

     SG_UNREF(post_approx);

     post_approx=NULL;


     /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */

     SGMatrix<float64_t> scaled_kernel(m_ktrtr.num_rows, m_ktrtr.num_cols);

     memcpy(scaled_kernel.matrix, m_ktrtr.matrix,

             sizeof(float64_t)*m_ktrtr.num_rows*m_ktrtr.num_cols);

     for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)

         scaled_kernel.matrix[i]*=CMath::exp(m_log_scale*2.0);


     /* add ridge */

     for (index_t i=0; i<m_ktrtr.num_rows; ++i)

         scaled_kernel(i,i)+=ridge_size;


     CGaussianDistribution* prior=new CGaussianDistribution(

             m_mean->get_mean_vector(m_features), scaled_kernel);

     SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);

     SG_UNREF(prior);

     prior=NULL;


     /* p(y|f^i) */

     SGVector<float64_t> log_likelihood=m_model->get_log_probability_fmatrix(

             m_labels, samples);


     /* combine probabilities */

     ASSERT(log_likelihood.vlen==num_importance_samples);

     ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);

     ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);

     SGVector<float64_t> sum(log_likelihood);

     for (index_t i=0; i<log_likelihood.vlen; ++i)

         sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];


     /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */

     return CMath::log_mean_exp(sum);

 }


 CMap<TParameter*, SGVector<float64_t> >* CInference::

 get_negative_log_marginal_likelihood_derivatives(CMap<TParameter*, CSGObject*>* params)

 {

     REQUIRE(params->get_num_elements(), "Number of parameters should be greater "

             "than zero\n")


     compute_gradient();


     // get number of derivatives

     const index_t num_deriv=params->get_num_elements();


     // create map of derivatives

     CMap<TParameter*, SGVector<float64_t> >* result=

         new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);


     SG_REF(result);


     // create lock object

     CLock lock;


 #ifdef HAVE_PTHREAD

     if (num_deriv<2)

     {

 #endif /* HAVE_PTHREAD */

         for (index_t i=0; i<num_deriv; i++)

         {

             CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);


             GRADIENT_THREAD_PARAM thread_params;


             thread_params.inf=this;

             thread_params.obj=node->data;

             thread_params.param=node->key;

             thread_params.grad=result;

             thread_params.lock=&lock;


             get_derivative_helper((void*) &thread_params);

         }

 #ifdef HAVE_PTHREAD

     }

     else

     {

         pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);

         GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,

                 num_deriv);


         for (index_t t=0; t<num_deriv; t++)

         {

             CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);


             thread_params[t].inf=this;

             thread_params[t].obj=node->data;

             thread_params[t].param=node->key;

             thread_params[t].grad=result;

             thread_params[t].lock=&lock;


             pthread_create(&threads[t], NULL, CInference::get_derivative_helper,

                     (void*)&thread_params[t]);

         }


         for (index_t t=0; t<num_deriv; t++)

             pthread_join(threads[t], NULL);


         SG_FREE(thread_params);

         SG_FREE(threads);

     }

 #endif /* HAVE_PTHREAD */


     return result;

 }


 void* CInference::get_derivative_helper(void *p)

 {

     GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;


     CInference* inf=thread_param->inf;

     CSGObject* obj=thread_param->obj;

     CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;

     TParameter* param=thread_param->param;

     CLock* lock=thread_param->lock;


     REQUIRE(param, "Parameter should not be NULL\n");

     REQUIRE(obj, "Object of the parameter should not be NULL\n");


     SGVector<float64_t> gradient;


     if (obj==inf)

     {

         // try to find dervative wrt InferenceMethod.parameter

         gradient=inf->get_derivative_wrt_inference_method(param);

     }

     else if (obj==inf->m_model)

     {

         // try to find derivative wrt LikelihoodModel.parameter

         gradient=inf->get_derivative_wrt_likelihood_model(param);

     }

     else if (obj==inf->m_kernel)

     {

         // try to find derivative wrt Kernel.parameter

         gradient=inf->get_derivative_wrt_kernel(param);

     }

     else if (obj==inf->m_mean)

     {

         // try to find derivative wrt MeanFunction.parameter

         gradient=inf->get_derivative_wrt_mean(param);

     }

     else

     {

         SG_SERROR("Can't compute derivative of negative log marginal "

                 "likelihood wrt %s.%s", obj->get_name(), param->m_name);

     }


     lock->lock();

     grad->add(param, gradient);

     lock->unlock();


     return NULL;

 }


 void CInference::update()

 {

     check_members();

     update_train_kernel();

 }


 void CInference::check_members() const

 {

     REQUIRE(m_features, "Training features should not be NULL\n")

     REQUIRE(m_features->get_num_vectors(),

             "Number of training features must be greater than zero\n")

     REQUIRE(m_labels, "Labels should not be NULL\n")

     REQUIRE(m_labels->get_num_labels(),

             "Number of labels must be greater than zero\n")

     REQUIRE(m_labels->get_num_labels()==m_features->get_num_vectors(),

             "Number of training vectors (%d) must match number of labels (%d)\n",

             m_labels->get_num_labels(), m_features->get_num_vectors())

     REQUIRE(m_kernel, "Kernel should not be NULL\n")

     REQUIRE(m_mean, "Mean function should not be NULL\n")

 }


 void CInference::update_train_kernel()

 {

     m_kernel->init(m_features, m_features);

     m_ktrtr=m_kernel->get_kernel_matrix();

 }


 void CInference::compute_gradient()

 {

     if (parameter_hash_changed())

         update();

 }

shogun::CSGObject::get_name
virtual const char * get_name() const =0

shogun::CKernel::init
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98

shogun::CInference::m_log_scale
float64_t m_log_scale
Definition: Inference.h:490

shogun::CInference::update_train_kernel
virtual void update_train_kernel()
Definition: Inference.cpp:337

shogun::CInference::update
virtual void update()
Definition: Inference.cpp:316

node
Definition: JLCoverTree.h:42

GaussianDistribution.h

shogun::TParameter::m_name
char * m_name
Definition: base/Parameter.h:145

shogun::CInference::~CInference
virtual ~CInference()
Definition: Inference.cpp:91

Math.h

shogun::SGMatrix::matrix
T * matrix
Definition: SGMatrix.h:372

index_t
int32_t index_t
Definition: common.h:62

shogun::CLabels
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43

Lock.h

shogun::CLabels::get_num_labels
virtual int32_t get_num_labels() const =0

shogun::CInference::m_kernel
CKernel * m_kernel
Definition: Inference.h:469

shogun::CInference::get_derivative_helper
static void * get_derivative_helper(void *p)
Definition: Inference.cpp:268

Statistics.h

shogun::TParameter
parameter struct
Definition: base/Parameter.h:32

shogun::CFeatures::get_num_vectors
virtual int32_t get_num_vectors() const =0

config.h

shogun::CInference::set_scale
virtual void set_scale(float64_t scale)
Definition: Inference.cpp:65

REQUIRE
#define REQUIRE(x,...)
Definition: SGIO.h:206

shogun::CLock::unlock
void unlock()
Definition: Lock.cpp:64

shogun::SGMatrix::num_cols
index_t num_cols
Definition: SGMatrix.h:376

shogun::CMeanFunction::get_mean_vector
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0

shogun::CInference::m_E
SGMatrix< float64_t > m_E
Definition: Inference.h:496

shogun::CMeanFunction
An abstract class of the mean function.
Definition: MeanFunction.h:49

shogun::CKernel::get_kernel_matrix
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:220

shogun::CInference::set_labels
virtual void set_labels(CLabels *lab)
Definition: Inference.h:323

SG_REF
#define SG_REF(x)
Definition: SGObject.h:54

shogun::SGMatrix::num_rows
index_t num_rows
Definition: SGMatrix.h:374

shogun::SGMatrix< float64_t >

shogun::CInference::m_features
CFeatures * m_features
Definition: Inference.h:478

shogun::MS_NOT_AVAILABLE
Definition: SGObject.h:92

shogun::CInference::m_ktrtr
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:493

shogun::CInference::get_derivative_wrt_mean
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)=0

shogun::CInference::get_posterior_covariance
virtual SGMatrix< float64_t > get_posterior_covariance()=0

shogun::CInference::m_mean
CMeanFunction * m_mean
Definition: Inference.h:472

shogun::CMap::get_node_ptr
CMapNode< K, T > * get_node_ptr(int32_t index)
Definition: Map.h:247

Inference.h

shogun::CMap::add
int32_t add(const K &key, const T &data)
Definition: Map.h:101

shogun::SGVector::vlen
index_t vlen
Definition: SGVector.h:494

shogun::CLikelihoodModel::get_log_probability_fmatrix
virtual SGVector< float64_t > get_log_probability_fmatrix(const CLabels *lab, SGMatrix< float64_t > F) const
Definition: LikelihoodModel.cpp:51

ASSERT
#define ASSERT(x)
Definition: SGIO.h:201

shogun::CInference::get_multiclass_E
virtual SGMatrix< float64_t > get_multiclass_E()
Definition: Inference.cpp:71

shogun::CSGObject
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115

shogun::CLock
Class Lock used for synchronization in concurrent programs.
Definition: Lock.h:17

shogun::SGVector< float64_t >

shogun::CInference::m_labels
CLabels * m_labels
Definition: Inference.h:481

shogun::CInference::get_scale
virtual float64_t get_scale() const
Definition: Inference.cpp:60

float64_t
double float64_t
Definition: common.h:50

shogun::CInference::compute_gradient
virtual void compute_gradient()
Definition: Inference.cpp:343

shogun::CInference::set_model
virtual void set_model(CLikelihoodModel *mod)
Definition: Inference.h:340

shogun::CInference::set_kernel
virtual void set_kernel(CKernel *kern)
Definition: Inference.h:289

shogun::CInference::get_derivative_wrt_inference_method
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)=0

shogun::CInference::m_L
SGMatrix< float64_t > m_L
Definition: Inference.h:487

shogun::CInference::get_posterior_mean
virtual SGVector< float64_t > get_posterior_mean()=0

shogun::CGaussianDistribution::log_pdf_multiple
virtual SGVector< float64_t > log_pdf_multiple(SGMatrix< float64_t > samples) const
Definition: GaussianDistribution.cpp:110

shogun::CInference::CInference
CInference()
Definition: Inference.cpp:55

shogun::MS_AVAILABLE
Definition: SGObject.h:93

shogun::CInference::register_minimizer
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:128

shogun::CInference::set_features
virtual void set_features(CFeatures *feat)
Definition: Inference.h:272

shogun::CInference::get_derivative_wrt_kernel
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)=0

SG_UNREF
#define SG_UNREF(x)
Definition: SGObject.h:55

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

shogun::CGaussianDistribution
Dense version of the well-known Gaussian probability distribution, defined as .
Definition: GaussianDistribution.h:60

shogun::GRADIENT_AVAILABLE
Definition: SGObject.h:100

shogun::CInference
The Inference Method base class.
Definition: Inference.h:81

shogun::CInference::get_marginal_likelihood_estimate
float64_t get_marginal_likelihood_estimate(int32_t num_importance_samples=1, float64_t ridge_size=1e-15)
Definition: Inference.cpp:139

shogun::CInference::m_minimizer
Minimizer * m_minimizer
Definition: Inference.h:466

shogun::CFeatures
The class Features is the base class of all feature objects.
Definition: Features.h:68

SG_SERROR
#define SG_SERROR(...)
Definition: SGIO.h:179

shogun::linalg::scale
void scale(Matrix A, Matrix B, typename Matrix::Scalar alpha)
Definition: Core.h:94

shogun::CMath::exp
static float64_t exp(float64_t x)
Definition: Math.h:621

shogun::CMath::log
static float64_t log(float64_t v)
Definition: Math.h:922

shogun::CKernel
The Kernel base class.
Definition: Kernel.h:159

shogun::CMath::log_mean_exp
static T log_mean_exp(SGVector< T > values)
Definition: Math.h:1285

shogun::CInference::get_negative_log_marginal_likelihood_derivatives
virtual CMap< TParameter *, SGVector< float64_t > > * get_negative_log_marginal_likelihood_derivatives(CMap< TParameter *, CSGObject * > *parameters)
Definition: Inference.cpp:198

shogun::CInference::m_gradient_update
bool m_gradient_update
Definition: Inference.h:499

shogun::CMap::get_num_elements
int32_t get_num_elements() const
Definition: Map.h:211

shogun::Minimizer
The minimizer base class.
Definition: Minimizer.h:43

SG_ADD
#define SG_ADD(...)
Definition: SGObject.h:84

shogun::CInference::m_model
CLikelihoodModel * m_model
Definition: Inference.h:475

shogun::CSGObject::parameter_hash_changed
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295

shogun::CInference::set_mean
virtual void set_mean(CMeanFunction *m)
Definition: Inference.h:306

shogun::CLock::lock
void lock()
Definition: Lock.cpp:57

shogun::CLikelihoodModel
The Likelihood model base class.
Definition: LikelihoodModel.h:62

shogun::CInference::m_alpha
SGVector< float64_t > m_alpha
Definition: Inference.h:484

shogun::CMap
the class CMap, a map based on the hash-table. w: http://en.wikipedia.org/wiki/Hash_table ...
Definition: SGObject.h:39

shogun::CGaussianDistribution::sample
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const
Definition: GaussianDistribution.cpp:69

shogun::CInference::check_members
virtual void check_members() const
Definition: Inference.cpp:322

shogun::CInference::get_derivative_wrt_likelihood_model
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)=0