48 #include <shogun/lib/external/brent.h>
51 using namespace Eigen;
56 #ifndef DOXYGEN_SHOULD_SKIP_THIS
59 class CMultiPsiLine :
public func_base
73 virtual double operator() (
double x)
81 (*alpha)=start_alpha+x*dalpha;
86 eigen_f.block(bl*n,0,n,1)=K*alpha->block(bl*n,0,n,1)*
CMath::exp(log_scale*2.0);
87 result+=alpha->block(bl*n,0,n,1).dot(eigen_f.block(bl*n,0,n,1))/2.0;
88 eigen_f.block(bl*n,0,n,1)+=eigen_m;
114 void CMultiLaplacianInferenceMethod::init()
130 "Labels must be type of CMulticlassLabels\n");
132 "likelihood model should support multi-classification\n");
168 SG_SERROR(
"Provided inference is not of type CMultiLaplacianInferenceMethod!\n")
192 eigen_Sigma.block(bl*n,bl*n,n,n)=(MatrixXd::Identity(n,n)-eigen_U.block(bl*n,0,n,n))*(eigen_K*
CMath::exp(
m_log_scale*2.0));
194 MatrixXd eigen_V=eigen_M.triangularView<Upper>().adjoint().solve(eigen_U.transpose());
195 eigen_Sigma+=eigen_V.transpose()*eigen_V;
202 void CMultiLaplacianInferenceMethod::get_dpi_helper()
212 VectorXd max_coeff=eigen_mu_matrix.rowwise().maxCoeff();
213 eigen_dpi_matrix=eigen_mu_matrix.array().colwise()-max_coeff.array();
214 VectorXd log_sum_exp=((eigen_dpi_matrix.array().exp()).rowwise().sum()).array().log();
215 eigen_dpi_matrix=(eigen_dpi_matrix.array().colwise()-log_sum_exp.array()).exp();
234 VectorXd eigen_mean=eigen_mean_bl.replicate(C,1);
266 Psi_New=alpha.dot(eigen_mu)/2.0;
268 eigen_mu+=eigen_mean;
273 if (Psi_Def < Psi_New)
307 VectorXd eigen_sD=eigen_dpi.block(bl*n,0,n,1).cwiseSqrt();
308 LLT<MatrixXd> chol_tmp((eigen_sD*eigen_sD.transpose()).cwiseProduct(eigen_ktrtr*
CMath::exp(
m_log_scale*2.0))+
310 MatrixXd eigen_L_tmp=chol_tmp.matrixU();
311 MatrixXd eigen_E_bl=eigen_L_tmp.triangularView<Upper>().adjoint().solve(
MatrixXd(eigen_sD.asDiagonal()));
312 eigen_E_bl=eigen_E_bl.transpose()*eigen_E_bl;
313 eigen_E.block(0,bl*n,n,n)=eigen_E_bl;
318 m_nlz+=eigen_L_tmp.diagonal().array().log().sum();
321 LLT<MatrixXd> chol_tmp(eigen_M);
322 eigen_M = chol_tmp.matrixU();
323 m_nlz+=eigen_M.diagonal().array().log().sum();
325 VectorXd eigen_b=eigen_dlp;
327 tmp1=eigen_dpi.array()*(eigen_mu-eigen_mean).array();
329 VectorXd tmp2=m_tmp.array().rowwise().sum();
332 eigen_b.block(bl*n,0,n,1)+=eigen_dpi.block(bl*n,0,n,1).cwiseProduct(eigen_mu.block(bl*n,0,n,1)-eigen_mean_bl-tmp2);
336 eigen_c.block(bl*n,0,n,1)=eigen_E.block(0,bl*n,n,n)*(eigen_ktrtr*
CMath::exp(
m_log_scale*2.0)*eigen_b.block(bl*n,0,n,1));
340 VectorXd tmp3=c_tmp.array().rowwise().sum();
341 VectorXd tmp4=eigen_M.triangularView<Upper>().adjoint().solve(tmp3);
343 VectorXd &eigen_dalpha=eigen_b;
344 eigen_dalpha+=eigen_E.transpose()*(eigen_M.triangularView<Upper>().solve(tmp4))-eigen_c-eigen_alpha;
351 func.dalpha=eigen_dalpha;
352 func.start_alpha=eigen_alpha;
353 func.alpha=&eigen_alpha;
367 SG_WARNING(
"Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n",
m_iter, Psi_Old-Psi_New,
m_tolerance);
379 eigen_U=eigen_M.triangularView<Upper>().adjoint().solve(eigen_E);
395 result+=((eigen_E.block(0,bl*n,n,n)-eigen_U.block(0,bl*n,n,n).transpose()*eigen_U.block(0,bl*n,n,n)).array()
396 *eigen_dK.array()).sum();
397 result-=(eigen_dK*eigen_alpha.block(bl*n,0,n,1)).
dot(eigen_alpha.block(bl*n,0,n,1));
406 REQUIRE(!strcmp(param->
m_name,
"log_scale"),
"Can't compute derivative of "
407 "the nagative log marginal likelihood wrt %s.%s parameter\n",
428 REQUIRE(param,
"Param not set\n");
430 int64_t len=
const_cast<TParameter *
>(param)->m_datatype.get_num_elements();
458 REQUIRE(param,
"Param not set\n");
460 int64_t len=
const_cast<TParameter *
>(param)->m_datatype.get_num_elements();
477 result[i]-=eigen_alpha.block(bl*n,0,n,1).dot(eigen_dmu);
493 VectorXd eigen_mean=eigen_mean_bl.replicate(C,1);
496 eigen_res=eigen_mu-eigen_mean;
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
SGVector< float64_t > m_dlp
virtual bool supports_multiclass() const
virtual ELabelType get_label_type() const =0
SGVector< float64_t > m_alpha
virtual const char * get_name() const
The Inference Method base class.
virtual float64_t get_derivative_helper(SGMatrix< float64_t > dK)
virtual SGVector< float64_t > get_posterior_mean()
Vector::Scalar dot(Vector a, Vector b)
The class Labels models labels, i.e. class assignments of objects.
static const float64_t INFTY
infinity
virtual int32_t get_num_labels() const =0
multi-class labels 0,1,...
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual float64_t get_negative_log_marginal_likelihood()
The Laplace approximation inference method base class.
CMultiLaplacianInferenceMethod()
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
virtual void check_members() const
An abstract class of the mean function.
virtual void update_alpha()
static CMultiLaplacianInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
Multiclass Labels for multi-class classification.
SGMatrix< float64_t > m_L
SGMatrix< float64_t > m_E
SGVector< float64_t > m_mu
The Laplace approximation inference method class for multi classification.
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
SGMatrix< float64_t > m_U
virtual void update_approx_cov()
virtual ~CMultiLaplacianInferenceMethod()
static T sum(T *vec, int32_t len)
Return sum(vec)
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
virtual void update_chol()
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
all of classes and functions are contained in the shogun namespace
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
The class Features is the base class of all feature objects.
float64_t m_opt_tolerance
static float64_t exp(float64_t x)
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual void check_members() const
virtual EInferenceType get_inference_type() const
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
SGVector< float64_t > m_W
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
SGMatrix< float64_t > m_Sigma
virtual void compute_gradient()
virtual bool parameter_hash_changed()
virtual void update_deriv()
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
CLikelihoodModel * m_model