36 #include <shogun/lib/external/brent.h>
42 using namespace Eigen;
47 #ifndef DOXYGEN_SHOULD_SKIP_THIS
50 class CFITCPsiLine :
public func_base
65 virtual double operator() (
double x)
73 eigen_alpha=start_alpha+x*dalpha;
77 eigen_f=eigen_tmp+eigen_m;
86 float64_t result = eigen_alpha.dot(eigen_f-eigen_m)/2.0-
97 virtual ~SingleFITCLaplaceInferenceMethodCostFunction() { clean(); }
100 REQUIRE(obj,
"Obj must set\n");
116 REQUIRE(m_obj,
"Object not set\n");
117 return m_obj->get_psi_wrt_alpha();
119 void unset_target(
bool is_unref)
129 REQUIRE(m_obj,
"Object not set\n");
135 REQUIRE(m_obj,
"Object not set\n");
136 m_obj->get_gradient_wrt_alpha(m_derivatives);
137 return m_derivatives;
139 virtual const char* get_name()
const {
return "SingleFITCLaplaceInferenceMethodCostFunction"; }
145 SG_ADD(&m_derivatives,
"SingleFITCLaplaceInferenceMethodCostFunction__m_derivatives",
146 "derivatives in SingleFITCLaplaceInferenceMethodCostFunction",
MS_NOT_AVAILABLE);
147 SG_ADD((
CSGObject **)&m_obj,
"SingleFITCLaplaceInferenceMethodCostFunction__m_obj",
159 REQUIRE(obj,
"Obj must set\n");
178 void CSingleFITCLaplaceNewtonOptimizer::init()
183 m_opt_tolerance=1e-6;
186 SG_ADD((
CSGObject **)&m_obj,
"CSingleFITCLaplaceNewtonOptimizer__m_obj",
188 SG_ADD(&m_iter,
"CSingleFITCLaplaceNewtonOptimizer__m_iter",
190 SG_ADD(&m_tolerance,
"CSingleFITCLaplaceNewtonOptimizer__m_tolerance",
192 SG_ADD(&m_opt_tolerance,
"CSingleFITCLaplaceNewtonOptimizer__m_opt_tolerance",
194 SG_ADD(&m_opt_max,
"CSingleFITCLaplaceNewtonOptimizer__m_opt_max",
200 REQUIRE(m_obj,
"Object not set\n");
202 Map<MatrixXd> eigen_kuu((m_obj->m_kuu).matrix, (m_obj->m_kuu).num_rows, (m_obj->m_kuu).num_cols);
203 Map<MatrixXd> eigen_V((m_obj->m_V).matrix, (m_obj->m_V).num_rows, (m_obj->m_V).num_cols);
204 Map<VectorXd> eigen_dg((m_obj->m_dg).vector, (m_obj->m_dg).vlen);
205 Map<MatrixXd> eigen_R0((m_obj->m_chol_R0).matrix, (m_obj->m_chol_R0).num_rows, (m_obj->m_chol_R0).num_cols);
215 m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
216 m_obj->m_W.scale(-1.0);
219 Map<VectorXd> eigen_al((m_obj->m_al).vector, (m_obj->m_al).vlen);
222 m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
227 while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
230 Map<VectorXd> eigen_W((m_obj->m_W).vector, (m_obj->m_W).vlen);
231 Map<VectorXd> eigen_dlp((m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
236 if (eigen_W.minCoeff() < 0)
251 eigen_W+=(2.0/(df+1))*eigen_dlp.cwiseProduct(eigen_dlp);
255 VectorXd b=eigen_W.cwiseProduct(eigen_mu-eigen_mean)+eigen_dlp;
258 VectorXd dd=MatrixXd::Ones(b.rows(),1).cwiseQuotient(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(b.rows(),1));
260 VectorXd eigen_t=eigen_W.cwiseProduct(dd);
265 eigen_tmp=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(tmp.
num_rows,tmp.
num_rows);
266 tmp=m_obj->get_chol_inv(tmp);
271 MatrixXd eigen_RV=eigen_tmp2*eigen_V;
273 VectorXd dalpha=dd.cwiseProduct(b)-eigen_t.cwiseProduct(eigen_RV.transpose()*(eigen_RV*(dd.cwiseProduct(b))))-eigen_al;
278 func.log_scale=m_obj->m_log_scale;
280 func.start_alpha=eigen_al;
281 func.alpha=&(m_obj->m_al);
282 func.dlp=&(m_obj->m_dlp);
283 func.f=&(m_obj->m_mu);
285 func.W=&(m_obj->m_W);
286 func.lik=m_obj->m_model;
287 func.lab=m_obj->m_labels;
291 Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
294 if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
296 SG_SWARNING(
"Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
313 void CSingleFITCLaplaceInferenceMethod::init()
384 eigen_res=eigen_x.cwiseProduct(eigen_t)-eigen_Rvdd.transpose()*(eigen_Rvdd*eigen_x);
399 eigen_res= eigen_V.transpose()*(eigen_V*eigen_al)+eigen_dg.cwiseProduct(eigen_al);
406 REQUIRE(inference!=NULL,
"Inference should be not NULL");
409 SG_SERROR(
"Provided inference is not of type CSingleFITCLaplaceInferenceMethod!\n")
421 LLT<MatrixXd> chol(eigen_mtx.colwise().reverse().rowwise().reverse().matrix());
427 eigen_res=tmp.colwise().reverse().rowwise().reverse().matrix().triangularView<Upper>(
439 SG_WARNING(
"nlZ cannot be computed since W is too negative");
455 LLT<MatrixXd> chol(A);
462 float64_t result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
463 A.diagonal().array().log().sum()+(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_dg.rows(),1)).array().log().sum()/2.0;
498 eigen_dg=eigen_ktrtr_diag*
CMath::exp(
m_log_scale*2.0)-(eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
529 eigen_mu=eigen_tmp+eigen_mean;
531 Psi_New=eigen_alpha.dot(eigen_tmp)/2.0-
537 if (Psi_Def < Psi_New)
549 REQUIRE(minimizer,
"Minimizer must set\n");
550 if (!dynamic_cast<CSingleFITCLaplaceNewtonOptimizer*>(minimizer))
553 REQUIRE(opt,
"The provided minimizer is not supported\n")
566 if(this->ref_count()>1)
574 REQUIRE(minimizer,
"The provided minimizer is not supported\n");
577 cost_fun->set_target(
this);
578 if(this->ref_count()>1)
583 cost_fun->unset_target(cleanup);
597 eigen_mu=eigen_tmp+eigen_mean;
603 eigen_post_alpha=eigen_R0.transpose()*(eigen_V*eigen_al);
626 VectorXd Wd0_1=eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_W.rows(),1);
630 if (eigen_W.minCoeff()>0)
632 eigen_sW=eigen_W.cwiseSqrt();
636 eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
638 if (!(Wd0_1.array().abs().matrix()==Wd0_1))
646 VectorXd dd=MatrixXd::Ones(Wd0_1.rows(),1).cwiseQuotient(Wd0_1);
647 eigen_t=eigen_W.cwiseProduct(dd);
653 eigen_A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(A.
num_rows,A.
num_rows);
656 MatrixXd R0tV=eigen_R0.transpose()*eigen_V;
659 MatrixXd B=R0tV*eigen_t.asDiagonal();
666 eigen_L=-B*R0tV.transpose();
671 MatrixXd eigen_RV=eigen_tmp*eigen_V;
676 eigen_Rvdd=eigen_RV*eigen_t.asDiagonal();
681 B=B*eigen_RV.transpose();
683 eigen_L+=B*B.transpose();
688 B=B*eigen_V.transpose();
690 FullPivLU<MatrixXd> lu(eigen_A);
691 eigen_L+=B*lu.inverse()*B.transpose();
698 eigen_g=((eigen_dg.cwiseProduct(dd)).array()+
700 ).array().pow(2).colwise().sum().transpose())/2;
716 eigen_B=eigen_R0.transpose()*eigen_V;
722 eigen_w=eigen_B*eigen_al;
732 eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
751 eigen_dA=2*eigen_dKui-eigen_dKuui*eigen_R0tV;
758 eigen_v=eigen_ddiagKi-eigen_dA.cwiseProduct(eigen_R0tV).colwise().sum().transpose();
770 eigen_b=eigen_dA.transpose()*(eigen_R0tV*eigen_dlp)+eigen_v.cwiseProduct(eigen_dlp);
775 result-=eigen_dfhat.dot(eigen_b-eigen_KZb);
782 REQUIRE(param,
"Param not set\n");
785 && strcmp(param->
m_name,
"log_inducing_noise")
786 && strcmp(param->
m_name,
"inducing_features")),
787 "Can't compute derivative of"
788 " the nagative log marginal likelihood wrt %s.%s parameter\n",
793 if (!strcmp(param->
m_name,
"inducing_features"))
815 if (!strcmp(param->
m_name,
"log_inducing_noise"))
862 result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum();
876 REQUIRE(param,
"Param not set\n");
878 int64_t len=
const_cast<TParameter *
>(param)->m_datatype.get_num_elements();
940 return eigen_dfhat.dot(eigen_d-eigen_tmp);
947 REQUIRE(param,
"Param not set\n");
949 int64_t len=
const_cast<TParameter *
>(param)->m_datatype.get_num_elements();
968 REQUIRE(param,
"Param not set\n");
969 SG_WARNING(
"Derivative wrt %s cannot be computed since W (the Hessian (diagonal) matrix) is too negative\n", param->
m_name);
995 eigen_q=eigen_dfhat-eigen_q;
1005 eigen_neg_v-=2*eigen_dlp.cwiseProduct(eigen_q);
1015 eigen_BdK=eigen_B*eigen_neg_v.asDiagonal()+eigen_w*(eigen_al.transpose())+
1016 (eigen_B*eigen_Rvdd.transpose())*eigen_Rvdd+
1017 (eigen_B*eigen_dlp)*eigen_q.transpose()+(eigen_B*eigen_q)*eigen_dlp.transpose();
1036 VectorXd eigen_t1=eigen_B.cwiseProduct(eigen_B).colwise().sum().adjoint();
1042 eigen_b=(eigen_t1.cwiseProduct(eigen_dlp)-eigen_B.transpose()*(eigen_B*eigen_dlp))*factor;
1091 MatrixXd diagonal_part=eigen_dg.asDiagonal();
1093 MatrixXd prior=eigen_V.transpose()*eigen_V+diagonal_part;
1096 eigen_Sigma=prior-tmp.adjoint()*eigen_L*tmp;
1126 eigen_f=eigen_tmp+eigen_mean_f;
1129 float64_t psi=eigen_alpha.dot(eigen_tmp) * 0.5;
1150 eigen_f=eigen_tmp+eigen_mean_f;
1160 eigen_tmp2=eigen_alpha-eigen_dlp_f;
1163 eigen_gradient=eigen_tmp3;
SGVector< float64_t > m_dg
virtual bool init(CFeatures *lhs, CFeatures *rhs)
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update_deriv()
void set_target(CSingleFITCLaplaceInferenceMethod *obj)
SGVector< float64_t > m_dfhat
virtual void update_parameter_hash()
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
SGVector< float64_t > m_w
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
The class Labels models labels, i.e. class assignments of objects.
static const float64_t INFTY
infinity
virtual EInferenceType get_inference_type() const
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual int32_t get_num_labels() const =0
SGVector< float64_t > m_mu
The build-in minimizer for SingleFITCLaplaceInference.
virtual void update_approx_cov()
virtual void update_alpha()
SGVector< float64_t > m_W
SGVector< float64_t > m_mean_f
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
CSingleFITCLaplaceInferenceMethod()
float64_t get_degrees_freedom() const
virtual SGVector< float64_t > get_derivative_related_cov_diagonal()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
static CSingleFITCLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
SGVector< float64_t > m_d3lp
An abstract class of the mean function.
void scale(T alpha)
Scale vector inplace.
SGVector< float64_t > m_g
virtual void update_init()
SGMatrix< float64_t > m_ktrtr
virtual void register_minimizer(Minimizer *minimizer)
SGVector< float64_t > m_d2lp
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
SGVector< float64_t > m_sW
Class SGObject is the base class of all shogun objects.
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual void unset_cost_function(bool is_unref=true)
virtual SGVector< float64_t > get_diagonal_vector()
virtual void compute_gradient()
SGMatrix< float64_t > m_Sigma
SGMatrix< float64_t > m_chol_R0
void unset_target(bool is_unref)
virtual CFeatures * get_inducing_features()
SGMatrix< float64_t > m_Rvdd
SGMatrix< float64_t > m_kuu
static T sum(T *vec, int32_t len)
Return sum(vec)
SGMatrix< float64_t > m_L
The first order cost function base class.
friend class SingleFITCLaplaceInferenceMethodCostFunction
virtual float64_t get_negative_log_marginal_likelihood()
virtual SGVector< float64_t > get_derivative_related_inducing_features(SGMatrix< float64_t > BdK, const TParameter *param)
virtual float64_t minimize()=0
SGVector< float64_t > m_ktrtr_diag
virtual SGVector< float64_t > derivative_helper_when_Wneg(SGVector< float64_t > res, const TParameter *param)
SGVector< float64_t > m_t
Class that models a Student's-t likelihood.
virtual void register_minimizer(Minimizer *minimizer)
virtual SGVector< float64_t > get_posterior_mean()
virtual SGVector< float64_t > compute_mvmZ(SGVector< float64_t > x)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
virtual void compute_gradient()
virtual void update_chol()
float64_t get_psi_wrt_alpha()
all of classes and functions are contained in the shogun namespace
SGMatrix< float64_t > m_V
The Inference Method base class.
virtual SGMatrix< float64_t > get_posterior_covariance()
virtual float64_t get_derivative_implicit_term_helper(SGVector< float64_t > d)
SGMatrix< float64_t > m_inducing_features
SGVector< float64_t > m_dlp
The class Features is the base class of all feature objects.
static float64_t exp(float64_t x)
virtual float64_t minimize()
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual const char * get_name() const
virtual ~CSingleFITCLaplaceInferenceMethod()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
virtual SGVector< float64_t > compute_mvmK(SGVector< float64_t > al)
SGVector< T > clone() const
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
SGMatrix< float64_t > m_ktru
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
friend class CSingleFITCLaplaceNewtonOptimizer
The minimizer base class.
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGMatrix< float64_t > get_chol_inv(SGMatrix< float64_t > mtx)
SGMatrix< float64_t > m_B
The FITC approximation inference method class for regression and binary Classification. Note that the number of inducing points (m) is usually far less than the number of input points (n). (the time complexity is computed based on the assumption m < n)
The Fully Independent Conditional Training inference base class for Laplace and regression for 1-D la...
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
CLikelihoodModel * m_model
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual bool parameter_hash_changed()
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
The Likelihood model base class.
SGVector< float64_t > m_al
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
virtual void set_cost_function(FirstOrderCostFunction *fun)
float64_t m_log_ind_noise
SGVector< float64_t > m_alpha
The first order minimizer base class.