LaplacianInferenceMethod.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Copyright (C) 2012 Jacob Walker
00008  *
00009  * Code adapted from Gaussian Process Machine Learning Toolbox
00010  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
00011  * This code specifically adapted from infLaplace.m
00012  *
00013  */
00014 #include <shogun/lib/config.h>
00015  
00016 #ifdef HAVE_LAPACK
00017 #ifdef HAVE_EIGEN3
00018 
00019 #include <shogun/regression/gp/LaplacianInferenceMethod.h>
00020 #include <shogun/regression/gp/GaussianLikelihood.h>
00021 #include <shogun/regression/gp/StudentsTLikelihood.h>
00022 #include <shogun/mathematics/lapack.h>
00023 #include <shogun/mathematics/Math.h>
00024 #include <shogun/labels/RegressionLabels.h>
00025 #include <shogun/kernel/GaussianKernel.h>
00026 #include <shogun/features/CombinedFeatures.h>
00027 #include <shogun/mathematics/eigen3.h>
00028 #include <shogun/lib/external/brent.h>
00029 
00030 using namespace shogun;
00031 using namespace Eigen;
00032 
00033 namespace shogun
00034 {
00035     /*Wrapper class used for the Brent minimizer
00036      *
00037      */
00038     class Psi_line : public func_base
00039     {
00040     public:
00041         Eigen::Map<Eigen::VectorXd>* alpha;
00042         Eigen::VectorXd* dalpha;
00043         Eigen::Map<Eigen::MatrixXd>* K;
00044         float64_t* l1;
00045         SGVector<float64_t>* dl1;
00046         Eigen::Map<Eigen::VectorXd>* dl2;
00047         SGVector<float64_t>* mW;
00048         SGVector<float64_t>* f;
00049         SGVector<float64_t>* m;
00050         float64_t scale;
00051         CLikelihoodModel* lik;
00052         CRegressionLabels *lab;
00053 
00054         Eigen::VectorXd start_alpha;
00055 
00056         virtual double operator() (double x)
00057         {
00058             Eigen::Map<Eigen::VectorXd> eigen_f(f->vector, f->vlen);
00059             Eigen::Map<Eigen::VectorXd> eigen_m(m->vector, m->vlen);
00060 
00061             *alpha = start_alpha + x*(*dalpha);
00062             (eigen_f) = (*K)*(*alpha)*scale*scale+(eigen_m);
00063 
00064 
00065             for (index_t i = 0; i < eigen_f.rows(); i++)
00066                 (*f)[i] = eigen_f[i];
00067 
00068             (*dl1) = lik->get_log_probability_derivative_f(lab, (*f), 1);
00069             (*mW) = lik->get_log_probability_derivative_f(lab, (*f), 2);
00070             float64_t result = ((*alpha).dot(((eigen_f)-(eigen_m))))/2.0;
00071 
00072             for (index_t i = 0; i < (*mW).vlen; i++)
00073                 (*mW)[i] = -(*mW)[i];
00074 
00075 
00076 
00077             result -= lik->get_log_probability_f(lab, *f);
00078 
00079             return result;
00080         }
00081     };
00082 }
00083 
00084 CLaplacianInferenceMethod::CLaplacianInferenceMethod() : CInferenceMethod()
00085 {
00086     init();
00087     update_all();
00088     update_parameter_hash();
00089 }
00090 
00091 CLaplacianInferenceMethod::CLaplacianInferenceMethod(CKernel* kern,
00092         CFeatures* feat,
00093         CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
00094         CInferenceMethod(kern, feat, m, lab, mod)
00095 {
00096     init();
00097     update_all();
00098 }
00099 
00100 void CLaplacianInferenceMethod::init()
00101 {
00102     m_latent_features = NULL;
00103     m_max_itr = 30;
00104     m_opt_tolerance = 1e-6;
00105     m_tolerance = 1e-8;
00106     m_max = 5;
00107 }
00108 
00109 CLaplacianInferenceMethod::~CLaplacianInferenceMethod()
00110 {
00111 }
00112 
00113 void CLaplacianInferenceMethod::update_all()
00114 {
00115     if (m_labels)
00116         m_label_vector =
00117                 ((CRegressionLabels*) m_labels)->get_labels().clone();
00118 
00119     if (m_features && m_features->has_property(FP_DOT)
00120             && m_features->get_num_vectors())
00121     {
00122         m_feature_matrix =
00123                 ((CDotFeatures*)m_features)->get_computed_dot_feature_matrix();
00124 
00125     }
00126 
00127     else if (m_features && m_features->get_feature_class() == C_COMBINED)
00128     {
00129         CDotFeatures* feat =
00130                 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00131                 get_first_feature_obj();
00132 
00133         if (feat->get_num_vectors())
00134             m_feature_matrix = feat->get_computed_dot_feature_matrix();
00135 
00136         SG_UNREF(feat);
00137     }
00138 
00139     update_data_means();
00140 
00141     if (m_kernel)
00142         update_train_kernel();
00143 
00144     if (m_ktrtr.num_cols*m_ktrtr.num_rows)
00145     {
00146         update_alpha();
00147         update_chol();
00148     }
00149 }
00150 
00151 void CLaplacianInferenceMethod::check_members()
00152 {
00153     if (!m_labels)
00154         SG_ERROR("No labels set\n");
00155 
00156     if (m_labels->get_label_type() != LT_REGRESSION)
00157         SG_ERROR("Expected RegressionLabels\n");
00158 
00159     if (!m_features)
00160         SG_ERROR("No features set!\n");
00161 
00162     if (m_labels->get_num_labels() != m_features->get_num_vectors())
00163         SG_ERROR("Number of training vectors does not match number of labels\n");
00164 
00165     if(m_features->get_feature_class() == C_COMBINED)
00166     {
00167         CDotFeatures* feat =
00168                 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00169                 get_first_feature_obj();
00170 
00171         if (!feat->has_property(FP_DOT))
00172             SG_ERROR("Specified features are not of type CFeatures\n");
00173 
00174         if (feat->get_feature_class() != C_DENSE)
00175             SG_ERROR("Expected Simple Features\n");
00176 
00177         if (feat->get_feature_type() != F_DREAL)
00178             SG_ERROR("Expected Real Features\n");
00179 
00180         SG_UNREF(feat);
00181     }
00182 
00183     else
00184     {
00185         if (!m_features->has_property(FP_DOT))
00186             SG_ERROR("Specified features are not of type CFeatures\n");
00187 
00188         if (m_features->get_feature_class() != C_DENSE)
00189             SG_ERROR("Expected Simple Features\n");
00190 
00191         if (m_features->get_feature_type() != F_DREAL)
00192             SG_ERROR("Expected Real Features\n");
00193     }
00194 
00195     if (!m_kernel)
00196         SG_ERROR( "No kernel assigned!\n");
00197 
00198     if (!m_mean)
00199         SG_ERROR( "No mean function assigned!\n");
00200 
00201 }
00202 
00203 CMap<TParameter*, SGVector<float64_t> > CLaplacianInferenceMethod::
00204 get_marginal_likelihood_derivatives(CMap<TParameter*,
00205         CSGObject*>& para_dict)
00206 {
00207     check_members();
00208 
00209     if(update_parameter_hash())
00210         update_all();
00211 
00212     MatrixXd Z(m_L.num_rows, m_L.num_cols);
00213 
00214     Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
00215 
00216     for (index_t i = 0; i < m_L.num_rows; i++)
00217     {
00218         for (index_t j = 0; j < m_L.num_cols; j++)
00219             Z(i,j) = m_L(i,j);
00220     }
00221 
00222     MatrixXd sW_temp(sW.vlen, m_ktrtr.num_cols);
00223     VectorXd sum(1);
00224     sum[0] = 0;
00225 
00226 
00227     for (index_t i = 0; i < sW.vlen; i++)
00228     {
00229         for (index_t j = 0; j < m_ktrtr.num_cols; j++)
00230             sW_temp(i,j) = sW[i];
00231     }
00232 
00233     VectorXd g;
00234 
00235     Map<VectorXd> eigen_W(W.vector, W.vlen);
00236 
00237     Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix, 
00238             temp_kernel.num_rows, temp_kernel.num_cols);
00239 
00240     if (eigen_W.minCoeff() < 0)
00241     {
00242         Z = -Z;
00243 
00244         MatrixXd A = MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
00245 
00246         MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00247         temp_diagonal.setZero(sW.vlen, sW.vlen);
00248 
00249         for (index_t s = 0; s < temp_diagonal.rows(); s++)
00250         {
00251             for (index_t r = 0; r < temp_diagonal.cols(); r++)
00252                 temp_diagonal(r,s) = W[s];
00253         }
00254 
00255         A = A + eigen_temp_kernel*m_scale*m_scale*temp_diagonal;
00256 
00257         FullPivLU<MatrixXd> lu(A);
00258 
00259         MatrixXd temp_matrix =
00260                 lu.inverse().cwiseProduct(eigen_temp_kernel*m_scale*m_scale);
00261 
00262         VectorXd temp_sum(temp_matrix.rows());
00263 
00264         for (index_t i = 0; i < temp_matrix.rows(); i++)
00265         {
00266             for (index_t j = 0; j < temp_matrix.cols(); j++)
00267                 temp_sum[i] += temp_matrix(j,i);
00268         }
00269 
00270         g = temp_sum/2.0;
00271     }
00272 
00273     else
00274     {
00275         MatrixXd C = Z.transpose().colPivHouseholderQr().solve(
00276                 sW_temp.cwiseProduct(eigen_temp_kernel*m_scale*m_scale));
00277 
00278         MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00279         temp_diagonal.setZero(sW.vlen, sW.vlen);
00280 
00281         for (index_t s = 0; s < sW.vlen; s++)
00282             temp_diagonal(s,s) = sW[s];
00283 
00284         MatrixXd temp = Z.transpose();
00285 
00286         Z = Z.transpose().colPivHouseholderQr().solve(temp_diagonal);
00287 
00288         Z = temp.transpose().colPivHouseholderQr().solve(Z);
00289 
00290         for (index_t s = 0; s < Z.rows(); s++)
00291         {
00292             for (index_t r = 0; r < Z.cols(); r++)
00293                 Z(s,r) *= sW[s];
00294         }
00295 
00296         VectorXd temp_sum(C.rows());
00297 
00298         temp_sum.setZero(C.rows());
00299 
00300         for (index_t i = 0; i < C.rows(); i++)
00301         {
00302             for (index_t j = 0; j < C.cols(); j++)
00303                 temp_sum[i] += C(j,i)*C(j,i);
00304         }
00305 
00306         g = (eigen_temp_kernel.diagonal()*m_scale*m_scale-temp_sum)/2.0;
00307     }
00308 
00309     Map<VectorXd> eigen_d3lp(d3lp.vector, d3lp.vlen);
00310 
00311     VectorXd dfhat = g.cwiseProduct(eigen_d3lp);
00312 
00313     m_kernel->build_parameter_dictionary(para_dict);
00314     m_mean->build_parameter_dictionary(para_dict);
00315     m_model->build_parameter_dictionary(para_dict);
00316 
00317     CMap<TParameter*, SGVector<float64_t> > gradient(
00318             3+para_dict.get_num_elements(),
00319             3+para_dict.get_num_elements());
00320 
00321     for (index_t i = 0; i < para_dict.get_num_elements(); i++)
00322     {
00323         shogun::CMapNode<TParameter*, CSGObject*>* node =
00324                 para_dict.get_node_ptr(i);
00325 
00326         TParameter* param = node->key;
00327         CSGObject* obj = node->data;
00328 
00329         index_t length = 1;
00330 
00331         if ((param->m_datatype.m_ctype== CT_VECTOR ||
00332                 param->m_datatype.m_ctype == CT_SGVECTOR) &&
00333                 param->m_datatype.m_length_y != NULL)
00334             length = *(param->m_datatype.m_length_y);
00335 
00336         SGVector<float64_t> variables(length);
00337 
00338         bool deriv_found = false;
00339 
00340         Map<VectorXd> eigen_temp_alpha(temp_alpha.vector,
00341             temp_alpha.vlen);
00342 
00343         for (index_t h = 0; h < length; h++)
00344         {
00345 
00346             SGMatrix<float64_t> deriv;
00347             SGVector<float64_t> mean_derivatives;
00348             VectorXd mean_dev_temp;
00349             SGVector<float64_t> lik_first_deriv;
00350             SGVector<float64_t> lik_second_deriv;
00351 
00352             if (param->m_datatype.m_ctype == CT_VECTOR ||
00353                     param->m_datatype.m_ctype == CT_SGVECTOR)
00354             {
00355                 deriv = m_kernel->get_parameter_gradient(param, obj);
00356 
00357                 lik_first_deriv = m_model->get_first_derivative(
00358                         (CRegressionLabels*)m_labels, param, obj, function);
00359 
00360                 lik_second_deriv = m_model->get_second_derivative(
00361                         (CRegressionLabels*)m_labels, param, obj, function);
00362 
00363                 mean_derivatives = m_mean->get_parameter_derivative(
00364                         param, obj, m_feature_matrix, h);
00365 
00366                 for (index_t d = 0; d < mean_derivatives.vlen; d++)
00367                     mean_dev_temp[d] = mean_derivatives[d];
00368             }
00369 
00370             else
00371             {
00372                 mean_derivatives = m_mean->get_parameter_derivative(
00373                         param, obj, m_feature_matrix);
00374 
00375                 for (index_t d = 0; d < mean_derivatives.vlen; d++)
00376                     mean_dev_temp[d] = mean_derivatives[d];
00377 
00378                 deriv = m_kernel->get_parameter_gradient(param, obj);
00379 
00380                 lik_first_deriv = m_model->get_first_derivative(
00381                         (CRegressionLabels*)m_labels, param, obj, function);
00382 
00383                 lik_second_deriv = m_model->get_second_derivative(
00384                         (CRegressionLabels*)m_labels, param, obj, function);
00385             }
00386 
00387             if (deriv.num_cols*deriv.num_rows > 0)
00388             {
00389                 MatrixXd dK(deriv.num_cols, deriv.num_rows);
00390 
00391                 for (index_t d = 0; d < deriv.num_rows; d++)
00392                 {
00393                     for (index_t s = 0; s < deriv.num_cols; s++)
00394                         dK(d,s) = deriv(d,s);
00395                 }
00396 
00397 
00398                 sum[0] = (Z.cwiseProduct(dK)).sum()/2.0;
00399 
00400 
00401                 sum = sum - eigen_temp_alpha.transpose()*dK*eigen_temp_alpha/2.0;
00402 
00403                 VectorXd b = dK*eigen_dlp;
00404 
00405                 sum = sum -
00406                         dfhat.transpose()*(b-eigen_temp_kernel*(Z*b)*m_scale*m_scale);
00407 
00408                 variables[h] = sum[0];
00409 
00410                 deriv_found = true;
00411             }
00412 
00413             else if (mean_derivatives.vlen > 0)
00414             {
00415                 sum = -eigen_temp_alpha.transpose()*mean_dev_temp;
00416                 sum = sum - dfhat.transpose()*(mean_dev_temp-eigen_temp_kernel*
00417                         (Z*mean_dev_temp)*m_scale*m_scale);
00418                 variables[h] = sum[0];
00419                 deriv_found = true;
00420             }
00421 
00422             else if (lik_first_deriv[0]+lik_second_deriv[0] != CMath::INFTY)
00423             {
00424                 Map<VectorXd> eigen_fd(lik_first_deriv.vector, lik_first_deriv.vlen);
00425                 
00426                 Map<VectorXd> eigen_sd(lik_second_deriv.vector, lik_second_deriv.vlen);
00427 
00428                 sum[0] = -g.dot(eigen_sd);
00429                 sum[0] = sum[0] - eigen_fd.sum();
00430                 variables[h] = sum[0];
00431                 deriv_found = true;
00432             }
00433 
00434         }
00435 
00436         if (deriv_found)
00437             gradient.add(param, variables);
00438 
00439     }
00440 
00441     TParameter* param;
00442     index_t index = get_modsel_param_index("scale");
00443     param = m_model_selection_parameters->get_parameter(index);
00444 
00445     MatrixXd dK(m_ktrtr.num_cols, m_ktrtr.num_rows);
00446 
00447     for (index_t d = 0; d < m_ktrtr.num_rows; d++)
00448     {
00449         for (index_t s = 0; s < m_ktrtr.num_cols; s++)
00450             dK(d,s) = m_ktrtr(d,s)*m_scale*2.0;;
00451     }
00452 
00453     Map<VectorXd> eigen_temp_alpha(temp_alpha.vector,
00454         temp_alpha.vlen);
00455 
00456     sum[0] = (Z.cwiseProduct(dK)).sum()/2.0;
00457 
00458     sum = sum - eigen_temp_alpha.transpose()*dK*eigen_temp_alpha/2.0;
00459 
00460     VectorXd b = dK*eigen_dlp;
00461 
00462     sum = sum - dfhat.transpose()*(b-eigen_temp_kernel*(Z*b)*m_scale*m_scale);
00463 
00464     SGVector<float64_t> scale(1);
00465 
00466     scale[0] = sum[0];
00467 
00468     gradient.add(param, scale);
00469     para_dict.add(param, this);
00470 
00471     return gradient;
00472 }
00473 
00474 SGVector<float64_t> CLaplacianInferenceMethod::get_diagonal_vector()
00475 {
00476     SGVector<float64_t> result(sW.vlen);
00477 
00478     for (index_t i = 0; i < sW.vlen; i++)
00479         result[i] = sW[i];
00480 
00481     return result;
00482 }
00483 
00484 float64_t CLaplacianInferenceMethod::get_negative_marginal_likelihood()
00485 {
00486     if(update_parameter_hash())
00487         update_all();
00488 
00489     Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, temp_alpha.vlen);
00490 
00491     Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix,
00492 temp_kernel.num_rows, temp_kernel.num_cols);
00493 
00494     Map<VectorXd> eigen_function(function.vector,
00495         function.vlen);
00496 
00497     Map<VectorXd> eigen_W(W.vector, W.vlen);
00498 
00499     Map<VectorXd> eigen_m_means(m_means.vector, m_means.vlen);  
00500 
00501     if (eigen_W.minCoeff() < 0)
00502     {
00503         MatrixXd A = MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
00504 
00505         MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00506         temp_diagonal.setZero(sW.vlen, sW.vlen);
00507 
00508         for (index_t s = 0; s < sW.vlen; s++)
00509             temp_diagonal(s,s) = sW[s];
00510 
00511         A = A + eigen_temp_kernel*m_scale*m_scale*temp_diagonal;
00512 
00513         FullPivLU<MatrixXd> lu(A);
00514     
00515         float64_t result = (eigen_temp_alpha.transpose()*(eigen_function-eigen_m_means))[0]/2.0 -
00516                 lp + log(lu.determinant())/2.0;
00517 
00518         return result;
00519     }
00520 
00521     else
00522     {
00523 
00524         Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
00525 
00526         LLT<MatrixXd> L(
00527                 (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_temp_kernel*m_scale*m_scale) +
00528                 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
00529 
00530         MatrixXd chol = L.matrixL();
00531 
00532         float64_t sum = 0;
00533 
00534         for (index_t i = 0; i < m_L.num_rows; i++)
00535             sum += log(m_L(i,i));
00536 
00537         float64_t result = eigen_temp_alpha.dot(eigen_function-eigen_m_means)/2.0 - 
00538             lp + sum;
00539 
00540         return result;
00541     }
00542 
00543 }
00544 
00545 SGVector<float64_t> CLaplacianInferenceMethod::get_alpha()
00546 {
00547     if(update_parameter_hash())
00548         update_all();
00549 
00550     SGVector<float64_t> result(m_alpha);
00551     return result;
00552 }
00553 
00554 SGMatrix<float64_t> CLaplacianInferenceMethod::get_cholesky()
00555 {
00556     if(update_parameter_hash())
00557         update_all();
00558 
00559     SGMatrix<float64_t> result(m_L);
00560     return result;
00561 }
00562 
00563 void CLaplacianInferenceMethod::update_train_kernel()
00564 {
00565     m_kernel->cleanup();
00566 
00567     m_kernel->init(m_features, m_features);
00568 
00569     //K(X, X)
00570     SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00571 
00572     m_ktrtr=kernel_matrix.clone();
00573 
00574     temp_kernel =SGMatrix<float64_t>(kernel_matrix.num_rows, kernel_matrix.num_cols);
00575 
00576     for (index_t i = 0; i < kernel_matrix.num_rows; i++)
00577     {
00578         for (index_t j = 0; j < kernel_matrix.num_cols; j++)
00579             temp_kernel(i,j) = kernel_matrix(i,j);
00580     }
00581 }
00582 
00583 
00584 void CLaplacianInferenceMethod::update_chol()
00585 {
00586     check_members();
00587 
00588     Map<VectorXd> eigen_W(W.vector, W.vlen);
00589 
00590     Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix,
00591         temp_kernel.num_rows, temp_kernel.num_cols);
00592 
00593 
00594     if (eigen_W.minCoeff() < 0)
00595     {
00596         MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00597         temp_diagonal.setZero(sW.vlen, sW.vlen);
00598 
00599         for (index_t s = 0; s < temp_diagonal.rows(); s++)
00600         {
00601             for (index_t r = 0; r < temp_diagonal.cols(); r++)
00602                 temp_diagonal(s,s) = 1.0/W[s];
00603         }
00604 
00605 
00606         MatrixXd A = eigen_temp_kernel*m_scale*m_scale+temp_diagonal;
00607 
00608         FullPivLU<MatrixXd> lu(A);
00609 
00610         MatrixXd chol = -lu.inverse();
00611 
00612         m_L = SGMatrix<float64_t>(chol.rows(), chol.cols());
00613 
00614         for (index_t i = 0; i < chol.rows(); i++)
00615         {
00616             for (index_t j = 0; j < chol.cols(); j++)
00617                 m_L(i,j) = chol(i,j);
00618         }
00619 
00620     }
00621 
00622     else
00623     {
00624 
00625         Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
00626 
00627         LLT<MatrixXd> L(
00628                 (eigen_sW*eigen_sW.transpose()).cwiseProduct((eigen_temp_kernel*m_scale*m_scale)) +
00629                 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
00630 
00631         MatrixXd chol = L.matrixU();
00632 
00633         m_L = SGMatrix<float64_t>(chol.rows(), chol.cols());
00634 
00635         for (index_t i = 0; i < chol.rows(); i++)
00636         {
00637             for (index_t j = 0; j < chol.cols(); j++)
00638                 m_L(i,j) = chol(i,j);
00639         }
00640     }
00641 }
00642 
00643 void CLaplacianInferenceMethod::update_alpha()
00644 {
00645     float64_t Psi_Old = CMath::INFTY;
00646     float64_t Psi_New;
00647     float64_t Psi_Def;
00648 
00649     SGVector<float64_t> temp_mean = m_mean->get_mean_vector(m_feature_matrix);
00650 
00651     m_means = SGVector<float64_t>(temp_mean.vlen);
00652     temp_kernel = SGMatrix<float64_t>(m_ktrtr.num_rows, m_ktrtr.num_cols);
00653     temp_alpha = SGVector<float64_t>(m_labels->get_num_labels());
00654     VectorXd first_derivative;
00655 
00656     for (index_t i = 0; i < temp_mean.vlen; i++)
00657         m_means[i] = temp_mean[i];
00658 
00659     for (index_t i = 0; i < m_alpha.vlen; i++)
00660         temp_alpha[i] = m_alpha[i];
00661 
00662     for (index_t i = 0; i < m_ktrtr.num_rows; i++)
00663     {
00664         for (index_t j = 0; j < m_ktrtr.num_cols; j++)
00665             temp_kernel(i,j) = m_ktrtr(i,j);
00666     }
00667 
00668     Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix, temp_kernel.num_rows, temp_kernel.num_cols);
00669 
00670     VectorXd eigen_function;
00671 
00672     Map<VectorXd> eigen_m_means(m_means.vector, m_means.vlen);  
00673 
00674     if (m_alpha.vlen != m_labels->get_num_labels())
00675     {
00676 
00677         
00678         temp_alpha = SGVector<float64_t>(m_labels->get_num_labels());
00679 
00680         for (index_t i = 0; i < temp_alpha.vlen; i++)
00681             temp_alpha[i] = 0.0;
00682 
00683         Map<VectorXd> eigen_temp_alpha2(temp_alpha.vector, temp_alpha.vlen);
00684 
00685         eigen_function = eigen_temp_kernel*eigen_temp_alpha2*m_scale*m_scale+eigen_m_means;
00686 
00687         function = SGVector<float64_t>(eigen_function.rows());
00688 
00689         for (index_t i = 0; i < eigen_function.rows(); i++)
00690             function[i] = eigen_function[i];
00691 
00692         W = m_model->get_log_probability_derivative_f(
00693                 (CRegressionLabels*)m_labels, function, 2);
00694         for (index_t i = 0; i < W.vlen; i++)
00695             W[i] = -W[i];
00696 
00697         Psi_New = -m_model->get_log_probability_f(
00698                 (CRegressionLabels*)m_labels, function);
00699     }
00700 
00701 
00702     else
00703     {
00704 
00705         Map<VectorXd> eigen_temp_alpha2(temp_alpha.vector, temp_alpha.vlen);
00706 
00707         eigen_function = eigen_temp_kernel*m_scale*m_scale*eigen_temp_alpha2+eigen_m_means;
00708 
00709         function = SGVector<float64_t>(eigen_function.rows());
00710 
00711         for (index_t i = 0; i < eigen_function.rows(); i++)
00712             function[i] = eigen_function[i];
00713 
00714 
00715 
00716         W = m_model->get_log_probability_derivative_f(
00717                 (CRegressionLabels*)m_labels, function, 2);
00718 
00719 
00720         for (index_t i = 0; i < W.vlen; i++)
00721             W[i] = -W[i];
00722 
00723         Psi_New = (eigen_temp_alpha2.transpose()*(eigen_function-eigen_m_means))[0]/2.0;
00724 
00725         Psi_New -= m_model->get_log_probability_f(
00726                 (CRegressionLabels*)m_labels, function);
00727 
00728 
00729 
00730         Psi_Def = -m_model->get_log_probability_f(
00731                 (CRegressionLabels*)m_labels, m_means);
00732 
00733         if (Psi_Def < Psi_New)
00734         {
00735             eigen_temp_alpha2 = eigen_temp_alpha2.Zero(m_labels->get_num_labels());
00736 
00737             for (index_t i = 0; i < m_alpha.vlen; i++)
00738                 temp_alpha[i] = eigen_temp_alpha2[i];
00739 
00740             W = m_model->get_log_probability_derivative_f(
00741                     (CRegressionLabels*)m_labels, function, 2);
00742 
00743             for (index_t i = 0; i < W.vlen; i++)
00744                 W[i] = -W[i];
00745 
00746             Psi_New = -m_model->get_log_probability_f(
00747                     (CRegressionLabels*)m_labels, function);
00748         }
00749     }
00750 
00751     Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, temp_alpha.vlen);
00752 
00753     index_t itr = 0;
00754 
00755     dlp = m_model->get_log_probability_derivative_f(
00756             (CRegressionLabels*)m_labels, function, 1);
00757 
00758     d2lp = m_model->get_log_probability_derivative_f(
00759             (CRegressionLabels*)m_labels, function, 2);
00760 
00761     d3lp = m_model->get_log_probability_derivative_f(
00762             (CRegressionLabels*)m_labels, function, 3);
00763 
00764     Map<VectorXd> eigen_d2lp(d2lp.vector, d2lp.vlen);
00765 
00766     sW = W.clone();
00767 
00768     while (Psi_Old - Psi_New > m_tolerance && itr < m_max_itr)
00769     {
00770 
00771         Map<VectorXd> eigen_W(W.vector, W.vlen);
00772 
00773         Psi_Old = Psi_New;
00774         itr++;
00775 
00776         if (eigen_W.minCoeff() < 0)
00777         {
00778             //Suggested by Vanhatalo et. al.,
00779             //Gaussian Process Regression with Student's t likelihood, NIPS 2009
00780             //Quoted from infLaplace.m
00781             float64_t df = m_model->get_degrees_freedom();
00782 
00783             for (index_t i = 0; i < eigen_W.rows(); i++)
00784                 eigen_W[i] += 2.0/(df)*dlp[i]*dlp[i];
00785 
00786         }
00787 
00788         for (index_t i = 0; i < eigen_W.rows(); i++)
00789             W[i] = eigen_W[i];
00790 
00791         for (index_t i = 0; i < W.vlen; i++)
00792             sW[i] = CMath::sqrt(float64_t(W[i]));
00793 
00794         Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
00795 
00796         LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct(
00797                 eigen_temp_kernel*m_scale*m_scale) +
00798                 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
00799 
00800         MatrixXd chol = L.matrixL();
00801 
00802         MatrixXd temp = L.matrixL();
00803 
00804         Map<VectorXd> temp_eigen_dlp(dlp.vector, dlp.vlen);
00805 
00806         VectorXd b = eigen_W.cwiseProduct((eigen_function - eigen_m_means)) + temp_eigen_dlp;
00807 
00808         chol = chol.colPivHouseholderQr().solve(eigen_sW.cwiseProduct(
00809                 (eigen_temp_kernel*b*m_scale*m_scale)));
00810 
00811         chol = temp.transpose().colPivHouseholderQr().solve(chol);
00812 
00813         VectorXd dalpha = b - eigen_sW.cwiseProduct(chol) - eigen_temp_alpha;       
00814 
00815         Psi_line func;
00816 
00817         func.lab = (CRegressionLabels*)m_labels;
00818         func.K = &eigen_temp_kernel;
00819         func.scale = m_scale;
00820         func.alpha = &eigen_temp_alpha;
00821         func.dalpha = &dalpha;
00822         func.l1 = &lp;
00823         func.dl1 = &dlp;
00824         func.dl2 = &eigen_d2lp;
00825         func.f = &function;
00826         func.lik = m_model;
00827         func.m = &m_means;
00828         func.mW = &W;
00829         func.start_alpha = eigen_temp_alpha;
00830         local_min(0, m_max, m_opt_tolerance, func, Psi_New);
00831     }
00832 
00833     for (index_t i = 0; i < m_alpha.vlen; i++)
00834         temp_alpha[i] = eigen_temp_alpha[i];
00835 
00836     Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
00837 
00838     eigen_function = eigen_temp_kernel*m_scale*m_scale*eigen_temp_alpha+eigen_m_means;
00839 
00840     function = SGVector<float64_t>(eigen_function.rows());
00841 
00842     for (index_t i = 0; i < eigen_function.rows(); i++)
00843         function[i] = eigen_function[i];
00844 
00845     lp = m_model->get_log_probability_f((CRegressionLabels*)m_labels, function);
00846 
00847     dlp = m_model->get_log_probability_derivative_f(
00848             (CRegressionLabels*)m_labels, function, 1);
00849 
00850     d2lp = m_model->get_log_probability_derivative_f(
00851             (CRegressionLabels*)m_labels, function, 2);
00852 
00853     d3lp = m_model->get_log_probability_derivative_f(
00854             (CRegressionLabels*)m_labels, function, 3);
00855 
00856     W = m_model->get_log_probability_derivative_f(
00857             (CRegressionLabels*)m_labels, function, 2);
00858 
00859     for (index_t i = 0; i < W.vlen; i++)
00860         W[i] = -W[i];
00861 
00862     for (index_t i = 0; i < sW.vlen; i++)
00863         sW[i] = 0.0;
00864 
00865     Map<VectorXd> eigen_W2(W.vector, W.vlen);
00866 
00867     if (eigen_W2.minCoeff() > 0)
00868     {
00869         for (index_t i = 0; i < sW.vlen; i++)
00870             sW[i] = CMath::sqrt(float64_t(W[i]));
00871     }
00872 
00873     m_alpha = SGVector<float64_t>(eigen_temp_alpha.rows());
00874 
00875     for (index_t i = 0; i < m_alpha.vlen; i++)
00876         m_alpha[i] = eigen_temp_alpha[i];
00877 
00878 }
00879 
00880 #endif // HAVE_EIGEN3
00881 #endif // HAVE_LAPACK
00882 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation