SHOGUN: ExactInferenceMethod.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Copyright (C) 2012 Jacob Walker
00008  *
00009  * Code adapted from Gaussian Process Machine Learning Toolbox
00010  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
00011  *
00012  */
00013 #include <shogun/lib/config.h>
00014 #ifdef HAVE_EIGEN3
00015 #ifdef HAVE_LAPACK
00016 
00017 #include <shogun/regression/gp/ExactInferenceMethod.h>
00018 #include <shogun/regression/gp/GaussianLikelihood.h>
00019 #include <shogun/mathematics/lapack.h>
00020 #include <shogun/mathematics/Math.h>
00021 #include <shogun/labels/RegressionLabels.h>
00022 #include <shogun/kernel/GaussianKernel.h>
00023 #include <shogun/features/CombinedFeatures.h>
00024 
00025 using namespace shogun;
00026 
00027 CExactInferenceMethod::CExactInferenceMethod() : CInferenceMethod()
00028 {
00029     update_all();
00030     update_parameter_hash();
00031 }
00032 
00033 CExactInferenceMethod::CExactInferenceMethod(CKernel* kern, CFeatures* feat,
00034         CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
00035         CInferenceMethod(kern, feat, m, lab, mod)
00036 {
00037     update_all();
00038 }
00039 
00040 CExactInferenceMethod::~CExactInferenceMethod()
00041 {
00042 }
00043 
00044 void CExactInferenceMethod::update_all()
00045 {
00046     if (m_labels)
00047         m_label_vector =
00048                 ((CRegressionLabels*) m_labels)->get_labels().clone();
00049 
00050     if (m_features && m_features->has_property(FP_DOT) && m_features->get_num_vectors())
00051         m_feature_matrix =
00052                 ((CDotFeatures*)m_features)->get_computed_dot_feature_matrix();
00053 
00054     else if (m_features && m_features->get_feature_class() == C_COMBINED)
00055     {
00056         CDotFeatures* feat =
00057                 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00058                 get_first_feature_obj();
00059 
00060         if (feat->get_num_vectors())
00061             m_feature_matrix = feat->get_computed_dot_feature_matrix();
00062 
00063         SG_UNREF(feat);
00064     }
00065 
00066     update_data_means();
00067 
00068     if (m_kernel)
00069         update_train_kernel();
00070 
00071     if (m_ktrtr.num_cols*m_ktrtr.num_rows)
00072     {
00073         update_chol();
00074         update_alpha();
00075     }
00076 }
00077 
00078 void CExactInferenceMethod::check_members()
00079 {
00080     if (!m_labels)
00081         SG_ERROR("No labels set\n");
00082 
00083     if (m_labels->get_label_type() != LT_REGRESSION)
00084         SG_ERROR("Expected RegressionLabels\n");
00085 
00086     if (!m_features)
00087         SG_ERROR("No features set!\n");
00088 
00089     if (m_labels->get_num_labels() != m_features->get_num_vectors())
00090         SG_ERROR("Number of training vectors does not match number of labels\n");
00091 
00092     if(m_features->get_feature_class() == C_COMBINED)
00093     {
00094         CDotFeatures* feat =
00095                 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00096                 get_first_feature_obj();
00097 
00098         if (!feat->has_property(FP_DOT))
00099             SG_ERROR("Specified features are not of type CFeatures\n");
00100 
00101         if (feat->get_feature_class() != C_DENSE)
00102             SG_ERROR("Expected Simple Features\n");
00103 
00104         if (feat->get_feature_type() != F_DREAL)
00105             SG_ERROR("Expected Real Features\n");
00106 
00107         SG_UNREF(feat);
00108     }
00109 
00110     else
00111     {
00112         if (!m_features->has_property(FP_DOT))
00113             SG_ERROR("Specified features are not of type CFeatures\n");
00114 
00115         if (m_features->get_feature_class() != C_DENSE)
00116             SG_ERROR("Expected Simple Features\n");
00117 
00118         if (m_features->get_feature_type() != F_DREAL)
00119             SG_ERROR("Expected Real Features\n");
00120     }
00121 
00122     if (!m_kernel)
00123         SG_ERROR( "No kernel assigned!\n");
00124 
00125     if (!m_mean)
00126         SG_ERROR( "No mean function assigned!\n");
00127 
00128     if (m_model->get_model_type() != LT_GAUSSIAN)
00129     {
00130         SG_ERROR("Exact Inference Method can only use " \
00131                 "Gaussian Likelihood Function.\n");
00132     }
00133 }
00134 
00135 CMap<TParameter*, SGVector<float64_t> > CExactInferenceMethod::
00136 get_marginal_likelihood_derivatives(CMap<TParameter*,
00137         CSGObject*>& para_dict)
00138 {
00139     check_members();
00140 
00141     if(update_parameter_hash())
00142         update_all();
00143 
00144     //Get the sigma variable from the likelihood model
00145     float64_t m_sigma =
00146             dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00147 
00148     //Placeholder Matrix
00149     SGMatrix<float64_t> temp1(m_ktrtr.num_rows, m_ktrtr.num_cols);
00150 
00151     //Placeholder Matrix
00152     SGMatrix<float64_t> temp2(m_alpha.vlen, m_alpha.vlen);
00153 
00154     //Derivative Matrix
00155     SGMatrix<float64_t> Q(m_L.num_rows, m_L.num_cols);
00156 
00157     //Vector used to fill diagonal of Matrix.
00158     SGVector<float64_t> diagonal(temp1.num_rows);
00159     SGVector<float64_t> diagonal2(temp2.num_rows);
00160 
00161     diagonal.fill_vector(diagonal.vector, temp1.num_rows, 1.0);
00162     diagonal2.fill_vector(diagonal2.vector, temp2.num_rows, 0.0);
00163 
00164     temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
00165     Q.create_diagonal_matrix(Q.matrix, diagonal.vector, temp2.num_rows);
00166     temp2.create_diagonal_matrix(temp2.matrix, diagonal2.vector, temp2.num_rows);
00167 
00168     memcpy(temp1.matrix, m_L.matrix,
00169             m_L.num_cols*m_L.num_rows*sizeof(float64_t));
00170 
00171     //Solve (L) Q = Identity for Q.
00172     clapack_dpotrs(CblasColMajor, CblasUpper,
00173             temp1.num_rows, Q.num_cols, temp1.matrix, temp1.num_cols,
00174             Q.matrix, Q.num_cols);
00175 
00176     //Calculate alpha*alpha'
00177     cblas_dger(CblasColMajor, m_alpha.vlen, m_alpha.vlen,
00178             1.0, m_alpha.vector, 1, m_alpha.vector, 1,
00179             temp2.matrix, m_alpha.vlen);
00180 
00181     temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
00182 
00183     //Subtracct alpha*alpha' from Q.
00184     cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper,
00185             temp1.num_rows, temp1.num_cols, 1.0/(m_sigma*m_sigma),
00186             Q.matrix, temp1.num_cols,
00187             temp1.matrix, temp1.num_cols, -1.0,
00188             temp2.matrix, temp2.num_cols);
00189 
00190     memcpy(Q.matrix, temp2.matrix,
00191             temp2.num_cols*temp2.num_rows*sizeof(float64_t));
00192 
00193     float64_t sum = 0;
00194 
00195     m_kernel->build_parameter_dictionary(para_dict);
00196     m_mean->build_parameter_dictionary(para_dict);
00197 
00198     //This will be the vector we return
00199     CMap<TParameter*, SGVector<float64_t> > gradient(
00200             3+para_dict.get_num_elements(),
00201             3+para_dict.get_num_elements());
00202 
00203     for (index_t i = 0; i < para_dict.get_num_elements(); i++)
00204     {
00205         shogun::CMapNode<TParameter*, CSGObject*>* node =
00206                 para_dict.get_node_ptr(i);
00207 
00208         TParameter* param = node->key;
00209         CSGObject* obj = node->data;
00210 
00211         index_t length = 1;
00212 
00213         if ((param->m_datatype.m_ctype== CT_VECTOR ||
00214                 param->m_datatype.m_ctype == CT_SGVECTOR) &&
00215                 param->m_datatype.m_length_y != NULL)
00216             length = *(param->m_datatype.m_length_y);
00217 
00218         SGVector<float64_t> variables(length);
00219 
00220         bool deriv_found = false;
00221 
00222         for (index_t g = 0; g < length; g++)
00223         {
00224 
00225             SGMatrix<float64_t> deriv;
00226             SGVector<float64_t> mean_derivatives;
00227 
00228             if (param->m_datatype.m_ctype == CT_VECTOR ||
00229                     param->m_datatype.m_ctype == CT_SGVECTOR)
00230             {
00231                 deriv = m_kernel->get_parameter_gradient(param, obj, g);
00232                 mean_derivatives = m_mean->get_parameter_derivative(
00233                         param, obj, m_feature_matrix, g);
00234             }
00235 
00236             else
00237             {
00238                 mean_derivatives = m_mean->get_parameter_derivative(
00239                         param, obj, m_feature_matrix);
00240 
00241                 deriv = m_kernel->get_parameter_gradient(param, obj);
00242             }
00243 
00244             sum = 0;
00245 
00246 
00247             if (deriv.num_cols*deriv.num_rows > 0)
00248             {
00249                 for (index_t k = 0; k < Q.num_rows; k++)
00250                 {
00251                     for (index_t j = 0; j < Q.num_cols; j++)
00252                         sum += Q(k,j)*deriv(k,j)*m_scale*m_scale;
00253                 }
00254 
00255                 sum /= 2.0;
00256                 variables[g] = sum;
00257                 deriv_found = true;
00258             }
00259 
00260             else if (mean_derivatives.vlen > 0)
00261             {
00262                 sum = mean_derivatives.dot(mean_derivatives.vector,
00263                         m_alpha.vector, m_alpha.vlen);
00264                 variables[g] = sum;
00265                 deriv_found = true;
00266             }
00267 
00268 
00269         }
00270 
00271         if (deriv_found)
00272             gradient.add(param, variables);
00273 
00274     }
00275 
00276     TParameter* param;
00277     index_t index = get_modsel_param_index("scale");
00278     param = m_model_selection_parameters->get_parameter(index);
00279 
00280     sum = 0;
00281 
00282     for (index_t i = 0; i < Q.num_rows; i++)
00283     {
00284         for (index_t j = 0; j < Q.num_cols; j++)
00285             sum += Q(i,j)*m_ktrtr(i,j)*m_scale*2.0;
00286     }
00287 
00288     sum /= 2.0;
00289 
00290     SGVector<float64_t> scale(1);
00291 
00292     scale[0] = sum;
00293 
00294     gradient.add(param, scale);
00295     para_dict.add(param, this);
00296 
00297     index = m_model->get_modsel_param_index("sigma");
00298     param = m_model->m_model_selection_parameters->get_parameter(index);
00299 
00300     sum = m_sigma*Q.trace(Q.matrix, Q.num_rows, Q.num_cols);
00301 
00302     SGVector<float64_t> sigma(1);
00303 
00304     sigma[0] = sum;
00305     gradient.add(param, sigma);
00306     para_dict.add(param, m_model);
00307 
00308     return gradient;
00309 
00310 }
00311 
00312 SGVector<float64_t> CExactInferenceMethod::get_diagonal_vector()
00313 {
00314     if(update_parameter_hash())
00315         update_all();
00316 
00317     check_members();
00318 
00319     float64_t m_sigma =
00320             dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00321 
00322     SGVector<float64_t> result =
00323             SGVector<float64_t>(m_features->get_num_vectors());
00324 
00325     result.fill_vector(result.vector, m_features->get_num_vectors(),
00326             1.0/m_sigma);
00327 
00328     return result;
00329 }
00330 
00331 float64_t CExactInferenceMethod::get_negative_marginal_likelihood()
00332 {
00333     if(update_parameter_hash())
00334         update_all();
00335 
00336     float64_t result;
00337 
00338     result = m_label_vector.dot(m_label_vector.vector, m_alpha.vector,
00339             m_label_vector.vlen)/2.0;
00340 
00341     float64_t m_sigma =
00342             dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00343 
00344     for (int i = 0; i < m_L.num_rows; i++)
00345         result += CMath::log(m_L(i,i));
00346 
00347     result += m_L.num_rows * CMath::log(2*CMath::PI*m_sigma*m_sigma)/2.0;
00348 
00349     return result;
00350 }
00351 
00352 SGVector<float64_t> CExactInferenceMethod::get_alpha()
00353 {
00354     if(update_parameter_hash())
00355         update_all();
00356 
00357     SGVector<float64_t> result(m_alpha);
00358     return result;
00359 }
00360 
00361 SGMatrix<float64_t> CExactInferenceMethod::get_cholesky()
00362 {
00363     if(update_parameter_hash())
00364         update_all();
00365 
00366     SGMatrix<float64_t> result(m_L);
00367     return result;
00368 }
00369 
00370 void CExactInferenceMethod::update_train_kernel()
00371 {
00372     m_kernel->cleanup();
00373 
00374     m_kernel->init(m_features, m_features);
00375 
00376     //K(X, X)
00377     SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00378 
00379     m_ktrtr=kernel_matrix.clone();
00380 }
00381 
00382 
00383 void CExactInferenceMethod::update_chol()
00384 {
00385     check_members();
00386 
00387     float64_t m_sigma =
00388             dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00389 
00390     //Placeholder Matrices
00391     SGMatrix<float64_t> temp1(m_ktrtr.num_rows,
00392             m_ktrtr.num_cols);
00393 
00394     m_kern_with_noise = SGMatrix<float64_t>(m_ktrtr.num_rows,
00395             m_ktrtr.num_cols);
00396 
00397     SGMatrix<float64_t> temp2(m_ktrtr.num_rows,
00398             m_ktrtr.num_cols);
00399 
00400     //Vector to fill matrix diagonals
00401     SGVector<float64_t> diagonal(temp1.num_rows);
00402     diagonal.fill_vector(diagonal.vector, temp1.num_rows, 1.0);
00403 
00404     temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
00405     temp2.create_diagonal_matrix(temp2.matrix, diagonal.vector, temp2.num_rows);
00406 
00407     //Calculate first (K(X, X)+sigma*I)
00408     cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper,
00409             m_ktrtr.num_rows, temp2.num_cols, (m_scale*m_scale)/(m_sigma*m_sigma),
00410             m_ktrtr.matrix, m_ktrtr.num_cols,
00411             temp2.matrix, temp2.num_cols, 1.0,
00412             temp1.matrix, temp1.num_cols);
00413 
00414     memcpy(m_kern_with_noise.matrix, temp1.matrix,
00415             temp1.num_cols*temp1.num_rows*sizeof(float64_t));
00416 
00417     //Get Lower triangle cholesky decomposition of K(X, X)+sigma*I)
00418     clapack_dpotrf(CblasColMajor, CblasUpper,
00419             temp1.num_rows, temp1.matrix, temp1.num_cols);
00420 
00421     m_L = SGMatrix<float64_t>(temp1.num_rows, temp1.num_cols);
00422 
00423     /* lapack for some reason wants only to calculate the upper triangle
00424      * and leave the lower triangle with junk data. Finishing the job
00425      * by filling the lower triangle with zero entries.
00426      */
00427     for (int i = 0; i < temp1.num_rows; i++)
00428     {
00429         for (int j = 0; j < temp1.num_cols; j++)
00430         {
00431             if (i > j)
00432                 temp1(i,j) = 0;
00433         }
00434     }
00435 
00436     memcpy(m_L.matrix, temp1.matrix,
00437             temp1.num_cols*temp1.num_rows*sizeof(float64_t));
00438 }
00439 
00440 void CExactInferenceMethod::update_alpha()
00441 {
00442     float64_t m_sigma =
00443             dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00444 
00445     //Placeholder Matrices
00446     SGMatrix<float64_t> temp1(m_L.num_rows,
00447             m_L.num_cols);
00448 
00449     for (int i = 0; i < m_label_vector.vlen; i++)
00450         m_label_vector[i] = m_label_vector[i] - m_data_means[i];
00451 
00452     m_alpha = SGVector<float64_t>(m_label_vector.vlen);
00453 
00454     memcpy(temp1.matrix, m_L.matrix,
00455             m_L.num_cols*m_L.num_rows*sizeof(float64_t));
00456 
00457     memcpy(m_alpha.vector, m_label_vector.vector,
00458             m_label_vector.vlen*sizeof(float64_t));
00459 
00460     //Solve (K(X, X)+sigma*I) alpha = labels for alpha.
00461     clapack_dposv(CblasColMajor, CblasLower,
00462             m_kern_with_noise.num_cols, 1, m_kern_with_noise.matrix,
00463             m_kern_with_noise.num_cols, m_alpha.vector,
00464             m_kern_with_noise.num_cols);
00465 
00466     for (int i = 0; i < m_alpha.vlen; i++)
00467         m_alpha[i] = m_alpha[i]/(m_sigma*m_sigma);
00468 
00469 }
00470 #endif
00471 #endif // HAVE_LAPACK