00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <shogun/lib/config.h>
00014 #ifdef HAVE_EIGEN3
00015 #ifdef HAVE_LAPACK
00016
00017 #include <shogun/regression/gp/ExactInferenceMethod.h>
00018 #include <shogun/regression/gp/GaussianLikelihood.h>
00019 #include <shogun/mathematics/lapack.h>
00020 #include <shogun/mathematics/Math.h>
00021 #include <shogun/labels/RegressionLabels.h>
00022 #include <shogun/kernel/GaussianKernel.h>
00023 #include <shogun/features/CombinedFeatures.h>
00024
00025 using namespace shogun;
00026
00027 CExactInferenceMethod::CExactInferenceMethod() : CInferenceMethod()
00028 {
00029 update_all();
00030 update_parameter_hash();
00031 }
00032
00033 CExactInferenceMethod::CExactInferenceMethod(CKernel* kern, CFeatures* feat,
00034 CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
00035 CInferenceMethod(kern, feat, m, lab, mod)
00036 {
00037 update_all();
00038 }
00039
00040 CExactInferenceMethod::~CExactInferenceMethod()
00041 {
00042 }
00043
00044 void CExactInferenceMethod::update_all()
00045 {
00046 if (m_labels)
00047 m_label_vector =
00048 ((CRegressionLabels*) m_labels)->get_labels().clone();
00049
00050 if (m_features && m_features->has_property(FP_DOT) && m_features->get_num_vectors())
00051 m_feature_matrix =
00052 ((CDotFeatures*)m_features)->get_computed_dot_feature_matrix();
00053
00054 else if (m_features && m_features->get_feature_class() == C_COMBINED)
00055 {
00056 CDotFeatures* feat =
00057 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00058 get_first_feature_obj();
00059
00060 if (feat->get_num_vectors())
00061 m_feature_matrix = feat->get_computed_dot_feature_matrix();
00062
00063 SG_UNREF(feat);
00064 }
00065
00066 update_data_means();
00067
00068 if (m_kernel)
00069 update_train_kernel();
00070
00071 if (m_ktrtr.num_cols*m_ktrtr.num_rows)
00072 {
00073 update_chol();
00074 update_alpha();
00075 }
00076 }
00077
00078 void CExactInferenceMethod::check_members()
00079 {
00080 if (!m_labels)
00081 SG_ERROR("No labels set\n");
00082
00083 if (m_labels->get_label_type() != LT_REGRESSION)
00084 SG_ERROR("Expected RegressionLabels\n");
00085
00086 if (!m_features)
00087 SG_ERROR("No features set!\n");
00088
00089 if (m_labels->get_num_labels() != m_features->get_num_vectors())
00090 SG_ERROR("Number of training vectors does not match number of labels\n");
00091
00092 if(m_features->get_feature_class() == C_COMBINED)
00093 {
00094 CDotFeatures* feat =
00095 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00096 get_first_feature_obj();
00097
00098 if (!feat->has_property(FP_DOT))
00099 SG_ERROR("Specified features are not of type CFeatures\n");
00100
00101 if (feat->get_feature_class() != C_DENSE)
00102 SG_ERROR("Expected Simple Features\n");
00103
00104 if (feat->get_feature_type() != F_DREAL)
00105 SG_ERROR("Expected Real Features\n");
00106
00107 SG_UNREF(feat);
00108 }
00109
00110 else
00111 {
00112 if (!m_features->has_property(FP_DOT))
00113 SG_ERROR("Specified features are not of type CFeatures\n");
00114
00115 if (m_features->get_feature_class() != C_DENSE)
00116 SG_ERROR("Expected Simple Features\n");
00117
00118 if (m_features->get_feature_type() != F_DREAL)
00119 SG_ERROR("Expected Real Features\n");
00120 }
00121
00122 if (!m_kernel)
00123 SG_ERROR( "No kernel assigned!\n");
00124
00125 if (!m_mean)
00126 SG_ERROR( "No mean function assigned!\n");
00127
00128 if (m_model->get_model_type() != LT_GAUSSIAN)
00129 {
00130 SG_ERROR("Exact Inference Method can only use " \
00131 "Gaussian Likelihood Function.\n");
00132 }
00133 }
00134
00135 CMap<TParameter*, SGVector<float64_t> > CExactInferenceMethod::
00136 get_marginal_likelihood_derivatives(CMap<TParameter*,
00137 CSGObject*>& para_dict)
00138 {
00139 check_members();
00140
00141 if(update_parameter_hash())
00142 update_all();
00143
00144
00145 float64_t m_sigma =
00146 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00147
00148
00149 SGMatrix<float64_t> temp1(m_ktrtr.num_rows, m_ktrtr.num_cols);
00150
00151
00152 SGMatrix<float64_t> temp2(m_alpha.vlen, m_alpha.vlen);
00153
00154
00155 SGMatrix<float64_t> Q(m_L.num_rows, m_L.num_cols);
00156
00157
00158 SGVector<float64_t> diagonal(temp1.num_rows);
00159 SGVector<float64_t> diagonal2(temp2.num_rows);
00160
00161 diagonal.fill_vector(diagonal.vector, temp1.num_rows, 1.0);
00162 diagonal2.fill_vector(diagonal2.vector, temp2.num_rows, 0.0);
00163
00164 temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
00165 Q.create_diagonal_matrix(Q.matrix, diagonal.vector, temp2.num_rows);
00166 temp2.create_diagonal_matrix(temp2.matrix, diagonal2.vector, temp2.num_rows);
00167
00168 memcpy(temp1.matrix, m_L.matrix,
00169 m_L.num_cols*m_L.num_rows*sizeof(float64_t));
00170
00171
00172 clapack_dpotrs(CblasColMajor, CblasUpper,
00173 temp1.num_rows, Q.num_cols, temp1.matrix, temp1.num_cols,
00174 Q.matrix, Q.num_cols);
00175
00176
00177 cblas_dger(CblasColMajor, m_alpha.vlen, m_alpha.vlen,
00178 1.0, m_alpha.vector, 1, m_alpha.vector, 1,
00179 temp2.matrix, m_alpha.vlen);
00180
00181 temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
00182
00183
00184 cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper,
00185 temp1.num_rows, temp1.num_cols, 1.0/(m_sigma*m_sigma),
00186 Q.matrix, temp1.num_cols,
00187 temp1.matrix, temp1.num_cols, -1.0,
00188 temp2.matrix, temp2.num_cols);
00189
00190 memcpy(Q.matrix, temp2.matrix,
00191 temp2.num_cols*temp2.num_rows*sizeof(float64_t));
00192
00193 float64_t sum = 0;
00194
00195 m_kernel->build_parameter_dictionary(para_dict);
00196 m_mean->build_parameter_dictionary(para_dict);
00197
00198
00199 CMap<TParameter*, SGVector<float64_t> > gradient(
00200 3+para_dict.get_num_elements(),
00201 3+para_dict.get_num_elements());
00202
00203 for (index_t i = 0; i < para_dict.get_num_elements(); i++)
00204 {
00205 shogun::CMapNode<TParameter*, CSGObject*>* node =
00206 para_dict.get_node_ptr(i);
00207
00208 TParameter* param = node->key;
00209 CSGObject* obj = node->data;
00210
00211 index_t length = 1;
00212
00213 if ((param->m_datatype.m_ctype== CT_VECTOR ||
00214 param->m_datatype.m_ctype == CT_SGVECTOR) &&
00215 param->m_datatype.m_length_y != NULL)
00216 length = *(param->m_datatype.m_length_y);
00217
00218 SGVector<float64_t> variables(length);
00219
00220 bool deriv_found = false;
00221
00222 for (index_t g = 0; g < length; g++)
00223 {
00224
00225 SGMatrix<float64_t> deriv;
00226 SGVector<float64_t> mean_derivatives;
00227
00228 if (param->m_datatype.m_ctype == CT_VECTOR ||
00229 param->m_datatype.m_ctype == CT_SGVECTOR)
00230 {
00231 deriv = m_kernel->get_parameter_gradient(param, obj, g);
00232 mean_derivatives = m_mean->get_parameter_derivative(
00233 param, obj, m_feature_matrix, g);
00234 }
00235
00236 else
00237 {
00238 mean_derivatives = m_mean->get_parameter_derivative(
00239 param, obj, m_feature_matrix);
00240
00241 deriv = m_kernel->get_parameter_gradient(param, obj);
00242 }
00243
00244 sum = 0;
00245
00246
00247 if (deriv.num_cols*deriv.num_rows > 0)
00248 {
00249 for (index_t k = 0; k < Q.num_rows; k++)
00250 {
00251 for (index_t j = 0; j < Q.num_cols; j++)
00252 sum += Q(k,j)*deriv(k,j)*m_scale*m_scale;
00253 }
00254
00255 sum /= 2.0;
00256 variables[g] = sum;
00257 deriv_found = true;
00258 }
00259
00260 else if (mean_derivatives.vlen > 0)
00261 {
00262 sum = mean_derivatives.dot(mean_derivatives.vector,
00263 m_alpha.vector, m_alpha.vlen);
00264 variables[g] = sum;
00265 deriv_found = true;
00266 }
00267
00268
00269 }
00270
00271 if (deriv_found)
00272 gradient.add(param, variables);
00273
00274 }
00275
00276 TParameter* param;
00277 index_t index = get_modsel_param_index("scale");
00278 param = m_model_selection_parameters->get_parameter(index);
00279
00280 sum = 0;
00281
00282 for (index_t i = 0; i < Q.num_rows; i++)
00283 {
00284 for (index_t j = 0; j < Q.num_cols; j++)
00285 sum += Q(i,j)*m_ktrtr(i,j)*m_scale*2.0;
00286 }
00287
00288 sum /= 2.0;
00289
00290 SGVector<float64_t> scale(1);
00291
00292 scale[0] = sum;
00293
00294 gradient.add(param, scale);
00295 para_dict.add(param, this);
00296
00297 index = m_model->get_modsel_param_index("sigma");
00298 param = m_model->m_model_selection_parameters->get_parameter(index);
00299
00300 sum = m_sigma*Q.trace(Q.matrix, Q.num_rows, Q.num_cols);
00301
00302 SGVector<float64_t> sigma(1);
00303
00304 sigma[0] = sum;
00305 gradient.add(param, sigma);
00306 para_dict.add(param, m_model);
00307
00308 return gradient;
00309
00310 }
00311
00312 SGVector<float64_t> CExactInferenceMethod::get_diagonal_vector()
00313 {
00314 if(update_parameter_hash())
00315 update_all();
00316
00317 check_members();
00318
00319 float64_t m_sigma =
00320 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00321
00322 SGVector<float64_t> result =
00323 SGVector<float64_t>(m_features->get_num_vectors());
00324
00325 result.fill_vector(result.vector, m_features->get_num_vectors(),
00326 1.0/m_sigma);
00327
00328 return result;
00329 }
00330
00331 float64_t CExactInferenceMethod::get_negative_marginal_likelihood()
00332 {
00333 if(update_parameter_hash())
00334 update_all();
00335
00336 float64_t result;
00337
00338 result = m_label_vector.dot(m_label_vector.vector, m_alpha.vector,
00339 m_label_vector.vlen)/2.0;
00340
00341 float64_t m_sigma =
00342 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00343
00344 for (int i = 0; i < m_L.num_rows; i++)
00345 result += CMath::log(m_L(i,i));
00346
00347 result += m_L.num_rows * CMath::log(2*CMath::PI*m_sigma*m_sigma)/2.0;
00348
00349 return result;
00350 }
00351
00352 SGVector<float64_t> CExactInferenceMethod::get_alpha()
00353 {
00354 if(update_parameter_hash())
00355 update_all();
00356
00357 SGVector<float64_t> result(m_alpha);
00358 return result;
00359 }
00360
00361 SGMatrix<float64_t> CExactInferenceMethod::get_cholesky()
00362 {
00363 if(update_parameter_hash())
00364 update_all();
00365
00366 SGMatrix<float64_t> result(m_L);
00367 return result;
00368 }
00369
00370 void CExactInferenceMethod::update_train_kernel()
00371 {
00372 m_kernel->cleanup();
00373
00374 m_kernel->init(m_features, m_features);
00375
00376
00377 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00378
00379 m_ktrtr=kernel_matrix.clone();
00380 }
00381
00382
00383 void CExactInferenceMethod::update_chol()
00384 {
00385 check_members();
00386
00387 float64_t m_sigma =
00388 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00389
00390
00391 SGMatrix<float64_t> temp1(m_ktrtr.num_rows,
00392 m_ktrtr.num_cols);
00393
00394 m_kern_with_noise = SGMatrix<float64_t>(m_ktrtr.num_rows,
00395 m_ktrtr.num_cols);
00396
00397 SGMatrix<float64_t> temp2(m_ktrtr.num_rows,
00398 m_ktrtr.num_cols);
00399
00400
00401 SGVector<float64_t> diagonal(temp1.num_rows);
00402 diagonal.fill_vector(diagonal.vector, temp1.num_rows, 1.0);
00403
00404 temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
00405 temp2.create_diagonal_matrix(temp2.matrix, diagonal.vector, temp2.num_rows);
00406
00407
00408 cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper,
00409 m_ktrtr.num_rows, temp2.num_cols, (m_scale*m_scale)/(m_sigma*m_sigma),
00410 m_ktrtr.matrix, m_ktrtr.num_cols,
00411 temp2.matrix, temp2.num_cols, 1.0,
00412 temp1.matrix, temp1.num_cols);
00413
00414 memcpy(m_kern_with_noise.matrix, temp1.matrix,
00415 temp1.num_cols*temp1.num_rows*sizeof(float64_t));
00416
00417
00418 clapack_dpotrf(CblasColMajor, CblasUpper,
00419 temp1.num_rows, temp1.matrix, temp1.num_cols);
00420
00421 m_L = SGMatrix<float64_t>(temp1.num_rows, temp1.num_cols);
00422
00423
00424
00425
00426
00427 for (int i = 0; i < temp1.num_rows; i++)
00428 {
00429 for (int j = 0; j < temp1.num_cols; j++)
00430 {
00431 if (i > j)
00432 temp1(i,j) = 0;
00433 }
00434 }
00435
00436 memcpy(m_L.matrix, temp1.matrix,
00437 temp1.num_cols*temp1.num_rows*sizeof(float64_t));
00438 }
00439
00440 void CExactInferenceMethod::update_alpha()
00441 {
00442 float64_t m_sigma =
00443 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
00444
00445
00446 SGMatrix<float64_t> temp1(m_L.num_rows,
00447 m_L.num_cols);
00448
00449 for (int i = 0; i < m_label_vector.vlen; i++)
00450 m_label_vector[i] = m_label_vector[i] - m_data_means[i];
00451
00452 m_alpha = SGVector<float64_t>(m_label_vector.vlen);
00453
00454 memcpy(temp1.matrix, m_L.matrix,
00455 m_L.num_cols*m_L.num_rows*sizeof(float64_t));
00456
00457 memcpy(m_alpha.vector, m_label_vector.vector,
00458 m_label_vector.vlen*sizeof(float64_t));
00459
00460
00461 clapack_dposv(CblasColMajor, CblasLower,
00462 m_kern_with_noise.num_cols, 1, m_kern_with_noise.matrix,
00463 m_kern_with_noise.num_cols, m_alpha.vector,
00464 m_kern_with_noise.num_cols);
00465
00466 for (int i = 0; i < m_alpha.vlen; i++)
00467 m_alpha[i] = m_alpha[i]/(m_sigma*m_sigma);
00468
00469 }
00470 #endif
00471 #endif // HAVE_LAPACK