00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <shogun/lib/config.h>
00015
00016 #ifdef HAVE_LAPACK
00017 #ifdef HAVE_EIGEN3
00018
00019 #include <shogun/regression/gp/LaplacianInferenceMethod.h>
00020 #include <shogun/regression/gp/GaussianLikelihood.h>
00021 #include <shogun/regression/gp/StudentsTLikelihood.h>
00022 #include <shogun/mathematics/lapack.h>
00023 #include <shogun/mathematics/Math.h>
00024 #include <shogun/labels/RegressionLabels.h>
00025 #include <shogun/kernel/GaussianKernel.h>
00026 #include <shogun/features/CombinedFeatures.h>
00027 #include <shogun/mathematics/eigen3.h>
00028 #include <shogun/lib/external/brent.h>
00029
00030 using namespace shogun;
00031 using namespace Eigen;
00032
00033 namespace shogun
00034 {
00035
00036
00037
00038 class Psi_line : public func_base
00039 {
00040 public:
00041 Eigen::Map<Eigen::VectorXd>* alpha;
00042 Eigen::VectorXd* dalpha;
00043 Eigen::Map<Eigen::MatrixXd>* K;
00044 float64_t* l1;
00045 SGVector<float64_t>* dl1;
00046 Eigen::Map<Eigen::VectorXd>* dl2;
00047 SGVector<float64_t>* mW;
00048 SGVector<float64_t>* f;
00049 SGVector<float64_t>* m;
00050 float64_t scale;
00051 CLikelihoodModel* lik;
00052 CRegressionLabels *lab;
00053
00054 Eigen::VectorXd start_alpha;
00055
00056 virtual double operator() (double x)
00057 {
00058 Eigen::Map<Eigen::VectorXd> eigen_f(f->vector, f->vlen);
00059 Eigen::Map<Eigen::VectorXd> eigen_m(m->vector, m->vlen);
00060
00061 *alpha = start_alpha + x*(*dalpha);
00062 (eigen_f) = (*K)*(*alpha)*scale*scale+(eigen_m);
00063
00064
00065 for (index_t i = 0; i < eigen_f.rows(); i++)
00066 (*f)[i] = eigen_f[i];
00067
00068 (*dl1) = lik->get_log_probability_derivative_f(lab, (*f), 1);
00069 (*mW) = lik->get_log_probability_derivative_f(lab, (*f), 2);
00070 float64_t result = ((*alpha).dot(((eigen_f)-(eigen_m))))/2.0;
00071
00072 for (index_t i = 0; i < (*mW).vlen; i++)
00073 (*mW)[i] = -(*mW)[i];
00074
00075
00076
00077 result -= lik->get_log_probability_f(lab, *f);
00078
00079 return result;
00080 }
00081 };
00082 }
00083
00084 CLaplacianInferenceMethod::CLaplacianInferenceMethod() : CInferenceMethod()
00085 {
00086 init();
00087 update_all();
00088 update_parameter_hash();
00089 }
00090
00091 CLaplacianInferenceMethod::CLaplacianInferenceMethod(CKernel* kern,
00092 CFeatures* feat,
00093 CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
00094 CInferenceMethod(kern, feat, m, lab, mod)
00095 {
00096 init();
00097 update_all();
00098 }
00099
00100 void CLaplacianInferenceMethod::init()
00101 {
00102 m_latent_features = NULL;
00103 m_max_itr = 30;
00104 m_opt_tolerance = 1e-6;
00105 m_tolerance = 1e-8;
00106 m_max = 5;
00107 }
00108
00109 CLaplacianInferenceMethod::~CLaplacianInferenceMethod()
00110 {
00111 }
00112
00113 void CLaplacianInferenceMethod::update_all()
00114 {
00115 if (m_labels)
00116 m_label_vector =
00117 ((CRegressionLabels*) m_labels)->get_labels().clone();
00118
00119 if (m_features && m_features->has_property(FP_DOT)
00120 && m_features->get_num_vectors())
00121 {
00122 m_feature_matrix =
00123 ((CDotFeatures*)m_features)->get_computed_dot_feature_matrix();
00124
00125 }
00126
00127 else if (m_features && m_features->get_feature_class() == C_COMBINED)
00128 {
00129 CDotFeatures* feat =
00130 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00131 get_first_feature_obj();
00132
00133 if (feat->get_num_vectors())
00134 m_feature_matrix = feat->get_computed_dot_feature_matrix();
00135
00136 SG_UNREF(feat);
00137 }
00138
00139 update_data_means();
00140
00141 if (m_kernel)
00142 update_train_kernel();
00143
00144 if (m_ktrtr.num_cols*m_ktrtr.num_rows)
00145 {
00146 update_alpha();
00147 update_chol();
00148 }
00149 }
00150
00151 void CLaplacianInferenceMethod::check_members()
00152 {
00153 if (!m_labels)
00154 SG_ERROR("No labels set\n");
00155
00156 if (m_labels->get_label_type() != LT_REGRESSION)
00157 SG_ERROR("Expected RegressionLabels\n");
00158
00159 if (!m_features)
00160 SG_ERROR("No features set!\n");
00161
00162 if (m_labels->get_num_labels() != m_features->get_num_vectors())
00163 SG_ERROR("Number of training vectors does not match number of labels\n");
00164
00165 if(m_features->get_feature_class() == C_COMBINED)
00166 {
00167 CDotFeatures* feat =
00168 (CDotFeatures*)((CCombinedFeatures*)m_features)->
00169 get_first_feature_obj();
00170
00171 if (!feat->has_property(FP_DOT))
00172 SG_ERROR("Specified features are not of type CFeatures\n");
00173
00174 if (feat->get_feature_class() != C_DENSE)
00175 SG_ERROR("Expected Simple Features\n");
00176
00177 if (feat->get_feature_type() != F_DREAL)
00178 SG_ERROR("Expected Real Features\n");
00179
00180 SG_UNREF(feat);
00181 }
00182
00183 else
00184 {
00185 if (!m_features->has_property(FP_DOT))
00186 SG_ERROR("Specified features are not of type CFeatures\n");
00187
00188 if (m_features->get_feature_class() != C_DENSE)
00189 SG_ERROR("Expected Simple Features\n");
00190
00191 if (m_features->get_feature_type() != F_DREAL)
00192 SG_ERROR("Expected Real Features\n");
00193 }
00194
00195 if (!m_kernel)
00196 SG_ERROR( "No kernel assigned!\n");
00197
00198 if (!m_mean)
00199 SG_ERROR( "No mean function assigned!\n");
00200
00201 }
00202
00203 CMap<TParameter*, SGVector<float64_t> > CLaplacianInferenceMethod::
00204 get_marginal_likelihood_derivatives(CMap<TParameter*,
00205 CSGObject*>& para_dict)
00206 {
00207 check_members();
00208
00209 if(update_parameter_hash())
00210 update_all();
00211
00212 MatrixXd Z(m_L.num_rows, m_L.num_cols);
00213
00214 Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
00215
00216 for (index_t i = 0; i < m_L.num_rows; i++)
00217 {
00218 for (index_t j = 0; j < m_L.num_cols; j++)
00219 Z(i,j) = m_L(i,j);
00220 }
00221
00222 MatrixXd sW_temp(sW.vlen, m_ktrtr.num_cols);
00223 VectorXd sum(1);
00224 sum[0] = 0;
00225
00226
00227 for (index_t i = 0; i < sW.vlen; i++)
00228 {
00229 for (index_t j = 0; j < m_ktrtr.num_cols; j++)
00230 sW_temp(i,j) = sW[i];
00231 }
00232
00233 VectorXd g;
00234
00235 Map<VectorXd> eigen_W(W.vector, W.vlen);
00236
00237 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix,
00238 temp_kernel.num_rows, temp_kernel.num_cols);
00239
00240 if (eigen_W.minCoeff() < 0)
00241 {
00242 Z = -Z;
00243
00244 MatrixXd A = MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
00245
00246 MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00247 temp_diagonal.setZero(sW.vlen, sW.vlen);
00248
00249 for (index_t s = 0; s < temp_diagonal.rows(); s++)
00250 {
00251 for (index_t r = 0; r < temp_diagonal.cols(); r++)
00252 temp_diagonal(r,s) = W[s];
00253 }
00254
00255 A = A + eigen_temp_kernel*m_scale*m_scale*temp_diagonal;
00256
00257 FullPivLU<MatrixXd> lu(A);
00258
00259 MatrixXd temp_matrix =
00260 lu.inverse().cwiseProduct(eigen_temp_kernel*m_scale*m_scale);
00261
00262 VectorXd temp_sum(temp_matrix.rows());
00263
00264 for (index_t i = 0; i < temp_matrix.rows(); i++)
00265 {
00266 for (index_t j = 0; j < temp_matrix.cols(); j++)
00267 temp_sum[i] += temp_matrix(j,i);
00268 }
00269
00270 g = temp_sum/2.0;
00271 }
00272
00273 else
00274 {
00275 MatrixXd C = Z.transpose().colPivHouseholderQr().solve(
00276 sW_temp.cwiseProduct(eigen_temp_kernel*m_scale*m_scale));
00277
00278 MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00279 temp_diagonal.setZero(sW.vlen, sW.vlen);
00280
00281 for (index_t s = 0; s < sW.vlen; s++)
00282 temp_diagonal(s,s) = sW[s];
00283
00284 MatrixXd temp = Z.transpose();
00285
00286 Z = Z.transpose().colPivHouseholderQr().solve(temp_diagonal);
00287
00288 Z = temp.transpose().colPivHouseholderQr().solve(Z);
00289
00290 for (index_t s = 0; s < Z.rows(); s++)
00291 {
00292 for (index_t r = 0; r < Z.cols(); r++)
00293 Z(s,r) *= sW[s];
00294 }
00295
00296 VectorXd temp_sum(C.rows());
00297
00298 temp_sum.setZero(C.rows());
00299
00300 for (index_t i = 0; i < C.rows(); i++)
00301 {
00302 for (index_t j = 0; j < C.cols(); j++)
00303 temp_sum[i] += C(j,i)*C(j,i);
00304 }
00305
00306 g = (eigen_temp_kernel.diagonal()*m_scale*m_scale-temp_sum)/2.0;
00307 }
00308
00309 Map<VectorXd> eigen_d3lp(d3lp.vector, d3lp.vlen);
00310
00311 VectorXd dfhat = g.cwiseProduct(eigen_d3lp);
00312
00313 m_kernel->build_parameter_dictionary(para_dict);
00314 m_mean->build_parameter_dictionary(para_dict);
00315 m_model->build_parameter_dictionary(para_dict);
00316
00317 CMap<TParameter*, SGVector<float64_t> > gradient(
00318 3+para_dict.get_num_elements(),
00319 3+para_dict.get_num_elements());
00320
00321 for (index_t i = 0; i < para_dict.get_num_elements(); i++)
00322 {
00323 shogun::CMapNode<TParameter*, CSGObject*>* node =
00324 para_dict.get_node_ptr(i);
00325
00326 TParameter* param = node->key;
00327 CSGObject* obj = node->data;
00328
00329 index_t length = 1;
00330
00331 if ((param->m_datatype.m_ctype== CT_VECTOR ||
00332 param->m_datatype.m_ctype == CT_SGVECTOR) &&
00333 param->m_datatype.m_length_y != NULL)
00334 length = *(param->m_datatype.m_length_y);
00335
00336 SGVector<float64_t> variables(length);
00337
00338 bool deriv_found = false;
00339
00340 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector,
00341 temp_alpha.vlen);
00342
00343 for (index_t h = 0; h < length; h++)
00344 {
00345
00346 SGMatrix<float64_t> deriv;
00347 SGVector<float64_t> mean_derivatives;
00348 VectorXd mean_dev_temp;
00349 SGVector<float64_t> lik_first_deriv;
00350 SGVector<float64_t> lik_second_deriv;
00351
00352 if (param->m_datatype.m_ctype == CT_VECTOR ||
00353 param->m_datatype.m_ctype == CT_SGVECTOR)
00354 {
00355 deriv = m_kernel->get_parameter_gradient(param, obj);
00356
00357 lik_first_deriv = m_model->get_first_derivative(
00358 (CRegressionLabels*)m_labels, param, obj, function);
00359
00360 lik_second_deriv = m_model->get_second_derivative(
00361 (CRegressionLabels*)m_labels, param, obj, function);
00362
00363 mean_derivatives = m_mean->get_parameter_derivative(
00364 param, obj, m_feature_matrix, h);
00365
00366 for (index_t d = 0; d < mean_derivatives.vlen; d++)
00367 mean_dev_temp[d] = mean_derivatives[d];
00368 }
00369
00370 else
00371 {
00372 mean_derivatives = m_mean->get_parameter_derivative(
00373 param, obj, m_feature_matrix);
00374
00375 for (index_t d = 0; d < mean_derivatives.vlen; d++)
00376 mean_dev_temp[d] = mean_derivatives[d];
00377
00378 deriv = m_kernel->get_parameter_gradient(param, obj);
00379
00380 lik_first_deriv = m_model->get_first_derivative(
00381 (CRegressionLabels*)m_labels, param, obj, function);
00382
00383 lik_second_deriv = m_model->get_second_derivative(
00384 (CRegressionLabels*)m_labels, param, obj, function);
00385 }
00386
00387 if (deriv.num_cols*deriv.num_rows > 0)
00388 {
00389 MatrixXd dK(deriv.num_cols, deriv.num_rows);
00390
00391 for (index_t d = 0; d < deriv.num_rows; d++)
00392 {
00393 for (index_t s = 0; s < deriv.num_cols; s++)
00394 dK(d,s) = deriv(d,s);
00395 }
00396
00397
00398 sum[0] = (Z.cwiseProduct(dK)).sum()/2.0;
00399
00400
00401 sum = sum - eigen_temp_alpha.transpose()*dK*eigen_temp_alpha/2.0;
00402
00403 VectorXd b = dK*eigen_dlp;
00404
00405 sum = sum -
00406 dfhat.transpose()*(b-eigen_temp_kernel*(Z*b)*m_scale*m_scale);
00407
00408 variables[h] = sum[0];
00409
00410 deriv_found = true;
00411 }
00412
00413 else if (mean_derivatives.vlen > 0)
00414 {
00415 sum = -eigen_temp_alpha.transpose()*mean_dev_temp;
00416 sum = sum - dfhat.transpose()*(mean_dev_temp-eigen_temp_kernel*
00417 (Z*mean_dev_temp)*m_scale*m_scale);
00418 variables[h] = sum[0];
00419 deriv_found = true;
00420 }
00421
00422 else if (lik_first_deriv[0]+lik_second_deriv[0] != CMath::INFTY)
00423 {
00424 Map<VectorXd> eigen_fd(lik_first_deriv.vector, lik_first_deriv.vlen);
00425
00426 Map<VectorXd> eigen_sd(lik_second_deriv.vector, lik_second_deriv.vlen);
00427
00428 sum[0] = -g.dot(eigen_sd);
00429 sum[0] = sum[0] - eigen_fd.sum();
00430 variables[h] = sum[0];
00431 deriv_found = true;
00432 }
00433
00434 }
00435
00436 if (deriv_found)
00437 gradient.add(param, variables);
00438
00439 }
00440
00441 TParameter* param;
00442 index_t index = get_modsel_param_index("scale");
00443 param = m_model_selection_parameters->get_parameter(index);
00444
00445 MatrixXd dK(m_ktrtr.num_cols, m_ktrtr.num_rows);
00446
00447 for (index_t d = 0; d < m_ktrtr.num_rows; d++)
00448 {
00449 for (index_t s = 0; s < m_ktrtr.num_cols; s++)
00450 dK(d,s) = m_ktrtr(d,s)*m_scale*2.0;;
00451 }
00452
00453 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector,
00454 temp_alpha.vlen);
00455
00456 sum[0] = (Z.cwiseProduct(dK)).sum()/2.0;
00457
00458 sum = sum - eigen_temp_alpha.transpose()*dK*eigen_temp_alpha/2.0;
00459
00460 VectorXd b = dK*eigen_dlp;
00461
00462 sum = sum - dfhat.transpose()*(b-eigen_temp_kernel*(Z*b)*m_scale*m_scale);
00463
00464 SGVector<float64_t> scale(1);
00465
00466 scale[0] = sum[0];
00467
00468 gradient.add(param, scale);
00469 para_dict.add(param, this);
00470
00471 return gradient;
00472 }
00473
00474 SGVector<float64_t> CLaplacianInferenceMethod::get_diagonal_vector()
00475 {
00476 SGVector<float64_t> result(sW.vlen);
00477
00478 for (index_t i = 0; i < sW.vlen; i++)
00479 result[i] = sW[i];
00480
00481 return result;
00482 }
00483
00484 float64_t CLaplacianInferenceMethod::get_negative_marginal_likelihood()
00485 {
00486 if(update_parameter_hash())
00487 update_all();
00488
00489 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, temp_alpha.vlen);
00490
00491 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix,
00492 temp_kernel.num_rows, temp_kernel.num_cols);
00493
00494 Map<VectorXd> eigen_function(function.vector,
00495 function.vlen);
00496
00497 Map<VectorXd> eigen_W(W.vector, W.vlen);
00498
00499 Map<VectorXd> eigen_m_means(m_means.vector, m_means.vlen);
00500
00501 if (eigen_W.minCoeff() < 0)
00502 {
00503 MatrixXd A = MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
00504
00505 MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00506 temp_diagonal.setZero(sW.vlen, sW.vlen);
00507
00508 for (index_t s = 0; s < sW.vlen; s++)
00509 temp_diagonal(s,s) = sW[s];
00510
00511 A = A + eigen_temp_kernel*m_scale*m_scale*temp_diagonal;
00512
00513 FullPivLU<MatrixXd> lu(A);
00514
00515 float64_t result = (eigen_temp_alpha.transpose()*(eigen_function-eigen_m_means))[0]/2.0 -
00516 lp + log(lu.determinant())/2.0;
00517
00518 return result;
00519 }
00520
00521 else
00522 {
00523
00524 Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
00525
00526 LLT<MatrixXd> L(
00527 (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_temp_kernel*m_scale*m_scale) +
00528 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
00529
00530 MatrixXd chol = L.matrixL();
00531
00532 float64_t sum = 0;
00533
00534 for (index_t i = 0; i < m_L.num_rows; i++)
00535 sum += log(m_L(i,i));
00536
00537 float64_t result = eigen_temp_alpha.dot(eigen_function-eigen_m_means)/2.0 -
00538 lp + sum;
00539
00540 return result;
00541 }
00542
00543 }
00544
00545 SGVector<float64_t> CLaplacianInferenceMethod::get_alpha()
00546 {
00547 if(update_parameter_hash())
00548 update_all();
00549
00550 SGVector<float64_t> result(m_alpha);
00551 return result;
00552 }
00553
00554 SGMatrix<float64_t> CLaplacianInferenceMethod::get_cholesky()
00555 {
00556 if(update_parameter_hash())
00557 update_all();
00558
00559 SGMatrix<float64_t> result(m_L);
00560 return result;
00561 }
00562
00563 void CLaplacianInferenceMethod::update_train_kernel()
00564 {
00565 m_kernel->cleanup();
00566
00567 m_kernel->init(m_features, m_features);
00568
00569
00570 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00571
00572 m_ktrtr=kernel_matrix.clone();
00573
00574 temp_kernel =SGMatrix<float64_t>(kernel_matrix.num_rows, kernel_matrix.num_cols);
00575
00576 for (index_t i = 0; i < kernel_matrix.num_rows; i++)
00577 {
00578 for (index_t j = 0; j < kernel_matrix.num_cols; j++)
00579 temp_kernel(i,j) = kernel_matrix(i,j);
00580 }
00581 }
00582
00583
00584 void CLaplacianInferenceMethod::update_chol()
00585 {
00586 check_members();
00587
00588 Map<VectorXd> eigen_W(W.vector, W.vlen);
00589
00590 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix,
00591 temp_kernel.num_rows, temp_kernel.num_cols);
00592
00593
00594 if (eigen_W.minCoeff() < 0)
00595 {
00596 MatrixXd temp_diagonal(sW.vlen, sW.vlen);
00597 temp_diagonal.setZero(sW.vlen, sW.vlen);
00598
00599 for (index_t s = 0; s < temp_diagonal.rows(); s++)
00600 {
00601 for (index_t r = 0; r < temp_diagonal.cols(); r++)
00602 temp_diagonal(s,s) = 1.0/W[s];
00603 }
00604
00605
00606 MatrixXd A = eigen_temp_kernel*m_scale*m_scale+temp_diagonal;
00607
00608 FullPivLU<MatrixXd> lu(A);
00609
00610 MatrixXd chol = -lu.inverse();
00611
00612 m_L = SGMatrix<float64_t>(chol.rows(), chol.cols());
00613
00614 for (index_t i = 0; i < chol.rows(); i++)
00615 {
00616 for (index_t j = 0; j < chol.cols(); j++)
00617 m_L(i,j) = chol(i,j);
00618 }
00619
00620 }
00621
00622 else
00623 {
00624
00625 Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
00626
00627 LLT<MatrixXd> L(
00628 (eigen_sW*eigen_sW.transpose()).cwiseProduct((eigen_temp_kernel*m_scale*m_scale)) +
00629 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
00630
00631 MatrixXd chol = L.matrixU();
00632
00633 m_L = SGMatrix<float64_t>(chol.rows(), chol.cols());
00634
00635 for (index_t i = 0; i < chol.rows(); i++)
00636 {
00637 for (index_t j = 0; j < chol.cols(); j++)
00638 m_L(i,j) = chol(i,j);
00639 }
00640 }
00641 }
00642
00643 void CLaplacianInferenceMethod::update_alpha()
00644 {
00645 float64_t Psi_Old = CMath::INFTY;
00646 float64_t Psi_New;
00647 float64_t Psi_Def;
00648
00649 SGVector<float64_t> temp_mean = m_mean->get_mean_vector(m_feature_matrix);
00650
00651 m_means = SGVector<float64_t>(temp_mean.vlen);
00652 temp_kernel = SGMatrix<float64_t>(m_ktrtr.num_rows, m_ktrtr.num_cols);
00653 temp_alpha = SGVector<float64_t>(m_labels->get_num_labels());
00654 VectorXd first_derivative;
00655
00656 for (index_t i = 0; i < temp_mean.vlen; i++)
00657 m_means[i] = temp_mean[i];
00658
00659 for (index_t i = 0; i < m_alpha.vlen; i++)
00660 temp_alpha[i] = m_alpha[i];
00661
00662 for (index_t i = 0; i < m_ktrtr.num_rows; i++)
00663 {
00664 for (index_t j = 0; j < m_ktrtr.num_cols; j++)
00665 temp_kernel(i,j) = m_ktrtr(i,j);
00666 }
00667
00668 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix, temp_kernel.num_rows, temp_kernel.num_cols);
00669
00670 VectorXd eigen_function;
00671
00672 Map<VectorXd> eigen_m_means(m_means.vector, m_means.vlen);
00673
00674 if (m_alpha.vlen != m_labels->get_num_labels())
00675 {
00676
00677
00678 temp_alpha = SGVector<float64_t>(m_labels->get_num_labels());
00679
00680 for (index_t i = 0; i < temp_alpha.vlen; i++)
00681 temp_alpha[i] = 0.0;
00682
00683 Map<VectorXd> eigen_temp_alpha2(temp_alpha.vector, temp_alpha.vlen);
00684
00685 eigen_function = eigen_temp_kernel*eigen_temp_alpha2*m_scale*m_scale+eigen_m_means;
00686
00687 function = SGVector<float64_t>(eigen_function.rows());
00688
00689 for (index_t i = 0; i < eigen_function.rows(); i++)
00690 function[i] = eigen_function[i];
00691
00692 W = m_model->get_log_probability_derivative_f(
00693 (CRegressionLabels*)m_labels, function, 2);
00694 for (index_t i = 0; i < W.vlen; i++)
00695 W[i] = -W[i];
00696
00697 Psi_New = -m_model->get_log_probability_f(
00698 (CRegressionLabels*)m_labels, function);
00699 }
00700
00701
00702 else
00703 {
00704
00705 Map<VectorXd> eigen_temp_alpha2(temp_alpha.vector, temp_alpha.vlen);
00706
00707 eigen_function = eigen_temp_kernel*m_scale*m_scale*eigen_temp_alpha2+eigen_m_means;
00708
00709 function = SGVector<float64_t>(eigen_function.rows());
00710
00711 for (index_t i = 0; i < eigen_function.rows(); i++)
00712 function[i] = eigen_function[i];
00713
00714
00715
00716 W = m_model->get_log_probability_derivative_f(
00717 (CRegressionLabels*)m_labels, function, 2);
00718
00719
00720 for (index_t i = 0; i < W.vlen; i++)
00721 W[i] = -W[i];
00722
00723 Psi_New = (eigen_temp_alpha2.transpose()*(eigen_function-eigen_m_means))[0]/2.0;
00724
00725 Psi_New -= m_model->get_log_probability_f(
00726 (CRegressionLabels*)m_labels, function);
00727
00728
00729
00730 Psi_Def = -m_model->get_log_probability_f(
00731 (CRegressionLabels*)m_labels, m_means);
00732
00733 if (Psi_Def < Psi_New)
00734 {
00735 eigen_temp_alpha2 = eigen_temp_alpha2.Zero(m_labels->get_num_labels());
00736
00737 for (index_t i = 0; i < m_alpha.vlen; i++)
00738 temp_alpha[i] = eigen_temp_alpha2[i];
00739
00740 W = m_model->get_log_probability_derivative_f(
00741 (CRegressionLabels*)m_labels, function, 2);
00742
00743 for (index_t i = 0; i < W.vlen; i++)
00744 W[i] = -W[i];
00745
00746 Psi_New = -m_model->get_log_probability_f(
00747 (CRegressionLabels*)m_labels, function);
00748 }
00749 }
00750
00751 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, temp_alpha.vlen);
00752
00753 index_t itr = 0;
00754
00755 dlp = m_model->get_log_probability_derivative_f(
00756 (CRegressionLabels*)m_labels, function, 1);
00757
00758 d2lp = m_model->get_log_probability_derivative_f(
00759 (CRegressionLabels*)m_labels, function, 2);
00760
00761 d3lp = m_model->get_log_probability_derivative_f(
00762 (CRegressionLabels*)m_labels, function, 3);
00763
00764 Map<VectorXd> eigen_d2lp(d2lp.vector, d2lp.vlen);
00765
00766 sW = W.clone();
00767
00768 while (Psi_Old - Psi_New > m_tolerance && itr < m_max_itr)
00769 {
00770
00771 Map<VectorXd> eigen_W(W.vector, W.vlen);
00772
00773 Psi_Old = Psi_New;
00774 itr++;
00775
00776 if (eigen_W.minCoeff() < 0)
00777 {
00778
00779
00780
00781 float64_t df = m_model->get_degrees_freedom();
00782
00783 for (index_t i = 0; i < eigen_W.rows(); i++)
00784 eigen_W[i] += 2.0/(df)*dlp[i]*dlp[i];
00785
00786 }
00787
00788 for (index_t i = 0; i < eigen_W.rows(); i++)
00789 W[i] = eigen_W[i];
00790
00791 for (index_t i = 0; i < W.vlen; i++)
00792 sW[i] = CMath::sqrt(float64_t(W[i]));
00793
00794 Map<VectorXd> eigen_sW(sW.vector, sW.vlen);
00795
00796 LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct(
00797 eigen_temp_kernel*m_scale*m_scale) +
00798 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
00799
00800 MatrixXd chol = L.matrixL();
00801
00802 MatrixXd temp = L.matrixL();
00803
00804 Map<VectorXd> temp_eigen_dlp(dlp.vector, dlp.vlen);
00805
00806 VectorXd b = eigen_W.cwiseProduct((eigen_function - eigen_m_means)) + temp_eigen_dlp;
00807
00808 chol = chol.colPivHouseholderQr().solve(eigen_sW.cwiseProduct(
00809 (eigen_temp_kernel*b*m_scale*m_scale)));
00810
00811 chol = temp.transpose().colPivHouseholderQr().solve(chol);
00812
00813 VectorXd dalpha = b - eigen_sW.cwiseProduct(chol) - eigen_temp_alpha;
00814
00815 Psi_line func;
00816
00817 func.lab = (CRegressionLabels*)m_labels;
00818 func.K = &eigen_temp_kernel;
00819 func.scale = m_scale;
00820 func.alpha = &eigen_temp_alpha;
00821 func.dalpha = &dalpha;
00822 func.l1 = &lp;
00823 func.dl1 = &dlp;
00824 func.dl2 = &eigen_d2lp;
00825 func.f = &function;
00826 func.lik = m_model;
00827 func.m = &m_means;
00828 func.mW = &W;
00829 func.start_alpha = eigen_temp_alpha;
00830 local_min(0, m_max, m_opt_tolerance, func, Psi_New);
00831 }
00832
00833 for (index_t i = 0; i < m_alpha.vlen; i++)
00834 temp_alpha[i] = eigen_temp_alpha[i];
00835
00836 Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen);
00837
00838 eigen_function = eigen_temp_kernel*m_scale*m_scale*eigen_temp_alpha+eigen_m_means;
00839
00840 function = SGVector<float64_t>(eigen_function.rows());
00841
00842 for (index_t i = 0; i < eigen_function.rows(); i++)
00843 function[i] = eigen_function[i];
00844
00845 lp = m_model->get_log_probability_f((CRegressionLabels*)m_labels, function);
00846
00847 dlp = m_model->get_log_probability_derivative_f(
00848 (CRegressionLabels*)m_labels, function, 1);
00849
00850 d2lp = m_model->get_log_probability_derivative_f(
00851 (CRegressionLabels*)m_labels, function, 2);
00852
00853 d3lp = m_model->get_log_probability_derivative_f(
00854 (CRegressionLabels*)m_labels, function, 3);
00855
00856 W = m_model->get_log_probability_derivative_f(
00857 (CRegressionLabels*)m_labels, function, 2);
00858
00859 for (index_t i = 0; i < W.vlen; i++)
00860 W[i] = -W[i];
00861
00862 for (index_t i = 0; i < sW.vlen; i++)
00863 sW[i] = 0.0;
00864
00865 Map<VectorXd> eigen_W2(W.vector, W.vlen);
00866
00867 if (eigen_W2.minCoeff() > 0)
00868 {
00869 for (index_t i = 0; i < sW.vlen; i++)
00870 sW[i] = CMath::sqrt(float64_t(W[i]));
00871 }
00872
00873 m_alpha = SGVector<float64_t>(eigen_temp_alpha.rows());
00874
00875 for (index_t i = 0; i < m_alpha.vlen; i++)
00876 m_alpha[i] = eigen_temp_alpha[i];
00877
00878 }
00879
00880 #endif // HAVE_EIGEN3
00881 #endif // HAVE_LAPACK
00882