SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FITCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Jacob Walker
8  *
9  * Code adapted from Gaussian Process Machine Learning Toolbox
10  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
11  *
12  */
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_LAPACK
16 #ifdef HAVE_EIGEN3
17 
26 
27 
28 using namespace shogun;
29 using namespace Eigen;
30 
32 {
33  init();
34  update_all();
36 }
37 
39  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat) :
40  CInferenceMethod(kern, feat, m, lab, mod)
41 {
42  init();
44  update_all();
45 }
46 
47 void CFITCInferenceMethod::init()
48 {
49  m_latent_features = NULL;
50  m_ind_noise = 1e-10;
51 }
52 
54 {
55 }
56 
58 {
59  if (m_labels)
61  ((CRegressionLabels*) m_labels)->get_labels().clone();
62 
65  {
67  ((CDotFeatures*)m_features)->get_computed_dot_feature_matrix();
68  }
69 
71  {
72  CDotFeatures* feat =
74  get_first_feature_obj();
75 
76  if (feat->get_num_vectors())
78 
79  SG_UNREF(feat);
80  }
81 
84  {
87  get_computed_dot_feature_matrix();
88  }
89 
90  else if (m_latent_features &&
92  {
93  CDotFeatures* subfeat =
95  get_first_feature_obj();
96 
99 
100  SG_UNREF(subfeat);
101  }
102 
104 
105  if (m_kernel)
107 
109  m_kuu.num_rows*m_kuu.num_cols &&
110  m_ktru.num_cols*m_ktru.num_rows)
111  {
112  update_chol();
113  update_alpha();
114  }
115 }
116 
117 void CFITCInferenceMethod::check_members()
118 {
119  if (!m_labels)
120  SG_ERROR("No labels set\n");
121 
123  SG_ERROR("Expected RegressionLabels\n");
124 
125  if (!m_features)
126  SG_ERROR("No features set!\n");
127 
128  if (!m_latent_features)
129  SG_ERROR("No latent features set!\n");
130 
132  SG_ERROR("Number of training vectors does not match number of labels\n");
133 
135  {
136  CDotFeatures* feat =
138  get_first_feature_obj();
139 
140  if (!feat->has_property(FP_DOT))
141  SG_ERROR("Specified features are not of type CFeatures\n");
142 
143  if (feat->get_feature_class() != C_DENSE)
144  SG_ERROR("Expected Simple Features\n");
145 
146  if (feat->get_feature_type() != F_DREAL)
147  SG_ERROR("Expected Real Features\n");
148 
149  SG_UNREF(feat);
150  }
151 
152  else
153  {
155  SG_ERROR("Specified features are not of type CFeatures\n");
156 
158  SG_ERROR("Expected Simple Features\n");
159 
161  SG_ERROR("Expected Real Features\n");
162  }
163 
165  {
166  CDotFeatures* feat =
168  get_first_feature_obj();
169 
170  if (!feat->has_property(FP_DOT))
171  SG_ERROR("Specified features are not of type CFeatures\n");
172 
173  if (feat->get_feature_class() != C_DENSE)
174  SG_ERROR("Expected Simple Features\n");
175 
176  if (feat->get_feature_type() != F_DREAL)
177  SG_ERROR("Expected Real Features\n");
178 
179  SG_UNREF(feat);
180  }
181 
182  else
183  {
185  SG_ERROR("Specified features are not of type CFeatures\n");
186 
188  SG_ERROR("Expected Simple Features\n");
189 
191  SG_ERROR("Expected Real Features\n");
192  }
193 
195  SG_ERROR("Regular and Latent Features do not match in dimensionality!\n");
196 
197  if (!m_kernel)
198  SG_ERROR( "No kernel assigned!\n");
199 
200  if (!m_mean)
201  SG_ERROR( "No mean function assigned!\n");
202 
204  {
205  SG_ERROR("FITC Inference Method can only use " \
206  "Gaussian Likelihood Function.\n");
207  }
208 }
209 
212  CSGObject*>& para_dict)
213 {
214  check_members();
215 
217  update_all();
218 
219  //Get the sigma variable from the likelihood model
220  float64_t m_sigma =
221  dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
222 
223  Map<MatrixXd> eigen_ktru(m_ktru.matrix, m_ktru.num_rows, m_ktru.num_cols);
224 
225  MatrixXd W = eigen_ktru;
226 
227  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
228 
229  for (index_t j = 0; j < eigen_ktru.rows(); j++)
230  {
231  for (index_t i = 0; i < eigen_ktru.cols(); i++)
232  W(i,j) = eigen_ktru(i,j) / sqrt(eigen_dg[j]);
233  }
234 
235  Map<MatrixXd> eigen_uu(m_kuu.matrix, m_kuu.num_rows, m_kuu.num_cols);
236  LLT<MatrixXd> CholW(eigen_uu + W*W.transpose() +
237  m_ind_noise*MatrixXd::Identity(eigen_uu.rows(), eigen_uu.cols()));
238  W = CholW.matrixL();
239 
240 
241  W = W.colPivHouseholderQr().solve(eigen_ktru);
242 
243  VectorXd true_lab(m_data_means.vlen);
244 
245  for (index_t j = 0; j < m_data_means.vlen; j++)
246  true_lab[j] = m_label_vector[j] - m_data_means[j];
247 
248  VectorXd al = W*true_lab.cwiseQuotient(eigen_dg);
249 
250  al = W.transpose()*al;
251 
252  al = true_lab - al;
253 
254  al = al.cwiseQuotient(eigen_dg);
255 
256  MatrixXd iKuu = eigen_uu.selfadjointView<Eigen::Upper>().llt()
257  .solve(MatrixXd::Identity(eigen_uu.rows(), eigen_uu.cols()));
258 
259  MatrixXd B = iKuu*eigen_ktru;
260 
261  MatrixXd Wdg = W;
262 
263  for (index_t j = 0; j < eigen_ktru.rows(); j++)
264  {
265  for (index_t i = 0; i < eigen_ktru.cols(); i++)
266  Wdg(i,j) = Wdg(i,j) / eigen_dg[j];
267  }
268 
269  VectorXd w = B*al;
270 
271  VectorXd sum(1);
272  sum[0] = 0;
273 
276 
277  //This will be the vector we return
279  3+para_dict.get_num_elements(),
280  3+para_dict.get_num_elements());
281 
282  for (index_t i = 0; i < para_dict.get_num_elements(); i++)
283  {
284  shogun::CMapNode<TParameter*, CSGObject*>* node =
285  para_dict.get_node_ptr(i);
286 
287  TParameter* param = node->key;
288  CSGObject* obj = node->data;
289 
290  index_t length = 1;
291 
292  if ((param->m_datatype.m_ctype== CT_VECTOR ||
293  param->m_datatype.m_ctype == CT_SGVECTOR) &&
294  param->m_datatype.m_length_y != NULL)
295  length = *(param->m_datatype.m_length_y);
296 
297  SGVector<float64_t> variables(length);
298 
299  bool deriv_found = false;
300 
301  for (index_t g = 0; g < length; g++)
302  {
303 
304  SGMatrix<float64_t> deriv;
305  SGMatrix<float64_t> derivtru;
306  SGMatrix<float64_t> derivuu;
307  SGVector<float64_t> mean_derivatives;
308  VectorXd mean_dev_temp;
309 
310  if (param->m_datatype.m_ctype == CT_VECTOR ||
311  param->m_datatype.m_ctype == CT_SGVECTOR)
312  {
314  deriv = m_kernel->get_parameter_gradient(param, obj);
315 
317  derivtru = m_kernel->get_parameter_gradient(param, obj);
318 
320  derivuu = m_kernel->get_parameter_gradient(param, obj);
321 
323 
324  mean_derivatives = m_mean->get_parameter_derivative(
325  param, obj, m_feature_matrix, g);
326 
327  for (index_t d = 0; d < mean_derivatives.vlen; d++)
328  mean_dev_temp[d] = mean_derivatives[d];
329  }
330 
331  else
332  {
333  mean_derivatives = m_mean->get_parameter_derivative(
334  param, obj, m_feature_matrix);
335 
336  for (index_t d = 0; d < mean_derivatives.vlen; d++)
337  mean_dev_temp[d] = mean_derivatives[d];
338 
340  deriv = m_kernel->get_parameter_gradient(param, obj);
341 
343  derivtru = m_kernel->get_parameter_gradient(param, obj);
344 
346  derivuu = m_kernel->get_parameter_gradient(param, obj);
347 
349  }
350 
351  sum[0] = 0;
352 
353 
354  if (deriv.num_cols*deriv.num_rows > 0)
355  {
356  MatrixXd ddiagKi(deriv.num_cols, deriv.num_rows);
357  MatrixXd dKuui(derivuu.num_cols, derivuu.num_rows);
358  MatrixXd dKui(derivtru.num_cols, derivtru.num_rows);
359 
360  for (index_t d = 0; d < deriv.num_rows; d++)
361  {
362  for (index_t s = 0; s < deriv.num_cols; s++)
363  ddiagKi(d,s) = deriv(d,s)*m_scale*m_scale;
364  }
365 
366  for (index_t d = 0; d < derivuu.num_rows; d++)
367  {
368  for (index_t s = 0; s < derivuu.num_cols; s++)
369  dKuui(d,s) = derivuu(d,s)*m_scale*m_scale;
370  }
371 
372  for (index_t d = 0; d < derivtru.num_rows; d++)
373  {
374  for (index_t s = 0; s < derivtru.num_cols; s++)
375  dKui(d,s) = derivtru(d,s)*m_scale*m_scale;
376  }
377 
378  MatrixXd R = 2*dKui-dKuui*B;
379  MatrixXd v = ddiagKi;
380  MatrixXd temp = R.cwiseProduct(B);
381 
382  for (index_t d = 0; d < ddiagKi.rows(); d++)
383  v(d,d) = v(d,d) - temp.col(d).sum();
384 
385  sum = sum + ddiagKi.diagonal().transpose()*
386  VectorXd::Ones(eigen_dg.rows()).cwiseQuotient(eigen_dg);
387 
388  sum = sum + w.transpose()*(dKuui*w-2*(dKui*al));
389 
390  sum = sum - al.transpose()*(v.diagonal().cwiseProduct(al));
391 
392  MatrixXd Wdg_temp = Wdg.cwiseProduct(Wdg);
393 
394  VectorXd Wdg_sum(Wdg.rows());
395 
396  for (index_t d = 0; d < Wdg.rows(); d++)
397  Wdg_sum[d] = Wdg_temp.col(d).sum();
398 
399  sum = sum - v.diagonal().transpose()*Wdg_sum;
400 
401  Wdg_temp = (R*Wdg.transpose()).cwiseProduct(B*Wdg.transpose());
402 
403  sum[0] = sum[0] - Wdg_temp.sum();
404 
405  sum /= 2.0;
406 
407  variables[g] = sum[0];
408  deriv_found = true;
409  }
410 
411  else if (mean_derivatives.vlen > 0)
412  {
413  sum = mean_dev_temp*al;
414  variables[g] = sum[0];
415  deriv_found = true;
416  }
417 
418 
419  }
420 
421  if (deriv_found)
422  gradient.add(param, variables);
423 
424  }
425 
426  //Here we take the kernel scale derivative.
427  {
428  TParameter* param;
429  index_t index = get_modsel_param_index("scale");
431 
432  SGVector<float64_t> variables(1);
433 
434  SGMatrix<float64_t> deriv;
435  SGMatrix<float64_t> derivtru;
436  SGMatrix<float64_t> derivuu;
437 
439  deriv = m_kernel->get_kernel_matrix();
440 
442  derivtru = m_kernel->get_kernel_matrix();
443 
445  derivuu = m_kernel->get_kernel_matrix();
446 
448 
449  MatrixXd ddiagKi(deriv.num_cols, deriv.num_rows);
450  MatrixXd dKuui(derivuu.num_cols, derivuu.num_rows);
451  MatrixXd dKui(derivtru.num_cols, derivtru.num_rows);
452 
453  for (index_t d = 0; d < deriv.num_rows; d++)
454  {
455  for (index_t s = 0; s < deriv.num_cols; s++)
456  ddiagKi(d,s) = deriv(d,s)*m_scale*2.0;
457  }
458 
459  for (index_t d = 0; d < derivuu.num_rows; d++)
460  {
461  for (index_t s = 0; s < derivuu.num_cols; s++)
462  dKuui(d,s) = derivuu(d,s)*m_scale*2.0;
463  }
464 
465  for (index_t d = 0; d < derivtru.num_rows; d++)
466  {
467  for (index_t s = 0; s < derivtru.num_cols; s++)
468  dKui(d,s) = derivtru(d,s)*m_scale*2.0;
469  }
470 
471  MatrixXd R = 2*dKui-dKuui*B;
472  MatrixXd v = ddiagKi;
473  MatrixXd temp = R.cwiseProduct(B);
474 
475  for (index_t d = 0; d < ddiagKi.rows(); d++)
476  v(d,d) = v(d,d) - temp.col(d).sum();
477 
478  sum = sum + ddiagKi.diagonal().transpose()*
479 
480  VectorXd::Ones(eigen_dg.rows()).cwiseQuotient(eigen_dg);
481 
482  sum = sum + w.transpose()*(dKuui*w-2*(dKui*al));
483 
484  sum = sum - al.transpose()*(v.diagonal().cwiseProduct(al));
485 
486  MatrixXd Wdg_temp = Wdg.cwiseProduct(Wdg);
487 
488  VectorXd Wdg_sum(Wdg.rows());
489 
490  for (index_t d = 0; d < Wdg.rows(); d++)
491  Wdg_sum[d] = Wdg_temp.col(d).sum();
492 
493  sum = sum - v.diagonal().transpose()*Wdg_sum;
494 
495  Wdg_temp = (R*Wdg.transpose()).cwiseProduct(B*Wdg.transpose());
496 
497  sum[0] = sum[0] - Wdg_temp.sum();
498 
499  sum /= 2.0;
500 
501  variables[0] = sum[0];
502 
503  gradient.add(param, variables);
504  para_dict.add(param, this);
505 
506  }
507 
508  TParameter* param;
509  index_t index;
510 
511  index = m_model->get_modsel_param_index("sigma");
513 
514  sum[0] = 0;
515 
516  MatrixXd W_temp = W.cwiseProduct(W);
517  VectorXd W_sum(W_temp.rows());
518 
519  for (index_t d = 0; d < W_sum.rows(); d++)
520  W_sum[d] = W_temp.col(d).sum();
521 
522  W_sum = W_sum.cwiseQuotient(eigen_dg.cwiseProduct(eigen_dg));
523 
524  sum[0] = W_sum.sum();
525 
526  sum = sum + al.transpose()*al;
527 
528  sum[0] = VectorXd::Ones(eigen_dg.rows()).cwiseQuotient(eigen_dg).sum() - sum[0];
529 
530  sum = sum*m_sigma*m_sigma;
531  float64_t dKuui = 2.0*m_ind_noise;
532 
533  MatrixXd R = -dKuui*B;
534 
535  MatrixXd temp = R.cwiseProduct(B);
536  VectorXd v(temp.rows());
537 
538  for (index_t d = 0; d < temp.rows(); d++)
539  v[d] = temp.col(d).sum();
540 
541  sum = sum + (w.transpose()*dKuui*w)/2.0;
542 
543  sum = sum - al.transpose()*(v.cwiseProduct(al))/2.0;
544 
545  MatrixXd Wdg_temp = Wdg.cwiseProduct(Wdg);
546  VectorXd Wdg_sum(Wdg.rows());
547 
548  for (index_t d = 0; d < Wdg.rows(); d++)
549  Wdg_sum[d] = Wdg_temp.col(d).sum();
550 
551  sum = sum - v.transpose()*Wdg_sum/2.0;
552 
553 
554  Wdg_temp = (R*Wdg.transpose()).cwiseProduct(B*Wdg.transpose());
555 
556  sum[0] = sum[0] - Wdg_temp.sum()/2.0;
557 
558  SGVector<float64_t> sigma(1);
559 
560  sigma[0] = sum[0];
561  gradient.add(param, sigma);
562  para_dict.add(param, m_model);
563 
564  return gradient;
565 
566 }
567 
569 {
570  SGVector<float64_t> result;
571 
572  return result;
573 }
574 
576 {
578  update_all();
579 
580  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows, m_chol_utr.num_cols);
581 
582  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
583 
584  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
585 
586  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
587 
588  VectorXd temp = eigen_dg;
589  VectorXd temp2(m_chol_utr.num_cols);
590 
591  for (index_t i = 0; i < eigen_dg.rows(); i++)
592  temp[i] = log(eigen_dg[i]);
593 
594  for (index_t j = 0; j < eigen_chol_utr.rows(); j++)
595  temp2[j] = log(eigen_chol_utr(j,j));
596 
597  VectorXd sum(1);
598 
599  sum[0] = temp.sum();
600  sum = sum + eigen_r.transpose()*eigen_r;
601  sum = sum - eigen_be.transpose()*eigen_be;
602  sum[0] += m_label_vector.vlen*log(2*CMath::PI);
603  sum /= 2.0;
604  sum[0] += temp2.sum();
605 
606  return sum[0];
607 }
608 
610 {
612  update_all();
613 
615  return result;
616 }
617 
619 {
621  update_all();
622 
623  SGMatrix<float64_t> result(m_L);
624  return result;
625 }
626 
628 {
630 
631  //K(X, X)
633 
634  m_ktrtr=kernel_matrix.clone();
635 
637 
638  //K(X, X)
639  kernel_matrix = m_kernel->get_kernel_matrix();
640 
641  m_kuu = kernel_matrix.clone();
642  for (index_t i = 0; i < kernel_matrix.num_rows; i++)
643  {
644  for (index_t j = 0; j < kernel_matrix.num_cols; j++)
645  m_kuu(i,j) = kernel_matrix(i,j)*m_scale*m_scale;
646  }
647 
649 
650  kernel_matrix = m_kernel->get_kernel_matrix();
651 
653 
654  m_ktru = SGMatrix<float64_t>(kernel_matrix.num_rows, kernel_matrix.num_cols);
655 
656  for (index_t i = 0; i < kernel_matrix.num_rows; i++)
657  {
658  for (index_t j = 0; j < kernel_matrix.num_cols; j++)
659  m_ktru(i,j) = kernel_matrix(i,j)*m_scale*m_scale;
660  }
661 }
662 
663 
665 {
666  check_members();
667 
668  //Get the sigma variable from the likelihood model
669  float64_t m_sigma =
670  dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma();
671 
672  Map<MatrixXd> eigen_uu(m_kuu.matrix, m_kuu.num_rows, m_kuu.num_cols);
673 
674  Map<MatrixXd> eigen_ktru(m_ktru.matrix, m_ktru.num_rows,
675 m_ktru.num_cols);
676 
677  LLT<MatrixXd> Luu(eigen_uu +
678  m_ind_noise*MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
679 
680  MatrixXd eigen_chol_uu = Luu.matrixL();
681 
682  MatrixXd V = eigen_chol_uu.colPivHouseholderQr().solve(eigen_ktru);
683 
684  m_chol_uu = SGMatrix<float64_t>(eigen_chol_uu.rows(),
685  eigen_chol_uu.cols());
686 
687  for (index_t f = 0; f < eigen_chol_uu.rows(); f++)
688  {
689  for (index_t s = 0; s < eigen_chol_uu.cols(); s++)
690  m_chol_uu(f,s) = eigen_chol_uu(f,s);
691  }
692 
693  MatrixXd temp_V = V.cwiseProduct(V);
694 
695  VectorXd eigen_dg;
696 
697  eigen_dg.resize(m_ktrtr.num_cols);
698  VectorXd sqrt_dg(m_ktrtr.num_cols);
699 
700  for (index_t i = 0; i < m_ktrtr.num_cols; i++)
701  {
702  eigen_dg[i] = m_ktrtr(i,i)*m_scale*m_scale + m_sigma*m_sigma - temp_V.col(i).sum();
703  sqrt_dg[i] = sqrt(eigen_dg[i]);
704  }
705 
706  m_dg = SGVector<float64_t>(eigen_dg.rows());
707 
708  for (index_t i = 0; i < eigen_dg.rows(); i++)
709  m_dg[i] = eigen_dg[i];
710 
711  for (index_t i = 0; i < V.rows(); i++)
712  {
713  for (index_t j = 0; j < V.cols(); j++)
714  V(i,j) /= sqrt_dg[j];
715  }
716 
717  LLT<MatrixXd> Lu(V*V.transpose() +
718  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
719 
720  MatrixXd eigen_chol_utr = Lu.matrixL();
721 
722  m_chol_utr = SGMatrix<float64_t>(eigen_chol_utr.rows(),
723  eigen_chol_utr.cols());
724 
725  for (index_t f = 0; f < eigen_chol_utr.rows(); f++)
726  {
727  for (index_t s = 0; s < eigen_chol_utr.cols(); s++)
728  m_chol_utr(f,s) = eigen_chol_utr(f,s);
729  }
730 
731  VectorXd true_lab(m_data_means.vlen);
732 
733  for (index_t j = 0; j < m_data_means.vlen; j++)
734  true_lab[j] = m_label_vector[j] - m_data_means[j];
735 
736  VectorXd eigen_r;
737 
738  eigen_r = true_lab.cwiseQuotient(sqrt_dg);
739 
740  m_r = SGVector<float64_t>(eigen_r.rows());
741 
742  for (index_t j = 0; j < eigen_r.rows(); j++)
743  m_r[j] = eigen_r[j];
744 
745  VectorXd eigen_be = eigen_chol_utr.colPivHouseholderQr().solve(V*eigen_r);
746 
747 
748  m_be = SGVector<float64_t>(eigen_be.rows());
749 
750  for (index_t j = 0; j < eigen_be.rows(); j++)
751  m_be[j] = eigen_be[j];
752 
753  MatrixXd iKuu = eigen_chol_uu.llt().solve(
754  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
755 
756  MatrixXd chol = eigen_chol_utr*eigen_chol_uu;
757 
758  chol *= chol.transpose();
759 
760  chol = chol.llt().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
761 
762  chol = chol - iKuu;
763 
764  m_L = SGMatrix<float64_t>(chol.rows(), chol.cols());
765 
766  for (index_t i = 0; i < chol.rows(); i++)
767  {
768  for (index_t j = 0; j < chol.cols(); j++)
769  m_L(i,j) = chol(i,j);
770  }
771 }
772 
774 {
775  MatrixXd alph;
776 
777  Map<MatrixXd> eigen_chol_uu(m_chol_uu.matrix, m_chol_uu.num_rows, m_chol_uu.num_cols);
778 
779  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows, m_chol_utr.num_cols);
780 
781  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
782 
783  alph = eigen_chol_utr.colPivHouseholderQr().solve(eigen_be);
784 
785  alph = eigen_chol_uu.colPivHouseholderQr().solve(alph);
786 
787  m_alpha = SGVector<float64_t>(alph.rows());
788 
789  for (index_t i = 0; i < alph.rows(); i++)
790  m_alpha[i] = alph(i,0);
791 }
792 
793 #endif // HAVE_EIGEN3
794 #endif // HAVE_LAPACK

SHOGUN Machine Learning Toolbox - Documentation