SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StudentsTLikelihood.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Copyright (C) 2012 Jacob Walker
9  * Copyright (C) 2013 Roman Votyakov
10  *
11  * Adapted from the GPML toolbox, specifically likT.m
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  */
14 
16 
17 #ifdef HAVE_EIGEN3
18 
25 
26 using namespace shogun;
27 using namespace Eigen;
28 
29 namespace shogun
30 {
31 
32 #ifndef DOXYGEN_SHOULD_SKIP_THIS
33 
35 class CNormalPDF : public CFunction
36 {
37 public:
39  CNormalPDF()
40  {
41  m_mu=0.0;
42  m_sigma=1.0;
43  }
44 
50  CNormalPDF(float64_t mu, float64_t sigma)
51  {
52  m_mu=mu;
53  m_sigma=sigma;
54  }
55 
60  void set_mu(float64_t mu) { m_mu=mu; }
61 
66  void set_sigma(float64_t sigma) { m_sigma=sigma; }
67 
74  virtual float64_t operator() (float64_t x)
75  {
76  return (1.0/(CMath::sqrt(2*CMath::PI)*m_sigma))*
77  CMath::exp(-CMath::sq(x-m_mu)/(2.0*CMath::sq(m_sigma)));
78  }
79 
80 private:
81  /* mean */
82  float64_t m_mu;
83 
84  /* standard deviation */
85  float64_t m_sigma;
86 };
87 
91 class CStudentsTPDF : public CFunction
92 {
93 public:
95  CStudentsTPDF()
96  {
97  m_sigma=1.0;
98  m_mu=0.0;
99  m_nu=3.0;
100  }
101 
108  CStudentsTPDF(float64_t sigma, float64_t nu, float64_t mu)
109  {
110  m_sigma=sigma;
111  m_mu=mu;
112  m_nu=nu;
113  }
114 
119  void set_sigma(float64_t sigma) { m_sigma=sigma; }
120 
125  void set_mu(float64_t mu) { m_mu=mu; }
126 
131  void set_nu(float64_t nu) { m_nu=nu; }
132 
140  virtual float64_t operator() (float64_t x)
141  {
142  float64_t lZ=CStatistics::lgamma((m_nu+1.0)/2.0)-CStatistics::lgamma(m_nu/2.0)-
143  CMath::log(m_nu*CMath::PI*CMath::sq(m_sigma))/2.0;
144  return CMath::exp(lZ-((m_nu+1.0)/2.0)*CMath::log(1.0+CMath::sq(x-m_mu)/
145  (m_nu*CMath::sq(m_sigma))));
146  }
147 
148 private:
150  float64_t m_sigma;
151 
153  float64_t m_mu;
154 
156  float64_t m_nu;
157 };
158 
160 class CProductFunction : public CFunction
161 {
162 public:
168  CProductFunction(CFunction* f, CFunction* g)
169  {
170  SG_REF(f);
171  SG_REF(g);
172  m_f=f;
173  m_g=g;
174  }
175 
176  virtual ~CProductFunction()
177  {
178  SG_UNREF(m_f);
179  SG_UNREF(m_g);
180  }
181 
188  virtual float64_t operator() (float64_t x)
189  {
190  return (*m_f)(x)*(*m_g)(x);
191  }
192 
193 private:
195  CFunction* m_f;
197  CFunction* m_g;
198 };
199 
201 class CLinearFunction : public CFunction
202 {
203 public:
205  CLinearFunction() { }
206 
207  virtual ~CLinearFunction() { }
208 
215  virtual float64_t operator() (float64_t x)
216  {
217  return x;
218  }
219 };
220 
222 class CQuadraticFunction : public CFunction
223 {
224 public:
226  CQuadraticFunction() { }
227 
228  virtual ~CQuadraticFunction() { }
229 
236  virtual float64_t operator() (float64_t x)
237  {
238  return CMath::sq(x);
239  }
240 };
241 
242 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
243 
245 {
246  init();
247 }
248 
250  : CLikelihoodModel()
251 {
252  REQUIRE(sigma>0.0, "Scale parameter must be greater than zero\n")
253  REQUIRE(df>1.0, "Number of degrees of freedom must be greater than one\n")
254 
255  init();
256  m_sigma=sigma;
257  m_df=df;
258 }
259 
260 void CStudentsTLikelihood::init()
261 {
262  m_sigma=1.0;
263  m_df=3.0;
264  SG_ADD(&m_df, "df", "Degrees of freedom", MS_AVAILABLE, GRADIENT_AVAILABLE);
265  SG_ADD(&m_sigma, "sigma", "Scale parameter", MS_AVAILABLE, GRADIENT_AVAILABLE);
266 }
267 
269 {
270 }
271 
273  CLikelihoodModel* lik)
274 {
275  ASSERT(lik!=NULL);
276 
277  if (lik->get_model_type()!=LT_STUDENTST)
278  SG_SERROR("Provided likelihood is not of type CStudentsTLikelihood!\n")
279 
280  SG_REF(lik);
281  return (CStudentsTLikelihood*)lik;
282 }
283 
285  SGVector<float64_t> mu, SGVector<float64_t> s2, const CLabels* lab) const
286 {
287  return SGVector<float64_t>(mu);
288 }
289 
291  SGVector<float64_t> mu, SGVector<float64_t> s2, const CLabels* lab) const
292 {
293  SGVector<float64_t> result(s2);
294  Map<VectorXd> eigen_result(result.vector, result.vlen);
295 
296  if (m_df<2.0)
297  eigen_result=CMath::INFTY*VectorXd::Ones(result.vlen);
298  else
299  {
300  eigen_result+=CMath::sq(m_sigma)*m_df/(m_df-2.0)*
301  VectorXd::Ones(result.vlen);
302  }
303 
304  return result;
305 }
306 
308  SGVector<float64_t> func) const
309 {
310  // check the parameters
311  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
313  "Labels must be type of CRegressionLabels\n")
314  REQUIRE(lab->get_num_labels()==func.vlen, "Number of labels must match "
315  "length of the function vector\n")
316 
317  Map<VectorXd> eigen_f(func.vector, func.vlen);
318 
319  SGVector<float64_t> r(func.vlen);
320  Map<VectorXd> eigen_r(r.vector, r.vlen);
321 
322  SGVector<float64_t> y=((CRegressionLabels*)lab)->get_labels();
323  Map<VectorXd> eigen_y(y.vector, y.vlen);
324 
325  // compute lZ=log(gamma(df/2+1/2))-log(gamma(df/2))-log(df*pi*sigma^2)/2
326  VectorXd eigen_lZ=(CStatistics::lgamma(m_df/2.0+0.5)-
327  CStatistics::lgamma(m_df/2.0)-log(m_df*CMath::PI*CMath::sq(m_sigma))/2.0)*
328  VectorXd::Ones(r.vlen);
329 
330  // compute log probability: lp=lZ-(df+1)*log(1+(y-f).^2./(df*sigma^2))/2
331  eigen_r=eigen_y-eigen_f;
332  eigen_r=eigen_r.cwiseProduct(eigen_r)/(m_df*CMath::sq(m_sigma));
333  eigen_r=eigen_lZ-(m_df+1)*
334  (eigen_r+VectorXd::Ones(r.vlen)).array().log().matrix()/2.0;
335 
336  return r;
337 }
338 
340  const CLabels* lab, SGVector<float64_t> func, index_t i) const
341 {
342  // check the parameters
343  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
345  "Labels must be type of CRegressionLabels\n")
346  REQUIRE(lab->get_num_labels()==func.vlen, "Number of labels must match "
347  "length of the function vector\n")
348  REQUIRE(i>=1 && i<=3, "Index for derivative should be 1, 2 or 3\n")
349 
350  Map<VectorXd> eigen_f(func.vector, func.vlen);
351 
352  SGVector<float64_t> r(func.vlen);
353  Map<VectorXd> eigen_r(r.vector, r.vlen);
354 
355  SGVector<float64_t> y=((CRegressionLabels*)lab)->get_labels();
356  Map<VectorXd> eigen_y(y.vector, y.vlen);
357 
358  // compute r=y-f, r2=r.^2
359  eigen_r=eigen_y-eigen_f;
360  VectorXd eigen_r2=eigen_r.cwiseProduct(eigen_r);
361 
362  // compute a=(y-f).^2+df*sigma^2
363  VectorXd a=eigen_r2+VectorXd::Ones(r.vlen)*m_df*CMath::sq(m_sigma);
364 
365  if (i==1)
366  {
367  // compute first derivative of log probability wrt f:
368  // dlp=(df+1)*(y-f)./a
369  eigen_r=(m_df+1)*eigen_r.cwiseQuotient(a);
370  }
371  else if (i==2)
372  {
373  // compute second derivative of log probability wrt f:
374  // d2lp=(df+1)*((y-f)^2-df*sigma^2)./a.^2
375  VectorXd b=eigen_r2-VectorXd::Ones(r.vlen)*m_df*CMath::sq(m_sigma);
376 
377  eigen_r=(m_df+1)*b.cwiseQuotient(a.cwiseProduct(a));
378  }
379  else if (i==3)
380  {
381  // compute third derivative of log probability wrt f:
382  // d3lp=(f+1)*2*(y-f).*((y-f)^2-3*f*sigma^2)./a.^3
383  VectorXd c=eigen_r2-VectorXd::Ones(r.vlen)*3*m_df*CMath::sq(m_sigma);
384  VectorXd a2=a.cwiseProduct(a);
385 
386  eigen_r=(m_df+1)*2*eigen_r.cwiseProduct(c).cwiseQuotient(
387  a2.cwiseProduct(a));
388  }
389 
390  return r;
391 }
392 
394  SGVector<float64_t> func, const TParameter* param) const
395 {
396  // check the parameters
397  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
399  "Labels must be type of CRegressionLabels\n")
400  REQUIRE(lab->get_num_labels()==func.vlen, "Number of labels must match "
401  "length of the function vector\n")
402 
403  SGVector<float64_t> r(func.vlen);
404  Map<VectorXd> eigen_r(r.vector, r.vlen);
405 
406  Map<VectorXd> eigen_f(func.vector, func.vlen);
407 
408  SGVector<float64_t> y=((CRegressionLabels*)lab)->get_labels();
409  Map<VectorXd> eigen_y(y.vector, y.vlen);
410 
411  // compute r=y-f and r2=(y-f).^2
412  eigen_r=eigen_y-eigen_f;
413  VectorXd eigen_r2=eigen_r.cwiseProduct(eigen_r);
414 
415  if (!strcmp(param->m_name, "df"))
416  {
417  // compute derivative of log probability wrt df:
418  // lp_ddf=df*(dloggamma(df/2+1/2)-dloggamma(df/2))/2-1/2-
419  // df*log(1+r2/(df*sigma^2))/2 +(df/2+1/2)*r2./(df*sigma^2+r2)
420  eigen_r=(m_df*(CStatistics::dlgamma(m_df*0.5+0.5)-
421  CStatistics::dlgamma(m_df*0.5))*0.5-0.5)*VectorXd::Ones(r.vlen);
422 
423  eigen_r-=m_df*(VectorXd::Ones(r.vlen)+
424  eigen_r2/(m_df*CMath::sq(m_sigma))).array().log().matrix()/2.0;
425 
426  eigen_r+=(m_df/2.0+0.5)*eigen_r2.cwiseQuotient(
427  eigen_r2+VectorXd::Ones(r.vlen)*(m_df*CMath::sq(m_sigma)));
428 
429  eigen_r*=(1.0-1.0/m_df);
430 
431  return r;
432  }
433  else if (!strcmp(param->m_name, "sigma"))
434  {
435  // compute derivative of log probability wrt sigma:
436  // lp_dsigma=(df+1)*r2./a-1
437  eigen_r=(m_df+1)*eigen_r2.cwiseQuotient(eigen_r2+
438  VectorXd::Ones(r.vlen)*(m_df*CMath::sq(m_sigma)));
439  eigen_r-=VectorXd::Ones(r.vlen);
440 
441  return r;
442  }
443 
444  return SGVector<float64_t>();
445 }
446 
448  SGVector<float64_t> func, const TParameter* param) const
449 {
450  // check the parameters
451  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
453  "Labels must be type of CRegressionLabels\n")
454  REQUIRE(lab->get_num_labels()==func.vlen, "Number of labels must match "
455  "length of the function vector\n")
456 
457  SGVector<float64_t> r(func.vlen);
458  Map<VectorXd> eigen_r(r.vector, r.vlen);
459 
460  Map<VectorXd> eigen_f(func.vector, func.vlen);
461 
462  SGVector<float64_t> y=((CRegressionLabels*)lab)->get_labels();
463  Map<VectorXd> eigen_y(y.vector, y.vlen);
464 
465  // compute r=y-f and r2=(y-f).^2
466  eigen_r=eigen_y-eigen_f;
467  VectorXd eigen_r2=eigen_r.cwiseProduct(eigen_r);
468 
469  // compute a=r+sigma^2*df and a2=a.^2
470  VectorXd a=eigen_r2+CMath::sq(m_sigma)*m_df*VectorXd::Ones(r.vlen);
471  VectorXd a2=a.cwiseProduct(a);
472 
473  if (!strcmp(param->m_name, "df"))
474  {
475  // compute derivative of first derivative of log probability wrt df:
476  // dlp_ddf=df*r.*(a-sigma^2*(df+1))./a2
477  eigen_r=m_df*eigen_r.cwiseProduct(a-CMath::sq(m_sigma)*(m_df+1.0)*
478  VectorXd::Ones(r.vlen)).cwiseQuotient(a2);
479  eigen_r*=(1.0-1.0/m_df);
480 
481  return r;
482  }
483  else if (!strcmp(param->m_name, "sigma"))
484  {
485  // compute derivative of first derivative of log probability wrt sigma:
486  // dlp_dsigma=-(df+1)*2*df*sigma^2*r./a2
487  eigen_r=-(m_df+1.0)*2*m_df*CMath::sq(m_sigma)*
488  eigen_r.cwiseQuotient(a2);
489 
490  return r;
491  }
492 
493  return SGVector<float64_t>();
494 }
495 
497  SGVector<float64_t> func, const TParameter* param) const
498 {
499  // check the parameters
500  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
502  "Labels must be type of CRegressionLabels\n")
503  REQUIRE(lab->get_num_labels()==func.vlen, "Number of labels must match "
504  "length of the function vector\n")
505 
506  SGVector<float64_t> r(func.vlen);
507  Map<VectorXd> eigen_r(r.vector, r.vlen);
508 
509  Map<VectorXd> eigen_f(func.vector, func.vlen);
510 
511  SGVector<float64_t> y=((CRegressionLabels*)lab)->get_labels();
512  Map<VectorXd> eigen_y(y.vector, y.vlen);
513 
514  // compute r=y-f and r2=(y-f).^2
515  eigen_r=eigen_y-eigen_f;
516  VectorXd eigen_r2=eigen_r.cwiseProduct(eigen_r);
517 
518  // compute a=r+sigma^2*df and a3=a.^3
519  VectorXd a=eigen_r2+CMath::sq(m_sigma)*m_df*VectorXd::Ones(r.vlen);
520  VectorXd a3=(a.cwiseProduct(a)).cwiseProduct(a);
521 
522  if (!strcmp(param->m_name, "df"))
523  {
524  // compute derivative of second derivative of log probability wrt df:
525  // d2lp_ddf=df*(r2.*(r2-3*sigma^2*(1+df))+df*sigma^4)./a3
526  float64_t sigma2=CMath::sq(m_sigma);
527 
528  eigen_r=m_df*(eigen_r2.cwiseProduct(eigen_r2-3*sigma2*(1.0+m_df)*
529  VectorXd::Ones(r.vlen))+(m_df*CMath::sq(sigma2))*VectorXd::Ones(r.vlen));
530  eigen_r=eigen_r.cwiseQuotient(a3);
531 
532  eigen_r*=(1.0-1.0/m_df);
533 
534  return r;
535  }
536  else if (!strcmp(param->m_name, "sigma"))
537  {
538  // compute derivative of second derivative of log probability wrt sigma:
539  // d2lp_dsigma=(df+1)*2*df*sigma^2*(a-4*r2)./a3
540  eigen_r=(m_df+1.0)*2*m_df*CMath::sq(m_sigma)*
541  (a-4.0*eigen_r2).cwiseQuotient(a3);
542 
543  return r;
544  }
545 
546  return SGVector<float64_t>();
547 }
548 
550  SGVector<float64_t> mu, SGVector<float64_t> s2, const CLabels* lab) const
551 {
553 
554  if (lab)
555  {
556  REQUIRE((mu.vlen==s2.vlen) && (mu.vlen==lab->get_num_labels()),
557  "Length of the vector of means (%d), length of the vector of "
558  "variances (%d) and number of labels (%d) should be the same\n",
559  mu.vlen, s2.vlen, lab->get_num_labels())
561  "Labels must be type of CRegressionLabels\n")
562 
563  y=((CRegressionLabels*)lab)->get_labels();
564  }
565  else
566  {
567  REQUIRE(mu.vlen==s2.vlen, "Length of the vector of means (%d) and "
568  "length of the vector of variances (%d) should be the same\n",
569  mu.vlen, s2.vlen)
570 
572  y.set_const(1.0);
573  }
574 
575  // create an object of normal pdf
576  CNormalPDF* f=new CNormalPDF();
577 
578  // create an object of Student's t pdf
579  CStudentsTPDF* g=new CStudentsTPDF();
580 
581  g->set_nu(m_df);
582  g->set_sigma(m_sigma);
583 
584  // create an object of product of Student's-t pdf and normal pdf
585  CProductFunction* h=new CProductFunction(f, g);
586  SG_REF(h);
587 
588  // compute probabilities using numerical integration
590 
591  for (index_t i=0; i<mu.vlen; i++)
592  {
593  // set normal pdf parameters
594  f->set_mu(mu[i]);
595  f->set_sigma(CMath::sqrt(s2[i]));
596 
597  // set Stundent's-t pdf parameters
598  g->set_mu(y[i]);
599 
600  // evaluate integral on (-inf, inf)
603  }
604 
605  SG_UNREF(h);
606 
607  r.log();
608 
609  return r;
610 }
611 
613  SGVector<float64_t> s2, const CLabels *lab, index_t i) const
614 {
615  // check the parameters
616  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
617  REQUIRE((mu.vlen==s2.vlen) && (mu.vlen==lab->get_num_labels()),
618  "Length of the vector of means (%d), length of the vector of "
619  "variances (%d) and number of labels (%d) should be the same\n",
620  mu.vlen, s2.vlen, lab->get_num_labels())
621  REQUIRE(i>=0 && i<=mu.vlen, "Index (%d) out of bounds!\n", i)
623  "Labels must be type of CRegressionLabels\n")
624 
625  SGVector<float64_t> y=((CRegressionLabels*)lab)->get_labels();
626 
627  // create an object of normal pdf
628  CNormalPDF* f=new CNormalPDF(mu[i], CMath::sqrt(s2[i]));
629 
630  // create an object of Student's t pdf
631  CStudentsTPDF* g=new CStudentsTPDF(m_sigma, m_df, y[i]);
632 
633  // create an object of h(x)=N(x|mu,sigma)*t(x|mu,sigma,nu)
634  CProductFunction* h=new CProductFunction(f, g);
635 
636  // create an object of k(x)=x*N(x|mu,sigma)*t(x|mu,sigma,nu)
637  CProductFunction* k=new CProductFunction(new CLinearFunction(), h);
638  SG_REF(k);
639 
640  // compute Z = \int N(x|mu,sigma)*t(x|mu,sigma,nu) dx
643 
644  // compute 1st moment:
645  // E[x] = Z^-1 * \int x*N(x|mu,sigma)*t(x|mu,sigma,nu)dx
648 
649  SG_UNREF(k);
650 
651  return Ex;
652 }
653 
655  SGVector<float64_t> s2, const CLabels *lab, index_t i) const
656 {
657  // check the parameters
658  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
659  REQUIRE((mu.vlen==s2.vlen) && (mu.vlen==lab->get_num_labels()),
660  "Length of the vector of means (%d), length of the vector of "
661  "variances (%d) and number of labels (%d) should be the same\n",
662  mu.vlen, s2.vlen, lab->get_num_labels())
663  REQUIRE(i>=0 && i<=mu.vlen, "Index (%d) out of bounds!\n", i)
665  "Labels must be type of CRegressionLabels\n")
666 
667  SGVector<float64_t> y=((CRegressionLabels*)lab)->get_labels();
668 
669  // create an object of normal pdf
670  CNormalPDF* f=new CNormalPDF(mu[i], CMath::sqrt(s2[i]));
671 
672  // create an object of Student's t pdf
673  CStudentsTPDF* g=new CStudentsTPDF(m_sigma, m_df, y[i]);
674 
675  // create an object of h(x)=N(x|mu,sigma)*t(x|mu,sigma,nu)
676  CProductFunction* h=new CProductFunction(f, g);
677 
678  // create an object of k(x)=x*N(x|mu,sigma)*t(x|mu,sigma,nu)
679  CProductFunction* k=new CProductFunction(new CLinearFunction(), h);
680  SG_REF(k);
681 
682  // create an object of p(x)=x^2*N(x|mu,sigma^2)*t(x|mu,sigma,nu)
683  CProductFunction* p=new CProductFunction(new CQuadraticFunction(), h);
684  SG_REF(p);
685 
686  // compute Z = \int N(x|mu,sigma)*t(x|mu,sigma,nu) dx
689 
690  // compute 1st moment:
691  // E[x] = Z^-1 * \int x*N(x|mu,sigma)*t(x|mu,sigma,nu)dx
694 
695  // compute E[x^2] = Z^-1 * \int x^2*N(x|mu,sigma)*t(x|mu,sigma,nu)dx
698 
699  SG_UNREF(k);
700  SG_UNREF(p);
701 
702  // return 2nd moment: Var[x]=E[x^2]-E[x]^2
703  return Ex2-CMath::sq(Ex);
704 }
705 }
706 
707 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation