Statistics.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011-2012 Heiko Strathmann
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  *
00010  * ALGLIB Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier under GPL2+
00011  * http://www.alglib.net/
00012  * See header file for which functions are taken from ALGLIB (with adjustments
00013  * for shogun)
00014  */
00015 
00016 #include <shogun/mathematics/Statistics.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/lib/SGMatrix.h>
00019 #include <shogun/lib/SGVector.h>
00020 
00021 #ifdef HAVE_LAPACK
00022 #include <shogun/mathematics/lapack.h>
00023 #endif //HAVE_LAPACK
00024 using namespace shogun;
00025 
00026 float64_t CStatistics::mean(SGVector<float64_t> values)
00027 {
00028     ASSERT(values.vlen>0);
00029     ASSERT(values.vector);
00030 
00031     float64_t sum=0;
00032     for (index_t i=0; i<values.vlen; ++i)
00033         sum+=values.vector[i];
00034 
00035     return sum/values.vlen;
00036 }
00037 
00038 float64_t CStatistics::median(SGVector<float64_t> values, bool modify,
00039             bool in_place)
00040 {
00041     float64_t result;
00042     if (modify)
00043     {
00044         /* use QuickSelect method
00045          * This Quickselect routine is based on the algorithm described in
00046          * "Numerical recipes in C", Second Edition,
00047          * Cambridge University Press, 1992, Section 8.5, ISBN 0-521-43108-5
00048          * This code by Nicolas Devillard - 1998. Public domain.
00049          * Adapted to SHOGUN by Heiko Strathmann
00050          */
00051         int32_t low;
00052         int32_t high;
00053         int32_t median;
00054         int32_t middle;
00055         int32_t l;
00056         int32_t h;
00057 
00058         low=0;
00059         high=values.vlen-1;
00060         median=(low+high)/2;
00061 
00062         while (true)
00063         {
00064             if (high<=low)
00065             {
00066                 result=values[median];
00067                 break;
00068             }
00069 
00070             if (high==low+1)
00071             {
00072                 if (values[low]>values[high])
00073                     CMath::CMath::swap(values[low], values[high]);
00074                 result=values[median];
00075                 break;
00076             }
00077 
00078             middle=(low+high)/2;
00079             if (values[middle]>values[high])
00080                 CMath::swap(values[middle], values[high]);
00081             if (values[low]>values[high])
00082                 CMath::swap(values[low], values[high]);
00083             if (values[middle]>values[low])
00084                 CMath::swap(values[middle], values[low]);
00085 
00086             CMath::swap(values[middle], values[low+1]);
00087 
00088             l=low+1;
00089             h=high;
00090             for (;;)
00091             {
00092                 do
00093                     l++;
00094                 while (values[low]>values[l]);
00095                 do
00096                     h--;
00097                 while (values[h]>values[low]);
00098                 if (h<l)
00099                     break;
00100                 CMath::swap(values[l], values[h]);
00101             }
00102 
00103             CMath::swap(values[low], values[h]);
00104             if (h<=median)
00105                 low=l;
00106             if (h>=median)
00107                 high=h-1;
00108         }
00109 
00110     }
00111     else
00112     {
00113         if (in_place)
00114         {
00115             /* use Torben method
00116              * The following code is public domain.
00117              * Algorithm by Torben Mogensen, implementation by N. Devillard.
00118              * This code in public domain.
00119              * Adapted to SHOGUN by Heiko Strathmann
00120              */
00121             int32_t i;
00122             int32_t less;
00123             int32_t greater;
00124             int32_t equal;
00125             float64_t min;
00126             float64_t max;
00127             float64_t guess;
00128             float64_t maxltguess;
00129             float64_t mingtguess;
00130             min=max=values[0];
00131             for (i=1; i<values.vlen; i++)
00132             {
00133                 if (values[i]<min)
00134                     min=values[i];
00135                 if (values[i]>max)
00136                     max=values[i];
00137             }
00138             while (1)
00139             {
00140                 guess=(min+max)/2;
00141                 less=0;
00142                 greater=0;
00143                 equal=0;
00144                 maxltguess=min;
00145                 mingtguess=max;
00146                 for (i=0; i<values.vlen; i++)
00147                 {
00148                     if (values[i]<guess)
00149                     {
00150                         less++;
00151                         if (values[i]>maxltguess)
00152                             maxltguess=values[i];
00153                     }
00154                     else if (values[i]>guess)
00155                     {
00156                         greater++;
00157                         if (values[i]<mingtguess)
00158                             mingtguess=values[i];
00159                     }
00160                     else
00161                         equal++;
00162                 }
00163                 if (less<=(values.vlen+1)/2&&greater<=(values.vlen+1)/2)
00164                     break;
00165                 else if (less>greater)
00166                     max=maxltguess;
00167                 else
00168                     min=mingtguess;
00169             }
00170 
00171             if (less>=(values.vlen+1)/2)
00172                 result=maxltguess;
00173             else if (less+equal>=(values.vlen+1)/2)
00174                 result=guess;
00175             else
00176                 result=mingtguess;
00177         }
00178         else
00179         {
00180             /* copy vector and do recursive call which modifies copy */
00181             SGVector<float64_t> copy(values.vlen);
00182             memcpy(copy.vector, values.vector, sizeof(float64_t)*values.vlen);
00183             result=median(copy, true);
00184         }
00185     }
00186 
00187     return result;
00188 }
00189 
00190 float64_t CStatistics::matrix_median(SGMatrix<float64_t> values,
00191         bool modify, bool in_place)
00192 {
00193     /* create a vector that uses the matrix data, dont do reference counting */
00194     SGVector<float64_t> as_vector(values.matrix,
00195             values.num_rows*values.num_cols, false);
00196 
00197     /* return vector median method */
00198     return median(as_vector, modify, in_place);
00199 }
00200 
00201 
00202 float64_t CStatistics::variance(SGVector<float64_t> values)
00203 {
00204     ASSERT(values.vlen>1);
00205     ASSERT(values.vector);
00206 
00207     float64_t mean=CStatistics::mean(values);
00208 
00209     float64_t sum_squared_diff=0;
00210     for (index_t i=0; i<values.vlen; ++i)
00211         sum_squared_diff+=CMath::pow(values.vector[i]-mean, 2);
00212 
00213     return sum_squared_diff/(values.vlen-1);
00214 }
00215 
00216 SGVector<float64_t> CStatistics::matrix_mean(SGMatrix<float64_t> values,
00217         bool col_wise)
00218 {
00219     ASSERT(values.num_rows>0);
00220     ASSERT(values.num_cols>0);
00221     ASSERT(values.matrix);
00222 
00223     SGVector<float64_t> result;
00224 
00225     if (col_wise)
00226     {
00227         result=SGVector<float64_t>(values.num_cols);
00228         for (index_t j=0; j<values.num_cols; ++j)
00229         {
00230             result[j]=0;
00231             for (index_t i=0; i<values.num_rows; ++i)
00232                 result[j]+=values(i,j);
00233 
00234             result[j]/=values.num_rows;
00235         }
00236     }
00237     else
00238     {
00239         result=SGVector<float64_t>(values.num_rows);
00240         for (index_t j=0; j<values.num_rows; ++j)
00241         {
00242             result[j]=0;
00243             for (index_t i=0; i<values.num_cols; ++i)
00244                 result[j]+=values(j,i);
00245 
00246             result[j]/=values.num_cols;
00247         }
00248     }
00249 
00250     return result;
00251 }
00252 
00253 SGVector<float64_t> CStatistics::matrix_variance(SGMatrix<float64_t> values,
00254         bool col_wise)
00255 {
00256     ASSERT(values.num_rows>0);
00257     ASSERT(values.num_cols>0);
00258     ASSERT(values.matrix);
00259 
00260     /* first compute mean */
00261     SGVector<float64_t> mean=CStatistics::matrix_mean(values, col_wise);
00262 
00263     SGVector<float64_t> result;
00264 
00265     if (col_wise)
00266     {
00267         result=SGVector<float64_t>(values.num_cols);
00268         for (index_t j=0; j<values.num_cols; ++j)
00269         {
00270             result[j]=0;
00271             for (index_t i=0; i<values.num_rows; ++i)
00272                 result[j]+=CMath::pow(values(i,j)-mean[j], 2);
00273 
00274             result[j]/=(values.num_rows-1);
00275         }
00276     }
00277     else
00278     {
00279         result=SGVector<float64_t>(values.num_rows);
00280         for (index_t j=0; j<values.num_rows; ++j)
00281         {
00282             result[j]=0;
00283             for (index_t i=0; i<values.num_cols; ++i)
00284                 result[j]+=CMath::pow(values(j,i)-mean[j], 2);
00285 
00286             result[j]/=(values.num_cols-1);
00287         }
00288     }
00289 
00290     return result;
00291 }
00292 
00293 float64_t CStatistics::std_deviation(SGVector<float64_t> values)
00294 {
00295     return CMath::sqrt(variance(values));
00296 }
00297 
00298 SGVector<float64_t> CStatistics::matrix_std_deviation(
00299         SGMatrix<float64_t> values, bool col_wise)
00300 {
00301     SGVector<float64_t> var=CStatistics::matrix_variance(values, col_wise);
00302     for (index_t i=0; i<var.vlen; ++i)
00303         var[i]=CMath::sqrt(var[i]);
00304 
00305     return var;
00306 }
00307 
00308 #ifdef HAVE_LAPACK
00309 SGMatrix<float64_t> CStatistics::covariance_matrix(
00310         SGMatrix<float64_t> observations, bool in_place)
00311 {
00312     SGMatrix<float64_t> centered=
00313             in_place ?
00314                     observations :
00315                     SGMatrix<float64_t>(observations.num_rows,
00316                             observations.num_cols);
00317 
00318     if (!in_place)
00319     {
00320         memcpy(centered.matrix, observations.matrix,
00321                 sizeof(float64_t)*observations.num_rows*observations.num_cols);
00322     }
00323     centered.remove_column_mean();
00324 
00325     /* compute 1/(m-1) * X' * X */
00326     SGMatrix<float64_t> cov=SGMatrix<float64_t>::matrix_multiply(centered,
00327             centered, true, false, 1.0/(observations.num_rows-1));
00328 
00329     return cov;
00330 }
00331 #endif //HAVE_LAPACK
00332 
00333 float64_t CStatistics::confidence_intervals_mean(SGVector<float64_t> values,
00334         float64_t alpha, float64_t& conf_int_low, float64_t& conf_int_up)
00335 {
00336     ASSERT(values.vlen>1);
00337     ASSERT(values.vector);
00338 
00339     /* using one sided student t distribution evaluation */
00340     alpha=alpha/2;
00341 
00342     /* degrees of freedom */
00343     int32_t deg=values.vlen-1;
00344 
00345     /* compute absolute value of t-value */
00346     float64_t t=CMath::abs(inverse_student_t(deg, alpha));
00347 
00348     /* values for calculating confidence interval */
00349     float64_t std_dev=CStatistics::std_deviation(values);
00350     float64_t mean=CStatistics::mean(values);
00351 
00352     /* compute confidence interval */
00353     float64_t interval=t*std_dev/CMath::sqrt((float64_t)values.vlen);
00354     conf_int_low=mean-interval;
00355     conf_int_up=mean+interval;
00356 
00357     return mean;
00358 }
00359 
00360 float64_t CStatistics::inverse_student_t(int32_t k, float64_t p)
00361 {
00362     float64_t t;
00363     float64_t rk;
00364     float64_t z;
00365     int32_t rflg;
00366     float64_t result;
00367 
00368     if (!(k>0 && greater(p, 0)) && less(p, 1))
00369     {
00370         SG_SERROR("CStatistics::inverse_student_t_distribution(): "
00371         "Domain error\n");
00372     }
00373     rk=k;
00374     if (greater(p, 0.25) && less(p, 0.75))
00375     {
00376         if (equal(p, 0.5))
00377         {
00378             result=0;
00379             return result;
00380         }
00381         z=1.0-2.0*p;
00382         z=inverse_incomplete_beta(0.5, 0.5*rk, CMath::abs(z));
00383         t=CMath::sqrt(rk*z/(1.0-z));
00384         if (less(p, 0.5))
00385         {
00386             t=-t;
00387         }
00388         result=t;
00389         return result;
00390     }
00391     rflg=-1;
00392     if (greater_equal(p, 0.5))
00393     {
00394         p=1.0-p;
00395         rflg=1;
00396     }
00397     z=inverse_incomplete_beta(0.5*rk, 0.5, 2.0*p);
00398     if (less(CMath::MAX_REAL_NUMBER*z, rk))
00399     {
00400         result=rflg*CMath::MAX_REAL_NUMBER;
00401         return result;
00402     }
00403     t=CMath::sqrt(rk/z-rk);
00404     result=rflg*t;
00405     return result;
00406 }
00407 
00408 float64_t CStatistics::inverse_incomplete_beta(float64_t a, float64_t b,
00409         float64_t y)
00410 {
00411     float64_t aaa;
00412     float64_t bbb;
00413     float64_t y0;
00414     float64_t d;
00415     float64_t yyy;
00416     float64_t x;
00417     float64_t x0;
00418     float64_t x1;
00419     float64_t lgm;
00420     float64_t yp;
00421     float64_t di;
00422     float64_t dithresh;
00423     float64_t yl;
00424     float64_t yh;
00425     float64_t xt;
00426     int32_t i;
00427     int32_t rflg;
00428     int32_t dir;
00429     int32_t nflg;
00430     int32_t mainlooppos;
00431     int32_t ihalve;
00432     int32_t ihalvecycle;
00433     int32_t newt;
00434     int32_t newtcycle;
00435     int32_t breaknewtcycle;
00436     int32_t breakihalvecycle;
00437     float64_t result;
00438 
00439     i=0;
00440     if (!(greater_equal(y, 0) && less_equal(y, 1)))
00441     {
00442         SG_SERROR("CStatistics::inverse_incomplete_beta(): "
00443         "Domain error\n");
00444     }
00445 
00446     /*
00447      * special cases
00448      */
00449     if (equal(y, 0))
00450     {
00451         result=0;
00452         return result;
00453     }
00454     if (equal(y, 1.0))
00455     {
00456         result=1;
00457         return result;
00458     }
00459 
00460     /*
00461      * these initializations are not really necessary,
00462      * but without them compiler complains about 'possibly uninitialized variables'.
00463      */
00464     dithresh=0;
00465     rflg=0;
00466     aaa=0;
00467     bbb=0;
00468     y0=0;
00469     x=0;
00470     yyy=0;
00471     lgm=0;
00472     dir=0;
00473     di=0;
00474 
00475     /*
00476      * normal initializations
00477      */
00478     x0=0.0;
00479     yl=0.0;
00480     x1=1.0;
00481     yh=1.0;
00482     nflg=0;
00483     mainlooppos=0;
00484     ihalve=1;
00485     ihalvecycle=2;
00486     newt=3;
00487     newtcycle=4;
00488     breaknewtcycle=5;
00489     breakihalvecycle=6;
00490 
00491     /*
00492      * main loop
00493      */
00494     for (;;)
00495     {
00496 
00497         /*
00498          * start
00499          */
00500         if (mainlooppos==0)
00501         {
00502             if (less_equal(a, 1.0) || less_equal(b, 1.0))
00503             {
00504                 dithresh=1.0e-6;
00505                 rflg=0;
00506                 aaa=a;
00507                 bbb=b;
00508                 y0=y;
00509                 x=aaa/(aaa+bbb);
00510                 yyy=incomplete_beta(aaa, bbb, x);
00511                 mainlooppos=ihalve;
00512                 continue;
00513             }
00514             else
00515             {
00516                 dithresh=1.0e-4;
00517             }
00518             yp=-inverse_normal_cdf(y);
00519             if (greater(y, 0.5))
00520             {
00521                 rflg=1;
00522                 aaa=b;
00523                 bbb=a;
00524                 y0=1.0-y;
00525                 yp=-yp;
00526             }
00527             else
00528             {
00529                 rflg=0;
00530                 aaa=a;
00531                 bbb=b;
00532                 y0=y;
00533             }
00534             lgm=(yp*yp-3.0)/6.0;
00535             x=2.0/(1.0/(2.0*aaa-1.0)+1.0/(2.0*bbb-1.0));
00536             d=yp*CMath::sqrt(x+lgm)/x
00537                     -(1.0/(2.0*bbb-1.0)-1.0/(2.0*aaa-1.0))
00538                             *(lgm+5.0/6.0-2.0/(3.0*x));
00539             d=2.0*d;
00540             if (less(d, CMath::log(CMath::MIN_REAL_NUMBER)))
00541             {
00542                 x=0;
00543                 break;
00544             }
00545             x=aaa/(aaa+bbb*CMath::exp(d));
00546             yyy=incomplete_beta(aaa, bbb, x);
00547             yp=(yyy-y0)/y0;
00548             if (less(CMath::abs(yp), 0.2))
00549             {
00550                 mainlooppos=newt;
00551                 continue;
00552             }
00553             mainlooppos=ihalve;
00554             continue;
00555         }
00556 
00557         /*
00558          * ihalve
00559          */
00560         if (mainlooppos==ihalve)
00561         {
00562             dir=0;
00563             di=0.5;
00564             i=0;
00565             mainlooppos=ihalvecycle;
00566             continue;
00567         }
00568 
00569         /*
00570          * ihalvecycle
00571          */
00572         if (mainlooppos==ihalvecycle)
00573         {
00574             if (i<=99)
00575             {
00576                 if (i!=0)
00577                 {
00578                     x=x0+di*(x1-x0);
00579                     if (equal(x, 1.0))
00580                     {
00581                         x=1.0-CMath::MACHINE_EPSILON;
00582                     }
00583                     if (equal(x, 0.0))
00584                     {
00585                         di=0.5;
00586                         x=x0+di*(x1-x0);
00587                         if (equal(x, 0.0))
00588                         {
00589                             break;
00590                         }
00591                     }
00592                     yyy=incomplete_beta(aaa, bbb, x);
00593                     yp=(x1-x0)/(x1+x0);
00594                     if (less(CMath::abs(yp), dithresh))
00595                     {
00596                         mainlooppos=newt;
00597                         continue;
00598                     }
00599                     yp=(yyy-y0)/y0;
00600                     if (less(CMath::abs(yp), dithresh))
00601                     {
00602                         mainlooppos=newt;
00603                         continue;
00604                     }
00605                 }
00606                 if (less(yyy, y0))
00607                 {
00608                     x0=x;
00609                     yl=yyy;
00610                     if (dir<0)
00611                     {
00612                         dir=0;
00613                         di=0.5;
00614                     }
00615                     else
00616                     {
00617                         if (dir>3)
00618                         {
00619                             di=1.0-(1.0-di)*(1.0-di);
00620                         }
00621                         else
00622                         {
00623                             if (dir>1)
00624                             {
00625                                 di=0.5*di+0.5;
00626                             }
00627                             else
00628                             {
00629                                 di=(y0-yyy)/(yh-yl);
00630                             }
00631                         }
00632                     }
00633                     dir=dir+1;
00634                     if (greater(x0, 0.75))
00635                     {
00636                         if (rflg==1)
00637                         {
00638                             rflg=0;
00639                             aaa=a;
00640                             bbb=b;
00641                             y0=y;
00642                         }
00643                         else
00644                         {
00645                             rflg=1;
00646                             aaa=b;
00647                             bbb=a;
00648                             y0=1.0-y;
00649                         }
00650                         x=1.0-x;
00651                         yyy=incomplete_beta(aaa, bbb, x);
00652                         x0=0.0;
00653                         yl=0.0;
00654                         x1=1.0;
00655                         yh=1.0;
00656                         mainlooppos=ihalve;
00657                         continue;
00658                     }
00659                 }
00660                 else
00661                 {
00662                     x1=x;
00663                     if (rflg==1 && less(x1, CMath::MACHINE_EPSILON))
00664                     {
00665                         x=0.0;
00666                         break;
00667                     }
00668                     yh=yyy;
00669                     if (dir>0)
00670                     {
00671                         dir=0;
00672                         di=0.5;
00673                     }
00674                     else
00675                     {
00676                         if (dir<-3)
00677                         {
00678                             di=di*di;
00679                         }
00680                         else
00681                         {
00682                             if (dir<-1)
00683                             {
00684                                 di=0.5*di;
00685                             }
00686                             else
00687                             {
00688                                 di=(yyy-y0)/(yh-yl);
00689                             }
00690                         }
00691                     }
00692                     dir=dir-1;
00693                 }
00694                 i=i+1;
00695                 mainlooppos=ihalvecycle;
00696                 continue;
00697             }
00698             else
00699             {
00700                 mainlooppos=breakihalvecycle;
00701                 continue;
00702             }
00703         }
00704 
00705         /*
00706          * breakihalvecycle
00707          */
00708         if (mainlooppos==breakihalvecycle)
00709         {
00710             if (greater_equal(x0, 1.0))
00711             {
00712                 x=1.0-CMath::MACHINE_EPSILON;
00713                 break;
00714             }
00715             if (less_equal(x, 0.0))
00716             {
00717                 x=0.0;
00718                 break;
00719             }
00720             mainlooppos=newt;
00721             continue;
00722         }
00723 
00724         /*
00725          * newt
00726          */
00727         if (mainlooppos==newt)
00728         {
00729             if (nflg!=0)
00730             {
00731                 break;
00732             }
00733             nflg=1;
00734             lgm=lgamma(aaa+bbb)-lgamma(aaa)-lgamma(bbb);
00735             i=0;
00736             mainlooppos=newtcycle;
00737             continue;
00738         }
00739 
00740         /*
00741          * newtcycle
00742          */
00743         if (mainlooppos==newtcycle)
00744         {
00745             if (i<=7)
00746             {
00747                 if (i!=0)
00748                 {
00749                     yyy=incomplete_beta(aaa, bbb, x);
00750                 }
00751                 if (less(yyy, yl))
00752                 {
00753                     x=x0;
00754                     yyy=yl;
00755                 }
00756                 else
00757                 {
00758                     if (greater(yyy, yh))
00759                     {
00760                         x=x1;
00761                         yyy=yh;
00762                     }
00763                     else
00764                     {
00765                         if (less(yyy, y0))
00766                         {
00767                             x0=x;
00768                             yl=yyy;
00769                         }
00770                         else
00771                         {
00772                             x1=x;
00773                             yh=yyy;
00774                         }
00775                     }
00776                 }
00777                 if (equal(x, 1.0) || equal(x, 0.0))
00778                 {
00779                     mainlooppos=breaknewtcycle;
00780                     continue;
00781                 }
00782                 d=(aaa-1.0)*CMath::log(x)+(bbb-1.0)*CMath::log(1.0-x)+lgm;
00783                 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER)))
00784                 {
00785                     break;
00786                 }
00787                 if (greater(d, CMath::log(CMath::MAX_REAL_NUMBER)))
00788                 {
00789                     mainlooppos=breaknewtcycle;
00790                     continue;
00791                 }
00792                 d=CMath::exp(d);
00793                 d=(yyy-y0)/d;
00794                 xt=x-d;
00795                 if (less_equal(xt, x0))
00796                 {
00797                     yyy=(x-x0)/(x1-x0);
00798                     xt=x0+0.5*yyy*(x-x0);
00799                     if (less_equal(xt, 0.0))
00800                     {
00801                         mainlooppos=breaknewtcycle;
00802                         continue;
00803                     }
00804                 }
00805                 if (greater_equal(xt, x1))
00806                 {
00807                     yyy=(x1-x)/(x1-x0);
00808                     xt=x1-0.5*yyy*(x1-x);
00809                     if (greater_equal(xt, 1.0))
00810                     {
00811                         mainlooppos=breaknewtcycle;
00812                         continue;
00813                     }
00814                 }
00815                 x=xt;
00816                 if (less(CMath::abs(d/x), 128.0*CMath::MACHINE_EPSILON))
00817                 {
00818                     break;
00819                 }
00820                 i=i+1;
00821                 mainlooppos=newtcycle;
00822                 continue;
00823             }
00824             else
00825             {
00826                 mainlooppos=breaknewtcycle;
00827                 continue;
00828             }
00829         }
00830 
00831         /*
00832          * breaknewtcycle
00833          */
00834         if (mainlooppos==breaknewtcycle)
00835         {
00836             dithresh=256.0*CMath::MACHINE_EPSILON;
00837             mainlooppos=ihalve;
00838             continue;
00839         }
00840     }
00841 
00842     /*
00843      * done
00844      */
00845     if (rflg!=0)
00846     {
00847         if (less_equal(x, CMath::MACHINE_EPSILON))
00848         {
00849             x=1.0-CMath::MACHINE_EPSILON;
00850         }
00851         else
00852         {
00853             x=1.0-x;
00854         }
00855     }
00856     result=x;
00857     return result;
00858 }
00859 
00860 float64_t CStatistics::incomplete_beta(float64_t a, float64_t b, float64_t x)
00861 {
00862     float64_t t;
00863     float64_t xc;
00864     float64_t w;
00865     float64_t y;
00866     int32_t flag;
00867     float64_t big;
00868     float64_t biginv;
00869     float64_t maxgam;
00870     float64_t minlog;
00871     float64_t maxlog;
00872     float64_t result;
00873 
00874     big=4.503599627370496e15;
00875     biginv=2.22044604925031308085e-16;
00876     maxgam=171.624376956302725;
00877     minlog=CMath::log(CMath::MIN_REAL_NUMBER);
00878     maxlog=CMath::log(CMath::MAX_REAL_NUMBER);
00879 
00880     if (!(greater(a, 0) && greater(b, 0)))
00881     {
00882         SG_SERROR("CStatistics::incomplete_beta(): "
00883         "Domain error\n");
00884     }
00885 
00886     if (!(greater_equal(x, 0) && less_equal(x, 1)))
00887     {
00888         SG_SERROR("CStatistics::incomplete_beta(): "
00889         "Domain error\n");
00890     }
00891 
00892     if (equal(x, 0))
00893     {
00894         result=0;
00895         return result;
00896     }
00897     if (equal(x, 1))
00898     {
00899         result=1;
00900         return result;
00901     }
00902     flag=0;
00903     if (less_equal(b*x, 1.0) && less_equal(x, 0.95))
00904     {
00905         result=ibetaf_incompletebetaps(a, b, x, maxgam);
00906         return result;
00907     }
00908     w=1.0-x;
00909     if (greater(x, a/(a+b)))
00910     {
00911         flag=1;
00912         t=a;
00913         a=b;
00914         b=t;
00915         xc=x;
00916         x=w;
00917     }
00918     else
00919     {
00920         xc=w;
00921     }
00922     if ((flag==1 && less_equal(b*x, 1.0)) && less_equal(x, 0.95))
00923     {
00924         t=ibetaf_incompletebetaps(a, b, x, maxgam);
00925         if (less_equal(t, CMath::MACHINE_EPSILON))
00926         {
00927             result=1.0-CMath::MACHINE_EPSILON;
00928         }
00929         else
00930         {
00931             result=1.0-t;
00932         }
00933         return result;
00934     }
00935     y=x*(a+b-2.0)-(a-1.0);
00936     if (less(y, 0.0))
00937     {
00938         w=ibetaf_incompletebetafe(a, b, x, big, biginv);
00939     }
00940     else
00941     {
00942         w=ibetaf_incompletebetafe2(a, b, x, big, biginv)/xc;
00943     }
00944     y=a*CMath::log(x);
00945     t=b*CMath::log(xc);
00946     if ((less(a+b, maxgam) && less(CMath::abs(y), maxlog))
00947              && less(CMath::abs(t), maxlog))
00948     {
00949         t=CMath::pow(xc, b);
00950         t=t*CMath::pow(x, a);
00951         t=t/a;
00952         t=t*w;
00953         t=t*(tgamma(a+b)/(tgamma(a)*tgamma(b)));
00954         if (flag==1)
00955         {
00956             if (less_equal(t, CMath::MACHINE_EPSILON))
00957             {
00958                 result=1.0-CMath::MACHINE_EPSILON;
00959             }
00960             else
00961             {
00962                 result=1.0-t;
00963             }
00964         }
00965         else
00966         {
00967             result=t;
00968         }
00969         return result;
00970     }
00971     y=y+t+lgamma(a+b)-lgamma(a)-lgamma(b);
00972     y=y+CMath::log(w/a);
00973     if (less(y, minlog))
00974     {
00975         t=0.0;
00976     }
00977     else
00978     {
00979         t=CMath::exp(y);
00980     }
00981     if (flag==1)
00982     {
00983         if (less_equal(t, CMath::MACHINE_EPSILON))
00984         {
00985             t=1.0-CMath::MACHINE_EPSILON;
00986         }
00987         else
00988         {
00989             t=1.0-t;
00990         }
00991     }
00992     result=t;
00993     return result;
00994 }
00995 
00996 float64_t CStatistics::inverse_normal_cdf(float64_t y0, float64_t mean,
00997         float64_t std_dev)
00998 {
00999     return inverse_normal_cdf(y0)*std_dev+mean;
01000 }
01001 
01002 float64_t CStatistics::inverse_normal_cdf(float64_t y0)
01003 {
01004     float64_t expm2;
01005     float64_t s2pi;
01006     float64_t x;
01007     float64_t y;
01008     float64_t z;
01009     float64_t y2;
01010     float64_t x0;
01011     float64_t x1;
01012     int32_t code;
01013     float64_t p0;
01014     float64_t q0;
01015     float64_t p1;
01016     float64_t q1;
01017     float64_t p2;
01018     float64_t q2;
01019     float64_t result;
01020 
01021     expm2=0.13533528323661269189;
01022     s2pi=2.50662827463100050242;
01023     if (less_equal(y0, 0))
01024     {
01025         result=-CMath::MAX_REAL_NUMBER;
01026         return result;
01027     }
01028     if (greater_equal(y0, 1))
01029     {
01030         result=CMath::MAX_REAL_NUMBER;
01031         return result;
01032     }
01033     code=1;
01034     y=y0;
01035     if (greater(y, 1.0-expm2))
01036     {
01037         y=1.0-y;
01038         code=0;
01039     }
01040     if (greater(y, expm2))
01041     {
01042         y=y-0.5;
01043         y2=y*y;
01044         p0=-59.9633501014107895267;
01045         p0=98.0010754185999661536+y2*p0;
01046         p0=-56.6762857469070293439+y2*p0;
01047         p0=13.9312609387279679503+y2*p0;
01048         p0=-1.23916583867381258016+y2*p0;
01049         q0=1;
01050         q0=1.95448858338141759834+y2*q0;
01051         q0=4.67627912898881538453+y2*q0;
01052         q0=86.3602421390890590575+y2*q0;
01053         q0=-225.462687854119370527+y2*q0;
01054         q0=200.260212380060660359+y2*q0;
01055         q0=-82.0372256168333339912+y2*q0;
01056         q0=15.9056225126211695515+y2*q0;
01057         q0=-1.18331621121330003142+y2*q0;
01058         x=y+y*y2*p0/q0;
01059         x=x*s2pi;
01060         result=x;
01061         return result;
01062     }
01063     x=CMath::sqrt(-2.0*CMath::log(y));
01064     x0=x-CMath::log(x)/x;
01065     z=1.0/x;
01066     if (less(x, 8.0))
01067     {
01068         p1=4.05544892305962419923;
01069         p1=31.5251094599893866154+z*p1;
01070         p1=57.1628192246421288162+z*p1;
01071         p1=44.0805073893200834700+z*p1;
01072         p1=14.6849561928858024014+z*p1;
01073         p1=2.18663306850790267539+z*p1;
01074         p1=-1.40256079171354495875*0.1+z*p1;
01075         p1=-3.50424626827848203418*0.01+z*p1;
01076         p1=-8.57456785154685413611*0.0001+z*p1;
01077         q1=1;
01078         q1=15.7799883256466749731+z*q1;
01079         q1=45.3907635128879210584+z*q1;
01080         q1=41.3172038254672030440+z*q1;
01081         q1=15.0425385692907503408+z*q1;
01082         q1=2.50464946208309415979+z*q1;
01083         q1=-1.42182922854787788574*0.1+z*q1;
01084         q1=-3.80806407691578277194*0.01+z*q1;
01085         q1=-9.33259480895457427372*0.0001+z*q1;
01086         x1=z*p1/q1;
01087     }
01088     else
01089     {
01090         p2=3.23774891776946035970;
01091         p2=6.91522889068984211695+z*p2;
01092         p2=3.93881025292474443415+z*p2;
01093         p2=1.33303460815807542389+z*p2;
01094         p2=2.01485389549179081538*0.1+z*p2;
01095         p2=1.23716634817820021358*0.01+z*p2;
01096         p2=3.01581553508235416007*0.0001+z*p2;
01097         p2=2.65806974686737550832*0.000001+z*p2;
01098         p2=6.23974539184983293730*0.000000001+z*p2;
01099         q2=1;
01100         q2=6.02427039364742014255+z*q2;
01101         q2=3.67983563856160859403+z*q2;
01102         q2=1.37702099489081330271+z*q2;
01103         q2=2.16236993594496635890*0.1+z*q2;
01104         q2=1.34204006088543189037*0.01+z*q2;
01105         q2=3.28014464682127739104*0.0001+z*q2;
01106         q2=2.89247864745380683936*0.000001+z*q2;
01107         q2=6.79019408009981274425*0.000000001+z*q2;
01108         x1=z*p2/q2;
01109     }
01110     x=x0-x1;
01111     if (code!=0)
01112     {
01113         x=-x;
01114     }
01115     result=x;
01116     return result;
01117 }
01118 
01119 float64_t CStatistics::ibetaf_incompletebetaps(float64_t a, float64_t b,
01120         float64_t x, float64_t maxgam)
01121 {
01122     float64_t s;
01123     float64_t t;
01124     float64_t u;
01125     float64_t v;
01126     float64_t n;
01127     float64_t t1;
01128     float64_t z;
01129     float64_t ai;
01130     float64_t result;
01131 
01132     ai=1.0/a;
01133     u=(1.0-b)*x;
01134     v=u/(a+1.0);
01135     t1=v;
01136     t=u;
01137     n=2.0;
01138     s=0.0;
01139     z=CMath::MACHINE_EPSILON*ai;
01140     while (greater(CMath::abs(v), z))
01141     {
01142         u=(n-b)*x/n;
01143         t=t*u;
01144         v=t/(a+n);
01145         s=s+v;
01146         n=n+1.0;
01147     }
01148     s=s+t1;
01149     s=s+ai;
01150     u=a*CMath::log(x);
01151     if (less(a+b, maxgam)
01152              && less(CMath::abs(u), CMath::log(CMath::MAX_REAL_NUMBER)))
01153     {
01154         t=tgamma(a+b)/(tgamma(a)*tgamma(b));
01155         s=s*t*CMath::pow(x, a);
01156     }
01157     else
01158     {
01159         t=lgamma(a+b)-lgamma(a)-lgamma(b)+u+CMath::log(s);
01160         if (less(t, CMath::log(CMath::MIN_REAL_NUMBER)))
01161         {
01162             s=0.0;
01163         }
01164         else
01165         {
01166             s=CMath::exp(t);
01167         }
01168     }
01169     result=s;
01170     return result;
01171 }
01172 
01173 float64_t CStatistics::ibetaf_incompletebetafe(float64_t a, float64_t b,
01174         float64_t x, float64_t big, float64_t biginv)
01175 {
01176     float64_t xk;
01177     float64_t pk;
01178     float64_t pkm1;
01179     float64_t pkm2;
01180     float64_t qk;
01181     float64_t qkm1;
01182     float64_t qkm2;
01183     float64_t k1;
01184     float64_t k2;
01185     float64_t k3;
01186     float64_t k4;
01187     float64_t k5;
01188     float64_t k6;
01189     float64_t k7;
01190     float64_t k8;
01191     float64_t r;
01192     float64_t t;
01193     float64_t ans;
01194     float64_t thresh;
01195     int32_t n;
01196     float64_t result;
01197 
01198     k1=a;
01199     k2=a+b;
01200     k3=a;
01201     k4=a+1.0;
01202     k5=1.0;
01203     k6=b-1.0;
01204     k7=k4;
01205     k8=a+2.0;
01206     pkm2=0.0;
01207     qkm2=1.0;
01208     pkm1=1.0;
01209     qkm1=1.0;
01210     ans=1.0;
01211     r=1.0;
01212     n=0;
01213     thresh=3.0*CMath::MACHINE_EPSILON;
01214     do
01215     {
01216         xk=-x*k1*k2/(k3*k4);
01217         pk=pkm1+pkm2*xk;
01218         qk=qkm1+qkm2*xk;
01219         pkm2=pkm1;
01220         pkm1=pk;
01221         qkm2=qkm1;
01222         qkm1=qk;
01223         xk=x*k5*k6/(k7*k8);
01224         pk=pkm1+pkm2*xk;
01225         qk=qkm1+qkm2*xk;
01226         pkm2=pkm1;
01227         pkm1=pk;
01228         qkm2=qkm1;
01229         qkm1=qk;
01230         if (not_equal(qk, 0))
01231         {
01232             r=pk/qk;
01233         }
01234         if (not_equal(r, 0))
01235         {
01236             t=CMath::abs((ans-r)/r);
01237             ans=r;
01238         }
01239         else
01240         {
01241             t=1.0;
01242         }
01243         if (less(t, thresh))
01244         {
01245             break;
01246         }
01247         k1=k1+1.0;
01248         k2=k2+1.0;
01249         k3=k3+2.0;
01250         k4=k4+2.0;
01251         k5=k5+1.0;
01252         k6=k6-1.0;
01253         k7=k7+2.0;
01254         k8=k8+2.0;
01255         if (greater(CMath::abs(qk)+CMath::abs(pk), big))
01256         {
01257             pkm2=pkm2*biginv;
01258             pkm1=pkm1*biginv;
01259             qkm2=qkm2*biginv;
01260             qkm1=qkm1*biginv;
01261         }
01262         if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv))
01263         {
01264             pkm2=pkm2*big;
01265             pkm1=pkm1*big;
01266             qkm2=qkm2*big;
01267             qkm1=qkm1*big;
01268         }
01269         n=n+1;
01270     }
01271     while (n!=300);
01272     result=ans;
01273     return result;
01274 }
01275 
01276 float64_t CStatistics::ibetaf_incompletebetafe2(float64_t a, float64_t b,
01277         float64_t x, float64_t big, float64_t biginv)
01278 {
01279     float64_t xk;
01280     float64_t pk;
01281     float64_t pkm1;
01282     float64_t pkm2;
01283     float64_t qk;
01284     float64_t qkm1;
01285     float64_t qkm2;
01286     float64_t k1;
01287     float64_t k2;
01288     float64_t k3;
01289     float64_t k4;
01290     float64_t k5;
01291     float64_t k6;
01292     float64_t k7;
01293     float64_t k8;
01294     float64_t r;
01295     float64_t t;
01296     float64_t ans;
01297     float64_t z;
01298     float64_t thresh;
01299     int32_t n;
01300     float64_t result;
01301 
01302     k1=a;
01303     k2=b-1.0;
01304     k3=a;
01305     k4=a+1.0;
01306     k5=1.0;
01307     k6=a+b;
01308     k7=a+1.0;
01309     k8=a+2.0;
01310     pkm2=0.0;
01311     qkm2=1.0;
01312     pkm1=1.0;
01313     qkm1=1.0;
01314     z=x/(1.0-x);
01315     ans=1.0;
01316     r=1.0;
01317     n=0;
01318     thresh=3.0*CMath::MACHINE_EPSILON;
01319     do
01320     {
01321         xk=-z*k1*k2/(k3*k4);
01322         pk=pkm1+pkm2*xk;
01323         qk=qkm1+qkm2*xk;
01324         pkm2=pkm1;
01325         pkm1=pk;
01326         qkm2=qkm1;
01327         qkm1=qk;
01328         xk=z*k5*k6/(k7*k8);
01329         pk=pkm1+pkm2*xk;
01330         qk=qkm1+qkm2*xk;
01331         pkm2=pkm1;
01332         pkm1=pk;
01333         qkm2=qkm1;
01334         qkm1=qk;
01335         if (not_equal(qk, 0))
01336         {
01337             r=pk/qk;
01338         }
01339         if (not_equal(r, 0))
01340         {
01341             t=CMath::abs((ans-r)/r);
01342             ans=r;
01343         }
01344         else
01345         {
01346             t=1.0;
01347         }
01348         if (less(t, thresh))
01349         {
01350             break;
01351         }
01352         k1=k1+1.0;
01353         k2=k2-1.0;
01354         k3=k3+2.0;
01355         k4=k4+2.0;
01356         k5=k5+1.0;
01357         k6=k6+1.0;
01358         k7=k7+2.0;
01359         k8=k8+2.0;
01360         if (greater(CMath::abs(qk)+CMath::abs(pk), big))
01361         {
01362             pkm2=pkm2*biginv;
01363             pkm1=pkm1*biginv;
01364             qkm2=qkm2*biginv;
01365             qkm1=qkm1*biginv;
01366         }
01367         if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv))
01368         {
01369             pkm2=pkm2*big;
01370             pkm1=pkm1*big;
01371             qkm2=qkm2*big;
01372             qkm1=qkm1*big;
01373         }
01374         n=n+1;
01375     }
01376     while (n!=300);
01377     result=ans;
01378     return result;
01379 }
01380 
01381 float64_t CStatistics::incomplete_gamma(float64_t a, float64_t x)
01382 {
01383     float64_t igammaepsilon;
01384     float64_t ans;
01385     float64_t ax;
01386     float64_t c;
01387     float64_t r;
01388     float64_t result;
01389 
01390     igammaepsilon=0.000000000000001;
01391     if (less_equal(x, 0) || less_equal(a, 0))
01392     {
01393         result=0;
01394         return result;
01395     }
01396     if (greater(x, 1) && greater(x, a))
01397     {
01398         result=1-incomplete_gamma_completed(a, x);
01399         return result;
01400     }
01401     ax=a*CMath::log(x)-x-lgamma(a);
01402     if (less(ax, -709.78271289338399))
01403     {
01404         result=0;
01405         return result;
01406     }
01407     ax=CMath::exp(ax);
01408     r=a;
01409     c=1;
01410     ans=1;
01411     do
01412     {
01413         r=r+1;
01414         c=c*x/r;
01415         ans=ans+c;
01416     }
01417     while (greater(c/ans, igammaepsilon));
01418     result=ans*ax/a;
01419     return result;
01420 }
01421 
01422 float64_t CStatistics::incomplete_gamma_completed(float64_t a, float64_t x)
01423 {
01424     float64_t igammaepsilon;
01425     float64_t igammabignumber;
01426     float64_t igammabignumberinv;
01427     float64_t ans;
01428     float64_t ax;
01429     float64_t c;
01430     float64_t yc;
01431     float64_t r;
01432     float64_t t;
01433     float64_t y;
01434     float64_t z;
01435     float64_t pk;
01436     float64_t pkm1;
01437     float64_t pkm2;
01438     float64_t qk;
01439     float64_t qkm1;
01440     float64_t qkm2;
01441     float64_t result;
01442 
01443     igammaepsilon=0.000000000000001;
01444     igammabignumber=4503599627370496.0;
01445     igammabignumberinv=2.22044604925031308085*0.0000000000000001;
01446     if (less_equal(x, 0) || less_equal(a, 0))
01447     {
01448         result=1;
01449         return result;
01450     }
01451     if (less(x, 1) || less(x, a))
01452     {
01453         result=1-incomplete_gamma(a, x);
01454         return result;
01455     }
01456     ax=a*CMath::log(x)-x-lgamma(a);
01457     if (less(ax, -709.78271289338399))
01458     {
01459         result=0;
01460         return result;
01461     }
01462     ax=CMath::exp(ax);
01463     y=1-a;
01464     z=x+y+1;
01465     c=0;
01466     pkm2=1;
01467     qkm2=x;
01468     pkm1=x+1;
01469     qkm1=z*x;
01470     ans=pkm1/qkm1;
01471     do
01472     {
01473         c=c+1;
01474         y=y+1;
01475         z=z+2;
01476         yc=y*c;
01477         pk=pkm1*z-pkm2*yc;
01478         qk=qkm1*z-qkm2*yc;
01479         if (not_equal(qk, 0))
01480         {
01481             r=pk/qk;
01482             t=CMath::abs((ans-r)/r);
01483             ans=r;
01484         }
01485         else
01486         {
01487             t=1;
01488         }
01489         pkm2=pkm1;
01490         pkm1=pk;
01491         qkm2=qkm1;
01492         qkm1=qk;
01493         if (greater(CMath::abs(pk), igammabignumber))
01494         {
01495             pkm2=pkm2*igammabignumberinv;
01496             pkm1=pkm1*igammabignumberinv;
01497             qkm2=qkm2*igammabignumberinv;
01498             qkm1=qkm1*igammabignumberinv;
01499         }
01500     }
01501     while (greater(t, igammaepsilon));
01502     result=ans*ax;
01503     return result;
01504 }
01505 
01506 float64_t CStatistics::gamma_cdf(float64_t x, float64_t a, float64_t b)
01507 {
01508     /* definition of wikipedia: incomplete gamma devised by true gamma */
01509     return incomplete_gamma(a, x/b);
01510 }
01511 
01512 float64_t CStatistics::inverse_gamma_cdf(float64_t p, float64_t a,
01513         float64_t b)
01514 {
01515     /* inverse of gamma(a,b) CDF is
01516      * inverse_incomplete_gamma_completed(a, 1. - p) * b */
01517     return inverse_incomplete_gamma_completed(a, 1-p)*b;
01518 }
01519 
01520 float64_t CStatistics::inverse_incomplete_gamma_completed(float64_t a,
01521         float64_t y0)
01522 {
01523     float64_t igammaepsilon;
01524     float64_t iinvgammabignumber;
01525     float64_t x0;
01526     float64_t x1;
01527     float64_t x;
01528     float64_t yl;
01529     float64_t yh;
01530     float64_t y;
01531     float64_t d;
01532     float64_t lgm;
01533     float64_t dithresh;
01534     int32_t i;
01535     int32_t dir;
01536     float64_t result;
01537 
01538     igammaepsilon=0.000000000000001;
01539     iinvgammabignumber=4503599627370496.0;
01540     x0=iinvgammabignumber;
01541     yl=0;
01542     x1=0;
01543     yh=1;
01544     dithresh=5*igammaepsilon;
01545     d=1/(9*a);
01546     y=1-d-inverse_normal_cdf(y0)*CMath::sqrt(d);
01547     x=a*y*y*y;
01548     lgm=lgamma(a);
01549     i=0;
01550     while (i<10)
01551     {
01552         if (greater(x, x0) || less(x, x1))
01553         {
01554             d=0.0625;
01555             break;
01556         }
01557         y=incomplete_gamma_completed(a, x);
01558         if (less(y, yl) || greater(y, yh))
01559         {
01560             d=0.0625;
01561             break;
01562         }
01563         if (less(y, y0))
01564         {
01565             x0=x;
01566             yl=y;
01567         }
01568         else
01569         {
01570             x1=x;
01571             yh=y;
01572         }
01573         d=(a-1)*CMath::log(x)-x-lgm;
01574         if (less(d, -709.78271289338399))
01575         {
01576             d=0.0625;
01577             break;
01578         }
01579         d=-CMath::exp(d);
01580         d=(y-y0)/d;
01581         if (less(CMath::abs(d/x), igammaepsilon))
01582         {
01583             result=x;
01584             return result;
01585         }
01586         x=x-d;
01587         i=i+1;
01588     }
01589     if (equal(x0, iinvgammabignumber))
01590     {
01591         if (less_equal(x, 0))
01592         {
01593             x=1;
01594         }
01595         while (equal(x0, iinvgammabignumber))
01596         {
01597             x=(1+d)*x;
01598             y=incomplete_gamma_completed(a, x);
01599             if (less(y, y0))
01600             {
01601                 x0=x;
01602                 yl=y;
01603                 break;
01604             }
01605             d=d+d;
01606         }
01607     }
01608     d=0.5;
01609     dir=0;
01610     i=0;
01611     while (i<400)
01612     {
01613         x=x1+d*(x0-x1);
01614         y=incomplete_gamma_completed(a, x);
01615         lgm=(x0-x1)/(x1+x0);
01616         if (less(CMath::abs(lgm), dithresh))
01617         {
01618             break;
01619         }
01620         lgm=(y-y0)/y0;
01621         if (less(CMath::abs(lgm), dithresh))
01622         {
01623             break;
01624         }
01625         if (less_equal(x, 0.0))
01626         {
01627             break;
01628         }
01629         if (greater_equal(y, y0))
01630         {
01631             x1=x;
01632             yh=y;
01633             if (dir<0)
01634             {
01635                 dir=0;
01636                 d=0.5;
01637             }
01638             else
01639             {
01640                 if (dir>1)
01641                 {
01642                     d=0.5*d+0.5;
01643                 }
01644                 else
01645                 {
01646                     d=(y0-yl)/(yh-yl);
01647                 }
01648             }
01649             dir=dir+1;
01650         }
01651         else
01652         {
01653             x0=x;
01654             yl=y;
01655             if (dir>0)
01656             {
01657                 dir=0;
01658                 d=0.5;
01659             }
01660             else
01661             {
01662                 if (dir<-1)
01663                 {
01664                     d=0.5*d;
01665                 }
01666                 else
01667                 {
01668                     d=(y0-yl)/(yh-yl);
01669                 }
01670             }
01671             dir=dir-1;
01672         }
01673         i=i+1;
01674     }
01675     result=x;
01676     return result;
01677 }
01678 
01679 float64_t CStatistics::normal_cdf(float64_t x, float64_t std_dev)
01680 {
01681     return 0.5*(error_function(x/std_dev/1.41421356237309504880)+1);
01682 }
01683 
01684 float64_t CStatistics::error_function(float64_t x)
01685 {
01686     float64_t xsq;
01687     float64_t s;
01688     float64_t p;
01689     float64_t q;
01690     float64_t result;
01691 
01692     s=CMath::sign(x);
01693     x=CMath::abs(x);
01694     if (less(x, 0.5))
01695     {
01696         xsq=x*x;
01697         p=0.007547728033418631287834;
01698         p=0.288805137207594084924010+xsq*p;
01699         p=14.3383842191748205576712+xsq*p;
01700         p=38.0140318123903008244444+xsq*p;
01701         p=3017.82788536507577809226+xsq*p;
01702         p=7404.07142710151470082064+xsq*p;
01703         p=80437.3630960840172832162+xsq*p;
01704         q=0.0;
01705         q=1.00000000000000000000000+xsq*q;
01706         q=38.0190713951939403753468+xsq*q;
01707         q=658.070155459240506326937+xsq*q;
01708         q=6379.60017324428279487120+xsq*q;
01709         q=34216.5257924628539769006+xsq*q;
01710         q=80437.3630960840172826266+xsq*q;
01711         result=s*1.1283791670955125738961589031*x*p/q;
01712         return result;
01713     }
01714     if (greater_equal(x, 10))
01715     {
01716         result=s;
01717         return result;
01718     }
01719     result=s*(1-error_function_complement(x));
01720     return result;
01721 }
01722 
01723 float64_t CStatistics::error_function_complement(float64_t x)
01724 {
01725     float64_t p;
01726     float64_t q;
01727     float64_t result;
01728 
01729     if (less(x, 0))
01730     {
01731         result=2-error_function_complement(-x);
01732         return result;
01733     }
01734     if (less(x, 0.5))
01735     {
01736         result=1.0-error_function(x);
01737         return result;
01738     }
01739     if (greater_equal(x, 10))
01740     {
01741         result=0;
01742         return result;
01743     }
01744     p=0.0;
01745     p=0.5641877825507397413087057563+x*p;
01746     p=9.675807882987265400604202961+x*p;
01747     p=77.08161730368428609781633646+x*p;
01748     p=368.5196154710010637133875746+x*p;
01749     p=1143.262070703886173606073338+x*p;
01750     p=2320.439590251635247384768711+x*p;
01751     p=2898.0293292167655611275846+x*p;
01752     p=1826.3348842295112592168999+x*p;
01753     q=1.0;
01754     q=17.14980943627607849376131193+x*q;
01755     q=137.1255960500622202878443578+x*q;
01756     q=661.7361207107653469211984771+x*q;
01757     q=2094.384367789539593790281779+x*q;
01758     q=4429.612803883682726711528526+x*q;
01759     q=6089.5424232724435504633068+x*q;
01760     q=4958.82756472114071495438422+x*q;
01761     q=1826.3348842295112595576438+x*q;
01762     result=CMath::exp(-x*x)*p/q;
01763     return result;
01764 }
01765 
01766 SGVector<float64_t> CStatistics::fishers_exact_test_for_multiple_2x3_tables(
01767         SGMatrix<float64_t> tables)
01768 {
01769     SGMatrix<float64_t> table(NULL, 2, 3, false);
01770     int32_t len=tables.num_cols/3;
01771 
01772     SGVector<float64_t> v(len);
01773     for (int32_t i=0; i<len; i++)
01774     {
01775         table.matrix=&tables.matrix[2*3*i];
01776         v.vector[i]=fishers_exact_test_for_2x3_table(table);
01777     }
01778     return v;
01779 }
01780 
01781 float64_t CStatistics::fishers_exact_test_for_2x3_table(
01782         SGMatrix<float64_t> table)
01783 {
01784     ASSERT(table.num_rows==2);
01785     ASSERT(table.num_cols==3);
01786 
01787     int32_t m_len=3+2;
01788     float64_t* m=SG_MALLOC(float64_t, 3+2);
01789     m[0]=table.matrix[0]+table.matrix[2]+table.matrix[4];
01790     m[1]=table.matrix[1]+table.matrix[3]+table.matrix[5];
01791     m[2]=table.matrix[0]+table.matrix[1];
01792     m[3]=table.matrix[2]+table.matrix[3];
01793     m[4]=table.matrix[4]+table.matrix[5];
01794 
01795     float64_t n=SGVector<float64_t>::sum(m, m_len)/2.0;
01796     int32_t x_len=2*3*CMath::sq(SGVector<float64_t>::max(m, m_len));
01797     float64_t* x=SG_MALLOC(float64_t, x_len);
01798     SGVector<float64_t>::fill_vector(x, x_len, 0.0);
01799 
01800     float64_t log_nom=0.0;
01801     for (int32_t i=0; i<3+2; i++)
01802         log_nom+=lgamma(m[i]+1);
01803     log_nom-=lgamma(n+1.0);
01804 
01805     float64_t log_denomf=0;
01806     floatmax_t log_denom=0;
01807 
01808     for (int32_t i=0; i<3*2; i++)
01809     {
01810         log_denom+=lgammal((floatmax_t)table.matrix[i]+1);
01811         log_denomf+=lgammal((floatmax_t)table.matrix[i]+1);
01812     }
01813 
01814     floatmax_t prob_table_log=log_nom-log_denom;
01815 
01816     int32_t dim1=CMath::min(m[0], m[2]);
01817 
01818     //traverse all possible tables with given m
01819     int32_t counter=0;
01820     for (int32_t k=0; k<=dim1; k++)
01821     {
01822         for (int32_t l=CMath::max(0.0, m[0]-m[4]-k);
01823                 l<=CMath::min(m[0]-k, m[3]); l++)
01824         {
01825             x[0+0*2+counter*2*3]=k;
01826             x[0+1*2+counter*2*3]=l;
01827             x[0+2*2+counter*2*3]=m[0]-x[0+0*2+counter*2*3]-x[0+1*2+counter*2*3];
01828             x[1+0*2+counter*2*3]=m[2]-x[0+0*2+counter*2*3];
01829             x[1+1*2+counter*2*3]=m[3]-x[0+1*2+counter*2*3];
01830             x[1+2*2+counter*2*3]=m[4]-x[0+2*2+counter*2*3];
01831 
01832             counter++;
01833         }
01834     }
01835 
01836 //#define DEBUG_FISHER_TABLE
01837 #ifdef DEBUG_FISHER_TABLE
01838     SG_SPRINT("counter=%d\n", counter);
01839     SG_SPRINT("dim1=%d\n", dim1);
01840     SG_SPRINT("l=%g...%g\n", CMath::max(0.0,m[0]-m[4]-0), CMath::min(m[0]-0, m[3]));
01841     SG_SPRINT("n=%g\n", n);
01842     SG_SPRINT("prob_table_log=%.18Lg\n", prob_table_log);
01843     SG_SPRINT("log_denomf=%.18g\n", log_denomf);
01844     SG_SPRINT("log_denom=%.18Lg\n", log_denom);
01845     SG_SPRINT("log_nom=%.18g\n", log_nom);
01846     display_vector(m, m_len, "marginals");
01847     display_vector(x, 2*3*counter, "x");
01848 #endif // DEBUG_FISHER_TABLE
01849 
01850     floatmax_t* log_denom_vec=SG_MALLOC(floatmax_t, counter);
01851     SGVector<floatmax_t>::fill_vector(log_denom_vec, counter, (floatmax_t)0.0);
01852 
01853     for (int32_t k=0; k<counter; k++)
01854     {
01855         for (int32_t j=0; j<3; j++)
01856         {
01857             for (int32_t i=0; i<2; i++)
01858                 log_denom_vec[k]+=lgammal(x[i+j*2+k*2*3]+1.0);
01859         }
01860     }
01861 
01862     for (int32_t i=0; i<counter; i++)
01863         log_denom_vec[i]=log_nom-log_denom_vec[i];
01864 
01865 #ifdef DEBUG_FISHER_TABLE
01866     display_vector(log_denom_vec, counter, "log_denom_vec");
01867 #endif // DEBUG_FISHER_TABLE
01868 
01869     float64_t nonrand_p=-CMath::INFTY;
01870     for (int32_t i=0; i<counter; i++)
01871     {
01872         if (log_denom_vec[i]<=prob_table_log)
01873             nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]);
01874     }
01875 
01876 #ifdef DEBUG_FISHER_TABLE
01877     SG_SPRINT("nonrand_p=%.18g\n", nonrand_p);
01878     SG_SPRINT("exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p));
01879 #endif // DEBUG_FISHER_TABLE
01880     nonrand_p=CMath::exp(nonrand_p);
01881 
01882     SG_FREE(log_denom_vec);
01883     SG_FREE(x);
01884     SG_FREE(m);
01885 
01886     return nonrand_p;
01887 }
01888 
01889 float64_t CStatistics::mutual_info(float64_t* p1, float64_t* p2, int32_t len)
01890 {
01891     double e=0;
01892 
01893     for (int32_t i=0; i<len; i++)
01894         for (int32_t j=0; j<len; j++)
01895             e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
01896 
01897     return (float64_t)e;
01898 }
01899 
01900 float64_t CStatistics::relative_entropy(float64_t* p, float64_t* q, int32_t len)
01901 {
01902     double e=0;
01903 
01904     for (int32_t i=0; i<len; i++)
01905         e+=exp(p[i])*(p[i]-q[i]);
01906 
01907     return (float64_t)e;
01908 }
01909 
01910 float64_t CStatistics::entropy(float64_t* p, int32_t len)
01911 {
01912     double e=0;
01913 
01914     for (int32_t i=0; i<len; i++)
01915         e-=exp(p[i])*p[i];
01916 
01917     return (float64_t)e;
01918 }
01919 
01920 SGVector<int32_t> CStatistics::sample_indices(int32_t sample_size, int32_t N)
01921 {
01922     REQUIRE(sample_size<N,
01923             "sample size should be less than number of indices\n");
01924     int32_t* idxs=SG_MALLOC(int32_t,N);
01925     int32_t i, rnd;
01926     int32_t* permuted_idxs=SG_MALLOC(int32_t,sample_size);
01927 
01928     // reservoir sampling
01929     for (i=0; i<N; i++)
01930         idxs[i]=i;
01931     for (i=0; i<sample_size; i++)
01932         permuted_idxs[i]=idxs[i];
01933     for (i=sample_size; i<N; i++)
01934     {
01935         rnd=CMath::random(1, i);
01936         if (rnd<sample_size)
01937             permuted_idxs[rnd]=idxs[i];
01938     }
01939     SG_FREE(idxs);
01940 
01941     CMath::qsort(permuted_idxs, sample_size);
01942     return SGVector<int32_t>(permuted_idxs, sample_size);
01943 }
01944 
01945 float64_t CStatistics::dlgamma(float64_t x)
01946 {
01947     x = x+6.0;
01948         float64_t df = 1./(x*x);
01949         df = (((df/240-0.003968253986254)*df+1/120.0)*df-1/120.0)*df;
01950         df = df+log(x)-0.5/x-1.0/(x-1.0)-1.0/(x-2.0)-1.0/
01951                           (x-3.0)-1.0/(x-4.0)-1.0/(x-5.0)-1.0/(x-6.0);
01952         return df;
01953 }
01954 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation