00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <shogun/mathematics/Statistics.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/lib/SGMatrix.h>
00019 #include <shogun/lib/SGVector.h>
00020
00021 #ifdef HAVE_LAPACK
00022 #include <shogun/mathematics/lapack.h>
00023 #endif //HAVE_LAPACK
00024 using namespace shogun;
00025
00026 float64_t CStatistics::mean(SGVector<float64_t> values)
00027 {
00028 ASSERT(values.vlen>0);
00029 ASSERT(values.vector);
00030
00031 float64_t sum=0;
00032 for (index_t i=0; i<values.vlen; ++i)
00033 sum+=values.vector[i];
00034
00035 return sum/values.vlen;
00036 }
00037
00038 float64_t CStatistics::median(SGVector<float64_t> values, bool modify,
00039 bool in_place)
00040 {
00041 float64_t result;
00042 if (modify)
00043 {
00044
00045
00046
00047
00048
00049
00050
00051 int32_t low;
00052 int32_t high;
00053 int32_t median;
00054 int32_t middle;
00055 int32_t l;
00056 int32_t h;
00057
00058 low=0;
00059 high=values.vlen-1;
00060 median=(low+high)/2;
00061
00062 while (true)
00063 {
00064 if (high<=low)
00065 {
00066 result=values[median];
00067 break;
00068 }
00069
00070 if (high==low+1)
00071 {
00072 if (values[low]>values[high])
00073 CMath::CMath::swap(values[low], values[high]);
00074 result=values[median];
00075 break;
00076 }
00077
00078 middle=(low+high)/2;
00079 if (values[middle]>values[high])
00080 CMath::swap(values[middle], values[high]);
00081 if (values[low]>values[high])
00082 CMath::swap(values[low], values[high]);
00083 if (values[middle]>values[low])
00084 CMath::swap(values[middle], values[low]);
00085
00086 CMath::swap(values[middle], values[low+1]);
00087
00088 l=low+1;
00089 h=high;
00090 for (;;)
00091 {
00092 do
00093 l++;
00094 while (values[low]>values[l]);
00095 do
00096 h--;
00097 while (values[h]>values[low]);
00098 if (h<l)
00099 break;
00100 CMath::swap(values[l], values[h]);
00101 }
00102
00103 CMath::swap(values[low], values[h]);
00104 if (h<=median)
00105 low=l;
00106 if (h>=median)
00107 high=h-1;
00108 }
00109
00110 }
00111 else
00112 {
00113 if (in_place)
00114 {
00115
00116
00117
00118
00119
00120
00121 int32_t i;
00122 int32_t less;
00123 int32_t greater;
00124 int32_t equal;
00125 float64_t min;
00126 float64_t max;
00127 float64_t guess;
00128 float64_t maxltguess;
00129 float64_t mingtguess;
00130 min=max=values[0];
00131 for (i=1; i<values.vlen; i++)
00132 {
00133 if (values[i]<min)
00134 min=values[i];
00135 if (values[i]>max)
00136 max=values[i];
00137 }
00138 while (1)
00139 {
00140 guess=(min+max)/2;
00141 less=0;
00142 greater=0;
00143 equal=0;
00144 maxltguess=min;
00145 mingtguess=max;
00146 for (i=0; i<values.vlen; i++)
00147 {
00148 if (values[i]<guess)
00149 {
00150 less++;
00151 if (values[i]>maxltguess)
00152 maxltguess=values[i];
00153 }
00154 else if (values[i]>guess)
00155 {
00156 greater++;
00157 if (values[i]<mingtguess)
00158 mingtguess=values[i];
00159 }
00160 else
00161 equal++;
00162 }
00163 if (less<=(values.vlen+1)/2&&greater<=(values.vlen+1)/2)
00164 break;
00165 else if (less>greater)
00166 max=maxltguess;
00167 else
00168 min=mingtguess;
00169 }
00170
00171 if (less>=(values.vlen+1)/2)
00172 result=maxltguess;
00173 else if (less+equal>=(values.vlen+1)/2)
00174 result=guess;
00175 else
00176 result=mingtguess;
00177 }
00178 else
00179 {
00180
00181 SGVector<float64_t> copy(values.vlen);
00182 memcpy(copy.vector, values.vector, sizeof(float64_t)*values.vlen);
00183 result=median(copy, true);
00184 }
00185 }
00186
00187 return result;
00188 }
00189
00190 float64_t CStatistics::matrix_median(SGMatrix<float64_t> values,
00191 bool modify, bool in_place)
00192 {
00193
00194 SGVector<float64_t> as_vector(values.matrix,
00195 values.num_rows*values.num_cols, false);
00196
00197
00198 return median(as_vector, modify, in_place);
00199 }
00200
00201
00202 float64_t CStatistics::variance(SGVector<float64_t> values)
00203 {
00204 ASSERT(values.vlen>1);
00205 ASSERT(values.vector);
00206
00207 float64_t mean=CStatistics::mean(values);
00208
00209 float64_t sum_squared_diff=0;
00210 for (index_t i=0; i<values.vlen; ++i)
00211 sum_squared_diff+=CMath::pow(values.vector[i]-mean, 2);
00212
00213 return sum_squared_diff/(values.vlen-1);
00214 }
00215
00216 SGVector<float64_t> CStatistics::matrix_mean(SGMatrix<float64_t> values,
00217 bool col_wise)
00218 {
00219 ASSERT(values.num_rows>0);
00220 ASSERT(values.num_cols>0);
00221 ASSERT(values.matrix);
00222
00223 SGVector<float64_t> result;
00224
00225 if (col_wise)
00226 {
00227 result=SGVector<float64_t>(values.num_cols);
00228 for (index_t j=0; j<values.num_cols; ++j)
00229 {
00230 result[j]=0;
00231 for (index_t i=0; i<values.num_rows; ++i)
00232 result[j]+=values(i,j);
00233
00234 result[j]/=values.num_rows;
00235 }
00236 }
00237 else
00238 {
00239 result=SGVector<float64_t>(values.num_rows);
00240 for (index_t j=0; j<values.num_rows; ++j)
00241 {
00242 result[j]=0;
00243 for (index_t i=0; i<values.num_cols; ++i)
00244 result[j]+=values(j,i);
00245
00246 result[j]/=values.num_cols;
00247 }
00248 }
00249
00250 return result;
00251 }
00252
00253 SGVector<float64_t> CStatistics::matrix_variance(SGMatrix<float64_t> values,
00254 bool col_wise)
00255 {
00256 ASSERT(values.num_rows>0);
00257 ASSERT(values.num_cols>0);
00258 ASSERT(values.matrix);
00259
00260
00261 SGVector<float64_t> mean=CStatistics::matrix_mean(values, col_wise);
00262
00263 SGVector<float64_t> result;
00264
00265 if (col_wise)
00266 {
00267 result=SGVector<float64_t>(values.num_cols);
00268 for (index_t j=0; j<values.num_cols; ++j)
00269 {
00270 result[j]=0;
00271 for (index_t i=0; i<values.num_rows; ++i)
00272 result[j]+=CMath::pow(values(i,j)-mean[j], 2);
00273
00274 result[j]/=(values.num_rows-1);
00275 }
00276 }
00277 else
00278 {
00279 result=SGVector<float64_t>(values.num_rows);
00280 for (index_t j=0; j<values.num_rows; ++j)
00281 {
00282 result[j]=0;
00283 for (index_t i=0; i<values.num_cols; ++i)
00284 result[j]+=CMath::pow(values(j,i)-mean[j], 2);
00285
00286 result[j]/=(values.num_cols-1);
00287 }
00288 }
00289
00290 return result;
00291 }
00292
00293 float64_t CStatistics::std_deviation(SGVector<float64_t> values)
00294 {
00295 return CMath::sqrt(variance(values));
00296 }
00297
00298 SGVector<float64_t> CStatistics::matrix_std_deviation(
00299 SGMatrix<float64_t> values, bool col_wise)
00300 {
00301 SGVector<float64_t> var=CStatistics::matrix_variance(values, col_wise);
00302 for (index_t i=0; i<var.vlen; ++i)
00303 var[i]=CMath::sqrt(var[i]);
00304
00305 return var;
00306 }
00307
00308 #ifdef HAVE_LAPACK
00309 SGMatrix<float64_t> CStatistics::covariance_matrix(
00310 SGMatrix<float64_t> observations, bool in_place)
00311 {
00312 SGMatrix<float64_t> centered=
00313 in_place ?
00314 observations :
00315 SGMatrix<float64_t>(observations.num_rows,
00316 observations.num_cols);
00317
00318 if (!in_place)
00319 {
00320 memcpy(centered.matrix, observations.matrix,
00321 sizeof(float64_t)*observations.num_rows*observations.num_cols);
00322 }
00323 centered.remove_column_mean();
00324
00325
00326 SGMatrix<float64_t> cov=SGMatrix<float64_t>::matrix_multiply(centered,
00327 centered, true, false, 1.0/(observations.num_rows-1));
00328
00329 return cov;
00330 }
00331 #endif //HAVE_LAPACK
00332
00333 float64_t CStatistics::confidence_intervals_mean(SGVector<float64_t> values,
00334 float64_t alpha, float64_t& conf_int_low, float64_t& conf_int_up)
00335 {
00336 ASSERT(values.vlen>1);
00337 ASSERT(values.vector);
00338
00339
00340 alpha=alpha/2;
00341
00342
00343 int32_t deg=values.vlen-1;
00344
00345
00346 float64_t t=CMath::abs(inverse_student_t(deg, alpha));
00347
00348
00349 float64_t std_dev=CStatistics::std_deviation(values);
00350 float64_t mean=CStatistics::mean(values);
00351
00352
00353 float64_t interval=t*std_dev/CMath::sqrt((float64_t)values.vlen);
00354 conf_int_low=mean-interval;
00355 conf_int_up=mean+interval;
00356
00357 return mean;
00358 }
00359
00360 float64_t CStatistics::inverse_student_t(int32_t k, float64_t p)
00361 {
00362 float64_t t;
00363 float64_t rk;
00364 float64_t z;
00365 int32_t rflg;
00366 float64_t result;
00367
00368 if (!(k>0 && greater(p, 0)) && less(p, 1))
00369 {
00370 SG_SERROR("CStatistics::inverse_student_t_distribution(): "
00371 "Domain error\n");
00372 }
00373 rk=k;
00374 if (greater(p, 0.25) && less(p, 0.75))
00375 {
00376 if (equal(p, 0.5))
00377 {
00378 result=0;
00379 return result;
00380 }
00381 z=1.0-2.0*p;
00382 z=inverse_incomplete_beta(0.5, 0.5*rk, CMath::abs(z));
00383 t=CMath::sqrt(rk*z/(1.0-z));
00384 if (less(p, 0.5))
00385 {
00386 t=-t;
00387 }
00388 result=t;
00389 return result;
00390 }
00391 rflg=-1;
00392 if (greater_equal(p, 0.5))
00393 {
00394 p=1.0-p;
00395 rflg=1;
00396 }
00397 z=inverse_incomplete_beta(0.5*rk, 0.5, 2.0*p);
00398 if (less(CMath::MAX_REAL_NUMBER*z, rk))
00399 {
00400 result=rflg*CMath::MAX_REAL_NUMBER;
00401 return result;
00402 }
00403 t=CMath::sqrt(rk/z-rk);
00404 result=rflg*t;
00405 return result;
00406 }
00407
00408 float64_t CStatistics::inverse_incomplete_beta(float64_t a, float64_t b,
00409 float64_t y)
00410 {
00411 float64_t aaa;
00412 float64_t bbb;
00413 float64_t y0;
00414 float64_t d;
00415 float64_t yyy;
00416 float64_t x;
00417 float64_t x0;
00418 float64_t x1;
00419 float64_t lgm;
00420 float64_t yp;
00421 float64_t di;
00422 float64_t dithresh;
00423 float64_t yl;
00424 float64_t yh;
00425 float64_t xt;
00426 int32_t i;
00427 int32_t rflg;
00428 int32_t dir;
00429 int32_t nflg;
00430 int32_t mainlooppos;
00431 int32_t ihalve;
00432 int32_t ihalvecycle;
00433 int32_t newt;
00434 int32_t newtcycle;
00435 int32_t breaknewtcycle;
00436 int32_t breakihalvecycle;
00437 float64_t result;
00438
00439 i=0;
00440 if (!(greater_equal(y, 0) && less_equal(y, 1)))
00441 {
00442 SG_SERROR("CStatistics::inverse_incomplete_beta(): "
00443 "Domain error\n");
00444 }
00445
00446
00447
00448
00449 if (equal(y, 0))
00450 {
00451 result=0;
00452 return result;
00453 }
00454 if (equal(y, 1.0))
00455 {
00456 result=1;
00457 return result;
00458 }
00459
00460
00461
00462
00463
00464 dithresh=0;
00465 rflg=0;
00466 aaa=0;
00467 bbb=0;
00468 y0=0;
00469 x=0;
00470 yyy=0;
00471 lgm=0;
00472 dir=0;
00473 di=0;
00474
00475
00476
00477
00478 x0=0.0;
00479 yl=0.0;
00480 x1=1.0;
00481 yh=1.0;
00482 nflg=0;
00483 mainlooppos=0;
00484 ihalve=1;
00485 ihalvecycle=2;
00486 newt=3;
00487 newtcycle=4;
00488 breaknewtcycle=5;
00489 breakihalvecycle=6;
00490
00491
00492
00493
00494 for (;;)
00495 {
00496
00497
00498
00499
00500 if (mainlooppos==0)
00501 {
00502 if (less_equal(a, 1.0) || less_equal(b, 1.0))
00503 {
00504 dithresh=1.0e-6;
00505 rflg=0;
00506 aaa=a;
00507 bbb=b;
00508 y0=y;
00509 x=aaa/(aaa+bbb);
00510 yyy=incomplete_beta(aaa, bbb, x);
00511 mainlooppos=ihalve;
00512 continue;
00513 }
00514 else
00515 {
00516 dithresh=1.0e-4;
00517 }
00518 yp=-inverse_normal_cdf(y);
00519 if (greater(y, 0.5))
00520 {
00521 rflg=1;
00522 aaa=b;
00523 bbb=a;
00524 y0=1.0-y;
00525 yp=-yp;
00526 }
00527 else
00528 {
00529 rflg=0;
00530 aaa=a;
00531 bbb=b;
00532 y0=y;
00533 }
00534 lgm=(yp*yp-3.0)/6.0;
00535 x=2.0/(1.0/(2.0*aaa-1.0)+1.0/(2.0*bbb-1.0));
00536 d=yp*CMath::sqrt(x+lgm)/x
00537 -(1.0/(2.0*bbb-1.0)-1.0/(2.0*aaa-1.0))
00538 *(lgm+5.0/6.0-2.0/(3.0*x));
00539 d=2.0*d;
00540 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER)))
00541 {
00542 x=0;
00543 break;
00544 }
00545 x=aaa/(aaa+bbb*CMath::exp(d));
00546 yyy=incomplete_beta(aaa, bbb, x);
00547 yp=(yyy-y0)/y0;
00548 if (less(CMath::abs(yp), 0.2))
00549 {
00550 mainlooppos=newt;
00551 continue;
00552 }
00553 mainlooppos=ihalve;
00554 continue;
00555 }
00556
00557
00558
00559
00560 if (mainlooppos==ihalve)
00561 {
00562 dir=0;
00563 di=0.5;
00564 i=0;
00565 mainlooppos=ihalvecycle;
00566 continue;
00567 }
00568
00569
00570
00571
00572 if (mainlooppos==ihalvecycle)
00573 {
00574 if (i<=99)
00575 {
00576 if (i!=0)
00577 {
00578 x=x0+di*(x1-x0);
00579 if (equal(x, 1.0))
00580 {
00581 x=1.0-CMath::MACHINE_EPSILON;
00582 }
00583 if (equal(x, 0.0))
00584 {
00585 di=0.5;
00586 x=x0+di*(x1-x0);
00587 if (equal(x, 0.0))
00588 {
00589 break;
00590 }
00591 }
00592 yyy=incomplete_beta(aaa, bbb, x);
00593 yp=(x1-x0)/(x1+x0);
00594 if (less(CMath::abs(yp), dithresh))
00595 {
00596 mainlooppos=newt;
00597 continue;
00598 }
00599 yp=(yyy-y0)/y0;
00600 if (less(CMath::abs(yp), dithresh))
00601 {
00602 mainlooppos=newt;
00603 continue;
00604 }
00605 }
00606 if (less(yyy, y0))
00607 {
00608 x0=x;
00609 yl=yyy;
00610 if (dir<0)
00611 {
00612 dir=0;
00613 di=0.5;
00614 }
00615 else
00616 {
00617 if (dir>3)
00618 {
00619 di=1.0-(1.0-di)*(1.0-di);
00620 }
00621 else
00622 {
00623 if (dir>1)
00624 {
00625 di=0.5*di+0.5;
00626 }
00627 else
00628 {
00629 di=(y0-yyy)/(yh-yl);
00630 }
00631 }
00632 }
00633 dir=dir+1;
00634 if (greater(x0, 0.75))
00635 {
00636 if (rflg==1)
00637 {
00638 rflg=0;
00639 aaa=a;
00640 bbb=b;
00641 y0=y;
00642 }
00643 else
00644 {
00645 rflg=1;
00646 aaa=b;
00647 bbb=a;
00648 y0=1.0-y;
00649 }
00650 x=1.0-x;
00651 yyy=incomplete_beta(aaa, bbb, x);
00652 x0=0.0;
00653 yl=0.0;
00654 x1=1.0;
00655 yh=1.0;
00656 mainlooppos=ihalve;
00657 continue;
00658 }
00659 }
00660 else
00661 {
00662 x1=x;
00663 if (rflg==1 && less(x1, CMath::MACHINE_EPSILON))
00664 {
00665 x=0.0;
00666 break;
00667 }
00668 yh=yyy;
00669 if (dir>0)
00670 {
00671 dir=0;
00672 di=0.5;
00673 }
00674 else
00675 {
00676 if (dir<-3)
00677 {
00678 di=di*di;
00679 }
00680 else
00681 {
00682 if (dir<-1)
00683 {
00684 di=0.5*di;
00685 }
00686 else
00687 {
00688 di=(yyy-y0)/(yh-yl);
00689 }
00690 }
00691 }
00692 dir=dir-1;
00693 }
00694 i=i+1;
00695 mainlooppos=ihalvecycle;
00696 continue;
00697 }
00698 else
00699 {
00700 mainlooppos=breakihalvecycle;
00701 continue;
00702 }
00703 }
00704
00705
00706
00707
00708 if (mainlooppos==breakihalvecycle)
00709 {
00710 if (greater_equal(x0, 1.0))
00711 {
00712 x=1.0-CMath::MACHINE_EPSILON;
00713 break;
00714 }
00715 if (less_equal(x, 0.0))
00716 {
00717 x=0.0;
00718 break;
00719 }
00720 mainlooppos=newt;
00721 continue;
00722 }
00723
00724
00725
00726
00727 if (mainlooppos==newt)
00728 {
00729 if (nflg!=0)
00730 {
00731 break;
00732 }
00733 nflg=1;
00734 lgm=lgamma(aaa+bbb)-lgamma(aaa)-lgamma(bbb);
00735 i=0;
00736 mainlooppos=newtcycle;
00737 continue;
00738 }
00739
00740
00741
00742
00743 if (mainlooppos==newtcycle)
00744 {
00745 if (i<=7)
00746 {
00747 if (i!=0)
00748 {
00749 yyy=incomplete_beta(aaa, bbb, x);
00750 }
00751 if (less(yyy, yl))
00752 {
00753 x=x0;
00754 yyy=yl;
00755 }
00756 else
00757 {
00758 if (greater(yyy, yh))
00759 {
00760 x=x1;
00761 yyy=yh;
00762 }
00763 else
00764 {
00765 if (less(yyy, y0))
00766 {
00767 x0=x;
00768 yl=yyy;
00769 }
00770 else
00771 {
00772 x1=x;
00773 yh=yyy;
00774 }
00775 }
00776 }
00777 if (equal(x, 1.0) || equal(x, 0.0))
00778 {
00779 mainlooppos=breaknewtcycle;
00780 continue;
00781 }
00782 d=(aaa-1.0)*CMath::log(x)+(bbb-1.0)*CMath::log(1.0-x)+lgm;
00783 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER)))
00784 {
00785 break;
00786 }
00787 if (greater(d, CMath::log(CMath::MAX_REAL_NUMBER)))
00788 {
00789 mainlooppos=breaknewtcycle;
00790 continue;
00791 }
00792 d=CMath::exp(d);
00793 d=(yyy-y0)/d;
00794 xt=x-d;
00795 if (less_equal(xt, x0))
00796 {
00797 yyy=(x-x0)/(x1-x0);
00798 xt=x0+0.5*yyy*(x-x0);
00799 if (less_equal(xt, 0.0))
00800 {
00801 mainlooppos=breaknewtcycle;
00802 continue;
00803 }
00804 }
00805 if (greater_equal(xt, x1))
00806 {
00807 yyy=(x1-x)/(x1-x0);
00808 xt=x1-0.5*yyy*(x1-x);
00809 if (greater_equal(xt, 1.0))
00810 {
00811 mainlooppos=breaknewtcycle;
00812 continue;
00813 }
00814 }
00815 x=xt;
00816 if (less(CMath::abs(d/x), 128.0*CMath::MACHINE_EPSILON))
00817 {
00818 break;
00819 }
00820 i=i+1;
00821 mainlooppos=newtcycle;
00822 continue;
00823 }
00824 else
00825 {
00826 mainlooppos=breaknewtcycle;
00827 continue;
00828 }
00829 }
00830
00831
00832
00833
00834 if (mainlooppos==breaknewtcycle)
00835 {
00836 dithresh=256.0*CMath::MACHINE_EPSILON;
00837 mainlooppos=ihalve;
00838 continue;
00839 }
00840 }
00841
00842
00843
00844
00845 if (rflg!=0)
00846 {
00847 if (less_equal(x, CMath::MACHINE_EPSILON))
00848 {
00849 x=1.0-CMath::MACHINE_EPSILON;
00850 }
00851 else
00852 {
00853 x=1.0-x;
00854 }
00855 }
00856 result=x;
00857 return result;
00858 }
00859
00860 float64_t CStatistics::incomplete_beta(float64_t a, float64_t b, float64_t x)
00861 {
00862 float64_t t;
00863 float64_t xc;
00864 float64_t w;
00865 float64_t y;
00866 int32_t flag;
00867 float64_t big;
00868 float64_t biginv;
00869 float64_t maxgam;
00870 float64_t minlog;
00871 float64_t maxlog;
00872 float64_t result;
00873
00874 big=4.503599627370496e15;
00875 biginv=2.22044604925031308085e-16;
00876 maxgam=171.624376956302725;
00877 minlog=CMath::log(CMath::MIN_REAL_NUMBER);
00878 maxlog=CMath::log(CMath::MAX_REAL_NUMBER);
00879
00880 if (!(greater(a, 0) && greater(b, 0)))
00881 {
00882 SG_SERROR("CStatistics::incomplete_beta(): "
00883 "Domain error\n");
00884 }
00885
00886 if (!(greater_equal(x, 0) && less_equal(x, 1)))
00887 {
00888 SG_SERROR("CStatistics::incomplete_beta(): "
00889 "Domain error\n");
00890 }
00891
00892 if (equal(x, 0))
00893 {
00894 result=0;
00895 return result;
00896 }
00897 if (equal(x, 1))
00898 {
00899 result=1;
00900 return result;
00901 }
00902 flag=0;
00903 if (less_equal(b*x, 1.0) && less_equal(x, 0.95))
00904 {
00905 result=ibetaf_incompletebetaps(a, b, x, maxgam);
00906 return result;
00907 }
00908 w=1.0-x;
00909 if (greater(x, a/(a+b)))
00910 {
00911 flag=1;
00912 t=a;
00913 a=b;
00914 b=t;
00915 xc=x;
00916 x=w;
00917 }
00918 else
00919 {
00920 xc=w;
00921 }
00922 if ((flag==1 && less_equal(b*x, 1.0)) && less_equal(x, 0.95))
00923 {
00924 t=ibetaf_incompletebetaps(a, b, x, maxgam);
00925 if (less_equal(t, CMath::MACHINE_EPSILON))
00926 {
00927 result=1.0-CMath::MACHINE_EPSILON;
00928 }
00929 else
00930 {
00931 result=1.0-t;
00932 }
00933 return result;
00934 }
00935 y=x*(a+b-2.0)-(a-1.0);
00936 if (less(y, 0.0))
00937 {
00938 w=ibetaf_incompletebetafe(a, b, x, big, biginv);
00939 }
00940 else
00941 {
00942 w=ibetaf_incompletebetafe2(a, b, x, big, biginv)/xc;
00943 }
00944 y=a*CMath::log(x);
00945 t=b*CMath::log(xc);
00946 if ((less(a+b, maxgam) && less(CMath::abs(y), maxlog))
00947 && less(CMath::abs(t), maxlog))
00948 {
00949 t=CMath::pow(xc, b);
00950 t=t*CMath::pow(x, a);
00951 t=t/a;
00952 t=t*w;
00953 t=t*(tgamma(a+b)/(tgamma(a)*tgamma(b)));
00954 if (flag==1)
00955 {
00956 if (less_equal(t, CMath::MACHINE_EPSILON))
00957 {
00958 result=1.0-CMath::MACHINE_EPSILON;
00959 }
00960 else
00961 {
00962 result=1.0-t;
00963 }
00964 }
00965 else
00966 {
00967 result=t;
00968 }
00969 return result;
00970 }
00971 y=y+t+lgamma(a+b)-lgamma(a)-lgamma(b);
00972 y=y+CMath::log(w/a);
00973 if (less(y, minlog))
00974 {
00975 t=0.0;
00976 }
00977 else
00978 {
00979 t=CMath::exp(y);
00980 }
00981 if (flag==1)
00982 {
00983 if (less_equal(t, CMath::MACHINE_EPSILON))
00984 {
00985 t=1.0-CMath::MACHINE_EPSILON;
00986 }
00987 else
00988 {
00989 t=1.0-t;
00990 }
00991 }
00992 result=t;
00993 return result;
00994 }
00995
00996 float64_t CStatistics::inverse_normal_cdf(float64_t y0, float64_t mean,
00997 float64_t std_dev)
00998 {
00999 return inverse_normal_cdf(y0)*std_dev+mean;
01000 }
01001
01002 float64_t CStatistics::inverse_normal_cdf(float64_t y0)
01003 {
01004 float64_t expm2;
01005 float64_t s2pi;
01006 float64_t x;
01007 float64_t y;
01008 float64_t z;
01009 float64_t y2;
01010 float64_t x0;
01011 float64_t x1;
01012 int32_t code;
01013 float64_t p0;
01014 float64_t q0;
01015 float64_t p1;
01016 float64_t q1;
01017 float64_t p2;
01018 float64_t q2;
01019 float64_t result;
01020
01021 expm2=0.13533528323661269189;
01022 s2pi=2.50662827463100050242;
01023 if (less_equal(y0, 0))
01024 {
01025 result=-CMath::MAX_REAL_NUMBER;
01026 return result;
01027 }
01028 if (greater_equal(y0, 1))
01029 {
01030 result=CMath::MAX_REAL_NUMBER;
01031 return result;
01032 }
01033 code=1;
01034 y=y0;
01035 if (greater(y, 1.0-expm2))
01036 {
01037 y=1.0-y;
01038 code=0;
01039 }
01040 if (greater(y, expm2))
01041 {
01042 y=y-0.5;
01043 y2=y*y;
01044 p0=-59.9633501014107895267;
01045 p0=98.0010754185999661536+y2*p0;
01046 p0=-56.6762857469070293439+y2*p0;
01047 p0=13.9312609387279679503+y2*p0;
01048 p0=-1.23916583867381258016+y2*p0;
01049 q0=1;
01050 q0=1.95448858338141759834+y2*q0;
01051 q0=4.67627912898881538453+y2*q0;
01052 q0=86.3602421390890590575+y2*q0;
01053 q0=-225.462687854119370527+y2*q0;
01054 q0=200.260212380060660359+y2*q0;
01055 q0=-82.0372256168333339912+y2*q0;
01056 q0=15.9056225126211695515+y2*q0;
01057 q0=-1.18331621121330003142+y2*q0;
01058 x=y+y*y2*p0/q0;
01059 x=x*s2pi;
01060 result=x;
01061 return result;
01062 }
01063 x=CMath::sqrt(-2.0*CMath::log(y));
01064 x0=x-CMath::log(x)/x;
01065 z=1.0/x;
01066 if (less(x, 8.0))
01067 {
01068 p1=4.05544892305962419923;
01069 p1=31.5251094599893866154+z*p1;
01070 p1=57.1628192246421288162+z*p1;
01071 p1=44.0805073893200834700+z*p1;
01072 p1=14.6849561928858024014+z*p1;
01073 p1=2.18663306850790267539+z*p1;
01074 p1=-1.40256079171354495875*0.1+z*p1;
01075 p1=-3.50424626827848203418*0.01+z*p1;
01076 p1=-8.57456785154685413611*0.0001+z*p1;
01077 q1=1;
01078 q1=15.7799883256466749731+z*q1;
01079 q1=45.3907635128879210584+z*q1;
01080 q1=41.3172038254672030440+z*q1;
01081 q1=15.0425385692907503408+z*q1;
01082 q1=2.50464946208309415979+z*q1;
01083 q1=-1.42182922854787788574*0.1+z*q1;
01084 q1=-3.80806407691578277194*0.01+z*q1;
01085 q1=-9.33259480895457427372*0.0001+z*q1;
01086 x1=z*p1/q1;
01087 }
01088 else
01089 {
01090 p2=3.23774891776946035970;
01091 p2=6.91522889068984211695+z*p2;
01092 p2=3.93881025292474443415+z*p2;
01093 p2=1.33303460815807542389+z*p2;
01094 p2=2.01485389549179081538*0.1+z*p2;
01095 p2=1.23716634817820021358*0.01+z*p2;
01096 p2=3.01581553508235416007*0.0001+z*p2;
01097 p2=2.65806974686737550832*0.000001+z*p2;
01098 p2=6.23974539184983293730*0.000000001+z*p2;
01099 q2=1;
01100 q2=6.02427039364742014255+z*q2;
01101 q2=3.67983563856160859403+z*q2;
01102 q2=1.37702099489081330271+z*q2;
01103 q2=2.16236993594496635890*0.1+z*q2;
01104 q2=1.34204006088543189037*0.01+z*q2;
01105 q2=3.28014464682127739104*0.0001+z*q2;
01106 q2=2.89247864745380683936*0.000001+z*q2;
01107 q2=6.79019408009981274425*0.000000001+z*q2;
01108 x1=z*p2/q2;
01109 }
01110 x=x0-x1;
01111 if (code!=0)
01112 {
01113 x=-x;
01114 }
01115 result=x;
01116 return result;
01117 }
01118
01119 float64_t CStatistics::ibetaf_incompletebetaps(float64_t a, float64_t b,
01120 float64_t x, float64_t maxgam)
01121 {
01122 float64_t s;
01123 float64_t t;
01124 float64_t u;
01125 float64_t v;
01126 float64_t n;
01127 float64_t t1;
01128 float64_t z;
01129 float64_t ai;
01130 float64_t result;
01131
01132 ai=1.0/a;
01133 u=(1.0-b)*x;
01134 v=u/(a+1.0);
01135 t1=v;
01136 t=u;
01137 n=2.0;
01138 s=0.0;
01139 z=CMath::MACHINE_EPSILON*ai;
01140 while (greater(CMath::abs(v), z))
01141 {
01142 u=(n-b)*x/n;
01143 t=t*u;
01144 v=t/(a+n);
01145 s=s+v;
01146 n=n+1.0;
01147 }
01148 s=s+t1;
01149 s=s+ai;
01150 u=a*CMath::log(x);
01151 if (less(a+b, maxgam)
01152 && less(CMath::abs(u), CMath::log(CMath::MAX_REAL_NUMBER)))
01153 {
01154 t=tgamma(a+b)/(tgamma(a)*tgamma(b));
01155 s=s*t*CMath::pow(x, a);
01156 }
01157 else
01158 {
01159 t=lgamma(a+b)-lgamma(a)-lgamma(b)+u+CMath::log(s);
01160 if (less(t, CMath::log(CMath::MIN_REAL_NUMBER)))
01161 {
01162 s=0.0;
01163 }
01164 else
01165 {
01166 s=CMath::exp(t);
01167 }
01168 }
01169 result=s;
01170 return result;
01171 }
01172
01173 float64_t CStatistics::ibetaf_incompletebetafe(float64_t a, float64_t b,
01174 float64_t x, float64_t big, float64_t biginv)
01175 {
01176 float64_t xk;
01177 float64_t pk;
01178 float64_t pkm1;
01179 float64_t pkm2;
01180 float64_t qk;
01181 float64_t qkm1;
01182 float64_t qkm2;
01183 float64_t k1;
01184 float64_t k2;
01185 float64_t k3;
01186 float64_t k4;
01187 float64_t k5;
01188 float64_t k6;
01189 float64_t k7;
01190 float64_t k8;
01191 float64_t r;
01192 float64_t t;
01193 float64_t ans;
01194 float64_t thresh;
01195 int32_t n;
01196 float64_t result;
01197
01198 k1=a;
01199 k2=a+b;
01200 k3=a;
01201 k4=a+1.0;
01202 k5=1.0;
01203 k6=b-1.0;
01204 k7=k4;
01205 k8=a+2.0;
01206 pkm2=0.0;
01207 qkm2=1.0;
01208 pkm1=1.0;
01209 qkm1=1.0;
01210 ans=1.0;
01211 r=1.0;
01212 n=0;
01213 thresh=3.0*CMath::MACHINE_EPSILON;
01214 do
01215 {
01216 xk=-x*k1*k2/(k3*k4);
01217 pk=pkm1+pkm2*xk;
01218 qk=qkm1+qkm2*xk;
01219 pkm2=pkm1;
01220 pkm1=pk;
01221 qkm2=qkm1;
01222 qkm1=qk;
01223 xk=x*k5*k6/(k7*k8);
01224 pk=pkm1+pkm2*xk;
01225 qk=qkm1+qkm2*xk;
01226 pkm2=pkm1;
01227 pkm1=pk;
01228 qkm2=qkm1;
01229 qkm1=qk;
01230 if (not_equal(qk, 0))
01231 {
01232 r=pk/qk;
01233 }
01234 if (not_equal(r, 0))
01235 {
01236 t=CMath::abs((ans-r)/r);
01237 ans=r;
01238 }
01239 else
01240 {
01241 t=1.0;
01242 }
01243 if (less(t, thresh))
01244 {
01245 break;
01246 }
01247 k1=k1+1.0;
01248 k2=k2+1.0;
01249 k3=k3+2.0;
01250 k4=k4+2.0;
01251 k5=k5+1.0;
01252 k6=k6-1.0;
01253 k7=k7+2.0;
01254 k8=k8+2.0;
01255 if (greater(CMath::abs(qk)+CMath::abs(pk), big))
01256 {
01257 pkm2=pkm2*biginv;
01258 pkm1=pkm1*biginv;
01259 qkm2=qkm2*biginv;
01260 qkm1=qkm1*biginv;
01261 }
01262 if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv))
01263 {
01264 pkm2=pkm2*big;
01265 pkm1=pkm1*big;
01266 qkm2=qkm2*big;
01267 qkm1=qkm1*big;
01268 }
01269 n=n+1;
01270 }
01271 while (n!=300);
01272 result=ans;
01273 return result;
01274 }
01275
01276 float64_t CStatistics::ibetaf_incompletebetafe2(float64_t a, float64_t b,
01277 float64_t x, float64_t big, float64_t biginv)
01278 {
01279 float64_t xk;
01280 float64_t pk;
01281 float64_t pkm1;
01282 float64_t pkm2;
01283 float64_t qk;
01284 float64_t qkm1;
01285 float64_t qkm2;
01286 float64_t k1;
01287 float64_t k2;
01288 float64_t k3;
01289 float64_t k4;
01290 float64_t k5;
01291 float64_t k6;
01292 float64_t k7;
01293 float64_t k8;
01294 float64_t r;
01295 float64_t t;
01296 float64_t ans;
01297 float64_t z;
01298 float64_t thresh;
01299 int32_t n;
01300 float64_t result;
01301
01302 k1=a;
01303 k2=b-1.0;
01304 k3=a;
01305 k4=a+1.0;
01306 k5=1.0;
01307 k6=a+b;
01308 k7=a+1.0;
01309 k8=a+2.0;
01310 pkm2=0.0;
01311 qkm2=1.0;
01312 pkm1=1.0;
01313 qkm1=1.0;
01314 z=x/(1.0-x);
01315 ans=1.0;
01316 r=1.0;
01317 n=0;
01318 thresh=3.0*CMath::MACHINE_EPSILON;
01319 do
01320 {
01321 xk=-z*k1*k2/(k3*k4);
01322 pk=pkm1+pkm2*xk;
01323 qk=qkm1+qkm2*xk;
01324 pkm2=pkm1;
01325 pkm1=pk;
01326 qkm2=qkm1;
01327 qkm1=qk;
01328 xk=z*k5*k6/(k7*k8);
01329 pk=pkm1+pkm2*xk;
01330 qk=qkm1+qkm2*xk;
01331 pkm2=pkm1;
01332 pkm1=pk;
01333 qkm2=qkm1;
01334 qkm1=qk;
01335 if (not_equal(qk, 0))
01336 {
01337 r=pk/qk;
01338 }
01339 if (not_equal(r, 0))
01340 {
01341 t=CMath::abs((ans-r)/r);
01342 ans=r;
01343 }
01344 else
01345 {
01346 t=1.0;
01347 }
01348 if (less(t, thresh))
01349 {
01350 break;
01351 }
01352 k1=k1+1.0;
01353 k2=k2-1.0;
01354 k3=k3+2.0;
01355 k4=k4+2.0;
01356 k5=k5+1.0;
01357 k6=k6+1.0;
01358 k7=k7+2.0;
01359 k8=k8+2.0;
01360 if (greater(CMath::abs(qk)+CMath::abs(pk), big))
01361 {
01362 pkm2=pkm2*biginv;
01363 pkm1=pkm1*biginv;
01364 qkm2=qkm2*biginv;
01365 qkm1=qkm1*biginv;
01366 }
01367 if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv))
01368 {
01369 pkm2=pkm2*big;
01370 pkm1=pkm1*big;
01371 qkm2=qkm2*big;
01372 qkm1=qkm1*big;
01373 }
01374 n=n+1;
01375 }
01376 while (n!=300);
01377 result=ans;
01378 return result;
01379 }
01380
01381 float64_t CStatistics::incomplete_gamma(float64_t a, float64_t x)
01382 {
01383 float64_t igammaepsilon;
01384 float64_t ans;
01385 float64_t ax;
01386 float64_t c;
01387 float64_t r;
01388 float64_t result;
01389
01390 igammaepsilon=0.000000000000001;
01391 if (less_equal(x, 0) || less_equal(a, 0))
01392 {
01393 result=0;
01394 return result;
01395 }
01396 if (greater(x, 1) && greater(x, a))
01397 {
01398 result=1-incomplete_gamma_completed(a, x);
01399 return result;
01400 }
01401 ax=a*CMath::log(x)-x-lgamma(a);
01402 if (less(ax, -709.78271289338399))
01403 {
01404 result=0;
01405 return result;
01406 }
01407 ax=CMath::exp(ax);
01408 r=a;
01409 c=1;
01410 ans=1;
01411 do
01412 {
01413 r=r+1;
01414 c=c*x/r;
01415 ans=ans+c;
01416 }
01417 while (greater(c/ans, igammaepsilon));
01418 result=ans*ax/a;
01419 return result;
01420 }
01421
01422 float64_t CStatistics::incomplete_gamma_completed(float64_t a, float64_t x)
01423 {
01424 float64_t igammaepsilon;
01425 float64_t igammabignumber;
01426 float64_t igammabignumberinv;
01427 float64_t ans;
01428 float64_t ax;
01429 float64_t c;
01430 float64_t yc;
01431 float64_t r;
01432 float64_t t;
01433 float64_t y;
01434 float64_t z;
01435 float64_t pk;
01436 float64_t pkm1;
01437 float64_t pkm2;
01438 float64_t qk;
01439 float64_t qkm1;
01440 float64_t qkm2;
01441 float64_t result;
01442
01443 igammaepsilon=0.000000000000001;
01444 igammabignumber=4503599627370496.0;
01445 igammabignumberinv=2.22044604925031308085*0.0000000000000001;
01446 if (less_equal(x, 0) || less_equal(a, 0))
01447 {
01448 result=1;
01449 return result;
01450 }
01451 if (less(x, 1) || less(x, a))
01452 {
01453 result=1-incomplete_gamma(a, x);
01454 return result;
01455 }
01456 ax=a*CMath::log(x)-x-lgamma(a);
01457 if (less(ax, -709.78271289338399))
01458 {
01459 result=0;
01460 return result;
01461 }
01462 ax=CMath::exp(ax);
01463 y=1-a;
01464 z=x+y+1;
01465 c=0;
01466 pkm2=1;
01467 qkm2=x;
01468 pkm1=x+1;
01469 qkm1=z*x;
01470 ans=pkm1/qkm1;
01471 do
01472 {
01473 c=c+1;
01474 y=y+1;
01475 z=z+2;
01476 yc=y*c;
01477 pk=pkm1*z-pkm2*yc;
01478 qk=qkm1*z-qkm2*yc;
01479 if (not_equal(qk, 0))
01480 {
01481 r=pk/qk;
01482 t=CMath::abs((ans-r)/r);
01483 ans=r;
01484 }
01485 else
01486 {
01487 t=1;
01488 }
01489 pkm2=pkm1;
01490 pkm1=pk;
01491 qkm2=qkm1;
01492 qkm1=qk;
01493 if (greater(CMath::abs(pk), igammabignumber))
01494 {
01495 pkm2=pkm2*igammabignumberinv;
01496 pkm1=pkm1*igammabignumberinv;
01497 qkm2=qkm2*igammabignumberinv;
01498 qkm1=qkm1*igammabignumberinv;
01499 }
01500 }
01501 while (greater(t, igammaepsilon));
01502 result=ans*ax;
01503 return result;
01504 }
01505
01506 float64_t CStatistics::gamma_cdf(float64_t x, float64_t a, float64_t b)
01507 {
01508
01509 return incomplete_gamma(a, x/b);
01510 }
01511
01512 float64_t CStatistics::inverse_gamma_cdf(float64_t p, float64_t a,
01513 float64_t b)
01514 {
01515
01516
01517 return inverse_incomplete_gamma_completed(a, 1-p)*b;
01518 }
01519
01520 float64_t CStatistics::inverse_incomplete_gamma_completed(float64_t a,
01521 float64_t y0)
01522 {
01523 float64_t igammaepsilon;
01524 float64_t iinvgammabignumber;
01525 float64_t x0;
01526 float64_t x1;
01527 float64_t x;
01528 float64_t yl;
01529 float64_t yh;
01530 float64_t y;
01531 float64_t d;
01532 float64_t lgm;
01533 float64_t dithresh;
01534 int32_t i;
01535 int32_t dir;
01536 float64_t result;
01537
01538 igammaepsilon=0.000000000000001;
01539 iinvgammabignumber=4503599627370496.0;
01540 x0=iinvgammabignumber;
01541 yl=0;
01542 x1=0;
01543 yh=1;
01544 dithresh=5*igammaepsilon;
01545 d=1/(9*a);
01546 y=1-d-inverse_normal_cdf(y0)*CMath::sqrt(d);
01547 x=a*y*y*y;
01548 lgm=lgamma(a);
01549 i=0;
01550 while (i<10)
01551 {
01552 if (greater(x, x0) || less(x, x1))
01553 {
01554 d=0.0625;
01555 break;
01556 }
01557 y=incomplete_gamma_completed(a, x);
01558 if (less(y, yl) || greater(y, yh))
01559 {
01560 d=0.0625;
01561 break;
01562 }
01563 if (less(y, y0))
01564 {
01565 x0=x;
01566 yl=y;
01567 }
01568 else
01569 {
01570 x1=x;
01571 yh=y;
01572 }
01573 d=(a-1)*CMath::log(x)-x-lgm;
01574 if (less(d, -709.78271289338399))
01575 {
01576 d=0.0625;
01577 break;
01578 }
01579 d=-CMath::exp(d);
01580 d=(y-y0)/d;
01581 if (less(CMath::abs(d/x), igammaepsilon))
01582 {
01583 result=x;
01584 return result;
01585 }
01586 x=x-d;
01587 i=i+1;
01588 }
01589 if (equal(x0, iinvgammabignumber))
01590 {
01591 if (less_equal(x, 0))
01592 {
01593 x=1;
01594 }
01595 while (equal(x0, iinvgammabignumber))
01596 {
01597 x=(1+d)*x;
01598 y=incomplete_gamma_completed(a, x);
01599 if (less(y, y0))
01600 {
01601 x0=x;
01602 yl=y;
01603 break;
01604 }
01605 d=d+d;
01606 }
01607 }
01608 d=0.5;
01609 dir=0;
01610 i=0;
01611 while (i<400)
01612 {
01613 x=x1+d*(x0-x1);
01614 y=incomplete_gamma_completed(a, x);
01615 lgm=(x0-x1)/(x1+x0);
01616 if (less(CMath::abs(lgm), dithresh))
01617 {
01618 break;
01619 }
01620 lgm=(y-y0)/y0;
01621 if (less(CMath::abs(lgm), dithresh))
01622 {
01623 break;
01624 }
01625 if (less_equal(x, 0.0))
01626 {
01627 break;
01628 }
01629 if (greater_equal(y, y0))
01630 {
01631 x1=x;
01632 yh=y;
01633 if (dir<0)
01634 {
01635 dir=0;
01636 d=0.5;
01637 }
01638 else
01639 {
01640 if (dir>1)
01641 {
01642 d=0.5*d+0.5;
01643 }
01644 else
01645 {
01646 d=(y0-yl)/(yh-yl);
01647 }
01648 }
01649 dir=dir+1;
01650 }
01651 else
01652 {
01653 x0=x;
01654 yl=y;
01655 if (dir>0)
01656 {
01657 dir=0;
01658 d=0.5;
01659 }
01660 else
01661 {
01662 if (dir<-1)
01663 {
01664 d=0.5*d;
01665 }
01666 else
01667 {
01668 d=(y0-yl)/(yh-yl);
01669 }
01670 }
01671 dir=dir-1;
01672 }
01673 i=i+1;
01674 }
01675 result=x;
01676 return result;
01677 }
01678
01679 float64_t CStatistics::normal_cdf(float64_t x, float64_t std_dev)
01680 {
01681 return 0.5*(error_function(x/std_dev/1.41421356237309504880)+1);
01682 }
01683
01684 float64_t CStatistics::error_function(float64_t x)
01685 {
01686 float64_t xsq;
01687 float64_t s;
01688 float64_t p;
01689 float64_t q;
01690 float64_t result;
01691
01692 s=CMath::sign(x);
01693 x=CMath::abs(x);
01694 if (less(x, 0.5))
01695 {
01696 xsq=x*x;
01697 p=0.007547728033418631287834;
01698 p=0.288805137207594084924010+xsq*p;
01699 p=14.3383842191748205576712+xsq*p;
01700 p=38.0140318123903008244444+xsq*p;
01701 p=3017.82788536507577809226+xsq*p;
01702 p=7404.07142710151470082064+xsq*p;
01703 p=80437.3630960840172832162+xsq*p;
01704 q=0.0;
01705 q=1.00000000000000000000000+xsq*q;
01706 q=38.0190713951939403753468+xsq*q;
01707 q=658.070155459240506326937+xsq*q;
01708 q=6379.60017324428279487120+xsq*q;
01709 q=34216.5257924628539769006+xsq*q;
01710 q=80437.3630960840172826266+xsq*q;
01711 result=s*1.1283791670955125738961589031*x*p/q;
01712 return result;
01713 }
01714 if (greater_equal(x, 10))
01715 {
01716 result=s;
01717 return result;
01718 }
01719 result=s*(1-error_function_complement(x));
01720 return result;
01721 }
01722
01723 float64_t CStatistics::error_function_complement(float64_t x)
01724 {
01725 float64_t p;
01726 float64_t q;
01727 float64_t result;
01728
01729 if (less(x, 0))
01730 {
01731 result=2-error_function_complement(-x);
01732 return result;
01733 }
01734 if (less(x, 0.5))
01735 {
01736 result=1.0-error_function(x);
01737 return result;
01738 }
01739 if (greater_equal(x, 10))
01740 {
01741 result=0;
01742 return result;
01743 }
01744 p=0.0;
01745 p=0.5641877825507397413087057563+x*p;
01746 p=9.675807882987265400604202961+x*p;
01747 p=77.08161730368428609781633646+x*p;
01748 p=368.5196154710010637133875746+x*p;
01749 p=1143.262070703886173606073338+x*p;
01750 p=2320.439590251635247384768711+x*p;
01751 p=2898.0293292167655611275846+x*p;
01752 p=1826.3348842295112592168999+x*p;
01753 q=1.0;
01754 q=17.14980943627607849376131193+x*q;
01755 q=137.1255960500622202878443578+x*q;
01756 q=661.7361207107653469211984771+x*q;
01757 q=2094.384367789539593790281779+x*q;
01758 q=4429.612803883682726711528526+x*q;
01759 q=6089.5424232724435504633068+x*q;
01760 q=4958.82756472114071495438422+x*q;
01761 q=1826.3348842295112595576438+x*q;
01762 result=CMath::exp(-x*x)*p/q;
01763 return result;
01764 }
01765
01766 SGVector<float64_t> CStatistics::fishers_exact_test_for_multiple_2x3_tables(
01767 SGMatrix<float64_t> tables)
01768 {
01769 SGMatrix<float64_t> table(NULL, 2, 3, false);
01770 int32_t len=tables.num_cols/3;
01771
01772 SGVector<float64_t> v(len);
01773 for (int32_t i=0; i<len; i++)
01774 {
01775 table.matrix=&tables.matrix[2*3*i];
01776 v.vector[i]=fishers_exact_test_for_2x3_table(table);
01777 }
01778 return v;
01779 }
01780
01781 float64_t CStatistics::fishers_exact_test_for_2x3_table(
01782 SGMatrix<float64_t> table)
01783 {
01784 ASSERT(table.num_rows==2);
01785 ASSERT(table.num_cols==3);
01786
01787 int32_t m_len=3+2;
01788 float64_t* m=SG_MALLOC(float64_t, 3+2);
01789 m[0]=table.matrix[0]+table.matrix[2]+table.matrix[4];
01790 m[1]=table.matrix[1]+table.matrix[3]+table.matrix[5];
01791 m[2]=table.matrix[0]+table.matrix[1];
01792 m[3]=table.matrix[2]+table.matrix[3];
01793 m[4]=table.matrix[4]+table.matrix[5];
01794
01795 float64_t n=SGVector<float64_t>::sum(m, m_len)/2.0;
01796 int32_t x_len=2*3*CMath::sq(SGVector<float64_t>::max(m, m_len));
01797 float64_t* x=SG_MALLOC(float64_t, x_len);
01798 SGVector<float64_t>::fill_vector(x, x_len, 0.0);
01799
01800 float64_t log_nom=0.0;
01801 for (int32_t i=0; i<3+2; i++)
01802 log_nom+=lgamma(m[i]+1);
01803 log_nom-=lgamma(n+1.0);
01804
01805 float64_t log_denomf=0;
01806 floatmax_t log_denom=0;
01807
01808 for (int32_t i=0; i<3*2; i++)
01809 {
01810 log_denom+=lgammal((floatmax_t)table.matrix[i]+1);
01811 log_denomf+=lgammal((floatmax_t)table.matrix[i]+1);
01812 }
01813
01814 floatmax_t prob_table_log=log_nom-log_denom;
01815
01816 int32_t dim1=CMath::min(m[0], m[2]);
01817
01818
01819 int32_t counter=0;
01820 for (int32_t k=0; k<=dim1; k++)
01821 {
01822 for (int32_t l=CMath::max(0.0, m[0]-m[4]-k);
01823 l<=CMath::min(m[0]-k, m[3]); l++)
01824 {
01825 x[0+0*2+counter*2*3]=k;
01826 x[0+1*2+counter*2*3]=l;
01827 x[0+2*2+counter*2*3]=m[0]-x[0+0*2+counter*2*3]-x[0+1*2+counter*2*3];
01828 x[1+0*2+counter*2*3]=m[2]-x[0+0*2+counter*2*3];
01829 x[1+1*2+counter*2*3]=m[3]-x[0+1*2+counter*2*3];
01830 x[1+2*2+counter*2*3]=m[4]-x[0+2*2+counter*2*3];
01831
01832 counter++;
01833 }
01834 }
01835
01836
01837 #ifdef DEBUG_FISHER_TABLE
01838 SG_SPRINT("counter=%d\n", counter);
01839 SG_SPRINT("dim1=%d\n", dim1);
01840 SG_SPRINT("l=%g...%g\n", CMath::max(0.0,m[0]-m[4]-0), CMath::min(m[0]-0, m[3]));
01841 SG_SPRINT("n=%g\n", n);
01842 SG_SPRINT("prob_table_log=%.18Lg\n", prob_table_log);
01843 SG_SPRINT("log_denomf=%.18g\n", log_denomf);
01844 SG_SPRINT("log_denom=%.18Lg\n", log_denom);
01845 SG_SPRINT("log_nom=%.18g\n", log_nom);
01846 display_vector(m, m_len, "marginals");
01847 display_vector(x, 2*3*counter, "x");
01848 #endif // DEBUG_FISHER_TABLE
01849
01850 floatmax_t* log_denom_vec=SG_MALLOC(floatmax_t, counter);
01851 SGVector<floatmax_t>::fill_vector(log_denom_vec, counter, (floatmax_t)0.0);
01852
01853 for (int32_t k=0; k<counter; k++)
01854 {
01855 for (int32_t j=0; j<3; j++)
01856 {
01857 for (int32_t i=0; i<2; i++)
01858 log_denom_vec[k]+=lgammal(x[i+j*2+k*2*3]+1.0);
01859 }
01860 }
01861
01862 for (int32_t i=0; i<counter; i++)
01863 log_denom_vec[i]=log_nom-log_denom_vec[i];
01864
01865 #ifdef DEBUG_FISHER_TABLE
01866 display_vector(log_denom_vec, counter, "log_denom_vec");
01867 #endif // DEBUG_FISHER_TABLE
01868
01869 float64_t nonrand_p=-CMath::INFTY;
01870 for (int32_t i=0; i<counter; i++)
01871 {
01872 if (log_denom_vec[i]<=prob_table_log)
01873 nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]);
01874 }
01875
01876 #ifdef DEBUG_FISHER_TABLE
01877 SG_SPRINT("nonrand_p=%.18g\n", nonrand_p);
01878 SG_SPRINT("exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p));
01879 #endif // DEBUG_FISHER_TABLE
01880 nonrand_p=CMath::exp(nonrand_p);
01881
01882 SG_FREE(log_denom_vec);
01883 SG_FREE(x);
01884 SG_FREE(m);
01885
01886 return nonrand_p;
01887 }
01888
01889 float64_t CStatistics::mutual_info(float64_t* p1, float64_t* p2, int32_t len)
01890 {
01891 double e=0;
01892
01893 for (int32_t i=0; i<len; i++)
01894 for (int32_t j=0; j<len; j++)
01895 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
01896
01897 return (float64_t)e;
01898 }
01899
01900 float64_t CStatistics::relative_entropy(float64_t* p, float64_t* q, int32_t len)
01901 {
01902 double e=0;
01903
01904 for (int32_t i=0; i<len; i++)
01905 e+=exp(p[i])*(p[i]-q[i]);
01906
01907 return (float64_t)e;
01908 }
01909
01910 float64_t CStatistics::entropy(float64_t* p, int32_t len)
01911 {
01912 double e=0;
01913
01914 for (int32_t i=0; i<len; i++)
01915 e-=exp(p[i])*p[i];
01916
01917 return (float64_t)e;
01918 }
01919
01920 SGVector<int32_t> CStatistics::sample_indices(int32_t sample_size, int32_t N)
01921 {
01922 REQUIRE(sample_size<N,
01923 "sample size should be less than number of indices\n");
01924 int32_t* idxs=SG_MALLOC(int32_t,N);
01925 int32_t i, rnd;
01926 int32_t* permuted_idxs=SG_MALLOC(int32_t,sample_size);
01927
01928
01929 for (i=0; i<N; i++)
01930 idxs[i]=i;
01931 for (i=0; i<sample_size; i++)
01932 permuted_idxs[i]=idxs[i];
01933 for (i=sample_size; i<N; i++)
01934 {
01935 rnd=CMath::random(1, i);
01936 if (rnd<sample_size)
01937 permuted_idxs[rnd]=idxs[i];
01938 }
01939 SG_FREE(idxs);
01940
01941 CMath::qsort(permuted_idxs, sample_size);
01942 return SGVector<int32_t>(permuted_idxs, sample_size);
01943 }
01944
01945 float64_t CStatistics::dlgamma(float64_t x)
01946 {
01947 x = x+6.0;
01948 float64_t df = 1./(x*x);
01949 df = (((df/240-0.003968253986254)*df+1/120.0)*df-1/120.0)*df;
01950 df = df+log(x)-0.5/x-1.0/(x-1.0)-1.0/(x-2.0)-1.0/
01951 (x-3.0)-1.0/(x-4.0)-1.0/(x-5.0)-1.0/(x-6.0);
01952 return df;
01953 }
01954