22 #include <shogun/lib/external/cdflib.hpp>
25 using namespace Eigen;
31 REQUIRE(values.
vlen>1,
"Number of observations (%d) needs to be at least 1.\n",
38 sum_squared_diff+=CMath::pow(values.
vector[i]-mean, 2);
40 return sum_squared_diff/(values.
vlen-1);
59 result[j]+=values(i,j);
71 result[j]+=values(j,i);
99 result[j]+=CMath::pow(values(i,j)-mean[j], 2);
111 result[j]+=CMath::pow(values(j,i)-mean[j], 2);
122 return CMath::sqrt(variance(values));
130 var[i]=CMath::sqrt(var[i]);
140 SG_SDEBUG(
"%d observations in %d dimensions\n", N, D)
142 REQUIRE(N>1,
"Number of observations (%d) must be at least 2.\n", N);
143 REQUIRE(D>0,
"Number of dimensions (%d) must be at least 1.\n", D);
151 int64_t num_elements = N*D;
157 eigen_centered.colwise() -= eigen_centered.rowwise().mean();
160 SG_SDEBUG(
"Computing squared differences\n");
163 eigen_cov = (eigen_centered * eigen_centered.adjoint()) /
double(N - 1);
175 for (int32_t i=0; i<len; i++)
178 v.
vector[i]=fishers_exact_test_for_2x3_table(table);
183 float64_t CStatistics::fishers_exact_test_for_2x3_table(
198 int32_t x_len=2*3*CMath::sq(
CMath::max(m, m_len));
203 for (int32_t i=0; i<3+2; i++)
204 log_nom+=lgamma(m[i]+1);
205 log_nom-=lgamma(n+1.0);
210 for (int32_t i=0; i<3*2; i++)
218 int32_t dim1=CMath::min(m[0], m[2]);
222 for (int32_t k=0; k<=dim1; k++)
225 l<=CMath::min(m[0]-k, m[3]); l++)
227 x[0+0*2+counter*2*3]=k;
228 x[0+1*2+counter*2*3]=l;
229 x[0+2*2+counter*2*3]=m[0]-x[0+0*2+counter*2*3]-x[0+1*2+counter*2*3];
230 x[1+0*2+counter*2*3]=m[2]-x[0+0*2+counter*2*3];
231 x[1+1*2+counter*2*3]=m[3]-x[0+1*2+counter*2*3];
232 x[1+2*2+counter*2*3]=m[4]-x[0+2*2+counter*2*3];
239 #ifdef DEBUG_FISHER_TABLE
244 SG_SPRINT(
"prob_table_log=%.18Lg\n", prob_table_log)
245 SG_SPRINT(
"log_denomf=%.18g\n", log_denomf)
246 SG_SPRINT(
"log_denom=%.18Lg\n", log_denom)
248 display_vector(m, m_len,
"marginals");
249 display_vector(x, 2*3*counter,
"x");
250 #endif // DEBUG_FISHER_TABLE
255 for (int32_t k=0; k<counter; k++)
257 for (int32_t j=0; j<3; j++)
259 for (int32_t i=0; i<2; i++)
260 log_denom_vec[k]+=lgammal(x[i+j*2+k*2*3]+1.0);
264 for (int32_t i=0; i<counter; i++)
265 log_denom_vec[i]=log_nom-log_denom_vec[i];
267 #ifdef DEBUG_FISHER_TABLE
268 display_vector(log_denom_vec, counter,
"log_denom_vec");
269 #endif // DEBUG_FISHER_TABLE
272 for (int32_t i=0; i<counter; i++)
274 if (log_denom_vec[i]<=prob_table_log)
275 nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]);
278 #ifdef DEBUG_FISHER_TABLE
279 SG_SPRINT(
"nonrand_p=%.18g\n", nonrand_p)
280 SG_SPRINT(
"exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p))
281 #endif // DEBUG_FISHER_TABLE
282 nonrand_p=CMath::exp(nonrand_p);
284 SG_FREE(log_denom_vec);
295 for (int32_t i=0; i<len; i++)
296 for (int32_t j=0; j<len; j++)
297 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
306 for (int32_t i=0; i<len; i++)
307 e+=exp(p[i])*(p[i]-q[i]);
316 for (int32_t i=0; i<len; i++)
325 "sample size should be less than number of indices\n");
326 int32_t* idxs=SG_MALLOC(int32_t,N);
328 int32_t* permuted_idxs=SG_MALLOC(int32_t,sample_size);
333 for (i=0; i<sample_size; i++)
334 permuted_idxs[i]=idxs[i];
335 for (i=sample_size; i<N; i++)
337 rnd=CMath::random(1, i);
339 permuted_idxs[rnd]=idxs[i];
344 CMath::qsort(result);
351 REQUIRE(p>=0,
"p (%f); must be greater or equal to 0.\n", p);
352 REQUIRE(p<=1,
"p (%f); must be greater or equal to 1.\n", p);
353 REQUIRE(std_deviation>0,
"Standard deviation (%f); must be positive\n",
363 cdfnor(&which, &p, &q, &output_x, &mean, &std_deviation, &output_status, &output_bound);
365 if (output_status!=0)
366 SG_SERROR(
"Error %d while calling cdflib::cdfnor\n", output_status);
376 REQUIRE(x>=0,
"x (%f) has to be greater or equal to 0.\n", x);
377 REQUIRE(k>0,
"Degrees of freedom (%f) has to be positive.\n", k);
387 cdfchi(&which, &output_p, &output_q, &x, &df, &output_status, &output_bound);
389 if (output_status!=0)
390 SG_SERROR(
"Error %d while calling cdflib::cdfchi\n", output_status);
397 REQUIRE(x>=0,
"x (%f) has to be greater or equal to 0.\n", x);
398 REQUIRE(a>=0,
"a (%f) has to be greater or equal to 0.\n", a);
399 REQUIRE(b>=0,
"b (%f) has to be greater or equal to 0.\n", b);
408 int output_error_code;
410 cdfgam(&which, &output_p, &output_q, &x, &shape, &scale, &output_error_code, &output_bound);
412 if (output_error_code!=0)
413 SG_SERROR(
"Error %d while calling cdflib::cdfgam\n", output_error_code);
426 const float64_t sqrt_of_2=1.41421356237309514547;
427 const float64_t log_of_2=0.69314718055994528623;
428 const float64_t sqrt_of_pi=1.77245385090551588192;
440 -0.01621575378835404,
442 -0.001829764677455021,
443 2.0*(1.0-CMath::PI/3.0),
452 float64_t lp0 = -x/(sqrt_of_2*sqrt_of_pi);
453 for (
index_t i=0; i<c_len; i++)
454 f=lp0*(c_array[i]+f);
455 return -2.0*f-log_of_2;
457 else if (x<ERFC_CASE2)
458 return CMath::log(erfc8_weighted_sum(x))-log_of_2-x*x*0.5;
461 return CMath::log(normal_cdf(x));
468 const float64_t sqrt_of_2=1.41421356237309514547;
472 0.5641895835477550741253201704,
473 1.275366644729965952479585264,
474 5.019049726784267463450058,
475 6.1602098531096305440906,
476 7.409740605964741794425,
477 2.97886562639399288862
483 2.260528520767326969591866945,
484 9.396034016235054150430579648,
485 12.0489519278551290360340491,
486 17.08144074746600431571095,
487 9.608965327192787870698,
488 3.3690752069827527677
496 num=-x*num/sqrt_of_2+P[i];
502 den=-x*den/sqrt_of_2+Q[i];
510 return 0.5*(erfc(-x*M_SQRT1_2/std_dev));
516 REQUIRE(p>=0,
"p (%f) has to be greater or equal to 0.\n", p);
517 REQUIRE(a>=0,
"a (%f) has to be greater or equal to 0.\n", a);
518 REQUIRE(b>=0,
"b (%f) has to be greater or equal to 0.\n", b);
526 int output_error_code=0;
529 cdfgam(&which, &p, &q, &output_x, &shape, &scale, &output_error_code, &output_bound);
531 if (output_error_code!=0)
532 SG_SERROR(
"Error %d while calling cdflib::beta_inc\n", output_error_code);
539 REQUIRE(x>=0,
"x (%f) has to be greater or equal to 0.\n", x);
540 REQUIRE(d1>0,
"d1 (%f) has to be positive.\n", d1);
541 REQUIRE(d2>0,
"d2 (%f) has to be positive.\n", d2);
548 int output_error_code;
550 cdff(&which, &output_p, &output_q, &x, &d1, &d2, &output_error_code, &output_bound);
552 if (output_error_code!=0)
553 SG_SERROR(
"Error %d while calling cdflib::cdff\n", output_error_code);
566 result=CMath::PI/CMath::tan(CMath::PI*x);
582 0.04166666666666666667,
583 -0.00729166666666666667,
584 0.00384424603174603175,
585 -0.00413411458333333333,
586 0.00756096117424242424,
587 -0.02108249687595390720,
588 0.08332316080729166666,
589 -0.44324627670587277880,
590 3.05393103044765369366,
591 -26.45616165999210241989};
600 result+=coeff[i]*power;
609 REQUIRE(eigen_A.rows()==eigen_A.cols(),
610 "Input matrix should be a sqaure matrix row(%d) col(%d)\n",
611 eigen_A.rows(), eigen_A.cols());
613 PartialPivLU<MatrixXd> lu(eigen_A);
614 VectorXd tmp(eigen_A.rows());
616 for (
index_t idx=0; idx<tmp.rows(); idx++)
619 VectorXd p=lu.permutationP()*tmp;
622 for (
index_t idx=0; idx<p.rows(); idx++)
638 VectorXd u=lu.matrixLU().diagonal();
641 for (
index_t idx=0; idx<u.rows(); idx++)
655 result=u.array().abs().log().sum();
673 VectorXd diag = l.diagonal();
675 for( int32_t i = 0; i < diag.rows(); ++i ) {
676 retval += log(diag(i));
685 typedef SparseMatrix<float64_t> MatrixType;
688 SimplicialLLT<MatrixType> llt;
692 MatrixType L=llt.matrixL();
696 for(
index_t i=0; i<M.rows(); ++i )
697 retval+=log(L.coeff(i,i));
706 REQUIRE(cov.
num_rows>0,
"Number of covariance rows must be positive!\n");
707 REQUIRE(cov.
num_cols>0,
"Number of covariance cols must be positive!\n");
712 int32_t dim=mean.
vlen;
718 for( int32_t j=0; j<N; ++j )
719 for( int32_t i=0; i<dim; ++i )
720 S(i,j)=CMath::randn_double();
727 if( precision_matrix )
738 if( !precision_matrix )
747 for( int32_t i=0; i<N; ++i )
757 "CStatistics::sample_from_gaussian(): \
758 Number of covariance rows must be positive!\n");
760 "CStatistics::sample_from_gaussian(): \
761 Number of covariance cols must be positive!\n");
763 "CStatistics::sample_from_gaussian(): \
764 Covariance is not initialized!\n");
766 "CStatistics::sample_from_gaussian(): \
767 Covariance should be square matrix!\n");
769 "CStatistics::sample_from_gaussian(): \
770 Mean and covariance dimension mismatch!\n");
772 int32_t dim=mean.
vlen;
775 typedef SparseMatrix<float64_t> MatrixType;
778 SimplicialLLT<MatrixType> llt;
782 for( int32_t j=0; j<N; ++j )
783 for( int32_t i=0; i<dim; ++i )
784 S(i,j)=CMath::randn_double();
790 MatrixType LP=llt.matrixL();
791 MatrixType UP=llt.matrixU();
795 if( precision_matrix )
798 SimplicialLLT<MatrixType> lltUP;
809 s=llt.permutationPinv()*s;
814 for( int32_t i=0; i<N; ++i )
823 SG_SDEBUG(
"entering CStatistics::fit_sigmoid()\n")
825 REQUIRE(scores.
vector,
"CStatistics::fit_sigmoid() requires "
831 SG_SDEBUG(
"counting number of positive and negative labels\n")
841 SG_SDEBUG(
"%d pos; %d neg\n", prior1, prior0)
855 float64_t hiTarget=(prior1+1.0)/(prior1+2.0);
860 for (
index_t i=0; i<length; ++i)
871 float64_t b=CMath::log((prior0+1.0)/(prior1+1.0));
874 for (
index_t i=0; i<length; ++i)
878 fval+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
880 fval+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
886 for (it=0; it<maxiter; ++it)
888 SG_SDEBUG(
"Iteration %d, a=%f, b=%f, fval=%f\n", it, a, b, fval)
897 for (
index_t i=0; i<length; ++i)
904 p=CMath::exp(-fApB)/(1.0+CMath::exp(-fApB));
905 q=1.0/(1.0+CMath::exp(-fApB));
909 p=1.0/(1.0+CMath::exp(fApB));
910 q=CMath::exp(fApB)/(1.0+CMath::exp(fApB));
914 h11+=scores[i]*scores[i]*d2;
923 if (CMath::abs(g1)<eps && CMath::abs(g2)<eps)
935 while (stepsize>=minstep)
942 for (
index_t i=0; i<length; ++i)
946 newf+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
948 newf+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
952 if (newf<fval+0.0001*stepsize*gd)
960 stepsize=stepsize/2.0;
963 if (stepsize<minstep)
965 SG_SWARNING(
"CStatistics::fit_sigmoid(): line search fails, A=%f, "
966 "B=%f, g1=%f, g2=%f, dA=%f, dB=%f, gd=%f\n",
967 a, b, g1, g2, dA, dB, gd);
973 SG_SWARNING(
"CStatistics::fit_sigmoid(): reaching maximal iterations,"
974 " g1=%f, g2=%f\n", g1, g2);
977 SG_SDEBUG(
"fitted sigmoid: a=%f, b=%f\n", a, b)
983 SG_SDEBUG(
"leaving CStatistics::fit_sigmoid()\n")
987 const float64_t CStatistics::ERFC_CASE1=0.0492;
989 const float64_t CStatistics::ERFC_CASE2=-11.3137;
index_t num_features
total number of features
SGSparseVector< T > * sparse_matrix
array of sparse vectors of size num_vectors
This class contains some utilities for Eigen3 Sparse Matrix integration with shogun. Currently it provides a method for converting SGSparseMatrix to Eigen3 SparseMatrix.
all of classes and functions are contained in the shogun namespace
void scale(Matrix A, Matrix B, typename Matrix::Scalar alpha)
Matrix::Scalar max(Matrix m)
index_t num_vectors
total number of vectors