45 using namespace Eigen;
76 void CQuadraticTimeMMD::init()
79 " for spectrum method null-distribution approximation",
82 " Eigenvalues for spectrum method null-distribution approximation",
107 row_wise_sum_squared_sum_symmetric_block(0, m);
111 row_wise_sum_symmetric_block(m, n);
116 row_col_wise_sum_block(0, m, m, n);
121 std::copy(xx_sum_sq_sum_rowwise.
matrix, xx_sum_sq_sum_rowwise.
matrix+m,
125 std::copy(xy_sum_rowcolwise.
vector, xy_sum_rowcolwise.
vector+m,
129 std::copy(xy_sum_rowcolwise.
vector+m, xy_sum_rowcolwise.
vector+m+n,
134 xx_sq_sum+=xx_sum_sq_sum_rowwise(i, 1);
138 xx_sum+=xx_sum_rowwise[i];
142 yy_sum+=yy_sum_rowwise[i];
146 xy_sum+=xy_sum_rowwise[i];
163 SG_INFO(
"Computed statistic!\n");
164 SG_DEBUG(
"first=%f, second=%f, third=%f\n", first, second, third);
176 kappa_1+=
CMath::sq(xx_sum_rowwise[i]/(m-1));
182 float64_t var_null=2*(kappa_0-2*kappa_1+kappa_2);
184 SG_INFO(
"Computed variance under null!\n");
185 SG_DEBUG(
"kappa_0=%f, kappa_1=%f, kappa_2=%f\n", kappa_0, kappa_1, kappa_2);
196 float64_t term=xx_sum_rowwise[i]/(m-1)-xy_sum_rowwise[i]/n;
210 float64_t term=yy_sum_rowwise[i]/(n-1)-xy_sum_colwise[i]/m;
222 float64_t var_alt=4*rho_x*(alt_var_first-alt_var_second)+
223 4*rho_y*(alt_var_third-alt_var_fourth);
225 SG_INFO(
"Computed variance under alternative!\n");
226 SG_DEBUG(
"first=%f, second=%f, third=%f, fourth=%f\n", alt_var_first,
227 alt_var_second, alt_var_third, alt_var_fourth);
230 results[0]=statistic;
253 row_wise_sum_squared_sum_symmetric_block(0, m,
false);
257 row_wise_sum_symmetric_block(m, n,
false);
262 row_col_wise_sum_block(0, m, m, n);
267 std::copy(xx_sum_sq_sum_rowwise.
matrix, xx_sum_sq_sum_rowwise.
matrix+m,
271 std::copy(xy_sum_rowcolwise.
vector, xy_sum_rowcolwise.
vector+m,
275 std::copy(xy_sum_rowcolwise.
vector+m, xy_sum_rowcolwise.
vector+m+n,
280 xx_sq_sum+=xx_sum_sq_sum_rowwise(i, 1);
284 xx_sum+=xx_sum_rowwise[i];
288 yy_sum+=yy_sum_rowwise[i];
292 xy_sum+=xy_sum_rowwise[i];
309 SG_INFO(
"Computed statistic!\n");
310 SG_DEBUG(
"first=%f, second=%f, third=%f\n", first, second, third);
328 float64_t var_null=2*(kappa_0-2*kappa_1+kappa_2);
330 SG_INFO(
"Computed variance under null!\n");
331 SG_DEBUG(
"kappa_0=%f, kappa_1=%f, kappa_2=%f\n", kappa_0, kappa_1, kappa_2);
342 float64_t term=xx_sum_rowwise[i]/m-xy_sum_rowwise[i]/n;
356 float64_t term=yy_sum_rowwise[i]/n-xy_sum_colwise[i]/m;
368 float64_t var_alt=4*rho_x*(alt_var_first-alt_var_second)+
369 4*rho_y*(alt_var_third-alt_var_fourth);
371 SG_INFO(
"Computed variance under alternative!\n");
372 SG_DEBUG(
"first=%f, second=%f, third=%f, fourth=%f\n", alt_var_first,
373 alt_var_second, alt_var_third, alt_var_fourth);
376 results[0]=statistic;
399 row_wise_sum_squared_sum_symmetric_block(0, n);
403 row_wise_sum_symmetric_block(n, n);
408 row_col_wise_sum_block(0, n, n, n,
true);
413 std::copy(xx_sum_sq_sum_rowwise.
matrix, xx_sum_sq_sum_rowwise.
matrix+n,
417 std::copy(xy_sum_rowcolwise.
vector, xy_sum_rowcolwise.
vector+n,
421 std::copy(xy_sum_rowcolwise.
vector+n, xy_sum_rowcolwise.
vector+2*n,
426 xx_sq_sum+=xx_sum_sq_sum_rowwise(i, 1);
430 xx_sum+=xx_sum_rowwise[i];
434 yy_sum+=yy_sum_rowwise[i];
438 xy_sum+=xy_sum_rowwise[i];
455 SG_INFO(
"Computed statistic!\n");
456 SG_DEBUG(
"first=%f, second=%f, third=%f\n", first, second, third);
468 kappa_1+=
CMath::sq(xx_sum_rowwise[i]/(n-1));
474 float64_t var_null=2*(kappa_0-2*kappa_1+kappa_2);
476 SG_INFO(
"Computed variance under null!\n");
477 SG_DEBUG(
"kappa_0=%f, kappa_1=%f, kappa_2=%f\n", kappa_0, kappa_1, kappa_2);
488 float64_t term=(xx_sum_rowwise[i]-xy_sum_rowwise[i])/(n-1);
502 float64_t term=(yy_sum_rowwise[i]-xy_sum_colwise[i])/(n-1);
514 float64_t var_alt=4*rho_x*(alt_var_first-alt_var_second)+
515 4*rho_y*(alt_var_third-alt_var_fourth);
517 SG_INFO(
"Computed variance under alternative!\n");
518 SG_DEBUG(
"first=%f, second=%f, third=%f, fourth=%f\n", alt_var_first,
519 alt_var_second, alt_var_third, alt_var_fourth);
522 results[0]=statistic;
562 SG_DEBUG(
"Computing MMD with %d samples from p and %d samples from q!\n",
574 result*=m==n ? m : (m+n);
582 result*=m==n? m : (m+n);
585 REQUIRE(m==n,
"Only possible with equal number of samples from both"
591 SG_ERROR(
"Unknown statistic type!\n");
614 SG_DEBUG(
"Computing MMD with %d samples from p and %d samples from q!\n",
638 REQUIRE(m==n,
"Only possible with equal number of samples from both"
646 SG_ERROR(
"Unknown statistic type!\n");
666 if (!multiple_kernels)
676 "multiple kernels specified, but underlying kernel is not of type "
688 CKernel* current=combined->get_kernel(i);
707 if (!multiple_kernels)
712 vars(0, 0)=result[0];
713 vars(0, 1)=result[1];
719 "multiple kernels specified, but underlying kernel is not of type "
731 CKernel* current=combined->get_kernel(i);
735 vars(i, 0)=result[0];
736 vars(i, 1)=result[1];
765 SG_ERROR(
"Only possible if shogun is compiled with EIGEN3 enabled\n");
766 #endif // HAVE_EIGEN3
780 SG_ERROR(
"Only possible if shogun is compiled with EIGEN3 enabled\n");
781 #endif // HAVE_EIGEN3
816 SG_ERROR(
"Only possible if shogun is compiled with EIGEN3 enabled\n");
817 #endif // HAVE_EIGEN3
830 SG_ERROR(
"Only possible if shogun is compiled with EIGEN3 enabled\n");
831 #endif // HAVE_EIGEN3
860 "(%d, %d): No features set and no custom kernel in use!\n",
861 num_samples, num_eigenvalues);
877 SG_ERROR(
"Number of samples has to be at least 2, "
878 "better in the hundreds");
881 if (num_eigenvalues>m+n-1)
882 SG_ERROR(
"Number of Eigenvalues too large\n");
884 if (num_eigenvalues<1)
885 SG_ERROR(
"Number of Eigenvalues too small\n");
898 SelfAdjointEigenSolver<MatrixXd> eigen_solver(c_kernel_matrix);
899 REQUIRE(eigen_solver.info()==Eigen::Success,
900 "Eigendecomposition failed!\n");
901 index_t max_num_eigenvalues=eigen_solver.eigenvalues().rows();
905 for (
index_t i=0; i<num_samples; ++i)
908 for (
index_t j=0; j<num_eigenvalues; ++j)
918 *eigen_solver.eigenvalues()[max_num_eigenvalues-1-j]);
923 SG_DEBUG(
"multiple=%f, eigenvalue=%f\n", multiple,
924 eigenvalue_estimate);
926 null_samples[i]+=eigenvalue_estimate*multiple;
939 "(%d, %d): No features set and no custom kernel in use!\n",
940 num_samples, num_eigenvalues);
956 SG_ERROR(
"Number of samples has to be at least 2, "
957 "better in the hundreds");
960 if (num_eigenvalues>m+n-1)
961 SG_ERROR(
"Number of Eigenvalues too large\n");
963 if (num_eigenvalues<1)
964 SG_ERROR(
"Number of Eigenvalues too small\n");
977 SelfAdjointEigenSolver<MatrixXd> eigen_solver(c_kernel_matrix);
978 REQUIRE(eigen_solver.info()==Eigen::Success,
979 "Eigendecomposition failed!\n");
980 index_t max_num_eigenvalues=eigen_solver.eigenvalues().rows();
992 SG_DEBUG(
"Using Gaussian samples ~ N(0,%f)\n", std_dev*std_dev);
996 for (
index_t i=0; i<num_samples; ++i)
999 for (
index_t j=0; j<num_eigenvalues; ++j)
1012 *eigen_solver.eigenvalues()[max_num_eigenvalues-1-j]);
1015 multiple-=inv_rho_x_y;
1017 SG_DEBUG(
"multiple=%f, eigenvalue=%f\n", multiple,
1018 eigenvalue_estimate);
1020 null_samples[i]+=eigenvalue_estimate*multiple;
1026 null_samples.
scale(0.5);
1028 return null_samples;
1030 #endif // HAVE_EIGEN3
1036 "No features set and no custom kernel in use!\n");
1048 REQUIRE(
m_m==n,
"Only possible with equal number of samples "
1049 "from both distribution!\n")
1057 if (
m_m!=num_data/2)
1058 SG_ERROR(
"Currently, only equal sample sizes are supported\n");
1064 "to be BIASED. Please ensure that! To get rid of warning,"
1065 "call %s::set_statistic_type(BIASED_DEPRECATED)\n",
get_name());
1084 mean_mmd=2.0/m_m*(1.0-1.0/m_m*mean_mmd);
1095 if (i==j || m_m+i==j || m_m+j==i)
1105 var_mmd*=2.0/m_m/(m_m-1)*1.0/m_m/(m_m-1);
1119 num_samples_spectrum)
1125 index_t num_eigenvalues_spectrum)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
virtual float64_t compute_threshold(float64_t alpha)
virtual float64_t compute_p_value(float64_t statistic)
index_t m_num_samples_spectrum
EQuadraticMMDType m_statistic_type
virtual const char * get_name() const
float64_t compute_variance_under_alternative()
SGVector< float64_t > compute_biased_statistic_variance(int m, int n)
virtual ~CQuadraticTimeMMD()
The Custom Kernel allows for custom user provided kernel matrices.
float64_t compute_unbiased_statistic(int m, int n)
virtual float64_t compute_statistic()
void set_statistic_type(EQuadraticMMDType statistic_type)
SGVector< float64_t > fit_null_gamma()
virtual int32_t get_num_vectors() const =0
float64_t compute_incomplete_statistic(int n)
static float64_t randn_double()
index_t m_num_eigenvalues_spectrum
float64_t kernel(int32_t idx_a, int32_t idx_b)
Kernel two sample test base class. Provides an interface for performing a two-sample test using a ker...
void scale(T alpha)
Scale vector inplace.
static float64_t gamma_cdf(float64_t x, float64_t a, float64_t b)
virtual int32_t get_num_vec_lhs()
SGMatrix< float64_t > get_kernel_matrix()
static float64_t floor(float64_t d)
SGVector< float64_t > sample_null_spectrum_DEPRECATED(index_t num_samples, index_t num_eigenvalues)
float64_t compute_variance_under_null()
static void qsort(T *output, int32_t size)
SGVector< float64_t > sample_null_spectrum(index_t num_samples, index_t num_eigenvalues)
void set_num_eigenvalues_spectrum(index_t num_eigenvalues_spectrum)
SGVector< float64_t > compute_unbiased_statistic_variance(int m, int n)
The Combined kernel is used to combine a number of kernels into a single CombinedKernel object by lin...
void set_num_samples_spectrum(index_t num_samples_spectrum)
virtual int32_t get_num_vec_rhs()
index_t find_position_to_insert(T element)
virtual float64_t compute_p_value(float64_t statistic)
all of classes and functions are contained in the shogun namespace
virtual EKernelType get_kernel_type()=0
The class Features is the base class of all feature objects.
virtual SGVector< float64_t > compute_variance()
ENullApproximationMethod m_null_approximation_method
static float64_t inverse_gamma_cdf(float64_t p, float64_t a, float64_t b)
SGVector< float64_t > compute_incomplete_statistic_variance(int n)
static float32_t sqrt(float32_t x)
virtual float64_t compute_threshold(float64_t alpha)
static int32_t pow(bool x, int32_t n)
float64_t compute_biased_statistic(int m, int n)