44 using namespace Eigen;
74 void CQuadraticTimeMMD::init()
77 " for spectrum method null-distribution approximation",
80 " Eigenvalues for spectrum method null-distribution approximation",
105 row_wise_sum_squared_sum_symmetric_block(0, m);
109 row_wise_sum_symmetric_block(m, n);
114 row_col_wise_sum_block(0, m, m, n);
119 std::copy(xx_sum_sq_sum_rowwise.
matrix, xx_sum_sq_sum_rowwise.
matrix+m,
123 std::copy(xy_sum_rowcolwise.
vector, xy_sum_rowcolwise.
vector+m,
127 std::copy(xy_sum_rowcolwise.
vector+m, xy_sum_rowcolwise.
vector+m+n,
132 xx_sq_sum+=xx_sum_sq_sum_rowwise(i, 1);
136 xx_sum+=xx_sum_rowwise[i];
140 yy_sum+=yy_sum_rowwise[i];
144 xy_sum+=xy_sum_rowwise[i];
161 SG_INFO(
"Computed statistic!\n");
162 SG_DEBUG(
"first=%f, second=%f, third=%f\n", first, second, third);
174 kappa_1+=
CMath::sq(xx_sum_rowwise[i]/(m-1));
180 float64_t var_null=2*(kappa_0-2*kappa_1+kappa_2);
182 SG_INFO(
"Computed variance under null!\n");
183 SG_DEBUG(
"kappa_0=%f, kappa_1=%f, kappa_2=%f\n", kappa_0, kappa_1, kappa_2);
194 float64_t term=xx_sum_rowwise[i]/(m-1)-xy_sum_rowwise[i]/n;
208 float64_t term=yy_sum_rowwise[i]/(n-1)-xy_sum_colwise[i]/m;
220 float64_t var_alt=4*rho_x*(alt_var_first-alt_var_second)+
221 4*rho_y*(alt_var_third-alt_var_fourth);
223 SG_INFO(
"Computed variance under alternative!\n");
224 SG_DEBUG(
"first=%f, second=%f, third=%f, fourth=%f\n", alt_var_first,
225 alt_var_second, alt_var_third, alt_var_fourth);
228 results[0]=statistic;
251 row_wise_sum_squared_sum_symmetric_block(0, m,
false);
255 row_wise_sum_symmetric_block(m, n,
false);
260 row_col_wise_sum_block(0, m, m, n);
265 std::copy(xx_sum_sq_sum_rowwise.
matrix, xx_sum_sq_sum_rowwise.
matrix+m,
269 std::copy(xy_sum_rowcolwise.
vector, xy_sum_rowcolwise.
vector+m,
273 std::copy(xy_sum_rowcolwise.
vector+m, xy_sum_rowcolwise.
vector+m+n,
278 xx_sq_sum+=xx_sum_sq_sum_rowwise(i, 1);
282 xx_sum+=xx_sum_rowwise[i];
286 yy_sum+=yy_sum_rowwise[i];
290 xy_sum+=xy_sum_rowwise[i];
307 SG_INFO(
"Computed statistic!\n");
308 SG_DEBUG(
"first=%f, second=%f, third=%f\n", first, second, third);
326 float64_t var_null=2*(kappa_0-2*kappa_1+kappa_2);
328 SG_INFO(
"Computed variance under null!\n");
329 SG_DEBUG(
"kappa_0=%f, kappa_1=%f, kappa_2=%f\n", kappa_0, kappa_1, kappa_2);
340 float64_t term=xx_sum_rowwise[i]/m-xy_sum_rowwise[i]/n;
354 float64_t term=yy_sum_rowwise[i]/n-xy_sum_colwise[i]/m;
366 float64_t var_alt=4*rho_x*(alt_var_first-alt_var_second)+
367 4*rho_y*(alt_var_third-alt_var_fourth);
369 SG_INFO(
"Computed variance under alternative!\n");
370 SG_DEBUG(
"first=%f, second=%f, third=%f, fourth=%f\n", alt_var_first,
371 alt_var_second, alt_var_third, alt_var_fourth);
374 results[0]=statistic;
397 row_wise_sum_squared_sum_symmetric_block(0, n);
401 row_wise_sum_symmetric_block(n, n);
406 row_col_wise_sum_block(0, n, n, n,
true);
411 std::copy(xx_sum_sq_sum_rowwise.
matrix, xx_sum_sq_sum_rowwise.
matrix+n,
415 std::copy(xy_sum_rowcolwise.
vector, xy_sum_rowcolwise.
vector+n,
419 std::copy(xy_sum_rowcolwise.
vector+n, xy_sum_rowcolwise.
vector+2*n,
424 xx_sq_sum+=xx_sum_sq_sum_rowwise(i, 1);
428 xx_sum+=xx_sum_rowwise[i];
432 yy_sum+=yy_sum_rowwise[i];
436 xy_sum+=xy_sum_rowwise[i];
453 SG_INFO(
"Computed statistic!\n");
454 SG_DEBUG(
"first=%f, second=%f, third=%f\n", first, second, third);
466 kappa_1+=
CMath::sq(xx_sum_rowwise[i]/(n-1));
472 float64_t var_null=2*(kappa_0-2*kappa_1+kappa_2);
474 SG_INFO(
"Computed variance under null!\n");
475 SG_DEBUG(
"kappa_0=%f, kappa_1=%f, kappa_2=%f\n", kappa_0, kappa_1, kappa_2);
486 float64_t term=(xx_sum_rowwise[i]-xy_sum_rowwise[i])/(n-1);
500 float64_t term=(yy_sum_rowwise[i]-xy_sum_colwise[i])/(n-1);
512 float64_t var_alt=4*rho_x*(alt_var_first-alt_var_second)+
513 4*rho_y*(alt_var_third-alt_var_fourth);
515 SG_INFO(
"Computed variance under alternative!\n");
516 SG_DEBUG(
"first=%f, second=%f, third=%f, fourth=%f\n", alt_var_first,
517 alt_var_second, alt_var_third, alt_var_fourth);
520 results[0]=statistic;
560 SG_DEBUG(
"Computing MMD with %d samples from p and %d samples from q!\n",
572 result*=m==n ? m : (m+n);
580 result*=m==n? m : (m+n);
583 REQUIRE(m==n,
"Only possible with equal number of samples from both"
589 SG_ERROR(
"Unknown statistic type!\n");
612 SG_DEBUG(
"Computing MMD with %d samples from p and %d samples from q!\n",
636 REQUIRE(m==n,
"Only possible with equal number of samples from both"
644 SG_ERROR(
"Unknown statistic type!\n");
664 if (!multiple_kernels)
674 "multiple kernels specified, but underlying kernel is not of type "
686 CKernel* current=combined->get_kernel(i);
705 if (!multiple_kernels)
710 vars(0, 0)=result[0];
711 vars(0, 1)=result[1];
717 "multiple kernels specified, but underlying kernel is not of type "
729 CKernel* current=combined->get_kernel(i);
733 vars(i, 0)=result[0];
734 vars(i, 1)=result[1];
841 "(%d, %d): No features set and no custom kernel in use!\n",
842 num_samples, num_eigenvalues);
858 SG_ERROR(
"Number of samples has to be at least 2, "
859 "better in the hundreds");
862 if (num_eigenvalues>m+n-1)
863 SG_ERROR(
"Number of Eigenvalues too large\n");
865 if (num_eigenvalues<1)
866 SG_ERROR(
"Number of Eigenvalues too small\n");
879 SelfAdjointEigenSolver<MatrixXd> eigen_solver(c_kernel_matrix);
880 REQUIRE(eigen_solver.info()==Eigen::Success,
881 "Eigendecomposition failed!\n");
882 index_t max_num_eigenvalues=eigen_solver.eigenvalues().rows();
886 for (
index_t i=0; i<num_samples; ++i)
889 for (
index_t j=0; j<num_eigenvalues; ++j)
899 *eigen_solver.eigenvalues()[max_num_eigenvalues-1-j]);
904 SG_DEBUG(
"multiple=%f, eigenvalue=%f\n", multiple,
905 eigenvalue_estimate);
907 null_samples[i]+=eigenvalue_estimate*multiple;
920 "(%d, %d): No features set and no custom kernel in use!\n",
921 num_samples, num_eigenvalues);
937 SG_ERROR(
"Number of samples has to be at least 2, "
938 "better in the hundreds");
941 if (num_eigenvalues>m+n-1)
942 SG_ERROR(
"Number of Eigenvalues too large\n");
944 if (num_eigenvalues<1)
945 SG_ERROR(
"Number of Eigenvalues too small\n");
958 SelfAdjointEigenSolver<MatrixXd> eigen_solver(c_kernel_matrix);
959 REQUIRE(eigen_solver.info()==Eigen::Success,
960 "Eigendecomposition failed!\n");
961 index_t max_num_eigenvalues=eigen_solver.eigenvalues().rows();
973 SG_DEBUG(
"Using Gaussian samples ~ N(0,%f)\n", std_dev*std_dev);
977 for (
index_t i=0; i<num_samples; ++i)
980 for (
index_t j=0; j<num_eigenvalues; ++j)
993 *eigen_solver.eigenvalues()[max_num_eigenvalues-1-j]);
996 multiple-=inv_rho_x_y;
998 SG_DEBUG(
"multiple=%f, eigenvalue=%f\n", multiple,
999 eigenvalue_estimate);
1001 null_samples[i]+=eigenvalue_estimate*multiple;
1007 null_samples.
scale(0.5);
1009 return null_samples;
1016 "No features set and no custom kernel in use!\n");
1028 REQUIRE(
m_m==n,
"Only possible with equal number of samples "
1029 "from both distribution!\n")
1037 if (
m_m!=num_data/2)
1038 SG_ERROR(
"Currently, only equal sample sizes are supported\n");
1044 "to be BIASED. Please ensure that! To get rid of warning,"
1045 "call %s::set_statistic_type(BIASED_DEPRECATED)\n",
get_name());
1064 mean_mmd=2.0/m_m*(1.0-1.0/m_m*mean_mmd);
1075 if (i==j || m_m+i==j || m_m+j==i)
1085 var_mmd*=2.0/m_m/(m_m-1)*1.0/m_m/(m_m-1);
1099 num_samples_spectrum)
1105 index_t num_eigenvalues_spectrum)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
virtual float64_t compute_threshold(float64_t alpha)
virtual float64_t compute_p_value(float64_t statistic)
index_t m_num_samples_spectrum
EQuadraticMMDType m_statistic_type
virtual const char * get_name() const
float64_t compute_variance_under_alternative()
SGVector< float64_t > compute_biased_statistic_variance(int m, int n)
virtual ~CQuadraticTimeMMD()
The Custom Kernel allows for custom user provided kernel matrices.
float64_t compute_unbiased_statistic(int m, int n)
virtual float64_t compute_statistic()
void set_statistic_type(EQuadraticMMDType statistic_type)
SGVector< float64_t > fit_null_gamma()
virtual int32_t get_num_vectors() const =0
float64_t compute_incomplete_statistic(int n)
static float64_t randn_double()
index_t m_num_eigenvalues_spectrum
float64_t kernel(int32_t idx_a, int32_t idx_b)
Kernel two sample test base class. Provides an interface for performing a two-sample test using a ker...
void scale(T alpha)
Scale vector inplace.
static float64_t gamma_cdf(float64_t x, float64_t a, float64_t b)
virtual int32_t get_num_vec_lhs()
SGMatrix< float64_t > get_kernel_matrix()
static float64_t floor(float64_t d)
SGVector< float64_t > sample_null_spectrum_DEPRECATED(index_t num_samples, index_t num_eigenvalues)
float64_t compute_variance_under_null()
static void qsort(T *output, int32_t size)
SGVector< float64_t > sample_null_spectrum(index_t num_samples, index_t num_eigenvalues)
void set_num_eigenvalues_spectrum(index_t num_eigenvalues_spectrum)
SGVector< float64_t > compute_unbiased_statistic_variance(int m, int n)
The Combined kernel is used to combine a number of kernels into a single CombinedKernel object by lin...
static float64_t gamma_inverse_cdf(float64_t p, float64_t a, float64_t b)
void set_num_samples_spectrum(index_t num_samples_spectrum)
virtual int32_t get_num_vec_rhs()
index_t find_position_to_insert(T element)
virtual float64_t compute_p_value(float64_t statistic)
all of classes and functions are contained in the shogun namespace
virtual EKernelType get_kernel_type()=0
The class Features is the base class of all feature objects.
virtual SGVector< float64_t > compute_variance()
ENullApproximationMethod m_null_approximation_method
SGVector< float64_t > compute_incomplete_statistic_variance(int n)
static float32_t sqrt(float32_t x)
virtual float64_t compute_threshold(float64_t alpha)
static int32_t pow(bool x, int32_t n)
float64_t compute_biased_statistic(int m, int n)