QuadraticTimeMMD.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Heiko Strathmann
00008  */
00009 
00010 #ifndef __QUADRACTIMEMMD_H_
00011 #define __QUADRACTIMEMMD_H_
00012 
00013 #include <shogun/statistics/KernelTwoSampleTestStatistic.h>
00014 
00015 namespace shogun
00016 {
00017 
00018 class CFeatures;
00019 class CKernel;
00020 
00022 enum EQuadraticMMDType
00023 {
00024     BIASED, UNBIASED
00025 };
00026 
00084 class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic
00085 {
00086     public:
00087         CQuadraticTimeMMD();
00088 
00099         CQuadraticTimeMMD(CKernel* kernel, CFeatures* p_and_q, index_t m);
00100 
00112         CQuadraticTimeMMD(CKernel* kernel, CFeatures* p, CFeatures* q);
00113 
00114         virtual ~CQuadraticTimeMMD();
00115 
00122         virtual float64_t compute_statistic();
00123 
00135         virtual float64_t compute_p_value(float64_t statistic);
00136 
00147         virtual float64_t compute_threshold(float64_t alpha);
00148 
00149         virtual const char* get_name() const
00150         {
00151             return "QuadraticTimeMMD";
00152         };
00153 
00154 #ifdef HAVE_LAPACK
00155         /* returns a set of samples of an estimate of the null distribution
00156          * using the Eigen-spectrum of the centered kernel matrix of the merged
00157          * samples of p and q. May be used to compute p_value (easy)
00158          *
00159          * kernel matrix needs to be stored in memory
00160          *
00161          * Note that the provided statistic HAS to be the biased version
00162          * (see paper for details). Note that m*Null-distribution is returned,
00163          * which is fine since the statistic is also m*MMD:
00164          *
00165          * Works well if the kernel matrix is NOT diagonal dominant.
00166          * See Gretton, A., Fukumizu, K., & Harchaoui, Z. (2011).
00167          * A fast, consistent kernel two-sample test.
00168          *
00169          * @param num_samples number of samples to draw
00170          * @param num_eigenvalues number of eigenvalues to use to draw samples
00171          * Maximum number of 2m-1 where m is the size of both sets of samples.
00172          * It is usually safe to use a smaller number since they decay very
00173          * fast, however, a conservative approach would be to use all (-1 does
00174          * this). See paper for details.
00175          * @return samples from the estimated null distribution
00176          */
00177         SGVector<float64_t> sample_null_spectrum(index_t num_samples,
00178                 index_t num_eigenvalues);
00179 #endif // HAVE_LAPACK
00180 
00187         void set_num_samples_sepctrum(index_t num_samples_spectrum);
00188 
00195         void set_num_eigenvalues_spectrum(index_t num_eigenvalues_spectrum);
00196 
00198         void set_statistic_type(EQuadraticMMDType statistic_type);
00199 
00220         SGVector<float64_t> fit_null_gamma();
00221 
00222     protected:
00224         virtual float64_t compute_unbiased_statistic();
00225 
00227         virtual float64_t compute_biased_statistic();
00228 
00229     private:
00230         void init();
00231 
00232     protected:
00234         index_t m_num_samples_spectrum;
00235 
00237         index_t m_num_eigenvalues_spectrum;
00238 
00240         EQuadraticMMDType m_statistic_type;
00241 };
00242 
00243 }
00244 
00245 #endif /* __QUADRACTIMEMMD_H_ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation