SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QuadraticTimeMMD.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Heiko Strathmann
8  */
9 
10 #ifndef __QUADRACTIMEMMD_H_
11 #define __QUADRACTIMEMMD_H_
12 
14 
15 namespace shogun
16 {
17 
18 class CFeatures;
19 class CKernel;
20 
23 {
25 };
26 
85 {
86  public:
88 
99  CQuadraticTimeMMD(CKernel* kernel, CFeatures* p_and_q, index_t q_start);
100 
112  CQuadraticTimeMMD(CKernel* kernel, CFeatures* p, CFeatures* q);
113 
114  virtual ~CQuadraticTimeMMD();
115 
122  virtual float64_t compute_statistic();
123 
135  virtual float64_t compute_p_value(float64_t statistic);
136 
147  virtual float64_t compute_threshold(float64_t alpha);
148 
149  inline virtual const char* get_name() const
150  {
151  return "QuadraticTimeMMD";
152  };
153 
154 #ifdef HAVE_LAPACK
155  /* returns a set of samples of an estimate of the null distribution
156  * using the Eigen-spectrum of the centered kernel matrix of the merged
157  * samples of p and q. May be used to compute p_value (easy)
158  *
159  * kernel matrix needs to be stored in memory
160  *
161  * Note that the provided statistic HAS to be the biased version
162  * (see paper for details). Note that m*Null-distribution is returned,
163  * which is fine since the statistic is also m*MMD:
164  *
165  * Works well if the kernel matrix is NOT diagonal dominant.
166  * See Gretton, A., Fukumizu, K., & Harchaoui, Z. (2011).
167  * A fast, consistent kernel two-sample test.
168  *
169  * @param num_samples number of samples to draw
170  * @param num_eigenvalues number of eigenvalues to use to draw samples
171  * Maximum number of 2m-1 where m is the size of both sets of samples.
172  * It is usually safe to use a smaller number since they decay very
173  * fast, however, a conservative approach would be to use all (-1 does
174  * this). See paper for details.
175  * @return samples from the estimated null distribution
176  */
178  index_t num_eigenvalues);
179 #endif // HAVE_LAPACK
180 
187  void set_num_samples_sepctrum(index_t num_samples_spectrum);
188 
195  void set_num_eigenvalues_spectrum(index_t num_eigenvalues_spectrum);
196 
198  void set_statistic_type(EQuadraticMMDType statistic_type);
199 
221 
222  protected:
225 
228 
229  private:
230  void init();
231 
232  protected:
235 
238 
241 };
242 
243 }
244 
245 #endif /* __QUADRACTIMEMMD_H_ */

SHOGUN Machine Learning Toolbox - Documentation