SHOGUN
v2.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
statistics
QuadraticTimeMMD.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2012 Heiko Strathmann
8
*/
9
10
#ifndef __QUADRACTIMEMMD_H_
11
#define __QUADRACTIMEMMD_H_
12
13
#include <
shogun/statistics/KernelTwoSampleTestStatistic.h
>
14
15
namespace
shogun
16
{
17
18
class
CFeatures;
19
class
CKernel;
20
22
enum
EQuadraticMMDType
23
{
24
BIASED
,
UNBIASED
25
};
26
84
class
CQuadraticTimeMMD
:
public
CKernelTwoSampleTestStatistic
85
{
86
public
:
87
CQuadraticTimeMMD
();
88
99
CQuadraticTimeMMD
(
CKernel
* kernel,
CFeatures
* p_and_q,
index_t
q_start);
100
112
CQuadraticTimeMMD
(
CKernel
* kernel,
CFeatures
* p,
CFeatures
* q);
113
114
virtual
~CQuadraticTimeMMD
();
115
122
virtual
float64_t
compute_statistic
();
123
135
virtual
float64_t
compute_p_value
(
float64_t
statistic);
136
147
virtual
float64_t
compute_threshold
(
float64_t
alpha);
148
149
inline
virtual
const
char
*
get_name
()
const
150
{
151
return
"QuadraticTimeMMD"
;
152
};
153
154
#ifdef HAVE_LAPACK
155
/* returns a set of samples of an estimate of the null distribution
156
* using the Eigen-spectrum of the centered kernel matrix of the merged
157
* samples of p and q. May be used to compute p_value (easy)
158
*
159
* kernel matrix needs to be stored in memory
160
*
161
* Note that the provided statistic HAS to be the biased version
162
* (see paper for details). Note that m*Null-distribution is returned,
163
* which is fine since the statistic is also m*MMD:
164
*
165
* Works well if the kernel matrix is NOT diagonal dominant.
166
* See Gretton, A., Fukumizu, K., & Harchaoui, Z. (2011).
167
* A fast, consistent kernel two-sample test.
168
*
169
* @param num_samples number of samples to draw
170
* @param num_eigenvalues number of eigenvalues to use to draw samples
171
* Maximum number of 2m-1 where m is the size of both sets of samples.
172
* It is usually safe to use a smaller number since they decay very
173
* fast, however, a conservative approach would be to use all (-1 does
174
* this). See paper for details.
175
* @return samples from the estimated null distribution
176
*/
177
SGVector<float64_t>
sample_null_spectrum
(
index_t
num_samples,
178
index_t
num_eigenvalues);
179
#endif // HAVE_LAPACK
180
187
void
set_num_samples_sepctrum
(
index_t
num_samples_spectrum);
188
195
void
set_num_eigenvalues_spectrum
(
index_t
num_eigenvalues_spectrum);
196
198
void
set_statistic_type
(
EQuadraticMMDType
statistic_type);
199
220
SGVector<float64_t>
fit_null_gamma
();
221
222
protected
:
224
virtual
float64_t
compute_unbiased_statistic
();
225
227
virtual
float64_t
compute_biased_statistic
();
228
229
private
:
230
void
init();
231
232
protected
:
234
index_t
m_num_samples_spectrum
;
235
237
index_t
m_num_eigenvalues_spectrum
;
238
240
EQuadraticMMDType
m_statistic_type
;
241
};
242
243
}
244
245
#endif
/* __QUADRACTIMEMMD_H_ */
SHOGUN
Machine Learning Toolbox - Documentation