SHOGUN
v2.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
statistics
TwoDistributionsTestStatistic.cpp
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2012 Heiko Strathmann
8
*/
9
10
#include <
shogun/statistics/TwoDistributionsTestStatistic.h
>
11
#include <
shogun/features/Features.h
>
12
13
using namespace
shogun;
14
15
CTwoDistributionsTestStatistic::CTwoDistributionsTestStatistic
() :
16
CTestStatistic
()
17
{
18
init();
19
}
20
21
CTwoDistributionsTestStatistic::CTwoDistributionsTestStatistic
(
22
CFeatures
* p_and_q,
23
index_t
q_start) :
CTestStatistic
()
24
{
25
init();
26
27
m_p_and_q
=p_and_q;
28
SG_REF
(
m_p_and_q
);
29
30
m_q_start
=q_start;
31
}
32
33
CTwoDistributionsTestStatistic::CTwoDistributionsTestStatistic
(
34
CFeatures
* p,
CFeatures
* q) :
35
CTestStatistic
()
36
{
37
init();
38
39
m_p_and_q
=p->
create_merged_copy
(q);
40
SG_REF
(
m_p_and_q
);
41
42
m_q_start
=p->
get_num_vectors
();
43
}
44
45
CTwoDistributionsTestStatistic::~CTwoDistributionsTestStatistic
()
46
{
47
SG_UNREF
(
m_p_and_q
);
48
}
49
50
void
CTwoDistributionsTestStatistic::init()
51
{
52
SG_ADD
((
CSGObject
**)&
m_p_and_q
,
"p_and_q"
,
"Concatenated samples p and q"
,
53
MS_NOT_AVAILABLE
);
54
SG_ADD
(&
m_q_start
,
"q_start"
,
"Index of first sample of q"
,
55
MS_NOT_AVAILABLE
);
56
57
m_p_and_q
=NULL;
58
m_q_start
=0;
59
}
60
61
SGVector<float64_t>
CTwoDistributionsTestStatistic::bootstrap_null
()
62
{
63
SG_DEBUG
(
"entering CTwoDistributionsTestStatistic::bootstrap_null()\n"
);
64
65
/* compute bootstrap statistics for null distribution */
66
SGVector<float64_t>
results(
m_bootstrap_iterations
);
67
68
/* memory for index permutations, (would slow down loop) */
69
SGVector<index_t>
ind_permutation(
m_p_and_q
->
get_num_vectors
());
70
ind_permutation.
range_fill
();
71
m_p_and_q
->
add_subset
(ind_permutation);
72
73
for
(
index_t
i=0; i<
m_bootstrap_iterations
; ++i)
74
{
75
/* idea: merge features of p and q, shuffle, and compute statistic.
76
* This is done using subsets here */
77
78
/* create index permutation and add as subset. This will mix samples
79
* from p and q */
80
SGVector<int32_t>::permute_vector
(ind_permutation);
81
82
/* compute statistic for this permutation of mixed samples */
83
results[i]=
compute_statistic
();
84
}
85
86
/* clean up */
87
m_p_and_q
->
remove_subset
();
88
89
SG_DEBUG
(
"leaving CTwoDistributionsTestStatistic::bootstrap_null()\n"
);
90
return
results;
91
}
92
93
float64_t
CTwoDistributionsTestStatistic::compute_p_value
(
94
float64_t
statistic)
95
{
96
float64_t
result=0;
97
98
if
(
m_null_approximation_method
==
BOOTSTRAP
)
99
{
100
/* bootstrap a bunch of MMD values from null distribution */
101
SGVector<float64_t>
values=
bootstrap_null
();
102
103
/* find out percentile of parameter "statistic" in null distribution */
104
CMath::qsort
(values);
105
float64_t
i=
CMath::find_position_to_insert
(values, statistic);
106
107
/* return corresponding p-value */
108
result=1.0-i/values.
vlen
;
109
}
110
else
111
{
112
SG_ERROR
(
"CTwoDistributionsTestStatistics::compute_p_value(): Unknown"
113
"method to approximate null distribution!\n"
);
114
}
115
116
return
result;
117
}
118
119
float64_t
CTwoDistributionsTestStatistic::compute_threshold
(
120
float64_t
alpha)
121
{
122
float64_t
result=0;
123
124
if
(
m_null_approximation_method
==
BOOTSTRAP
)
125
{
126
/* bootstrap a bunch of MMD values from null distribution */
127
SGVector<float64_t>
values=
bootstrap_null
();
128
129
/* return value of (1-alpha) quantile */
130
result=values[
CMath::floor
(values.
vlen
*(1-alpha))];
131
}
132
else
133
{
134
SG_ERROR
(
"CTwoDistributionsTestStatistics::compute_threshold():"
135
"Unknown method to approximate null distribution!\n"
);
136
}
137
138
return
result;
139
}
SHOGUN
Machine Learning Toolbox - Documentation