SHOGUN
v3.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
statistics
TwoDistributionsTestStatistic.cpp
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2012-2013 Heiko Strathmann
8
*/
9
10
#include <
shogun/statistics/TwoDistributionsTestStatistic.h
>
11
#include <
shogun/features/Features.h
>
12
13
using namespace
shogun;
14
15
CTwoDistributionsTestStatistic::CTwoDistributionsTestStatistic
() :
16
CTestStatistic
()
17
{
18
init();
19
}
20
21
CTwoDistributionsTestStatistic::CTwoDistributionsTestStatistic
(
22
CFeatures
* p_and_q,
23
index_t
m) :
CTestStatistic
()
24
{
25
init();
26
27
m_p_and_q
=p_and_q;
28
SG_REF
(
m_p_and_q
);
29
30
m_m
=m;
31
}
32
33
CTwoDistributionsTestStatistic::CTwoDistributionsTestStatistic
(
34
CFeatures
* p,
CFeatures
* q) :
35
CTestStatistic
()
36
{
37
init();
38
39
m_p_and_q
=p->
create_merged_copy
(q);
40
SG_REF
(
m_p_and_q
);
41
42
m_m
=p->
get_num_vectors
();
43
}
44
45
CTwoDistributionsTestStatistic::~CTwoDistributionsTestStatistic
()
46
{
47
SG_UNREF
(
m_p_and_q
);
48
}
49
50
void
CTwoDistributionsTestStatistic::init()
51
{
52
SG_ADD
((
CSGObject
**)&
m_p_and_q
,
"p_and_q"
,
"Concatenated samples p and q"
,
53
MS_NOT_AVAILABLE
);
54
SG_ADD
(&
m_m
,
"m"
,
"Index of first sample of q"
,
55
MS_NOT_AVAILABLE
);
56
57
m_p_and_q
=NULL;
58
m_m
=0;
59
}
60
61
SGVector<float64_t>
CTwoDistributionsTestStatistic::bootstrap_null
()
62
{
63
SG_DEBUG
(
"entering CTwoDistributionsTestStatistic::bootstrap_null()\n"
)
64
65
REQUIRE
(
m_p_and_q
,
"CTwoDistributionsTestStatistic::bootstrap_null(): "
66
"No appended features p and q!\n"
);
67
68
/* compute bootstrap statistics for null distribution */
69
SGVector<float64_t>
results(
m_bootstrap_iterations
);
70
71
/* memory for index permutations. Adding of subset has to happen
72
* inside the loop since it may be copied if there already is one set */
73
SGVector<index_t>
ind_permutation(2*
m_m
);
74
ind_permutation.
range_fill
();
75
76
for
(
index_t
i=0; i<
m_bootstrap_iterations
; ++i)
77
{
78
/* idea: merge features of p and q, shuffle, and compute statistic.
79
* This is done using subsets here */
80
81
/* create index permutation and add as subset. This will mix samples
82
* from p and q */
83
SGVector<int32_t>::permute_vector
(ind_permutation);
84
85
/* compute statistic for this permutation of mixed samples */
86
m_p_and_q
->
add_subset
(ind_permutation);
87
results[i]=
compute_statistic
();
88
m_p_and_q
->
remove_subset
();
89
}
90
91
SG_DEBUG
(
"leaving CTwoDistributionsTestStatistic::bootstrap_null()\n"
)
92
return
results;
93
}
94
95
float64_t
CTwoDistributionsTestStatistic::compute_p_value
(
96
float64_t
statistic)
97
{
98
float64_t
result=0;
99
100
if
(
m_null_approximation_method
==
BOOTSTRAP
)
101
{
102
/* bootstrap a bunch of MMD values from null distribution */
103
SGVector<float64_t>
values=
bootstrap_null
();
104
105
/* find out percentile of parameter "statistic" in null distribution */
106
values.
qsort
();
107
float64_t
i=values.
find_position_to_insert
(statistic);
108
109
/* return corresponding p-value */
110
result=1.0-i/values.
vlen
;
111
}
112
else
113
{
114
SG_ERROR
(
"CTwoDistributionsTestStatistics::compute_p_value(): Unknown"
115
" method to approximate null distribution!\n"
);
116
}
117
118
return
result;
119
}
120
121
float64_t
CTwoDistributionsTestStatistic::compute_threshold
(
122
float64_t
alpha)
123
{
124
float64_t
result=0;
125
126
if
(
m_null_approximation_method
==
BOOTSTRAP
)
127
{
128
/* bootstrap a bunch of MMD values from null distribution */
129
SGVector<float64_t>
values=
bootstrap_null
();
130
131
/* return value of (1-alpha) quantile */
132
result=values[
CMath::floor
(values.
vlen
*(1-alpha))];
133
}
134
else
135
{
136
SG_ERROR
(
"CTwoDistributionsTestStatistics::compute_threshold():"
137
"Unknown method to approximate null distribution!\n"
);
138
}
139
140
return
result;
141
}
142
143
void
CTwoDistributionsTestStatistic::set_p_and_q
(
CFeatures
* p_and_q)
144
{
145
/* ref before unref to avoid problems when instances are equal */
146
SG_REF
(p_and_q);
147
SG_UNREF
(
m_p_and_q
);
148
m_p_and_q
=p_and_q;
149
}
150
151
CFeatures
*
CTwoDistributionsTestStatistic::get_p_and_q
()
152
{
153
SG_REF
(
m_p_and_q
);
154
return
m_p_and_q
;
155
}
156
SHOGUN
Machine Learning Toolbox - Documentation