SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TwoDistributionsTestStatistic.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012-2013 Heiko Strathmann
8  */
9 
12 
13 using namespace shogun;
14 
17 {
18  init();
19 }
20 
22  CFeatures* p_and_q,
23  index_t m) : CTestStatistic()
24 {
25  init();
26 
27  m_p_and_q=p_and_q;
29 
30  m_m=m;
31 }
32 
34  CFeatures* p, CFeatures* q) :
36 {
37  init();
38 
41 
42  m_m=p->get_num_vectors();
43 }
44 
46 {
48 }
49 
50 void CTwoDistributionsTestStatistic::init()
51 {
52  SG_ADD((CSGObject**)&m_p_and_q, "p_and_q", "Concatenated samples p and q",
54  SG_ADD(&m_m, "m", "Index of first sample of q",
56 
57  m_p_and_q=NULL;
58  m_m=0;
59 }
60 
62 {
63  SG_DEBUG("entering CTwoDistributionsTestStatistic::bootstrap_null()\n")
64 
65  REQUIRE(m_p_and_q, "CTwoDistributionsTestStatistic::bootstrap_null(): "
66  "No appended features p and q!\n");
67 
68  /* compute bootstrap statistics for null distribution */
70 
71  /* memory for index permutations. Adding of subset has to happen
72  * inside the loop since it may be copied if there already is one set */
73  SGVector<index_t> ind_permutation(2*m_m);
74  ind_permutation.range_fill();
75 
76  for (index_t i=0; i<m_bootstrap_iterations; ++i)
77  {
78  /* idea: merge features of p and q, shuffle, and compute statistic.
79  * This is done using subsets here */
80 
81  /* create index permutation and add as subset. This will mix samples
82  * from p and q */
83  SGVector<int32_t>::permute_vector(ind_permutation);
84 
85  /* compute statistic for this permutation of mixed samples */
86  m_p_and_q->add_subset(ind_permutation);
87  results[i]=compute_statistic();
89  }
90 
91  SG_DEBUG("leaving CTwoDistributionsTestStatistic::bootstrap_null()\n")
92  return results;
93 }
94 
96  float64_t statistic)
97 {
98  float64_t result=0;
99 
101  {
102  /* bootstrap a bunch of MMD values from null distribution */
104 
105  /* find out percentile of parameter "statistic" in null distribution */
106  values.qsort();
107  float64_t i=values.find_position_to_insert(statistic);
108 
109  /* return corresponding p-value */
110  result=1.0-i/values.vlen;
111  }
112  else
113  {
114  SG_ERROR("CTwoDistributionsTestStatistics::compute_p_value(): Unknown"
115  " method to approximate null distribution!\n");
116  }
117 
118  return result;
119 }
120 
122  float64_t alpha)
123 {
124  float64_t result=0;
125 
127  {
128  /* bootstrap a bunch of MMD values from null distribution */
130 
131  /* return value of (1-alpha) quantile */
132  result=values[CMath::floor(values.vlen*(1-alpha))];
133  }
134  else
135  {
136  SG_ERROR("CTwoDistributionsTestStatistics::compute_threshold():"
137  "Unknown method to approximate null distribution!\n");
138  }
139 
140  return result;
141 }
142 
144 {
145  /* ref before unref to avoid problems when instances are equal */
146  SG_REF(p_and_q);
148  m_p_and_q=p_and_q;
149 }
150 
152 {
153  SG_REF(m_p_and_q);
154  return m_p_and_q;
155 }
156 

SHOGUN Machine Learning Toolbox - Documentation