SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TwoDistributionsTestStatistic.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Heiko Strathmann
8  */
9 
12 
13 using namespace shogun;
14 
17 {
18  init();
19 }
20 
22  CFeatures* p_and_q,
23  index_t q_start) : CTestStatistic()
24 {
25  init();
26 
27  m_p_and_q=p_and_q;
29 
30  m_q_start=q_start;
31 }
32 
34  CFeatures* p, CFeatures* q) :
36 {
37  init();
38 
41 
43 }
44 
46 {
48 }
49 
50 void CTwoDistributionsTestStatistic::init()
51 {
52  SG_ADD((CSGObject**)&m_p_and_q, "p_and_q", "Concatenated samples p and q",
54  SG_ADD(&m_q_start, "q_start", "Index of first sample of q",
56 
57  m_p_and_q=NULL;
58  m_q_start=0;
59 }
60 
62 {
63  SG_DEBUG("entering CTwoDistributionsTestStatistic::bootstrap_null()\n");
64 
65  /* compute bootstrap statistics for null distribution */
67 
68  /* memory for index permutations, (would slow down loop) */
69  SGVector<index_t> ind_permutation(m_p_and_q->get_num_vectors());
70  ind_permutation.range_fill();
71  m_p_and_q->add_subset(ind_permutation);
72 
73  for (index_t i=0; i<m_bootstrap_iterations; ++i)
74  {
75  /* idea: merge features of p and q, shuffle, and compute statistic.
76  * This is done using subsets here */
77 
78  /* create index permutation and add as subset. This will mix samples
79  * from p and q */
80  SGVector<int32_t>::permute_vector(ind_permutation);
81 
82  /* compute statistic for this permutation of mixed samples */
83  results[i]=compute_statistic();
84  }
85 
86  /* clean up */
88 
89  SG_DEBUG("leaving CTwoDistributionsTestStatistic::bootstrap_null()\n");
90  return results;
91 }
92 
94  float64_t statistic)
95 {
96  float64_t result=0;
97 
99  {
100  /* bootstrap a bunch of MMD values from null distribution */
102 
103  /* find out percentile of parameter "statistic" in null distribution */
104  CMath::qsort(values);
105  float64_t i=CMath::find_position_to_insert(values, statistic);
106 
107  /* return corresponding p-value */
108  result=1.0-i/values.vlen;
109  }
110  else
111  {
112  SG_ERROR("CTwoDistributionsTestStatistics::compute_p_value(): Unknown"
113  "method to approximate null distribution!\n");
114  }
115 
116  return result;
117 }
118 
120  float64_t alpha)
121 {
122  float64_t result=0;
123 
125  {
126  /* bootstrap a bunch of MMD values from null distribution */
128 
129  /* return value of (1-alpha) quantile */
130  result=values[CMath::floor(values.vlen*(1-alpha))];
131  }
132  else
133  {
134  SG_ERROR("CTwoDistributionsTestStatistics::compute_threshold():"
135  "Unknown method to approximate null distribution!\n");
136  }
137 
138  return result;
139 }

SHOGUN Machine Learning Toolbox - Documentation