SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TwoSampleTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2012-2013 Heiko Strathmann
4  * Written (w) 2014 Soumyajit De
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
35 
36 using namespace shogun;
37 
39 {
40  init();
41 }
42 
45 {
46  init();
47 
48  m_p_and_q=p_and_q;
50 
51  m_m=m;
52 }
53 
56 {
57  init();
58 
61 
62  m_m=p->get_num_vectors();
63 }
64 
66 {
68 }
69 
70 void CTwoSampleTest::init()
71 {
72  SG_ADD((CSGObject**)&m_p_and_q, "p_and_q", "Concatenated samples p and q",
74  SG_ADD(&m_m, "m", "Index of first sample of q",
76 
77  m_p_and_q=NULL;
78  m_m=0;
79 }
80 
82 {
83  SG_DEBUG("entering!\n")
84 
85  REQUIRE(m_p_and_q, "No appended features p and q!\n");
86 
87  /* compute sample statistics for null distribution */
89 
90  /* memory for index permutations. Adding of subset has to happen
91  * inside the loop since it may be copied if there already is one set */
92  SGVector<index_t> ind_permutation(m_p_and_q->get_num_vectors());
93  ind_permutation.range_fill();
94 
95  for (index_t i=0; i<m_num_null_samples; ++i)
96  {
97  /* idea: merge features of p and q, shuffle, and compute statistic.
98  * This is done using subsets here */
99 
100  /* create index permutation and add as subset. This will mix samples
101  * from p and q */
102  CMath::permute(ind_permutation);
103 
104  /* compute statistic for this permutation of mixed samples */
105  m_p_and_q->add_subset(ind_permutation);
106  results[i]=compute_statistic();
108  }
109 
110  SG_DEBUG("leaving!\n")
111  return results;
112 }
113 
115 {
116  float64_t result=0;
117 
119  {
120  /* sample a bunch of MMD values from null distribution */
122 
123  /* find out percentile of parameter "statistic" in null distribution */
124  values.qsort();
125  float64_t i=values.find_position_to_insert(statistic);
126 
127  /* return corresponding p-value */
128  result=1.0-i/values.vlen;
129  }
130  else
131  SG_ERROR("Unknown method to approximate null distribution!\n");
132 
133  return result;
134 }
135 
137 {
138  float64_t result=0;
139 
141  {
142  /* sample a bunch of MMD values from null distribution */
144 
145  /* return value of (1-alpha) quantile */
146  result=values[index_t(CMath::floor(values.vlen*(1-alpha)))];
147  }
148  else
149  SG_ERROR("Unknown method to approximate null distribution!\n");
150 
151  return result;
152 }
153 
155 {
156  /* ref before unref to avoid problems when instances are equal */
157  SG_REF(p_and_q);
159  m_p_and_q=p_and_q;
160 }
161 
163 {
164  REQUIRE(m_p_and_q, "Samples are not specified!\n");
165  REQUIRE(m_p_and_q->get_num_vectors()>m, "Provided sample size for p"
166  "(%d) is greater than total number of samples (%d)!\n",
167  m, m_p_and_q->get_num_vectors());
168  m_m=m;
169 }
170 
172 {
173  SG_REF(m_p_and_q);
174  return m_p_and_q;
175 }
176 

SHOGUN Machine Learning Toolbox - Documentation