SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TwoSampleTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2012-2013 Heiko Strathmann
4  * Written (w) 2014 Soumyajit De
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
34 
35 using namespace shogun;
36 
38 {
39  init();
40 }
41 
44 {
45  init();
46 
47  m_p_and_q=p_and_q;
49 
50  m_m=m;
51 }
52 
55 {
56  init();
57 
60 
61  m_m=p->get_num_vectors();
62 }
63 
65 {
67 }
68 
69 void CTwoSampleTest::init()
70 {
71  SG_ADD((CSGObject**)&m_p_and_q, "p_and_q", "Concatenated samples p and q",
73  SG_ADD(&m_m, "m", "Index of first sample of q",
75 
76  m_p_and_q=NULL;
77  m_m=0;
78 }
79 
81 {
82  SG_DEBUG("entering!\n")
83 
84  REQUIRE(m_p_and_q, "No appended features p and q!\n");
85 
86  /* compute sample statistics for null distribution */
88 
89  /* memory for index permutations. Adding of subset has to happen
90  * inside the loop since it may be copied if there already is one set */
91  SGVector<index_t> ind_permutation(m_p_and_q->get_num_vectors());
92  ind_permutation.range_fill();
93 
94  for (index_t i=0; i<m_num_null_samples; ++i)
95  {
96  /* idea: merge features of p and q, shuffle, and compute statistic.
97  * This is done using subsets here */
98 
99  /* create index permutation and add as subset. This will mix samples
100  * from p and q */
101  SGVector<index_t>::permute_vector(ind_permutation);
102 
103  /* compute statistic for this permutation of mixed samples */
104  m_p_and_q->add_subset(ind_permutation);
105  results[i]=compute_statistic();
107  }
108 
109  SG_DEBUG("leaving!\n")
110  return results;
111 }
112 
114 {
115  float64_t result=0;
116 
118  {
119  /* sample a bunch of MMD values from null distribution */
121 
122  /* find out percentile of parameter "statistic" in null distribution */
123  values.qsort();
124  float64_t i=values.find_position_to_insert(statistic);
125 
126  /* return corresponding p-value */
127  result=1.0-i/values.vlen;
128  }
129  else
130  SG_ERROR("Unknown method to approximate null distribution!\n");
131 
132  return result;
133 }
134 
136 {
137  float64_t result=0;
138 
140  {
141  /* sample a bunch of MMD values from null distribution */
143 
144  /* return value of (1-alpha) quantile */
145  result=values[index_t(CMath::floor(values.vlen*(1-alpha)))];
146  }
147  else
148  SG_ERROR("Unknown method to approximate null distribution!\n");
149 
150  return result;
151 }
152 
154 {
155  /* ref before unref to avoid problems when instances are equal */
156  SG_REF(p_and_q);
158  m_p_and_q=p_and_q;
159 }
160 
162 {
163  REQUIRE(m_p_and_q, "Samples are not specified!\n");
164  REQUIRE(m_p_and_q->get_num_vectors()>m, "Provided sample size for p"
165  "(%d) is greater than total number of samples (%d)!\n",
166  m, m_p_and_q->get_num_vectors());
167  m_m=m;
168 }
169 
171 {
172  SG_REF(m_p_and_q);
173  return m_p_and_q;
174 }
175 

SHOGUN Machine Learning Toolbox - Documentation