SHOGUN  6.1.3
MultiKernelQuadraticTimeMMD.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2012 - 2013 Heiko Strathmann
4  * Written (w) 2014 - 2017 Soumyajit De
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
32 #include <shogun/io/SGIO.h>
33 #include <shogun/lib/SGVector.h>
43 
44 using namespace shogun;
45 using namespace internal;
46 using namespace mmd;
47 using std::unique_ptr;
48 
50 {
51  Self(CQuadraticTimeMMD* owner);
52  void update_pairwise_distance(CDistance *distance);
53 
55  unique_ptr<CCustomDistance> m_pairwise_distance;
57  KernelManager m_kernel_mgr;
58  ComputeMMD statistic_job;
59  VarianceH1 variance_h1_job;
60  PermutationMMD permutation_job;
61 };
62 
64  m_pairwise_distance(nullptr), m_dtype(D_UNKNOWN)
65 {
66 }
67 
69 {
70  ASSERT(distance);
71  if (m_dtype==distance->get_distance_type())
72  {
73  ASSERT(m_pairwise_distance!=nullptr);
74  SG_SINFO("Precomputed distance exists for %s!\n", distance->get_name());
75  }
76  else
77  {
78  auto precomputed_distance=m_owner->compute_joint_distance(distance);
79  m_pairwise_distance=unique_ptr<CCustomDistance>(precomputed_distance);
80  m_dtype=distance->get_distance_type();
81  }
82 }
83 
84 CMultiKernelQuadraticTimeMMD::CMultiKernelQuadraticTimeMMD() : CSGObject()
85 {
86  self=unique_ptr<Self>(new Self(nullptr));
87 }
88 
90 {
91  self=unique_ptr<Self>(new Self(owner));
92 }
93 
95 {
96  cleanup();
97 }
98 
100 {
101  ASSERT(self->m_owner);
102  REQUIRE(kernel, "Kernel instance cannot be NULL!\n");
103  self->m_kernel_mgr.push_back(kernel);
104 }
105 
107 {
108  self->m_kernel_mgr.clear();
109  invalidate_precomputed_distance();
110 }
111 
112 void CMultiKernelQuadraticTimeMMD::invalidate_precomputed_distance()
113 {
114  self->m_pairwise_distance=nullptr;
115  self->m_dtype=D_UNKNOWN;
116 }
117 
119 {
120  ASSERT(self->m_owner);
121  return statistic(self->m_kernel_mgr);
122 }
123 
125 {
126  ASSERT(self->m_owner);
128  return SGVector<float64_t>();
129 }
130 
132 {
133  ASSERT(self->m_owner);
134  return variance_h1(self->m_kernel_mgr);
135 }
136 
138 {
139  ASSERT(self->m_owner);
140  return test_power(self->m_kernel_mgr);
141 }
142 
144 {
145  ASSERT(self->m_owner);
146  return sample_null(self->m_kernel_mgr);
147 }
148 
150 {
151  ASSERT(self->m_owner);
152  return p_values(self->m_kernel_mgr);
153 }
154 
156 {
158  SGVector<bool> rejections(pvalues.size());
159  for (auto i=0; i<pvalues.size(); ++i)
160  {
161  rejections[i]=pvalues[i]<alpha;
162  }
163  return rejections;
164 }
165 
166 SGVector<float64_t> CMultiKernelQuadraticTimeMMD::statistic(const KernelManager& kernel_mgr)
167 {
168  SG_DEBUG("Entering");
169  REQUIRE(kernel_mgr.num_kernels()>0, "Number of kernels (%d) have to be greater than 0!\n", kernel_mgr.num_kernels());
170 
171  const auto nx=self->m_owner->get_num_samples_p();
172  const auto ny=self->m_owner->get_num_samples_q();
173  const auto stype = self->m_owner->get_statistic_type();
174 
175  CDistance* distance=kernel_mgr.get_distance_instance();
176  self->update_pairwise_distance(distance);
177  kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
178  SG_UNREF(distance);
179 
180  self->statistic_job.m_n_x=nx;
181  self->statistic_job.m_n_y=ny;
182  self->statistic_job.m_stype=stype;
183  SGVector<float64_t> result=self->statistic_job(kernel_mgr);
184 
185  kernel_mgr.unset_precomputed_distance();
186 
187  for (auto i=0; i<result.vlen; ++i)
188  result[i]=self->m_owner->normalize_statistic(result[i]);
189 
190  SG_DEBUG("Leaving");
191  return result;
192 }
193 
194 SGVector<float64_t> CMultiKernelQuadraticTimeMMD::variance_h1(const KernelManager& kernel_mgr)
195 {
196  SG_DEBUG("Entering");
197  REQUIRE(kernel_mgr.num_kernels()>0, "Number of kernels (%d) have to be greater than 0!\n", kernel_mgr.num_kernels());
198 
199  const auto nx=self->m_owner->get_num_samples_p();
200  const auto ny=self->m_owner->get_num_samples_q();
201 
202  CDistance* distance=kernel_mgr.get_distance_instance();
203  self->update_pairwise_distance(distance);
204  kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
205  SG_UNREF(distance);
206 
207  self->variance_h1_job.m_n_x=nx;
208  self->variance_h1_job.m_n_y=ny;
209  SGVector<float64_t> result=self->variance_h1_job(kernel_mgr);
210 
211  kernel_mgr.unset_precomputed_distance();
212 
213  SG_DEBUG("Leaving");
214  return result;
215 }
216 
217 SGVector<float64_t> CMultiKernelQuadraticTimeMMD::test_power(const KernelManager& kernel_mgr)
218 {
219  SG_DEBUG("Entering");
220  REQUIRE(kernel_mgr.num_kernels()>0, "Number of kernels (%d) have to be greater than 0!\n", kernel_mgr.num_kernels());
221  REQUIRE(self->m_owner->get_statistic_type()==ST_UNBIASED_FULL, "Only possible with UNBIASED_FULL!\n");
222 
223  const auto nx=self->m_owner->get_num_samples_p();
224  const auto ny=self->m_owner->get_num_samples_q();
225 
226  CDistance* distance=kernel_mgr.get_distance_instance();
227  self->update_pairwise_distance(distance);
228  kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
229  SG_UNREF(distance);
230 
231  self->variance_h1_job.m_n_x=nx;
232  self->variance_h1_job.m_n_y=ny;
233  SGVector<float64_t> result=self->variance_h1_job.test_power(kernel_mgr);
234 
235  kernel_mgr.unset_precomputed_distance();
236 
237  SG_DEBUG("Leaving");
238  return result;
239 }
240 
241 SGMatrix<float32_t> CMultiKernelQuadraticTimeMMD::sample_null(const KernelManager& kernel_mgr)
242 {
243  SG_DEBUG("Entering");
244  REQUIRE(self->m_owner->get_null_approximation_method()==NAM_PERMUTATION,
245  "Multi-kernel tests requires the H0 approximation method to be PERMUTATION!\n");
246 
247  REQUIRE(kernel_mgr.num_kernels()>0, "Number of kernels (%d) have to be greater than 0!\n", kernel_mgr.num_kernels());
248 
249  const auto nx=self->m_owner->get_num_samples_p();
250  const auto ny=self->m_owner->get_num_samples_q();
251  const auto stype = self->m_owner->get_statistic_type();
252  const auto num_null_samples = self->m_owner->get_num_null_samples();
253 
254  CDistance* distance=kernel_mgr.get_distance_instance();
255  self->update_pairwise_distance(distance);
256  kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
257  SG_UNREF(distance);
258 
259  self->permutation_job.m_n_x=nx;
260  self->permutation_job.m_n_y=ny;
261  self->permutation_job.m_num_null_samples=num_null_samples;
262  self->permutation_job.m_stype=stype;
263  SGMatrix<float32_t> result=self->permutation_job(kernel_mgr);
264 
265  kernel_mgr.unset_precomputed_distance();
266 
267  for (index_t i=0; i<result.size(); ++i)
268  result.matrix[i]=self->m_owner->normalize_statistic(result.matrix[i]);
269 
270  SG_DEBUG("Leaving");
271  return result;
272 }
273 
274 SGVector<float64_t> CMultiKernelQuadraticTimeMMD::p_values(const KernelManager& kernel_mgr)
275 {
276  SG_DEBUG("Entering");
277  REQUIRE(self->m_owner->get_null_approximation_method()==NAM_PERMUTATION,
278  "Multi-kernel tests requires the H0 approximation method to be PERMUTATION!\n");
279 
280  REQUIRE(kernel_mgr.num_kernels()>0, "Number of kernels (%d) have to be greater than 0!\n", kernel_mgr.num_kernels());
281 
282  const auto nx=self->m_owner->get_num_samples_p();
283  const auto ny=self->m_owner->get_num_samples_q();
284  const auto stype = self->m_owner->get_statistic_type();
285  const auto num_null_samples = self->m_owner->get_num_null_samples();
286 
287  CDistance* distance=kernel_mgr.get_distance_instance();
288  self->update_pairwise_distance(distance);
289  kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
290  SG_UNREF(distance);
291 
292  self->permutation_job.m_n_x=nx;
293  self->permutation_job.m_n_y=ny;
294  self->permutation_job.m_num_null_samples=num_null_samples;
295  self->permutation_job.m_stype=stype;
296  SGVector<float64_t> result=self->permutation_job.p_value(kernel_mgr);
297 
298  kernel_mgr.unset_precomputed_distance();
299 
300  SG_DEBUG("Leaving");
301  return result;
302 }
303 
305 {
306  return "MultiKernelQuadraticTimeMMD";
307 }
virtual const char * get_name() const =0
float distance(CJLCoverTreePoint p1, CJLCoverTreePoint p2, float64_t upper_bound)
Base class for the family of kernel functions that only depend on the difference of the inputs...
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:87
int32_t index_t
Definition: common.h:72
virtual EDistanceType get_distance_type()=0
#define REQUIRE(x,...)
Definition: SGIO.h:181
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:138
This class implements the quadratic time Maximum Mean Statistic as described in [1]. The MMD is the distance of two probability distributions and in a RKHS which we denote by .
#define ASSERT(x)
Definition: SGIO.h:176
EDistanceType
Definition: Distance.h:32
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
void add_kernel(CShiftInvariantKernel *kernel)
CCustomDistance * compute_joint_distance(CDistance *distance)
int32_t size() const
Definition: SGVector.h:156
double float64_t
Definition: common.h:60
SGVector< bool > perform_test(float64_t alpha)
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SINFO(...)
Definition: SGIO.h:158
int64_t size() const
Definition: SGMatrix.h:275
index_t vlen
Definition: SGVector.h:571

SHOGUN Machine Learning Toolbox - Documentation