SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
NOCCO.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Soumyajit De
4  * Written (w) 2012-2013 Heiko Strathmann
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
33 
35 #include <shogun/kernel/Kernel.h>
39 
40 using namespace shogun;
41 using namespace Eigen;
42 
44 {
45  init();
46 }
47 
48 CNOCCO::CNOCCO(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p, CFeatures* q)
49  : CKernelIndependenceTest(kernel_p, kernel_q, p, q)
50 {
51  init();
52 
53  // only equal number of samples are allowed
54  if (p && q)
55  {
57  "Only equal number of samples from both the distributions are "
58  "possible. Provided %d samples from p and %d samples from q!\n",
59  p->get_num_vectors(), q->get_num_vectors());
60 
61  m_num_features=p->get_num_vectors();
62  }
63 }
64 
66 {
67 }
68 
69 void CNOCCO::init()
70 {
71  SG_ADD(&m_num_features, "num_features",
72  "Number of features from each of the distributions",
74  SG_ADD(&m_epsilon, "epsilon", "The regularization constant",
76 
77  m_num_features=0;
78  m_epsilon=0.0;
79 
80  // we need PERMUTATION as the null approximation method here
82 }
83 
85 {
87  REQUIRE(m_p, "Provided feature for p cannot be null!\n");
88  m_num_features=m_p->get_num_vectors();
89 }
90 
92 {
94  REQUIRE(m_q, "Provided feature for q cannot be null!\n");
95  m_num_features=m_q->get_num_vectors();
96 }
97 
99 {
100  m_epsilon=epsilon;
101 }
102 
104 {
105  return m_epsilon;
106 }
107 
109 {
110  SG_DEBUG("Entering!\n");
111 
112  const index_t n=m_num_features;
113  Map<MatrixXd> mat(m.matrix, n, n);
114 
115  // the result matrix res = m * inv(m + n*epsilon*eye(n,n))
116  SGMatrix<float64_t> res(n, n);
117  MatrixXd to_inv=mat+n*m_epsilon*MatrixXd::Identity(n,n);
118 
119  // since the matrix is SPD, instead of directly computing the inverse,
120  // we compute the Cholesky decomposition and solve systems (see class
121  // documentation for details)
122  LLT<MatrixXd> chol(to_inv);
123 
124  // compute the matrix times inverse by solving systems
125  VectorXd e=VectorXd::Zero(n);
126  for (index_t i=0; i<n; ++i)
127  {
128  e(i)=1;
129 
130  // the solution vector corresponds to the i-th column of the inverse
131  const VectorXd& x=chol.solve(e);
132 #pragma omp parallel for shared (res, mat, x, i)
133  for (index_t j=0; j<n; ++j)
134  {
135  // since mat is symmetric we can use mat.col instead of mat.row here
136  // for faster execution since matrices are column-major
137  res(j,i)=x.dot(mat.col(j));
138  }
139  e(i)=0;
140  }
141 
142  SG_DEBUG("Leaving!\n");
143 
144  return res;
145 }
146 
148 {
149  SG_DEBUG("Entering!\n");
150 
151  REQUIRE(m_kernel_p, "Kernel for p is not set! Use set_kernel_p() method to "
152  "set the kernel for use!\n");
153  REQUIRE(m_kernel_q, "Kernel for q is not set! Use set_kernel_q() method to "
154  "set the kernel for use!\n");
155 
156  REQUIRE(m_p && m_q, "features needed!\n")
157 
158  // compute kernel matrices
161 
162  // center the kernel matrices
163  Gx.center();
164  Gy.center();
165 
168 
169  Map<MatrixXd> Rx_map(Rx.matrix, Rx.num_rows, Rx.num_cols);
170  Map<MatrixXd> Ry_map(Ry.matrix, Ry.num_rows, Ry.num_cols);
171 
172  // compute the trace of the matrix multiplication without computing the
173  // off-diagonal entries of the final matrix and just the diagonal entries
174  float64_t result=0.0;
175  for (index_t i=0; i<m_num_features; ++i)
176  {
177  // taking advantange of the symmetry, we can use Ry_map.col here
178  // instead of Ry_map.row for computing the trace for computational
179  // advantage since matrices are stored in column-major format
180  result+=Rx_map.col(i).dot(Ry_map.col(i));
181  }
182 
183  SG_DEBUG("leaving!\n");
184 
185  return result;
186 }
187 
189 {
190  float64_t result=0;
192  {
193  case PERMUTATION:
194  {
195  /* sampling null is handled there */
196  result=CIndependenceTest::compute_p_value(statistic);
197  break;
198  }
199  default:
200  SG_ERROR("Use only PERMUTATION for null-approximation method "
201  "for NOCCO!\n");
202  }
203 
204  return result;
205 }
206 
208 {
209  float64_t result=0;
211  {
212  case PERMUTATION:
213  {
214  /* sampling null is handled there */
216  break;
217  }
218  default:
219  SG_ERROR("Use only PERMUTATION for null-approximation method "
220  "for NOCCO!\n");
221  }
222 
223  return result;
224 }
225 
227 {
228  SG_DEBUG("Entering!\n")
229 
230  /* replace current kernel via precomputed custom kernel and call superclass
231  * method */
232 
233  /* backup references to old kernels */
234  CKernel* kernel_p=m_kernel_p;
235  CKernel* kernel_q=m_kernel_q;
236 
237  /* init kernels before to be sure that everything is fine
238  * kernel function between two samples from different distributions
239  * is never computed - in fact, they may as well have different features */
240  m_kernel_p->init(m_p, m_p);
241  m_kernel_q->init(m_q, m_q);
242 
243  /* precompute kernel matrices */
244  CCustomKernel* precomputed_p=new CCustomKernel(m_kernel_p);
245  CCustomKernel* precomputed_q=new CCustomKernel(m_kernel_q);
246  SG_REF(precomputed_p);
247  SG_REF(precomputed_q);
248 
249  /* temporarily replace own kernels */
250  m_kernel_p=precomputed_p;
251  m_kernel_q=precomputed_q;
252 
253  /* use superclass sample_null which shuffles the entries for one
254  * distribution using index permutation on rows and columns of
255  * kernel matrix from one distribution, while accessing the other
256  * in its original order and then compute statistic */
258 
259  /* restore kernels */
260  m_kernel_p=kernel_p;
261  m_kernel_q=kernel_q;
262 
263  SG_UNREF(precomputed_p);
264  SG_UNREF(precomputed_q);
265 
266  SG_DEBUG("Leaving!\n")
267  return null_samples;
268 }
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
virtual float64_t compute_p_value(float64_t statistic)
Definition: NOCCO.cpp:188
virtual float64_t compute_statistic()
Definition: NOCCO.cpp:147
int32_t index_t
Definition: common.h:62
virtual float64_t compute_p_value(float64_t statistic)
The Custom Kernel allows for custom user provided kernel matrices.
Definition: CustomKernel.h:36
SGMatrix< float64_t > get_kernel_matrix_L()
SGMatrix< float64_t > compute_helper(SGMatrix< float64_t > m)
Definition: NOCCO.cpp:108
virtual SGVector< float64_t > sample_null()
Definition: SGMatrix.h:20
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:376
float64_t get_epsilon() const
Definition: NOCCO.cpp:103
virtual void set_p(CFeatures *p)
Definition: NOCCO.cpp:84
virtual void set_p(CFeatures *p)
#define SG_REF(x)
Definition: SGObject.h:54
virtual float64_t compute_threshold(float64_t alpha)
index_t num_rows
Definition: SGMatrix.h:374
SGMatrix< float64_t > get_kernel_matrix_K()
void set_epsilon(float64_t epsilon)
Definition: NOCCO.cpp:98
virtual ~CNOCCO()
Definition: NOCCO.cpp:65
double float64_t
Definition: common.h:50
virtual float64_t compute_threshold(float64_t alpha)
Definition: NOCCO.cpp:207
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
ENullApproximationMethod m_null_approximation_method
Kernel independence test base class. Provides an interface for performing an independence test...
The Kernel base class.
Definition: Kernel.h:159
#define SG_ADD(...)
Definition: SGObject.h:84
virtual SGVector< float64_t > sample_null()
Definition: NOCCO.cpp:226
virtual void set_q(CFeatures *q)
virtual void set_q(CFeatures *q)
Definition: NOCCO.cpp:91

SHOGUN Machine Learning Toolbox - Documentation