SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
NOCCO.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Soumyajit De
4  * Written (w) 2012-2013 Heiko Strathmann
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
33 
34 #ifdef HAVE_EIGEN3
36 #include <shogun/kernel/Kernel.h>
40 
41 using namespace shogun;
42 using namespace Eigen;
43 
45 {
46  init();
47 }
48 
49 CNOCCO::CNOCCO(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p, CFeatures* q)
50  : CKernelIndependenceTest(kernel_p, kernel_q, p, q)
51 {
52  init();
53 
54  // only equal number of samples are allowed
55  if (p && q)
56  {
58  "Only equal number of samples from both the distributions are "
59  "possible. Provided %d samples from p and %d samples from q!\n",
60  p->get_num_vectors(), q->get_num_vectors());
61 
62  m_num_features=p->get_num_vectors();
63  }
64 }
65 
67 {
68 }
69 
70 void CNOCCO::init()
71 {
72  SG_ADD(&m_num_features, "num_features",
73  "Number of features from each of the distributions",
75  SG_ADD(&m_epsilon, "epsilon", "The regularization constant",
77 
78  m_num_features=0;
79  m_epsilon=0.0;
80 
81  // we need PERMUTATION as the null approximation method here
83 }
84 
86 {
88  REQUIRE(m_p, "Provided feature for p cannot be null!\n");
89  m_num_features=m_p->get_num_vectors();
90 }
91 
93 {
95  REQUIRE(m_q, "Provided feature for q cannot be null!\n");
96  m_num_features=m_q->get_num_vectors();
97 }
98 
100 {
101  m_epsilon=epsilon;
102 }
103 
105 {
106  return m_epsilon;
107 }
108 
110 {
111  SG_DEBUG("Entering!\n");
112 
113  const index_t n=m_num_features;
114  Map<MatrixXd> mat(m.matrix, n, n);
115 
116  // the result matrix res = m * inv(m + n*epsilon*eye(n,n))
117  SGMatrix<float64_t> res(n, n);
118  MatrixXd to_inv=mat+n*m_epsilon*MatrixXd::Identity(n,n);
119 
120  // since the matrix is SPD, instead of directly computing the inverse,
121  // we compute the Cholesky decomposition and solve systems (see class
122  // documentation for details)
123  LLT<MatrixXd> chol(to_inv);
124 
125  // compute the matrix times inverse by solving systems
126  VectorXd e=VectorXd::Zero(n);
127  for (index_t i=0; i<n; ++i)
128  {
129  e(i)=1;
130 
131  // the solution vector corresponds to the i-th column of the inverse
132  const VectorXd& x=chol.solve(e);
133 #pragma omp parallel for shared (res, mat, x, i)
134  for (index_t j=0; j<n; ++j)
135  {
136  // since mat is symmetric we can use mat.col instead of mat.row here
137  // for faster execution since matrices are column-major
138  res(j,i)=x.dot(mat.col(j));
139  }
140  e(i)=0;
141  }
142 
143  SG_DEBUG("Leaving!\n");
144 
145  return res;
146 }
147 
149 {
150  SG_DEBUG("Entering!\n");
151 
152  REQUIRE(m_kernel_p, "Kernel for p is not set! Use set_kernel_p() method to "
153  "set the kernel for use!\n");
154  REQUIRE(m_kernel_q, "Kernel for q is not set! Use set_kernel_q() method to "
155  "set the kernel for use!\n");
156 
157  REQUIRE(m_p && m_q, "features needed!\n")
158 
159  // compute kernel matrices
162 
163  // center the kernel matrices
164  Gx.center();
165  Gy.center();
166 
169 
170  Map<MatrixXd> Rx_map(Rx.matrix, Rx.num_rows, Rx.num_cols);
171  Map<MatrixXd> Ry_map(Ry.matrix, Ry.num_rows, Ry.num_cols);
172 
173  // compute the trace of the matrix multiplication without computing the
174  // off-diagonal entries of the final matrix and just the diagonal entries
175  float64_t result=0.0;
176  for (index_t i=0; i<m_num_features; ++i)
177  {
178  // taking advantange of the symmetry, we can use Ry_map.col here
179  // instead of Ry_map.row for computing the trace for computational
180  // advantage since matrices are stored in column-major format
181  result+=Rx_map.col(i).dot(Ry_map.col(i));
182  }
183 
184  SG_DEBUG("leaving!\n");
185 
186  return result;
187 }
188 
190 {
191  float64_t result=0;
193  {
194  case PERMUTATION:
195  {
196  /* sampling null is handled there */
197  result=CIndependenceTest::compute_p_value(statistic);
198  break;
199  }
200  default:
201  SG_ERROR("Use only PERMUTATION for null-approximation method "
202  "for NOCCO!\n");
203  }
204 
205  return result;
206 }
207 
209 {
210  float64_t result=0;
212  {
213  case PERMUTATION:
214  {
215  /* sampling null is handled there */
217  break;
218  }
219  default:
220  SG_ERROR("Use only PERMUTATION for null-approximation method "
221  "for NOCCO!\n");
222  }
223 
224  return result;
225 }
226 
228 {
229  SG_DEBUG("Entering!\n")
230 
231  /* replace current kernel via precomputed custom kernel and call superclass
232  * method */
233 
234  /* backup references to old kernels */
235  CKernel* kernel_p=m_kernel_p;
236  CKernel* kernel_q=m_kernel_q;
237 
238  /* init kernels before to be sure that everything is fine
239  * kernel function between two samples from different distributions
240  * is never computed - in fact, they may as well have different features */
241  m_kernel_p->init(m_p, m_p);
242  m_kernel_q->init(m_q, m_q);
243 
244  /* precompute kernel matrices */
245  CCustomKernel* precomputed_p=new CCustomKernel(m_kernel_p);
246  CCustomKernel* precomputed_q=new CCustomKernel(m_kernel_q);
247  SG_REF(precomputed_p);
248  SG_REF(precomputed_q);
249 
250  /* temporarily replace own kernels */
251  m_kernel_p=precomputed_p;
252  m_kernel_q=precomputed_q;
253 
254  /* use superclass sample_null which shuffles the entries for one
255  * distribution using index permutation on rows and columns of
256  * kernel matrix from one distribution, while accessing the other
257  * in its original order and then compute statistic */
259 
260  /* restore kernels */
261  m_kernel_p=kernel_p;
262  m_kernel_q=kernel_q;
263 
264  SG_UNREF(precomputed_p);
265  SG_UNREF(precomputed_q);
266 
267  SG_DEBUG("Leaving!\n")
268  return null_samples;
269 }
270 #endif // HAVE_EIGEN3
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
virtual float64_t compute_p_value(float64_t statistic)
Definition: NOCCO.cpp:189
virtual float64_t compute_statistic()
Definition: NOCCO.cpp:148
int32_t index_t
Definition: common.h:62
virtual float64_t compute_p_value(float64_t statistic)
The Custom Kernel allows for custom user provided kernel matrices.
Definition: CustomKernel.h:36
SGMatrix< float64_t > get_kernel_matrix_L()
SGMatrix< float64_t > compute_helper(SGMatrix< float64_t > m)
Definition: NOCCO.cpp:109
virtual SGVector< float64_t > sample_null()
Definition: SGMatrix.h:20
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:378
float64_t get_epsilon() const
Definition: NOCCO.cpp:104
virtual void set_p(CFeatures *p)
Definition: NOCCO.cpp:85
virtual void set_p(CFeatures *p)
#define SG_REF(x)
Definition: SGObject.h:51
virtual float64_t compute_threshold(float64_t alpha)
index_t num_rows
Definition: SGMatrix.h:376
static const float64_t epsilon
Definition: libbmrm.cpp:25
SGMatrix< float64_t > get_kernel_matrix_K()
void set_epsilon(float64_t epsilon)
Definition: NOCCO.cpp:99
virtual ~CNOCCO()
Definition: NOCCO.cpp:66
double float64_t
Definition: common.h:50
virtual float64_t compute_threshold(float64_t alpha)
Definition: NOCCO.cpp:208
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
ENullApproximationMethod m_null_approximation_method
Kernel independence test base class. Provides an interface for performing an independence test...
The Kernel base class.
Definition: Kernel.h:158
#define SG_ADD(...)
Definition: SGObject.h:81
virtual SGVector< float64_t > sample_null()
Definition: NOCCO.cpp:227
virtual void set_q(CFeatures *q)
virtual void set_q(CFeatures *q)
Definition: NOCCO.cpp:92

SHOGUN 机器学习工具包 - 项目文档