SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
MMDKernelSelectionComb.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012-2013 Heiko Strathmann
8  */
9 
13 
14 using namespace shogun;
15 
18 {
19  init();
20 }
21 
24 {
25  init();
26 }
27 
29 {
30 }
31 
32 void CMMDKernelSelectionComb::init()
33 {
34  SG_ADD(&m_opt_max_iterations, "opt_max_iterations", "Maximum number of "
35  "iterations for qp solver", MS_NOT_AVAILABLE);
36  SG_ADD(&m_opt_epsilon, "opt_epsilon", "Stopping criterion for qp solver",
38  SG_ADD(&m_opt_low_cut, "opt_low_cut", "Low cut value for optimization "
39  "kernel weights", MS_NOT_AVAILABLE);
40 
41  /* sensible values for optimization */
43  m_opt_epsilon=10E-15;
44  m_opt_low_cut=10E-7;
45 }
46 
48 {
49  /* cast is safe due to assertion in constructor */
51 
52  /* optimise for kernel weights and set them */
54  combined->set_subkernel_weights(weights);
55 
56  /* note that kernel is SG_REF'ed from getter above */
57  return combined;
58 }
59 
60 /* no reference counting, use the static context constructor of SGMatrix */
62 
64 {
65  return &m_Q[m_Q.num_rows*i];
66 }
67 
69 void CMMDKernelSelectionComb::print_state(libqp_state_T state)
70 {
71  SG_SDEBUG("libqp state: primal=%f\n", state.QP);
72 }
73 
76 {
77  /* readability */
78  index_t num_kernels=mmds.vlen;
79 
80  /* compute sum of mmds to generate feasible point for convex program */
81  float64_t sum_mmds=0;
82  for (index_t i=0; i<mmds.vlen; ++i)
83  sum_mmds+=mmds[i];
84 
85  /* QP: 0.5*x'*Q*x + f'*x
86  * subject to
87  * mmds'*x = b
88  * LB[i] <= x[i] <= UB[i] for all i=1..n */
89  SGVector<float64_t> Q_diag(num_kernels);
90  SGVector<float64_t> f(num_kernels);
91  SGVector<float64_t> lb(num_kernels);
92  SGVector<float64_t> ub(num_kernels);
93  SGVector<float64_t> weights(num_kernels);
94 
95  /* init everything, there are two cases possible: i) at least one mmd is
96  * is positive, ii) all mmds are negative */
97  bool one_pos=false;
98  for (index_t i=0; i<mmds.vlen; ++i)
99  {
100  if (mmds[i]>0)
101  {
102  SG_DEBUG("found at least one positive MMD\n")
103  one_pos=true;
104  break;
105  }
106  }
107 
108  if (!one_pos)
109  {
110  SG_WARNING("All mmd estimates are negative. This is techically possible,"
111  "although extremely rare. Consider using different kernels. "
112  "This combination will lead to a bad two-sample test. Since any"
113  "combination is bad, will now just return equally distributed "
114  "kernel weights\n");
115 
116  /* if no element is positive, we can choose arbritary weights since
117  * the results will be bad anyway */
118  weights.set_const(1.0/num_kernels);
119  }
120  else
121  {
122  SG_DEBUG("one MMD entry is positive, performing optimisation\n")
123  /* do optimisation, init vectors */
124  for (index_t i=0; i<num_kernels; ++i)
125  {
126  Q_diag[i]=m_Q(i,i);
127  f[i]=0;
128  lb[i]=0;
129  ub[i]=CMath::INFTY;
130 
131  /* initial point has to be feasible, i.e. mmds'*x = b */
132  weights[i]=1.0/sum_mmds;
133  }
134 
135  /* start libqp solver with desired parameters */
136  SG_DEBUG("starting libqp optimization\n")
137  libqp_state_T qp_exitflag=libqp_gsmo_solver(&get_Q_col, Q_diag.vector,
138  f.vector, mmds.vector,
139  one_pos ? 1 : -1,
140  lb.vector, ub.vector,
141  weights.vector, num_kernels, m_opt_max_iterations,
143 
144  SG_DEBUG("libqp returns: nIts=%d, exit_flag: %d\n", qp_exitflag.nIter,
145  qp_exitflag.exitflag);
146 
147  /* set really small entries to zero and sum up for normalization */
148  float64_t sum_weights=0;
149  for (index_t i=0; i<weights.vlen; ++i)
150  {
151  if (weights[i]<m_opt_low_cut)
152  {
153  SG_DEBUG("lowcut: weight[%i]=%f<%f setting to zero\n", i, weights[i],
154  m_opt_low_cut);
155  weights[i]=0;
156  }
157 
158  sum_weights+=weights[i];
159  }
160 
161  /* normalize (allowed since problem is scale invariant) */
162  for (index_t i=0; i<weights.vlen; ++i)
163  weights[i]/=sum_weights;
164  }
165 
166  return weights;
167 }
virtual void set_subkernel_weights(SGVector< float64_t > weights)
static SGMatrix< float64_t > m_Q
int32_t index_t
Definition: common.h:62
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual SGVector< float64_t > solve_optimization(SGVector< float64_t > mmds)
Base class for kernel selection for MMD-based two-sample test statistic implementations. Provides abstract methods for selecting kernels and computing criteria or kernel weights for the implemented method. In order to implement new methods for kernel selection, simply write a new implementation of this class.
Kernel two sample test base class. Provides an interface for performing a two-sample test using a ker...
static void print_state(libqp_state_T state)
index_t num_rows
Definition: SGMatrix.h:376
index_t vlen
Definition: SGVector.h:494
double float64_t
Definition: common.h:50
The Combined kernel is used to combine a number of kernels into a single CombinedKernel object by lin...
virtual SGVector< float64_t > compute_measures()=0
static const float64_t * get_Q_col(uint32_t i)
CKernelTwoSampleTest * m_estimator
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SDEBUG(...)
Definition: SGIO.h:168
The Kernel base class.
Definition: Kernel.h:158
#define SG_WARNING(...)
Definition: SGIO.h:128
#define SG_ADD(...)
Definition: SGObject.h:81
void set_const(T const_elem)
Definition: SGVector.cpp:152

SHOGUN 机器学习工具包 - 项目文档