SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
SoftMaxLikelihood.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * Written (w) 2014 Parijat Mazumdar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  *
31  * Code adapted from
32  * https://gist.github.com/yorkerlin/8a36e8f9b298aa0246a4
33  * and
34  * GPstuff - Gaussian process models for Bayesian analysis
35  * http://becs.aalto.fi/en/research/bayes/gpstuff/
36  *
37  * The reference pseudo code is the algorithm 3.4 of the GPML textbook
38  */
39 
41 
42 #ifdef HAVE_EIGEN3
43 
47 
48 using namespace shogun;
49 using namespace Eigen;
50 
52 {
53  init();
54 }
55 
57 {
58 }
59 
60 void CSoftMaxLikelihood::init()
61 {
62  m_num_samples=10000;
63  SG_ADD(&m_num_samples, "num_samples",
64  "Number of samples to be generated",
66 }
67 
69  SGVector<float64_t> func) const
70 {
71  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
73  "Labels must be type of CMulticlassLabels\n")
74 
75  SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
76  for (int32_t i=0;i<labels.vlen;i++)
77  REQUIRE(((labels[i]>-1)&&(labels[i]<func.vlen/labels.vlen)),
78  "Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
79  "%d\n",func.vlen/labels.vlen,i,labels[i]);
80 
81  // labels.vlen=num_rows func.vlen/num_rows=num_cols
82  Map<MatrixXd> eigen_f(func.vector,labels.vlen,func.vlen/labels.vlen);
83 
84  // log_sum_exp trick
85  VectorXd max_coeff=eigen_f.rowwise().maxCoeff();
86  eigen_f=eigen_f.array().colwise()-max_coeff.array();
87  VectorXd log_sum_exp=((eigen_f.array().exp()).rowwise().sum()).array().log();
88  log_sum_exp=log_sum_exp+max_coeff;
89 
90  // restore original matrix
91  eigen_f=eigen_f.array().colwise()+max_coeff.array();
92 
94  Map<VectorXd> eigen_ret(ret.vector,ret.vlen);
95 
96  for (int32_t i=0;i<labels.vlen;i++)
97  eigen_ret(i)=eigen_f(i,labels[i]);
98 
99  eigen_ret=eigen_ret-log_sum_exp;
100 
101  return ret;
102 }
103 
105  const CLabels* lab, SGVector<float64_t> func, index_t i) const
106 {
107  int32_t num_rows=lab->get_num_labels();
108  int32_t num_cols=func.vlen/num_rows;
109  SGMatrix<float64_t> f=SGMatrix<float64_t>(func.vector,num_rows,num_cols,false);
110 
111  if (i==1)
112  return get_log_probability_derivative1_f(lab,f);
113  else if (i==2)
114  return get_log_probability_derivative2_f(f);
115  else
116  return get_log_probability_derivative3_f(f);
117 }
118 
119 SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative1_f(
120  const CLabels* lab, SGMatrix<float64_t> func) const
121 {
122  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
123  REQUIRE(lab->get_label_type()==LT_MULTICLASS,
124  "Labels must be type of CMulticlassLabels\n")
125  REQUIRE(lab->get_num_labels()==func.num_rows, "Number of labels must match "
126  "number of vectors in function\n")
127 
128  SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
129  for (int32_t i=0;i<labels.vlen;i++)
130  REQUIRE(((labels[i]>-1)&&(labels[i]<func.num_cols)),
131  "Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
132  "%d\n",func.num_cols,i,labels[i]);
133 
134  SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols);
135  memcpy(ret.vector,func.matrix,func.num_rows*func.num_cols*sizeof(float64_t));
136 
137  //pi
138  Map<MatrixXd> eigen_ret(ret.vector,func.num_rows,func.num_cols);
139 
140  // with log_sum_exp trick
141  VectorXd max_coeff=eigen_ret.rowwise().maxCoeff();
142  eigen_ret=eigen_ret.array().colwise()-max_coeff.array();
143  VectorXd log_sum_exp=((eigen_ret.array().exp()).rowwise().sum()).array().log();
144  eigen_ret=(eigen_ret.array().colwise()-log_sum_exp.array()).exp();
145 
146  // without log_sum_exp trick
147  //eigen_ret=eigen_ret.array().exp();
148  //VectorXd tmp=eigen_ret.rowwise().sum();
149  //eigen_ret=eigen_ret.array().colwise()/tmp.array();
150 
151  MatrixXd y=MatrixXd::Zero(func.num_rows,func.num_cols);
152 
153  for (int32_t i=0;i<labels.vlen;i++)
154  y(i,labels[i])=1.;
155 
156  eigen_ret=y-eigen_ret;
157 
158  return ret;
159 }
160 
161 SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative2_f(SGMatrix<float64_t> func) const
162 {
163  SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols*func.num_cols);
164  Map<MatrixXd> eigen_ret(ret.vector,func.num_rows*func.num_cols,func.num_cols);
165 
166  Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);
167 
168  MatrixXd f1= eigen_f.array().exp();
169  VectorXd tmp=f1.rowwise().sum();
170  f1=f1.array().colwise()/tmp.array();
171 
172  for (int32_t i=0;i<eigen_f.rows();i++)
173  {
174  eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())=
175  f1.transpose().col(i)*f1.row(i);
176  VectorXd D=eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())
177  .diagonal().array().sqrt();
178  eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())-=
179  MatrixXd(D.asDiagonal());
180  }
181 
182  return ret;
183 }
184 
185 SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative3_f(SGMatrix<float64_t>
186  func) const
187 {
189 
190  Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);
191 
192  MatrixXd f1= eigen_f.array().exp();
193  VectorXd tmp=f1.rowwise().sum();
194  f1=f1.array().colwise()/tmp.array();
195 
196  for (int32_t i=0;i<func.num_rows;i++)
197  {
198  for (int32_t c1=0;c1<func.num_cols;c1++)
199  {
200  for (int32_t c2=0;c2<func.num_cols;c2++)
201  {
202  for (int32_t c3=0;c3<func.num_cols;c3++)
203  {
204  float64_t sum_temp=0;
205  if ((c1==c2) && (c2==c3))
206  sum_temp+=f1(i,c1);
207  if (c1==c2)
208  sum_temp=sum_temp-f1(i,c1)*f1(i,c3);
209  if (c1==c3)
210  sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
211  if (c2==c3)
212  sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
213  sum_temp+=2.0*f1(i,c1)*f1(i,c2)*f1(i,c3);
214 
215  ret[i*CMath::pow(func.num_cols,3)+
216  c1*CMath::pow(func.num_cols,2)+c2*func.num_cols+c3]=sum_temp;
217  }
218  }
219  }
220  }
221 
222  return ret;
223 }
224 
226 {
227  REQUIRE(num_samples>0, "Numer of samples (%d) should be positive\n",
228  num_samples);
229  m_num_samples=num_samples;
230 }
231 
232 SGVector<float64_t> CSoftMaxLikelihood::predictive_helper(SGVector<float64_t> mu,
233  SGVector<float64_t> s2, const CLabels *lab, EMCSamplerType option) const
234 {
235  const index_t C=s2.vlen/mu.vlen;
236  const index_t n=mu.vlen/C;
237 
238  REQUIRE(n*C==mu.vlen, "Number of labels (%d) times number of classes (%d) must match "
239  "number of elements(%d) in mu\n", n, C, mu.vlen);
240 
241  REQUIRE(n*C*C==s2.vlen, "Number of labels (%d) times second power of number of classes (%d*%d) must match "
242  "number of elements(%d) in s2\n",n, C, C, s2.vlen);
243 
245 
246  if (lab)
247  {
249  "Labels must be type of CMulticlassLabels\n");
250 
251  const index_t n1=lab->get_num_labels();
252  REQUIRE(n==n1, "Number of samples (%d) learned from mu and s2 must match "
253  "number of labels(%d) in lab\n",n,n1);
254 
255  y=((CMulticlassLabels*) lab)->get_int_labels();
256  for (index_t i=0;i<y.vlen;i++)
257  REQUIRE(y[i]<C,"Labels must be between 0 and C(ie %d here). Currently lab[%d] is"
258  "%d\n",C,i,y[i]);
259  }
260  else
261  {
262  y=SGVector<index_t>(n);
263  y.set_const(C);
264  }
265 
266  SGVector<float64_t> ret(mu.vlen);
267 
268  for(index_t idx=0; idx<n; idx++)
269  {
270  SGMatrix<float64_t> Sigma(s2.vector+idx*C*C, C, C, false);
271  SGVector<float64_t> mean(mu.vector+idx*C, C, false);
272  SGVector<float64_t> label(C);
273  if (y[idx]<C)
274  {
275  label.set_const(0);
276  label[y[idx]]=1.0;
277  }
278  else
279  {
280  label.set_const(1.0);
281  }
282 
283  Map<VectorXd> eigen_ret_sub(ret.vector+idx*C, C);
284  SGVector<float64_t> tmp=mc_sampler(m_num_samples,mean,Sigma,label);
285  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
286  eigen_ret_sub=eigen_tmp;
287 
288  if (option==1)
289  {
290  Map<VectorXd> eigen_label(label.vector, label.vlen);
291  eigen_ret_sub=eigen_ret_sub.array()*eigen_label.array()+(1-eigen_ret_sub.array())*(1-eigen_label.array());
292  }
293  }
294 
295  if (option==2)
296  {
297  Map<VectorXd> eigen_ret(ret.vector, ret.vlen);
298  eigen_ret=eigen_ret.array()*(1-eigen_ret.array());
299  }
300 
301  return ret;
302 }
303 
306 {
307  return predictive_helper(mu, s2, lab, MC_Probability);
308 }
309 
310 SGVector<float64_t> CSoftMaxLikelihood::mc_sampler(index_t num_samples, SGVector<float64_t> mean,
312 {
313  CGaussianDistribution *gen=new CGaussianDistribution (mean, Sigma);
314 
315  //category by samples
316  SGMatrix<float64_t> samples=gen->sample(num_samples);
317  Map<MatrixXd> eigen_samples(samples.matrix, samples.num_rows, samples.num_cols);
318 
319  MatrixXd my_samples=eigen_samples.array().exp();
320  VectorXd sum_samples=my_samples.array().colwise().sum().transpose();
321  MatrixXd normal_samples=(my_samples.array().rowwise()/sum_samples.array().transpose());
322  VectorXd mean_samples=normal_samples.rowwise().mean();
323 
324  SGVector<float64_t>est(mean.vlen);
325  Map<VectorXd> eigen_est(est.vector, est.vlen);
326 
327  //0 and 1 encoding
328  Map<VectorXd> eigen_y(y.vector, y.vlen);
329  eigen_est=(mean_samples.array()*eigen_y.array())+(1-mean_samples.array())*(1-eigen_y.array());
330 
331  SG_UNREF(gen);
332 
333  return est;
334 }
335 
337  SGVector<float64_t> mu, SGVector<float64_t> s2, const CLabels* lab) const
338 {
339 
340  return predictive_helper(mu, s2, lab, MC_Mean);
341 }
342 
344  SGVector<float64_t> mu, SGVector<float64_t> s2, const CLabels* lab) const
345 {
346  return predictive_helper(mu, s2, lab, MC_Variance);
347 }
348 
349 #endif /* HAVE_EIGEN3 */
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const
virtual void set_num_samples(index_t num_samples)
Class that models Soft-Max likelihood.
virtual ELabelType get_label_type() const =0
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
multi-class labels 0,1,...
Definition: LabelTypes.h:20
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const
Definition: SGMatrix.h:20
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:378
index_t num_rows
Definition: SGMatrix.h:376
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:494
double float64_t
Definition: common.h:50
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
virtual SGVector< float64_t > get_predictive_log_probabilities(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab=NULL)
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Dense version of the well-known Gaussian probability distribution, defined as .
#define SG_ADD(...)
Definition: SGObject.h:81
The Likelihood model base class.
static int32_t pow(bool x, int32_t n)
Definition: Math.h:535
void set_const(T const_elem)
Definition: SGVector.cpp:152
virtual SGVector< float64_t > get_predictive_means(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab=NULL) const
virtual SGVector< float64_t > get_predictive_variances(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab=NULL) const
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const

SHOGUN 机器学习工具包 - 项目文档