SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SoftMaxLikelihood.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * Written (w) 2014 Parijat Mazumdar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  *
31  * Code adapted from
32  * https://gist.github.com/yorkerlin/8a36e8f9b298aa0246a4
33  * and
34  * GPstuff - Gaussian process models for Bayesian analysis
35  * http://becs.aalto.fi/en/research/bayes/gpstuff/
36  *
37  * The reference pseudo code is the algorithm 3.4 of the GPML textbook
38  */
39 
41 
42 
46 
47 using namespace shogun;
48 using namespace Eigen;
49 
51 {
52  init();
53 }
54 
56 {
57 }
58 
59 void CSoftMaxLikelihood::init()
60 {
61  m_num_samples=10000;
62  SG_ADD(&m_num_samples, "num_samples",
63  "Number of samples to be generated",
65 }
66 
68  SGVector<float64_t> func) const
69 {
70  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
72  "Labels must be type of CMulticlassLabels\n")
73 
74  SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
75  for (int32_t i=0;i<labels.vlen;i++)
76  REQUIRE(((labels[i]>-1)&&(labels[i]<func.vlen/labels.vlen)),
77  "Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
78  "%d\n",func.vlen/labels.vlen,i,labels[i]);
79 
80  // labels.vlen=num_rows func.vlen/num_rows=num_cols
81  Map<MatrixXd> eigen_f(func.vector,labels.vlen,func.vlen/labels.vlen);
82 
83  // log_sum_exp trick
84  VectorXd max_coeff=eigen_f.rowwise().maxCoeff();
85  eigen_f=eigen_f.array().colwise()-max_coeff.array();
86  VectorXd log_sum_exp=((eigen_f.array().exp()).rowwise().sum()).array().log();
87  log_sum_exp=log_sum_exp+max_coeff;
88 
89  // restore original matrix
90  eigen_f=eigen_f.array().colwise()+max_coeff.array();
91 
93  Map<VectorXd> eigen_ret(ret.vector,ret.vlen);
94 
95  for (int32_t i=0;i<labels.vlen;i++)
96  eigen_ret(i)=eigen_f(i,labels[i]);
97 
98  eigen_ret=eigen_ret-log_sum_exp;
99 
100  return ret;
101 }
102 
104  const CLabels* lab, SGVector<float64_t> func, index_t i) const
105 {
106  int32_t num_rows=lab->get_num_labels();
107  int32_t num_cols=func.vlen/num_rows;
108  SGMatrix<float64_t> f=SGMatrix<float64_t>(func.vector,num_rows,num_cols,false);
109 
110  if (i==1)
111  return get_log_probability_derivative1_f(lab,f);
112  else if (i==2)
113  return get_log_probability_derivative2_f(f);
114  else
115  return get_log_probability_derivative3_f(f);
116 }
117 
118 SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative1_f(
119  const CLabels* lab, SGMatrix<float64_t> func) const
120 {
121  REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
122  REQUIRE(lab->get_label_type()==LT_MULTICLASS,
123  "Labels must be type of CMulticlassLabels\n")
124  REQUIRE(lab->get_num_labels()==func.num_rows, "Number of labels must match "
125  "number of vectors in function\n")
126 
127  SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
128  for (int32_t i=0;i<labels.vlen;i++)
129  REQUIRE(((labels[i]>-1)&&(labels[i]<func.num_cols)),
130  "Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
131  "%d\n",func.num_cols,i,labels[i]);
132 
133  SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols);
134  memcpy(ret.vector,func.matrix,func.num_rows*func.num_cols*sizeof(float64_t));
135 
136  //pi
137  Map<MatrixXd> eigen_ret(ret.vector,func.num_rows,func.num_cols);
138 
139  // with log_sum_exp trick
140  VectorXd max_coeff=eigen_ret.rowwise().maxCoeff();
141  eigen_ret=eigen_ret.array().colwise()-max_coeff.array();
142  VectorXd log_sum_exp=((eigen_ret.array().exp()).rowwise().sum()).array().log();
143  eigen_ret=(eigen_ret.array().colwise()-log_sum_exp.array()).exp();
144 
145  // without log_sum_exp trick
146  //eigen_ret=eigen_ret.array().exp();
147  //VectorXd tmp=eigen_ret.rowwise().sum();
148  //eigen_ret=eigen_ret.array().colwise()/tmp.array();
149 
150  MatrixXd y=MatrixXd::Zero(func.num_rows,func.num_cols);
151 
152  for (int32_t i=0;i<labels.vlen;i++)
153  y(i,labels[i])=1.;
154 
155  eigen_ret=y-eigen_ret;
156 
157  return ret;
158 }
159 
160 SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative2_f(SGMatrix<float64_t> func) const
161 {
162  SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols*func.num_cols);
163  Map<MatrixXd> eigen_ret(ret.vector,func.num_rows*func.num_cols,func.num_cols);
164 
165  Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);
166 
167  MatrixXd f1= eigen_f.array().exp();
168  VectorXd tmp=f1.rowwise().sum();
169  f1=f1.array().colwise()/tmp.array();
170 
171  for (int32_t i=0;i<eigen_f.rows();i++)
172  {
173  eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())=
174  f1.transpose().col(i)*f1.row(i);
175  VectorXd D=eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())
176  .diagonal().array().sqrt();
177  eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())-=
178  MatrixXd(D.asDiagonal());
179  }
180 
181  return ret;
182 }
183 
184 SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative3_f(SGMatrix<float64_t>
185  func) const
186 {
188 
189  Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);
190 
191  MatrixXd f1= eigen_f.array().exp();
192  VectorXd tmp=f1.rowwise().sum();
193  f1=f1.array().colwise()/tmp.array();
194 
195  for (int32_t i=0;i<func.num_rows;i++)
196  {
197  for (int32_t c1=0;c1<func.num_cols;c1++)
198  {
199  for (int32_t c2=0;c2<func.num_cols;c2++)
200  {
201  for (int32_t c3=0;c3<func.num_cols;c3++)
202  {
203  float64_t sum_temp=0;
204  if ((c1==c2) && (c2==c3))
205  sum_temp+=f1(i,c1);
206  if (c1==c2)
207  sum_temp=sum_temp-f1(i,c1)*f1(i,c3);
208  if (c1==c3)
209  sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
210  if (c2==c3)
211  sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
212  sum_temp+=2.0*f1(i,c1)*f1(i,c2)*f1(i,c3);
213 
214  ret[i*CMath::pow(func.num_cols,3)+
215  c1*CMath::pow(func.num_cols,2)+c2*func.num_cols+c3]=sum_temp;
216  }
217  }
218  }
219  }
220 
221  return ret;
222 }
223 
225 {
226  REQUIRE(num_samples>0, "Numer of samples (%d) should be positive\n",
227  num_samples);
228  m_num_samples=num_samples;
229 }
230 
231 SGVector<float64_t> CSoftMaxLikelihood::predictive_helper(SGVector<float64_t> mu,
232  SGVector<float64_t> s2, const CLabels *lab, EMCSamplerType option) const
233 {
234  const index_t C=s2.vlen/mu.vlen;
235  const index_t n=mu.vlen/C;
236 
237  REQUIRE(n*C==mu.vlen, "Number of labels (%d) times number of classes (%d) must match "
238  "number of elements(%d) in mu\n", n, C, mu.vlen);
239 
240  REQUIRE(n*C*C==s2.vlen, "Number of labels (%d) times second power of number of classes (%d*%d) must match "
241  "number of elements(%d) in s2\n",n, C, C, s2.vlen);
242 
244 
245  if (lab)
246  {
248  "Labels must be type of CMulticlassLabels\n");
249 
250  const index_t n1=lab->get_num_labels();
251  REQUIRE(n==n1, "Number of samples (%d) learned from mu and s2 must match "
252  "number of labels(%d) in lab\n",n,n1);
253 
254  y=((CMulticlassLabels*) lab)->get_int_labels();
255  for (index_t i=0;i<y.vlen;i++)
256  REQUIRE(y[i]<C,"Labels must be between 0 and C(ie %d here). Currently lab[%d] is"
257  "%d\n",C,i,y[i]);
258  }
259  else
260  {
261  y=SGVector<index_t>(n);
262  y.set_const(C);
263  }
264 
265  SGVector<float64_t> ret(mu.vlen);
266 
267  for(index_t idx=0; idx<n; idx++)
268  {
269  SGMatrix<float64_t> Sigma(s2.vector+idx*C*C, C, C, false);
270  SGVector<float64_t> mean(mu.vector+idx*C, C, false);
271  SGVector<float64_t> label(C);
272  if (y[idx]<C)
273  {
274  label.set_const(0);
275  label[y[idx]]=1.0;
276  }
277  else
278  {
279  label.set_const(1.0);
280  }
281 
282  Map<VectorXd> eigen_ret_sub(ret.vector+idx*C, C);
283  SGVector<float64_t> tmp=mc_sampler(m_num_samples,mean,Sigma,label);
284  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
285  eigen_ret_sub=eigen_tmp;
286 
287  if (option==1)
288  {
289  Map<VectorXd> eigen_label(label.vector, label.vlen);
290  eigen_ret_sub=eigen_ret_sub.array()*eigen_label.array()+(1-eigen_ret_sub.array())*(1-eigen_label.array());
291  }
292  }
293 
294  if (option==2)
295  {
296  Map<VectorXd> eigen_ret(ret.vector, ret.vlen);
297  eigen_ret=eigen_ret.array()*(1-eigen_ret.array());
298  }
299 
300  return ret;
301 }
302 
305 {
306  return predictive_helper(mu, s2, lab, MC_Probability);
307 }
308 
309 SGVector<float64_t> CSoftMaxLikelihood::mc_sampler(index_t num_samples, SGVector<float64_t> mean,
311 {
312  CGaussianDistribution *gen=new CGaussianDistribution (mean, Sigma);
313 
314  //category by samples
315  SGMatrix<float64_t> samples=gen->sample(num_samples);
316  Map<MatrixXd> eigen_samples(samples.matrix, samples.num_rows, samples.num_cols);
317 
318  MatrixXd my_samples=eigen_samples.array().exp();
319  VectorXd sum_samples=my_samples.array().colwise().sum().transpose();
320  MatrixXd normal_samples=(my_samples.array().rowwise()/sum_samples.array().transpose());
321  VectorXd mean_samples=normal_samples.rowwise().mean();
322 
323  SGVector<float64_t>est(mean.vlen);
324  Map<VectorXd> eigen_est(est.vector, est.vlen);
325 
326  //0 and 1 encoding
327  Map<VectorXd> eigen_y(y.vector, y.vlen);
328  eigen_est=(mean_samples.array()*eigen_y.array())+(1-mean_samples.array())*(1-eigen_y.array());
329 
330  SG_UNREF(gen);
331 
332  return est;
333 }
334 
336  SGVector<float64_t> mu, SGVector<float64_t> s2, const CLabels* lab) const
337 {
338 
339  return predictive_helper(mu, s2, lab, MC_Mean);
340 }
341 
343  SGVector<float64_t> mu, SGVector<float64_t> s2, const CLabels* lab) const
344 {
345  return predictive_helper(mu, s2, lab, MC_Variance);
346 }
347 
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const
virtual void set_num_samples(index_t num_samples)
Class that models Soft-Max likelihood.
virtual ELabelType get_label_type() const =0
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
multi-class labels 0,1,...
Definition: LabelTypes.h:20
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const
Definition: SGMatrix.h:20
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:376
index_t num_rows
Definition: SGMatrix.h:374
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:494
double float64_t
Definition: common.h:50
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
Definition: KLInference.h:52
virtual SGVector< float64_t > get_predictive_log_probabilities(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab=NULL)
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Dense version of the well-known Gaussian probability distribution, defined as .
#define SG_ADD(...)
Definition: SGObject.h:84
The Likelihood model base class.
static int32_t pow(bool x, int32_t n)
Definition: Math.h:535
void set_const(T const_elem)
Definition: SGVector.cpp:150
virtual SGVector< float64_t > get_predictive_means(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab=NULL) const
virtual SGVector< float64_t > get_predictive_variances(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab=NULL) const
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const

SHOGUN Machine Learning Toolbox - Documentation